When coming into a new environment and trying to learn a bit about the data my customer has; one of the tools I use is to grab histograms of key data. Now in sqlcl and sql developer you have the info+ command that will display histograms; however, if the optimizer did not generate histograms, the information provided will be of limited usefulness (in the sense of histograms)
A histogram will give you information on how the data is distributed, with text data, this is fairly easy using select column;, count(*) from t1 group by column1;
Dates and numbers, well that’s another story.
First I need to load up a table with some test data. updt_stat simply takes the order date and returns a status. This is to simulate and order entry system.
create sequence t1_seq;
create table t1 (
id number primary key,
order_date date not null,
col1 number not null,
status varchar2(25) not null);
alter table t1 modify id default t1_seq.nextval;
create or replace function updt_stat (pdate in date) return varchar2 as
value varchar2(10);
tmp number;
begin
tmp := dbms_random.value(0,10);
case
when pdate > sysdate - 50 and tmp > 9
then value := 'Canceled';
when pdate <= sysdate - 50
then value := 'Shipped';
when pdate >= sysdate - 50 and pdate < sysdate - 1
then value := 'Picked';
when pdate >= sysdate
then value := 'Ordered';
else
value := 'Back Order';
end case;
return value;
end;
/
Now populate the table with some test data.
declare
--create or replace procedure simulate_order_entry as
i number; -- an index variable
sdate date; -- the date we are going to insert.
cursor c1 is
select dbms_random.value(0,10000) col1
from dual
connect by level <=36500;
begin
-- one year of data
select sysdate-365 into sdate from dual;
i := 1;
-- start the loop, 36,500 times
-- one year of data
for rec in c1
loop
if ( i / 100 = 1 ) then
i := 1;
sdate := sdate + 1;
end if;
insert into t1 (order_date, col1, status)
values
(sdate, rec.col1, updt_stat(sdate));
i := i+1; -- i++
end loop;
commit;
end;
/
SQL> select status, count(*)
2 from t1
3* group by status;
STATUS COUNT(*)
_____________ ___________
Shipped 31284
Picked 4334
Canceled 553
Back Order 92
Ordered 237
SQL> select min(order_date),
2 max(order_date),
3 width_bucket(order_date, sysdate-365, sysdate+20, 12) bucket,
4 count(*)
5 from t1
6 group by width_bucket(order_date, sysdate-365, sysdate+20, 12)
7* order by 1;
MIN(ORDER_DATE) MAX(ORDER_DATE) BUCKET COUNT(*)
__________________ __________________ _________ ___________
16-MAY-22 16-MAY-22 0 99
17-MAY-22 17-JUN-22 1 3168
18-JUN-22 19-JUL-22 2 3168
20-JUL-22 20-AUG-22 3 3168
21-AUG-22 21-SEP-22 4 3168
22-SEP-22 23-OCT-22 5 3168
24-OCT-22 24-NOV-22 6 3168
25-NOV-22 26-DEC-22 7 3168
27-DEC-22 27-JAN-23 8 3168
28-JAN-23 28-FEB-23 9 3168
01-MAR-23 02-APR-23 10 3267
03-APR-23 04-MAY-23 11 3168
05-MAY-23 19-MAY-23 12 1454
SQL> select trunc(min(col1),4),
2 trunc(max(col1),4),
3 width_bucket(col1, 0, 10000, 10) bucket,
4 count(*)
5 from t1
6 group by width_bucket(col1, 0, 10000, 10)
7* order by 1;
TRUNC(MIN(COL1),4) TRUNC(MAX(COL1),4) BUCKET COUNT(*)
_____________________ _____________________ _________ ___________
0.1578 999.9308 1 3710
1000.0824 1999.9868 2 3543
2001.0208 2999.6238 3 3640
3000.4055 3999.8725 4 3692
4000.2374 4999.823 5 3751
5000.1659 5999.6815 6 3767
6000.3364 6999.9993 7 3660
7000.1511 7999.4147 8 3471
8000.284 8999.706 9 3651
9000.1652 9999.5417 10 3615