[20170114]12c varchar2类型直方图.txt
--我曾经提到慎用nvarchar2数据类型,链接:http://blog.itpub.net/267265/viewspace-2120925/
--我那里提到数据类型nvarchar2类型,因为1个字符占用2个字节,这样如果前面16个字符重复很多,直方图的建立就是鸡肋,
--毫无用处(因为分析仅仅对前面32个字节有效),12c 直方图支持更多类型: 高度直方图,频率直方图.混和类型(HYBRID).
--看看12c关于直方图方面的变化,通过例子说明:
1.环境:
SCOTT@test01p> @ ver1
PORT_STRING VERSION BANNER CON_ID
------------------------------ -------------- -------------------------------------------------------------------------------- ----------
IBMPC/WIN_NT64-9.1.0 12.1.0.1.0 Oracle Database 12c Enterprise Edition Release 12.1.0.1.0 - 64bit Production 0
create table t (a varchar2(100));
insert into t select '12345678901234567890123456789012'||level||'3456789123456789123456' a from dual connect by level < 5000;
commit ;
2.建立直方图:
SCOTT@test01p> exec DBMS_STATS.GATHER_TABLE_STATS(null,'T',method_opt => 'FOR ALL COLUMNS SIZE 2049');
BEGIN DBMS_STATS.GATHER_TABLE_STATS(null,'T',method_opt => 'FOR ALL COLUMNS SIZE 2049'); END;
*
ERROR at line 1:
ORA-20000: Cannot parse for clause: FOR ALL COLUMNS SIZE 2049
ORA-06512: at "SYS.DBMS_STATS", line 33859
ORA-06512: at line 1
--2049 太大了,11g最大支持255.
SCOTT@test01p> exec DBMS_STATS.GATHER_TABLE_STATS(null,'T',method_opt => 'FOR ALL COLUMNS SIZE 2048');
PL/SQL procedure successfully completed.
--//12C可以支持2048.
SELECT COLUMN_NAME
,NUM_DISTINCT
,DENSITY
,NUM_BUCKETS
,HISTOGRAM
,LENGTH (UTL_RAW.cast_to_varchar2 (low_value)) L_length
,UTL_RAW.cast_to_varchar2 (low_value) L_A
,LENGTH (UTL_RAW.cast_to_varchar2 (HIGH_VALUE)) H_length
,UTL_RAW.cast_to_varchar2 (HIGH_VALUE) H_A
FROM USER_TAB_COL_STATISTICS
WHERE table_name = 'T';
Record View
As of: 2017/1/14 21:25:12
COLUMN_NAME: A
NUM_DISTINCT: 4999
DENSITY: 0.0002
NUM_BUCKETS: 2048
HISTOGRAM: HYBRID
L_LENGTH: 58
L_A: 1234567890123456789012345678901210003456789123456789123456
H_LENGTH: 57
H_A: 123456789012345678901234567890129993456789123456789123456
--//你可以发现保存的长度发生了变量,而且bucket的数量增加到了2048.直方图类型HYBRID.
--//也可以查询:select * from DBA_TAB_HISTOGRAMS where owner=user and table_name='T' order by endpoint_number;
--//结果太长,大家可以自行检测.
3.再来看看查询的sql语句:
SCOTT@test01p> alter session set statistics_level=all;
Session altered.
SCOTT@test01p> select * from t where a='12345678901234567890123456789012'||1001||'3456789123456789123456';
A
----------------------------------------------------------
1234567890123456789012345678901210013456789123456789123456
SCOTT@test01p> @ dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID c4asjnvdvyqhn, child number 0
-------------------------------------
select * from t where a='12345678901234567890123456789012'||1001||'34567
89123456789123456'
Plan hash value: 1601196873
--------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers |
--------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 15 (100)| | 1 |00:00:00.01 | 54 |
|* 1 | TABLE ACCESS FULL| T | 1 | 1 | 59 | 15 (0)| 00:00:01 | 1 |00:00:00.01 | 54 |
--------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$1 / T@SEL$1
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter("A"='1234567890123456789012345678901210013456789123456789123456')
--//可以发现E-Rows与A-Rows一样.
4.也就是oracle 12c现在能很好的处理长字符串直方图问题,但是究竟支持多长,我继续测试:
create table t1 (a varchar2(1000));
insert into t1 select lpad('x',996,'x')||level a from dual connect by level < 5000;
commit ;
exec DBMS_STATS.GATHER_TABLE_STATS(null,'T1',method_opt => 'FOR ALL COLUMNS SIZE 2048');
SELECT COLUMN_NAME
,NUM_DISTINCT
,DENSITY
,NUM_BUCKETS
,HISTOGRAM
,LENGTH (UTL_RAW.cast_to_varchar2 (low_value)) L_length
,UTL_RAW.cast_to_varchar2 (low_value) L_A
,LENGTH (UTL_RAW.cast_to_varchar2 (HIGH_VALUE)) H_length
,UTL_RAW.cast_to_varchar2 (HIGH_VALUE) H_A
FROM USER_TAB_COL_STATISTICS
WHERE table_name = 'T1';
Record View
As of: 2017/1/14 21:41:33
COLUMN_NAME: A
NUM_DISTINCT: 4999
DENSITY: 0.0002
NUM_BUCKETS: 1
HISTOGRAM: HYBRID
L_LENGTH: 64
L_A: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
H_LENGTH: 64
H_A: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
--//也就是比原来增加了1倍,现在支持64字节.但是看看现在的情况NUM_BUCKETS=1,也就是仅仅1个桶.
SCOTT@test01p> select * from t1 where a=lpad('x',996,'x')||1001;
SCOTT@test01p> @ dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID 2cua094wf06kf, child number 0
-------------------------------------
select * from t1 where a=lpad('x',996,'x')||1001
Plan hash value: 3617692013
--------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers |
--------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 204 (100)| | 1 |00:00:00.01 | 753 |
|* 1 | TABLE ACCESS FULL| T1 | 1 | 4999 | 4886K| 204 (0)| 00:00:01 | 1 |00:00:00.01 | 753 |
--------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$1 / T1@SEL$1
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter("A"='xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
...
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx1001')
32 rows selected.
--//现在E-Rows,A-Rows相差很远了.这样的直方图也无用.