在工作中,有些报表的展示较复杂,且需要统计的内容不断变化,开发的sql越来越长,运行越来越慢,这时优化必不可免,先来看下如下冗长的执行了2分多钟的sql,看看从中是否能找出些规律
SELECT T1.BASE_YW
,T1.BRNC_ID
,T1.OFFC_ID
,T1.MKT_MODL_ID
,T1.BASC_MODL
,T1.BI_CHNL_TP_02
,T1.BI_CHNL_TP_03
,T1.BRNC_ID_NM
,T1.OFFC_ID_NM
,T1.MKT_MODL_DI_NM
,T1.BASC_MODL_NM
,T1.BI_CHNL_TP_02_NM
,T1.BI_CHNL_TP_03_NM
,T2.SHOP_CNT
,T1.YW_01_SHOP_CNT
,T1.YW_02_SHOP_CNT
,T1.YW_03_SHOP_CNT
,T1.YW_04_SHOP_CNT
,T1.YW_05_SHOP_CNT
,T1.YW_06_SHOP_CNT
,0 GR_BI_CHNL_TP_02
,0 GR_BI_CHNL_TP_03
,0 GR_MKT_MODL_ID --ADD
,0 GR_BASC_MODL --ADD
,0 GR_OFFC_ID
,0 GR_BRNC_ID
FROM (
SELECT MAX(T1.BASE_YW) BASE_YW
,T1.BRNC_ID
,T1.OFFC_ID
,T1.MKT_MODL_ID
,T1.BASC_MODL
,T1.BI_CHNL_TP_02
,T1.BI_CHNL_TP_03
,MAX(T1.BRNC_ID_NM) BRNC_ID_NM
,MAX(T1.OFFC_ID_NM) OFFC_ID_NM
,MAX(T1.MKT_MODL_ID) MKT_MODL_DI_NM
,MAX(T1.BASC_MODL) BASC_MODL_NM
,MAX(T1.BI_CHNL_TP_02_NM) BI_CHNL_TP_02_NM
,MAX(T1.BI_CHNL_TP_03_NM) BI_CHNL_TP_03_NM
,COUNT(DISTINCT CASE WHEN T1.YW_RNK = 1 THEN SHOP_ID END) YW_01_SHOP_CNT
,COUNT(DISTINCT CASE WHEN T1.YW_RNK = 2 THEN SHOP_ID END) YW_02_SHOP_CNT
,COUNT(DISTINCT CASE WHEN T1.YW_RNK = 3 THEN SHOP_ID END) YW_03_SHOP_CNT
,COUNT(DISTINCT CASE WHEN T1.YW_RNK = 4 THEN SHOP_ID END) YW_04_SHOP_CNT
,COUNT(DISTINCT CASE WHEN T1.YW_RNK = 5 THEN SHOP_ID END) YW_05_SHOP_CNT
,COUNT(DISTINCT CASE WHEN T1.YW_RNK = 6 THEN SHOP_ID END) YW_06_SHOP_CNT
FROM T_MART T1
WHERE NVL(T1.PSI_VAL,0) >= 1
GROUP BY T1.BRNC_ID
,T1.OFFC_ID
,T1.MKT_MODL_ID
,T1.BASC_MODL
,T1.BI_CHNL_TP_02
,T1.BI_CHNL_TP_03
) T1
,T_SHOP_DIM T2
WHERE T1.BRNC_ID = T2.BRNC_ID
AND T1.OFFC_ID = T2.OFFC_ID
AND NVL(T1.BI_CHNL_TP_02,'-') = NVL(T2.BI_CHNL_TP_02,'-')
AND NVL(T1.BI_CHNL_TP_03,'-') = NVL(T2.BI_CHNL_TP_03,'-')
UNION ALL
SELECT DISTINCT T1.BASE_YW
,T1.BRNC_ID
,'-' OFFC_ID
,'-' MKT_MODL_ID
,'-' BASC_MODL
,T1.BI_CHNL_TP_02
,T1.BI_CHNL_TP_03
,T1.BRNC_ID_NM
,T1.OFFC_ID_NM
,T1.MKT_MODL_DI_NM
,T1.BASC_MODL_NM
,T1.BI_CHNL_TP_02_NM
,T1.BI_CHNL_TP_03_NM
,SUM(T2.SHOP_CNT)OVER(PARTITION BY T1.BASE_YW,T1.BRNC_ID,T1.BI_CHNL_TP_02,T1.BI_CHNL_TP_03)SHOP_CNT
,T1.YW_01_SHOP_CNT
,T1.YW_02_SHOP_CNT
,T1.YW_03_SHOP_CNT
,T1.YW_04_SHOP_CNT
,T1.YW_05_SHOP_CNT
,T1.YW_06_SHOP_CNT
,0 GR_BI_CHNL_TP_02
,0 GR_BI_CHNL_TP_03
,1 GR_MKT_MODL_ID --ADD
,1 GR_BASC_MODL --ADD
,1 GR_OFFC_ID
,0 GR_BRNC_ID
FROM (
SELECT MAX(T1.BASE_YW) BASE_YW
,T1.BRNC_ID
-- ,T1.OFFC_ID
--,T1.MKT_MODL_ID
--,T1.BASC_MODL
,T1.BI_CHNL_TP_02
,T1.BI_CHNL_TP_03
,MAX(T1.BRNC_ID_NM) BRNC_ID_NM
,'TTL' OFFC_ID_NM
,'TTL' MKT_MODL_DI_NM --ADD
,'TTL' BASC_MODL_NM --ADD
,MAX(T1.BI_CHNL_TP_02_NM) BI_CHNL_TP_02_NM
,MAX(T1.BI_CHNL_TP_03_NM) BI_CHNL_TP_03_NM
,COUNT(DISTINCT CASE WHEN T1.YW_RNK = 1 THEN SHOP_ID END) YW_01_SHOP_CNT
,COUNT(DISTINCT CASE WHEN T1.YW_RNK = 2 THEN SHOP_ID END) YW_02_SHOP_CNT
,COUNT(DISTINCT CASE WHEN T1.YW_RNK = 3 THEN SHOP_ID END) YW_03_SHOP_CNT
,COUNT(DISTINCT CASE WHEN T1.YW_RNK = 4 THEN SHOP_ID END) YW_04_SHOP_CNT
,COUNT(DISTINCT CASE WHEN T1.YW_RNK = 5 THEN SHOP_ID END) YW_05_SHOP_CNT
,COUNT(DISTINCT CASE WHEN T1.YW_RNK = 6 THEN SHOP_ID END) YW_06_SHOP_CNT
FROM T_MART T1
WHERE NVL(T1.PSI_VAL,0) >= 1
GROUP BY T1.BRNC_ID
--,T1.OFFC_ID
--,T1.MKT_MODL_ID
--,T1.BASC_MODL
,T1.BI_CHNL_TP_02
,T1.BI_CHNL_TP_03
) T1
,T_SHOP_DIM T2
WHERE T1.BRNC_ID = T2.BRNC_ID
-- AND T1.OFFC_ID = T2.OFFC_ID
AND NVL(T1.BI_CHNL_TP_02,'-') = NVL(T2.BI_CHNL_TP_02,'-')
AND NVL(T1.BI_CHNL_TP_03,'-') = NVL(T2.BI_CHNL_TP_03,'-')
UNION ALL
……
有些统计报表的sql编写过于复杂,常常是如上的形式,多个union到一起,且不同字段组合在一起分组计算,工作中遇到上面这样的类似sql,由11个上面的select语句块 union到一起,统计出2000条数据要2分钟。这样的语句不在少数而且有时会手工修改满足更改的报表需求,为提高sql运行和维护sql效率,必然需要优化。
对于这类union很多的分组计算的sql常常想到用报表分析函数合并,这样类似cube的报表函数要比union all的效率高,io少,因此首要优化的想法就是用更高效的报表分析函数合并这些union语句。但看过具体实际的例子会发现很多select都带有自己的查询条件,上面这段sql每个select还和另一个查询集合关联,且每次连接的条件都不一样。这样就要找到一种通用的方式才能合并这样的sql
首先在分析sql的时候发现,sql中的select中都有distinct这样的关键字,经与开发人员分析后改distinct的存在主要是为了分析函数求出的商店总数,而对于这个商店总数在 T_SHOP_DIM中是可以计算的,于是首先在T_SHOP_DIM计算商店总数
SELECT SUM(T1.SHOP_CNT) SHOP_CNT
FROM T_SHOP_DIM T1
GROUP BY (T1.BRNC_ID,T1.BI_CHNL_TP_02,T1.BI_CHNL_TP_03)
计算后在与T1表关联,于是首先去掉distinct的排中方法。
继续观察分析sql,T_SHOP_DIM的计算也是根据不同情况分组计算的,这个情况和T1集合中的分组情况是一致的,而关联条件也可以想办法根据不同的分组情况动态的设置成相应值,比如OFFC_ID在本次分组中未加入group by list中,这时就可以设置该值为’-’ ,而T1集合中的OFFC_ID也可以设置成’-’,这样在对应到本次分组连接时两个字段就能连接了。连接的形式固定了,就可以采用分组分析函数合并union all了,于是最终优化sql是
SELECT MAX(T1.BASE_YW) BASE_YW
,T1.BRNC_ID
,(CASE WHEN GROUPING(T1.OFFC_ID)=1 THEN '-' ELSE T1.OFFC_ID END) OFFC_ID--,'-' OFFC_ID-- ,T1.OFFC_ID
,(CASE WHEN GROUPING(T1.MKT_MODL_ID)=1 THEN '-' ELSE T1.MKT_MODL_ID END) MKT_MODL_ID--'-' MKT_MODL_ID --,T1.MKT_MODL_ID
,(CASE WHEN GROUPING(T1.BASC_MODL)=1 THEN '-' ELSE T1.BASC_MODL END) BASC_MODL--'-' BASC_MODL --,T1.BASC_MODL
,T1.BI_CHNL_TP_02
,T1.BI_CHNL_TP_03
,MAX(T1.BRNC_ID_NM) BRNC_ID_NM
,(CASE WHEN GROUPING(T1.OFFC_ID)=1 THEN 'TTL' ELSE MAX(T1.OFFC_ID_NM) END) OFFC_ID_NM--'TTL' OFFC_ID_NM
,(CASE WHEN GROUPING(T1.MKT_MODL_ID)=1 THEN 'TTL' ELSE MAX(T1.MKT_MODL_ID) END) MKT_MODL_DI_NM--'TTL' MKT_MODL_DI_NM --ADD
,(CASE WHEN GROUPING(T1.BASC_MODL)=1 THEN 'TTL' ELSE MAX(T1.BASC_MODL) END) BASC_MODL_NM--'TTL' BASC_MODL_NM --ADD
,MAX(T1.BI_CHNL_TP_02_NM) BI_CHNL_TP_02_NM
,MAX(T1.BI_CHNL_TP_03_NM) BI_CHNL_TP_03_NM
,COUNT(DISTINCT CASE WHEN T1.YW_RNK = 1 THEN SHOP_ID END) YW_01_SHOP_CNT
,COUNT(DISTINCT CASE WHEN T1.YW_RNK = 2 THEN SHOP_ID END) YW_02_SHOP_CNT
,COUNT(DISTINCT CASE WHEN T1.YW_RNK = 3 THEN SHOP_ID END) YW_03_SHOP_CNT
,COUNT(DISTINCT CASE WHEN T1.YW_RNK = 4 THEN SHOP_ID END) YW_04_SHOP_CNT
,COUNT(DISTINCT CASE WHEN T1.YW_RNK = 5 THEN SHOP_ID END) YW_05_SHOP_CNT
,COUNT(DISTINCT CASE WHEN T1.YW_RNK = 6 THEN SHOP_ID END) YW_06_SHOP_CNT
,GROUPING_ID(T1.BRNC_ID) C1
,CASE WHEN MAX(T1.OFFC_ID) IS NULL THEN GROUPING_ID(T1.BRNC_ID) END C2
,GROUPING_ID(T1.BRNC_ID,T1.BI_CHNL_TP_02) C3
,GROUPING_ID(T1.BRNC_ID,T1.OFFC_ID,T1.BI_CHNL_TP_02,T1.BI_CHNL_TP_03) C4
,GROUPING(T1.BI_CHNL_TP_02) GR_BI_CHNL_TP_02
,GROUPING(T1.BI_CHNL_TP_03) GR_BI_CHNL_TP_03
,GROUPING(T1.MKT_MODL_ID) GR_MKT_MODL_ID --ADD
,GROUPING(T1.BASC_MODL) GR_BASC_MODL --ADD
,GROUPING(T1.OFFC_ID) GR_OFFC_ID
,GROUPING(T1.BRNC_ID) GR_BRNC_ID
FROM T_MART T1
WHERE NVL(T1.PSI_VAL,0) >= 1
GROUP BY GROUPING SETS( (T1.BRNC_ID,T1.BI_CHNL_TP_02,T1.BI_CHNL_TP_03)
,(T1.BRNC_ID,T1.OFFC_ID,T1.MKT_MODL_ID,T1.BASC_MODL,T1.BI_CHNL_TP_02,T1.BI_CHNL_TP_03)
)
) T1
,(SELECT (CASE WHEN GROUPING(T1.BRNC_ID) = 1 THEN '-' ELSE T1.BRNC_ID END) BRNC_ID
,(CASE WHEN GROUPING(T1.OFFC_ID) = 1 THEN '-' ELSE T1.OFFC_ID END) OFFC_ID
,(CASE WHEN GROUPING(BI_CHNL_TP_02) = 1 THEN '-' ELSE T1.BI_CHNL_TP_02 END) BI_CHNL_TP_02
,(CASE WHEN GROUPING(BI_CHNL_TP_03) = 1 THEN '-' ELSE T1.BI_CHNL_TP_03 END) BI_CHNL_TP_03
,SUM(T1.SHOP_CNT) SHOP_CNT
FROM T_SHOP_DIM T1
GROUP BY GROUPING SETS( (T1.BRNC_ID,T1.BI_CHNL_TP_02,T1.BI_CHNL_TP_03)
,(T1.BRNC_ID,T1.OFFC_ID,T1.BI_CHNL_TP_02,T1.BI_CHNL_TP_03)
)
) T2
WHERE T1.BRNC_ID = T2.BRNC_ID
AND T1.OFFC_ID = T2.OFFC_ID
AND NVL(T1.BI_CHNL_TP_02,'-') = NVL(T2.BI_CHNL_TP_02,'-')
AND NVL(T1.BI_CHNL_TP_03,'-') = NVL(T2.BI_CHNL_TP_03,'-')
这样根据不同的统计情况,还可以在group by grouping set中添加其他统计情况,这样核心的sql合并后再统计的效率是47s,因此采用合适的分组分析函数对sql的效率和维护都是非常有必要的。