[20161216]关于library cache lock.txt
--这几天一直在关注这个链接,http://www.itpub.net/thread-2073170-1-1.html
--就是library cache lock导致挂死业务,一般引起这个问题编译包,而应用正好在使用执行这个包,以及11g口令大小写导致无法登录的问题,
--我自己很久以前也遇到过一些,那时的系统是10g,http://www.itpub.net/thread-1842463-1-1.html,但是只要分析某个表就出现library cache lock,
--当时我采用方式是lock表统计,第2天仔细检查,才确定是bug,我们机器是rac,我轮询重启了数据库,再分析一切ok.
--今天我仔细看了链接 http://www.bobbydurrettdba.com/2014/11/24/parsing-blocks-stats-blocks-parsing/
--他提到了 分析sql语句阻塞 统计,统计阻塞相关表的分析执行的情况,我仅仅再重复作者的测试.
1.环境:
SCOTT@book> @ &r/ver1
PORT_STRING VERSION BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx 11.2.0.4.0 Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production
create table t1 as select * from dba_tables;
create table t2 as select * from dba_tables;
--lz的上传附件http://www.bobbydurrettdba.com/uploads/parsingstatsparsing.zip,里面包含执行的sql语句.
2.测试1:
--session 1,单独执行:chain1.sql
explain plan into plan_table for
select
count(*)
from
t1,
t2,
t2 t3,
t2 t4,
t2 t5,
t2 t6,
t2 t7,
...
t1.owner=t96.owner and
t1.owner=t97.owner and
t1.owner=t98.owner and
t1.owner=t99.owner and
t1.owner=t100.owner
/
--分析这个执行计划,涉及表连接很多分析时间很长.
SCOTT@book> SELECT VALUE/100 "CPU in seconds"
2 FROM V$SESSION a, V$SESSTAT b
3 where a.SID = b.SID
4 and a.SID = :monitored_sid
5 and b.STATISTIC# = (SELECT STATISTIC# FROM V$STATNAME WHERE NAME='CPU used by this session');
CPU in seconds
--------------
24.2
--//我的测试使用cpu 是24.2秒.
3.测试2:
--session 1,单独执行:chain1.sql
--session 2,单独执行:chain2.sql,也就是分析表
execute dbms_stats.gather_table_stats(NULL,'T1');
--可以发现session 2出现library cache lock.
SCOTT@book> -- show library cache lock wait time in seconds
SCOTT@book>
SCOTT@book> SELECT TIME_WAITED/100 "Library cache lock in seconds"
2 FROM V$SESSION_EVENT a, V$SESSION b
3 WHERE
4 a.SID = b.SID and
5 a.SID= :monitored_sid and
6 a.event='library cache lock';
Library cache lock in seconds
-----------------------------
22.98
--//session 4,分析:
SYS@book> @ &r/wait
P1RAW P2RAW P3RAW P1 P2 P3 SID SERIAL# SEQ# EVENT STATE WAIT_TIME_MICRO SECONDS_IN_WAIT
---------------- ---------------- ---------------- ---------- ---------- ---------- ---------- ---------- ---------- ---------------------------------------- ------------------- --------------- ---------------
0000000062657100 0000000000000001 00 1650815232 1 0 56 711 74 SQL*Net message to client WAITED SHORT TIME 2 0
000000007C6DAA20 000000007C3518E0 00015A6D00010003 2087561760 2083854560 3.8090E+14 68 311 92 library cache lock WAITING 3162559 3
SYS@book> @ &r/ev_name 'library cache lock'
EVENT# EVENT_ID NAME PARAMETER1 PARAMETER2 PARAMETER3 WAIT_CLASS_ID WAIT_CLASS# WAIT_CLASS
---------- ---------- ---------------------------------------- -------------------- -------------------- -------------------- ------------- ----------- --------------------
287 916468430 library cache lock handle address lock address 100*mode+namespace 3875070507 4 Concurrency
SYS@book> @ &r/lcl
display library cache lock problem
INST_ID SADDR HANDLE MOD REQ OBJECT SQL_ID HASH_VALUE KGLNAOBJ USER_NAME C50
---------- ---------------- ---------------- ---------- ---------- -------------------- ------------- ---------- ------------------------------ -------------------- --------------------------------------------------
1 0000000085A697C0 000000007C6DAA20 2 0 T1 gsmky1za3hww0 3560469376 T1 SCOTT alter system kill session '46,569' immediate;
display wait library cache lock
SID SERIAL# SPID PID P_SERIAL# EVENT C50
---------- ---------- ------ ------- ---------- ---------------------------------------- --------------------------------------------------
68 311 55054 30 145 library cache lock alter system kill session '68,311' immediate;
SYS@book> @ &r/sharepool/shp4 000000007C6DAA20 0
TEXT KGLHDADR KGLHDPAR C40 KGLOBHD0 KGLOBHD6 KGLOBHS0 KGLOBHS6 KGLOBT16 N0_6_16 N20 KGLNAHSH KGLOBT03 KGLOBT09
-------------- ---------------- ---------------- ---------------------------------------- ---------------- ---------------- ---------- ---------- ---------- --------- ---------- ---------- ------------- ----------
父游标句柄地址 000000007C6DAA20 000000007C6DAA20 T1 000000007DF2E798 00 4688 0 0 4688 4688 3560469376 0
--我不知道这里为什么前面有1个sql_id,查询这个sql_id是找不到信息的,实际上T1,顺便问一下那位知道这个hash值如何计算的,sql语句是后面chr(0)的md5值.
SYS@book> select KGLHDADR,KGLHDPAR,KGLNAHSH,KGLNAHSV,KGLHDNSD,KGLOBT03 from x$kglob where kglhdadr='000000007C6DAA20';
KGLHDADR KGLHDPAR KGLNAHSH KGLNAHSV KGLHDNSD KGLOBT03
---------------- ---------------- ---------- -------------------------------- ---------------------------------------------------------------- -------------
000000007C6DAA20 000000007C6DAA20 3560469376 f1cd2b1c8166d406fc4e5e0fd4387380 TABLE/PROCEDURE
--一般KGLOBT03对应sql_id.KGLNAHSV=f1cd2b1c8166d406fc4e5e0fd4387380取后面8位d4387380,
SYS@book> @ &r/16to10 d4387380
16 to 10 DEC
------------
3560469376
--//可以对上HASH_VALUE.
--//KGLNAHSV=f1cd2b1c8166d406fc4e5e0fd4387380取后面16位fc4e5e0fd4387380.
SYS@book> set numw 20
SYS@book> select to_number(substr(KGLNAHSV,-16),'xxxxxxxxxxxxxxxxxxxx') sql_id10 from x$kglob where kglhdadr='000000007C6DAA20';
SQL_ID10
--------------------
18180572167819260800
--//参考链接: http://blog.itpub.net/267265/viewspace-1365382/
select replace(wmsys.wm_concat(c),',') from (
select c from (
SELECT SUBSTR ('0123456789abcdfghjkmnpqrstuvwxyz', a + 1, 1) c,rownum rn
FROM (WITH data (a, b)
AS (SELECT MOD (&1, 32) a, TRUNC (&1 / 32) b FROM DUAL
UNION ALL
SELECT MOD (b, 32) a, TRUNC (b / 32) b
FROM data
WHERE b !=0
)
SELECT a
FROM data)) order by rn desc);
REPLACE(WMSYS.WM_CONCAT(C),',')
---------------------------------
gsmky1za3hww0
--//与前面的sql_id正好对上.
4.测试4:
--session 1,单独执行:chain1.sql
--session 2,单独执行:chain2.sql,也就是分析表
execute dbms_stats.gather_table_stats(NULL,'T1');
--session 3,单独执行:chain3.sql,做一个硬解析:select /* comment to force hard parse */ count(*) from T1;
--补充 实际上只要执行sql语句涉及t1的表都会出现阻塞.
--//可以发现session 2,3都出现了library cache lock.
SYS@book> @ &r/wait
P1RAW P2RAW P3RAW P1 P2 P3 SID SERIAL# SEQ# EVENT STATE WAIT_TIME_MICRO SECONDS_IN_WAIT
---------------- ---------------- ---------------- ---------- ---------- ---------- ---------- ---------- ---------- ------------------------- ------------------- --------------- ---------------
0000000062657100 0000000000000001 00 1650815232 1 0 46 589 69 SQL*Net message to client WAITED SHORT TIME 2 0
000000007C6DAA20 000000007BBE6938 00015A6D00010003 2087561760 2076076344 3.8090E+14 68 311 158 library cache lock WAITING 9187405 9
000000007C6DAA20 000000007E570F38 00015A6D00010002 2087561760 2119634744 3.8090E+14 56 719 25 library cache lock WAITING 8126936 8
SYS@book> @ &r/lcl
display library cache lock problem
INST_ID SADDR HANDLE MOD REQ OBJECT SQL_ID HASH_VALUE KGLNAOBJ USER_NAME C50
---------- ---------------- ---------------- ---------- ---------- -------------------- ------------- ---------- ------------------------------ -------------------- --------------------------------------------------
1 0000000085ACB3C0 000000007C6DAA20 2 0 T1 gsmky1za3hww0 3560469376 T1 SCOTT alter system kill session '80,81' immediate;
display wait library cache lock
SID SERIAL# SPID PID P_SERIAL# EVENT C50
---------- ---------- ------ ------- ---------- ---------------------------------------- --------------------------------------------------
56 719 55406 29 31 library cache lock alter system kill session '56,719' immediate;
68 311 55054 30 145 library cache lock alter system kill session '68,311' immediate;
--这里也就完整演示了分析sql语句阻塞分析表,分析表也会导致阻塞相关表的sql语句.
5.测试5:
--//又重复1次做测试4,我发现即使做软分析一样会阻塞.
--但是我修改chain3.sql执行,修改查询表T2,
select /* comment to force hard parse */ count(*) from T2;
--我发现session 2 依旧会阻塞,但是session 3很快完成,这也说明分析表T1这时阻塞了session 3的执行.
6.测试6:
--我修改chain1.sql,条件一半是t1.owner=...一半是t2.owner=...
..
t1.owner=t49.owner and
t1.owner=t50.owner and
t2.owner=t51.owner and
t2.owner=t52.owner and
t2.owner=t53.owner and
...
--session 1,单独执行:chain1.sql
--session 2,单独执行:chain2.sql,也就是分析表
execute dbms_stats.gather_table_stats(NULL,'T1');
SCOTT@book> SELECT TIME_WAITED/100 "Library cache lock in seconds"
2 FROM V$SESSION_EVENT a, V$SESSION b
3 WHERE
4 a.SID = b.SID and
5 a.SID= :monitored_sid and
6 a.event='library cache lock';
Library cache lock in seconds
-----------------------------
8.26
--我仅仅发现library cache lock减少不少.
7.我仔细思考itpub网友遇到的问题,我猜测bug可能性大,但是可能与后台执行的sql tuning advisor有关.
SYS@book> select client_name, status,attributes,window_group from dba_autotask_client;
CLIENT_NAME STATUS ATTRIBUTES WINDOW_GROUP
---------------------------------------- -------- ------------------------------------------------------------ --------------
auto optimizer stats collection ENABLED ON BY DEFAULT, VOLATILE, SAFE TO KILL ORA$AT_WGRP_OS
auto space advisor ENABLED ON BY DEFAULT, VOLATILE, SAFE TO KILL ORA$AT_WGRP_SA
sql tuning advisor ENABLED ONCE PER WINDOW, ON BY DEFAULT, VOLATILE, SAFE TO KILL ORA$AT_WGRP_SQ
SYS@book> select * from (select * from DBA_AUTOTASK_SCHEDULE order by 2 ) where rownum<=10;
WINDOW_NAME START_TIME DURATION
---------------- ---------------------------------- -------------
FRIDAY_WINDOW 2016-12-16 22:00:00.300000 +08:00 +000 04:00:00
SATURDAY_WINDOW 2016-12-17 06:00:00.300000 +08:00 +000 20:00:00
SUNDAY_WINDOW 2016-12-18 06:00:00.300000 +08:00 +000 20:00:00
MONDAY_WINDOW 2016-12-19 22:00:00.300000 +08:00 +000 04:00:00
TUESDAY_WINDOW 2016-12-20 22:00:00.300000 +08:00 +000 04:00:00
WEDNESDAY_WINDOW 2016-12-21 22:00:00.300000 +08:00 +000 04:00:00
THURSDAY_WINDOW 2016-12-22 22:00:00.300000 +08:00 +000 04:00:00
FRIDAY_WINDOW 2016-12-23 22:00:00.300000 +08:00 +000 04:00:00
SATURDAY_WINDOW 2016-12-24 06:00:00.300000 +08:00 +000 20:00:00
SUNDAY_WINDOW 2016-12-25 06:00:00.300000 +08:00 +000 20:00:00
10 rows selected.
SYS@book> select * from DBA_AUTOTASK_WINDOW_CLIENTS;
WINDOW_NAME WINDOW_NEXT_TIME WINDO AUTOTASK OPTIMIZE SEGMENT_ SQL_TUNE HEALTH_M
------------------------------ ---------------------------------- ----- -------- -------- -------- -------- --------
MONDAY_WINDOW 2016-12-19 22:00:00.000000 +08:00 FALSE ENABLED ENABLED ENABLED ENABLED DISABLED
TUESDAY_WINDOW 2016-12-20 22:00:00.000000 +08:00 FALSE ENABLED ENABLED ENABLED ENABLED DISABLED
WEDNESDAY_WINDOW 2016-12-21 22:00:00.000000 +08:00 FALSE ENABLED ENABLED ENABLED ENABLED DISABLED
THURSDAY_WINDOW 2016-12-22 22:00:00.000000 +08:00 FALSE ENABLED ENABLED ENABLED ENABLED DISABLED
FRIDAY_WINDOW 2016-12-16 22:00:00.000000 +08:00 FALSE ENABLED ENABLED ENABLED ENABLED DISABLED
SATURDAY_WINDOW 2016-12-17 06:00:00.000000 +08:00 FALSE ENABLED ENABLED ENABLED ENABLED DISABLED
SUNDAY_WINDOW 2016-12-18 06:00:00.000000 +08:00 FALSE ENABLED ENABLED ENABLED ENABLED DISABLED
7 rows selected.
--//可以3项工作是集中在一起执行的,有一些分析消耗大量资源,而这个时候有做统计分析,出现某种特殊的bug导致阻塞感觉"很正常".我感觉oracle应该改进一下,错开执行时间.
--//我显示基本disbale 了,auto space advisor,sql tuning advisor.
--//停止auto space advisor,是因为遭遇了日志暴涨的情况,链接:http://blog.itpub.net/267265/viewspace-1329205/
--作者后面做了一些补充,我仅仅摘录下来:
http://www.bobbydurrettdba.com/2014/11/24/parsing-blocks-stats-blocks-parsing/
P.S. After reviewing the SR I wanted to understand what this parse blocks stats blocks parse looked like in a state
dump. The Oracle support analyst explained how the locks looked in a state dump that we uploaded but I didn't get a
chance to look at it closely until today. I found the most important information in lines with the string "
LibraryObjectLock" at the front of the line after some spaces or tabs. There were three types of lines – the holding
share lock, the waiting exclusive lock, and the many waiting share locks:
LibraryObjectLock: Address=... Handle=0x5196c8908 Mode=S ...
LibraryObjectLock: Address=... Handle=0x5196c8908 RequestMode=X ...
LibraryObjectLock: Address=... Handle=0x5196c8908 RequestMode=S ...
The "…" indicates places I edited out other details. The handle 0x5196c8908 identifies the table being locked. The
"Mode=S" string indicates a successful share lock of that table by the session with the long parse time. The
"RequestMode=X" was from the stats job trying to get exclusive access to the table. The "RequestMode=S" was all the
other sessions trying to get shared access to the table waiting for stats to first get exclusive access. Anyway, I just
wanted to translate what Oracle support told me into something that might be useful to others. Plus I want to remember
it myself!
--附上我的执行脚本:
$ cat lcl.sql
PROMPT
PROMPT display library cache lock problem
PROMPT
column object format a20
column user_name format a20
SELECT inst_id
,kgllkses saddr
,kgllkhdl handle
,kgllkmod MOD
,kgllkreq REQ
,kglnaobj object
,KGLLKSQLID sql_id
,kglnahsh hash_value
,KGLNAOBJ
,user_name
, 'alter system kill session '''
|| s.sid
|| ','
|| s.serial#
|| ''''
|| ' immediate;'
c50
--,lock_a.*
FROM x$kgllk lock_a, v$session s
WHERE s.saddr = lock_a.kgllkses and
kgllkmod > 0
AND EXISTS
(SELECT lock_b.kgllkhdl
FROM x$kgllk lock_b
WHERE kgllkses IN (SELECT saddr
FROM v$session
WHERE event like 'library cache lock') /* blocked session */
--WHERE event like 'library cache pin') /* blocked session */
AND lock_a.kgllkhdl = lock_b.kgllkhdl
AND kgllkreq > 0);
PROMPT
PROMPT display wait library cache lock
PROMPT
SELECT s.sid
,s.serial#
,p.spid
,p.pid
,p.serial# p_serial#
,s.event
, 'alter system kill session '''
|| s.sid
|| ','
|| s.serial#
|| ''''
|| ' immediate;'
c50
FROM v$session s, v$process p
WHERE s.paddr = p.addr
AND s.event= 'library cache lock';
--AND s.event= 'library cache pin';
$ cat shp4.sql
column N0_6_16 format 99999999
SELECT DECODE (kglhdadr,
kglhdpar, '父游标句柄地址',
'子游标句柄地址')
text,
kglhdadr,
kglhdpar,
substr(kglnaobj,1,40) c40,
kglobhd0,
kglobhd6,
kglobhs0,kglobhs6,kglobt16,
kglobhs0+kglobhs6+kglobt16 N0_6_16,
kglobhs0+kglobhs1+kglobhs2+kglobhs3+kglobhs4+kglobhs5+kglobhs6+kglobt16 N20,
kglnahsh,
kglobt03 ,
kglobt09
FROM x$kglob
WHERE kglobt03 = '&1' or kglhdpar='&1' or kglhdadr='&1' or KGLNAHSH= &2;
$ cat wait.sql
select p1raw,p2raw,p3raw,p1,p2,p3,sid,serial#,seq#,event,state,wait_time_micro,seconds_in_wait from v$session where wait_class<>'Idle' order by event ;