[20141111]11G redo暴涨.txt
--新上线生产系统日志出现暴涨,当天达到了400G,正常业务每天7G上下,导致dataguard,备份出现问题,磁盘空间不足
--自己做一些调查:
SYS@xxxx> @ ver1
PORT_STRING VERSION BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx 11.2.0.4.0 Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production
--检查出现问题的awr报表,发现:
Segments by Physical Reads
Total Physical Reads: 40,645,541
Captured Segments account for 100.1% of Total
Owner Tablespace Name Object Name Subobject Name Obj. Type Physical Reads %Total
** MISSING ** XXX ** MISSING: 99641/99641 ** MISSING ** UNDEFINED 25,106,897 61.77
.........
** MISSING ** XXX ** MISSING: 99642/99642 ** MISSING ** UNDEFINED 7,526,514 18.52
,,,,,,,,
--奇怪这些对象消失。我检查dba_objects查询:
select * from dba_objects where object_id=99641 or data_object_id=99641
union all
select * from dba_objects where object_id=99642 or data_object_id=99642;
no rows selected
--没有返回任何结果。检查:
SQL ordered by Elapsed Time
Resources reported for PL/SQL code includes the resources used by all SQL statements called by the code.
% Total DB Time is the Elapsed Time of the SQL statement divided into the Total Database Time multiplied by 100
%Total - Elapsed Time as a percentage of Total DB time
%CPU - CPU Time as a percentage of Elapsed Time
%IO - User I/O Time as a percentage of Elapsed Time
Captured SQL account for 57.2% of Total DB Time (s): 5,584
Captured PL/SQL account for 40.9% of Total DB Time (s): 5,584
Elapsed Time (s) Executions Elapsed Time per Exec (s) %Total %CPU %IO SQL Id SQL Module SQL Text
1,912.08 1 1,912.08 34.24 54.98 48.73 6mcpb06rctk0x DBMS_SCHEDULER call dbms_space.auto_space_adv...
1,166.45 1 1,166.45 20.89 59.76 45.34 cv3gb5zb3jtff DBMS_SCHEDULER create table XXX
624.88 1 624.88 11.19 44.16 57.57 bknw4hb3zfsh0 DBMS_SCHEDULER create table XXX
--查看:
6mcpb06rctk0x call dbms_space.auto_space_advisor_job_proc ( )
cv3gb5zb3jtff create table XXX
bknw4hb3zfsh0 create table XXX
--很奇怪create table的显示不全,仅仅能看到这些信息。
$ cat dpcawr.sql
set verify off
select * from table(dbms_xplan.display_awr(NVL('&1',NULL),NULL,NULL,'ALLSTATS LAST PEEKED_BINDS &2 cost'));
SYS@xxxxx> @dpcawr bknw4hb3zfsh0 ''
PLAN_TABLE_OUTPUT
-----------------------------------------------------------------------------
SQL_ID bknw4hb3zfsh0
--------------------
create table PORTA
Plan hash value: 3127585402
----------------------------------------------------------------------
| Id | Operation | Name | E-Rows | Cost (%CPU)|
----------------------------------------------------------------------
| 0 | CREATE TABLE STATEMENT | | | 6200 (100)|
| 1 | LOAD AS SELECT | | | |
| 2 | TABLE ACCESS STORAGE SAMPLE| EMR_YZB | 24693 | 4060 (1)|
----------------------------------------------------------------------
SELECT SQL_ID,
PLAN_HASH_VALUE,
ID,
OPERATION,
OPTIONS,
OBJECT#,
OBJECT_OWNER,
OBJECT_NAME,
OBJECT_ALIAS,
OBJECT_TYPE,
OPTIMIZER,
PARENT_ID,
DEPTH,
POSITION
FROM DBA_HIST_SQL_PLAN
WHERE sql_id = 'bknw4hb3zfsh0';
SQL_ID PLAN_HASH_VALUE ID OPERATION OPTIONS OBJECT# OBJECT_OWNER OBJECT_NAME OBJECT_ALIAS OBJECT_TYPE OPTIMIZER PARENT_ID DEPTH POSITION
------------- --------------- ---- ---------------------- -------------------- ---------- -------------------- -------------------------- -------------------- -------------------- -------------------- ---------- ---------- ----------
bknw4hb3zfsh0 3127585402 0 CREATE TABLE STATEMENT ALL_ROWS 0 6200
bknw4hb3zfsh0 3127585402 1 LOAD AS SELECT 0 1 1
bknw4hb3zfsh0 3127585402 2 TABLE ACCESS STORAGE SAMPLE 93703 xxxxxxxxxx EMR_YZB MYTAB@SEL$1 TABLE 1 2 1
--检查EMR_YZB这个表58G,为什么call dbms_space.auto_space_advisor_job_proc ( )要生成这样一个表。
SYS@xxxxx> select * from dba_objects where object_id=93703 or data_object_id=93703 or object_name='EMR_YZB';
OWNER OBJECT_NAME SUBOBJECT_ OBJECT_ID DATA_OBJECT_ID OBJECT_TYPE CREATED LAST_DDL_TIME TIMESTAMP STATUS T G S NAMESPACE EDITION_NAME
-------------- -------------------------- ---------- ---------- -------------- ------------------- ------------------- ------------------- ------------------- ------- - - - ---------- ----------------
xxxxxxxxxx EMR_YZB 93703 93703 TABLE 2014-10-29 09:59:34 2014-11-03 10:15:57 2014-10-29:09:59:36 VALID N N N 1
--可以推测系统调用dbms_space.auto_space_advisor_job_proc ( )生成的一些表。
--我检查awr报表,可以发现里面还有执行这样的语句。而我查询CMP4$93703,CMP3$93703这张表并不存在,估计就是dbms_space.auto_space_advisor_job_proc生成的。
9b0akta6wcm6t select count(*) cnt from "XXX".CMP4$93703
9bnpu4v79w3wx select count(*) cnt from "XXX".CMP3$93703
Buffer Gets Executions Gets per Exec %Total Elapsed Time (s) %CPU %IO SQL Id SQL Module SQL Text
7,840,488 1 7,840,488.00 1.21 26.11 15.2 86.8 9bnpu4v79w3wx DBMS_SCHEDULER select count(*) cnt from "PORT...
7,526,550 1 7,526,550.00 1.16 23.50 16.6 85.5 9b0akta6wcm6t DBMS_SCHEDULER select count(*) cnt from "PORT...
SELECT SQL_ID,
PLAN_HASH_VALUE,
ID,
OPERATION,
OPTIONS,
OBJECT#,
OBJECT_OWNER,
OBJECT_NAME,
OBJECT_ALIAS,
OBJECT_TYPE,
OPTIMIZER,
PARENT_ID,
DEPTH,
POSITION
FROM DBA_HIST_SQL_PLAN
WHERE sql_id = '9bnpu4v79w3wx';
SQL_ID PLAN_HASH_VALUE ID OPERATION OPTIONS OBJECT# OBJECT_OWNER OBJECT_NAME OBJECT_ALIAS OBJECT_TYPE OPTIMIZER PARENT_ID DEPTH POSITION
------------- --------------- ---- -------------------- -------------------- ---------- -------------------- -------------------------- -------------------- -------------------- -------------------- ---------- ---------- ----------
9bnpu4v79w3wx 3481597830 0 SELECT STATEMENT ALL_ROWS 0 2127216
9bnpu4v79w3wx 3481597830 1 SORT AGGREGATE 0 1 1
9bnpu4v79w3wx 3481597830 2 TABLE ACCESS STORAGE FULL 99641 xxxxxxxxxx CMP3$93703 CMP3$93703@SEL$1 TABLE 1 2 1
-- OBJECT#=99641,与前面的Segments by Physical Reads显示的是一致。要建立一张如此巨大的表,又执行这样的语句,这个导致了磁盘IO很忙。
** MISSING ** XXX ** MISSING: 99641/99641 ** MISSING ** UNDEFINED 25,106,897 61.77
SYS@xxxxx> select * from DBA_HIST_SEG_STAT_OBJ where obj#=99641;
DBID TS# OBJ# DATAOBJ# OWNER OBJECT_NAME SUBOBJECT_ OBJECT_TYPE TABLESPACE_NAME PARTITIO
---------- ---------- ---------- ---------- -------------- -------------------------- ---------- ------------------ ------------------------------ --------
2417323702 7 99641 99641 ** MISSING ** ** MISSING: 99641/99641 ** MISSING UNDEFINED xxxxxxxxxx NONE
**
--可以发现这个对象也被删除了。
SELECT log_id,
job_name,
status,
error#,
ACTUAL_START_DATE,
log_date,
ADDITIONAL_INFO
FROM dba_scheduler_job_run_details
WHERE job_name like 'ORA$AT_SA_SPC_SY_%'
ORDER BY 5;
LOG_ID JOB_NAME STATUS ERROR# ACTUAL_START_DATE LOG_DATE ADDITIONAL_INFO
------ -------------------- ---------- ---------- ------------------------------ ------------------------------ ------------------------------------------------------------
420 ORA$AT_SA_SPC_SY_28 FAILED 1426 2014-10-31 22:00:02.268264 2014-10-31 22:11:01.215641 ORA-01426: numeric overflow
ORA-06512: at "SYS.DBMS_ADVISOR", line 201
ORA-06512: at "SYS.DBMS_SPACE", line 2480
ORA-06512: at "SYS.DBMS_SPACE", line 2560
.....
545 ORA$AT_SA_SPC_SY_35 FAILED 1426 2014-11-01 18:01:29.688439 2014-11-01 18:12:36.138340 ORA-01426: numeric overflow
ORA-06512: at "SYS.DBMS_ADVISOR", line 201
ORA-06512: at "SYS.DBMS_SPACE", line 2480
ORA-06512: at "SYS.DBMS_SPACE", line 2560
566 ORA$AT_SA_SPC_SY_37 STOPPED 1014 2014-11-01 22:01:46.140817 2014-11-01 22:24:37.361297 ORA-01014: ORACLE shutdown in progress
582 ORA$AT_SA_SPC_SY_51 FAILED 1426 2014-11-02 06:00:06.414419 2014-11-02 06:09:14.124466 ORA-01426: numeric overflow
ORA-06512: at "SYS.DBMS_ADVISOR", line 201
ORA-06512: at "SYS.DBMS_SPACE", line 2480
ORA-06512: at "SYS.DBMS_SPACE", line 2560
......
1484 ORA$AT_SA_SPC_SY_137 FAILED 1426 2014-11-09 22:03:53.398722 2014-11-09 22:34:33.829988 ORA-01426: numeric overflow
ORA-06512: at "SYS.DBMS_ADVISOR", line 201
ORA-06512: at "SYS.DBMS_SPACE", line 2480
ORA-06512: at "SYS.DBMS_SPACE", line 2560
1547 ORA$AT_SA_SPC_SY_114 FAILED 1426 2014-11-10 22:00:07.849244 2014-11-10 22:32:45.216366 ORA-01426: numeric overflow
ORA-06512: at "SYS.DBMS_ADVISOR", line 201
ORA-06512: at "SYS.DBMS_SPACE", line 2480
ORA-06512: at "SYS.DBMS_SPACE", line 2560
--没有1次成功!
--google找到eygle的链接http://www.eygle.com/archives/2011/02/auto_space_advisor_job_proc.html
--我们系统已经是11.2.0.4版本,难道还存在问题吗?不应该。
--error#=1426,1014
$ oerr ora 1014
01014, 00000, "ORACLE shutdown in progress"
// *Cause:
// *Action:
--好像是对上的,这个时间确实服务器重启。
$ oerr ora 1426
01426, 00000, "numeric overflow"
// *Cause: Evaluation of an value expression causes an overflow/underflow.
// *Action: Reduce the operands.
--不知道是否是这个错误。
SYS@xxxxx> select startup_time from v$instance ;
STARTUP_TIME
-------------------
2014-11-01 22:25:10
--而且我还发现服务器留下一张垃圾表,应该是调用dbms_space.auto_space_advisor_job_proc ( )生成的。建立时间也与关机的时间对上。
SYS@xxxxx> select * from dba_objects where object_name like 'CMP%$%' ;
OWNER OBJECT_NAME SUBOBJECT_ OBJECT_ID DATA_OBJECT_ID OBJECT_TYPE CREATED LAST_DDL_TIME TIMESTAMP STATUS T G S NAMESPACE EDITION_NAME
-------------- -------------------------- ---------- ---------- -------------- ------------------- ------------------- ------------------- ------------------- ------- - - - ---------- ------------------------------
xxxxxxxxxx CMP3$97173 98297 98297 TABLE 2014-11-01 22:01:50 2014-11-01 22:01:50 2014-11-01:22:01:50 VALID N N N 1
SYS@xxxxx> select * from dba_objects where object_id=97173 or object_name like 'CMP%$%' ;
OWNER OBJECT_NAME SUBOBJECT_ OBJECT_ID DATA_OBJECT_ID OBJECT_TYPE CREATED LAST_DDL_TIME TIMESTAMP STATUS T G S NAMESPACE EDITION_NAME
-------------- -------------------------- ---------- ---------- -------------- ------------------- ------------------- ------------------- ------------------- ------- - - - ---------- ------------------------------
xxxxxxxxxx CMP3$97173 98297 98297 TABLE 2014-11-01 22:01:50 2014-11-01 22:01:50 2014-11-01:22:01:50 VALID N N N 1
xxxxxxxxxx ZY_FYMX_JS 97173 97173 TABLE 2014-10-29 12:00:19 2014-11-03 10:16:12 2014-10-29:12:00:20 VALID N N N 1
-- 表CMP3$97173与ZY_FYMX_JS结构既然是一样的,大小都是大约8G。
--暂时关闭auto space advisor.
SYS@xxxx> select client_name, status,attributes,window_group from dba_autotask_client;
CLIENT_NAME STATUS ATTRIBUTES WINDOW_GROUP
---------------------------------------- -------- ------------------------------------------------------------ --------------------
auto optimizer stats collection ENABLED ON BY DEFAULT, VOLATILE, SAFE TO KILL ORA$AT_WGRP_OS
auto space advisor ENABLED ON BY DEFAULT, VOLATILE, SAFE TO KILL ORA$AT_WGRP_SA
sql tuning advisor ENABLED ONCE PER WINDOW, ON BY DEFAULT, VOLATILE, SAFE TO KILL ORA$AT_WGRP_SQ
--以sys用户执行。
BEGIN
DBMS_AUTO_TASK_ADMIN.disable(
client_name => 'auto space advisor',
operation => NULL,
window_name => NULL);
END;
/
SYS@xxxx> select client_name, status,attributes,window_group from dba_autotask_client;
CLIENT_NAME STATUS ATTRIBUTES WINDOW_GROUP
---------------------------------------- -------- ------------------------------------------------------------ -------------------------------
auto optimizer stats collection ENABLED ON BY DEFAULT, VOLATILE, SAFE TO KILL ORA$AT_WGRP_OS
auto space advisor DISABLED ON BY DEFAULT, VOLATILE, SAFE TO KILL ORA$AT_WGRP_SA
sql tuning advisor ENABLED ONCE PER WINDOW, ON BY DEFAULT, VOLATILE, SAFE TO KILL ORA$AT_WGRP_SQ
--不知道sql tuning advisor也要关闭吗?大家给一个建议。