[20170315]简单探究dg的mrp进程.txt
--//昨天上午在做测试做在线日志与备用日志大小不一样时遇到一个令我困惑的问题,链接:
http://blog.itpub.net/267265/viewspace-2135377/
--//发现mrp进程并没有打开数据文件的句柄.这样MRP进程如何应用日志的呢?简单探究看看:
1.环境:
SYS@192.168.31.8/xxxxxx> @ &r/ver1
PORT_STRING VERSION BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx 11.2.0.3.0 Oracle Database 11g Enterprise Edition Release 11.2.0.3.0 - 64bit Production
SYS@192.168.31.7/xxxxxxdg> @ &r/dg/dg
PROCESS PID STATUS CLIENT_P GROUP# THREAD# SEQUENCE# BLOCK# BLOCKS DELAY_MINS
--------- ------- ------------ -------- ------- ------- ---------- ---------- ---------- ----------
RFS 15714 IDLE ARCH N/A 0 0 0 0 0
ARCH 1549 CONNECTED ARCH N/A 0 0 0 0 0
RFS 15720 IDLE UNKNOWN N/A 0 0 0 0 0
RFS 15718 IDLE LGWR 1 1 5242 36608 2 0
ARCH 1551 CLOSING ARCH 4 1 5238 73728 2030 0
ARCH 1547 CLOSING ARCH 4 1 5241 73728 2048 0
ARCH 1553 CLOSING ARCH 4 1 5240 1 1 0
MRP0 1572 APPLYING_LOG N/A N/A 1 5242 36609 102400 0
8 rows selected.
--//不断执行可以发现MRP0进程的block#块数不断变化.
--//备库执行:
# ps -eLf | egrep "1572 |PID"
UID PID PPID LWP C NLWP STIME TTY TIME CMD
oracle 1572 1 1572 0 1 Feb03 ? 00:00:00 ora_mrp0_xxxxxxdg
root 7214 7174 7214 0 1 09:37 pts/2 00:00:00 egrep 1572 |PID
--//MPR0进程号10912.
2.而如果查看:
# ls -l /proc/1572/fd
total 0
lr-x------ 1 oracle oinstall 64 2017-03-16 09:38:01 0 -> /dev/null
l-wx------ 1 oracle oinstall 64 2017-03-16 09:38:01 1 -> /dev/null
lr-x------ 1 oracle oinstall 64 2017-03-16 09:38:01 10 -> /dev/zero
lr-x------ 1 oracle oinstall 64 2017-03-16 09:38:01 11 -> /dev/zero
lrwx------ 1 oracle oinstall 64 2017-03-16 09:38:01 12 -> /u01/app/oracle/product/11.2.0/db_1/dbs/hc_xxxxxxdg.dat
lr-x------ 1 oracle oinstall 64 2017-03-16 09:38:01 13 -> /u01/app/oracle/product/11.2.0/db_1/rdbms/mesg/oraus.msb
lr-x------ 1 oracle oinstall 64 2017-03-16 09:38:01 14 -> /proc/1572/fd
lr-x------ 1 oracle oinstall 64 2017-03-16 09:38:01 15 -> /dev/zero
lrwx------ 1 oracle oinstall 64 2017-03-16 09:38:01 16 -> /u01/app/oracle/product/11.2.0/db_1/dbs/hc_xxxxxxdg.dat
l-wx------ 1 oracle oinstall 64 2017-03-16 09:38:01 2 -> /dev/null
lrwx------ 1 oracle oinstall 64 2017-03-16 09:38:01 256 -> /u01/app/oracle/oradata/xxxxxx/control01.ctl
lrwx------ 1 oracle oinstall 64 2017-03-16 09:38:01 257 -> /u01/app/oracle/oradata/xxxxxx/control02.ctl
lr-x------ 1 oracle oinstall 64 2017-03-16 09:38:01 3 -> /dev/null
lr-x------ 1 oracle oinstall 64 2017-03-16 09:38:01 4 -> /dev/null
lr-x------ 1 oracle oinstall 64 2017-03-16 09:38:01 5 -> /dev/null
lr-x------ 1 oracle oinstall 64 2017-03-16 09:38:01 6 -> /dev/null
lrwx------ 1 oracle oinstall 64 2017-03-16 09:38:01 7 -> /u01/app/oracle/product/11.2.0/db_1/dbs/hc_xxxxxxdg.dat
lr-x------ 1 oracle oinstall 64 2017-03-16 09:38:01 8 -> /dev/null
lr-x------ 1 oracle oinstall 64 2017-03-16 09:38:01 9 -> /dev/null
--//可以发现这个进程并没有打开数据文件句柄.
# strace -f -c -p 1572 -tt
Process 1572 attached - interrupt to quit
Process 1572 detached
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
nan 0.000000 0 47 getrusage
nan 0.000000 0 77 77 semtimedop
------ ----------- ----------- --------- --------- ----------------
100.00 0.000000 124 77 total
--//既没有读也没有写操作相关调用.
--//查看RFS 进程号15718
# strace -f -c -p 15718 -tt
Process 15718 attached - interrupt to quit
Process 15718 detached
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
66.66 0.001997 29 69 io_submit
33.34 0.000999 14 72 poll
0.00 0.000000 0 361 145 readv
0.00 0.000000 0 53 semctl
0.00 0.000000 0 138 times
0.00 0.000000 0 69 io_getevents
------ ----------- ----------- --------- --------- ----------------
100.00 0.002996 762 145 total
--//io_submit 函数io_submit - Submit asynchronous I/O blocks for processing,可以使用man io_submit获得.
# ls -l /proc/15718/fd
total 0
lr-x------ 1 oracle oinstall 64 2017-03-15 17:25:20 0 -> /dev/null
l-wx------ 1 oracle oinstall 64 2017-03-15 17:25:20 1 -> /dev/null
l-wx------ 1 oracle oinstall 64 2017-03-15 17:25:20 10 -> /u01/app/oracle/diag/rdbms/xxxxxxdg/xxxxxxdg/trace/xxxxxxdg_rfs_15718.trm
lrwx------ 1 oracle oinstall 64 2017-03-15 17:25:20 13 -> socket:[1615732]
l-wx------ 1 oracle oinstall 64 2017-03-15 17:25:20 2 -> /dev/null
lrwx------ 1 oracle oinstall 64 2017-03-15 17:25:20 256 -> /u01/app/oracle/oradata/xxxxxx/control01.ctl
lrwx------ 1 oracle oinstall 64 2017-03-15 17:25:20 257 -> /u01/app/oracle/oradata/xxxxxx/control02.ctl
lrwx------ 1 oracle oinstall 64 2017-03-15 17:25:20 258 -> /u01/app/oracle/oradata/xxxxxx/redostb02.rbo
lr-x------ 1 oracle oinstall 64 2017-03-15 17:25:20 3 -> /dev/null
lr-x------ 1 oracle oinstall 64 2017-03-15 17:25:20 4 -> /dev/null
lr-x------ 1 oracle oinstall 64 2017-03-15 17:25:20 5 -> /u01/app/oracle/product/11.2.0/db_1/rdbms/mesg/oraus.msb
lr-x------ 1 oracle oinstall 64 2017-03-15 17:25:20 6 -> /proc/15718/fd
lr-x------ 1 oracle oinstall 64 2017-03-15 17:25:20 7 -> /dev/zero
lrwx------ 1 oracle oinstall 64 2017-03-15 17:25:20 8 -> /u01/app/oracle/admin/xxxxxx/adump/xxxxxxdg_ora_15718_1.aud
l-wx------ 1 oracle oinstall 64 2017-03-15 17:25:20 9 -> /u01/app/oracle/diag/rdbms/xxxxxxdg/xxxxxxdg/trace/xxxxxxdg_rfs_15718.trc
# strace -f -p 15718 -e trace=io_submit -tt
Process 15718 attached - interrupt to quit
09:43:12.375345 io_submit(47908000763904, 1, {{0x2b92736ee890, 0, 1, 0, 258}}) = 1
09:43:12.431387 io_submit(47908000763904, 1, {{0x2b9277141b38, 0, 1, 0, 258}}) = 1
09:43:12.439732 io_submit(47908000763904, 1, {{0x2b92771411a0, 0, 1, 0, 258}}) = 1
09:43:12.452173 io_submit(47908000763904, 1, {{0x2b9277140808, 0, 1, 0, 258}}) = 1
09:43:12.468799 io_submit(47908000763904, 1, {{0x2b927713fe70, 0, 1, 0, 258}}) = 1
09:43:12.510526 io_submit(47908000763904, 1, {{0x2b927713f4d8, 0, 1, 0, 258}}) = 1
09:43:12.615407 io_submit(47908000763904, 1, {{0x2b927713eb40, 0, 1, 0, 258}}) = 1
09:43:13.292328 io_submit(47908000763904, 1, {{0x2b9276bfc840, 0, 1, 0, 258}}) = 1
09:43:13.305197 io_submit(47908000763904, 1, {{0x2b9276bfbea8, 0, 1, 0, 258}}) = 1
09:43:13.323443 io_submit(47908000763904, 1, {{0x2b9276bfb510, 0, 1, 0, 258}}) = 1
09:43:13.502523 io_submit(47908000763904, 1, {{0x2b9276bfab78, 0, 1, 0, 258}}) = 1
09:43:14.504408 io_submit(47908000763904, 1, {{0x2b9276bfa1e0, 0, 1, 0, 258}}) = 1
09:43:15.531130 io_submit(47908000763904, 1, {{0x2b9276bf9848, 0, 1, 0, 258}}) = 1
09:43:16.532472 io_submit(47908000763904, 1, {{0x2b9276bf8eb0, 0, 1, 0, 258}}) = 1
09:43:17.748430 io_submit(47908000763904, 1, {{0x2b9276bf8518, 0, 1, 0, 258}}) = 1
09:43:18.604977 io_submit(47908000763904, 1, {{0x2b9276bf7b80, 0, 1, 0, 258}}) = 1
09:43:18.613206 io_submit(47908000763904, 1, {{0x2b9276bf71e8, 0, 1, 0, 258}}) = 1
09:43:18.713221 io_submit(47908000763904, 1, {{0x2b9276bf6850, 0, 1, 0, 258}}) = 1
09:43:19.238235 io_submit(47908000763904, 1, {{0x2b9276bf5eb8, 0, 1, 0, 258}}) = 1
Process 15718 detached
--//258 对应的就是258 -> /u01/app/oracle/oradata/xxxxxx/redostb02.rbo ,也就是由这个rfs进程写备用日志文件.
# strace -f -p 15718 -e trace=readv -tt
Process 15718 attached - interrupt to quit
09:44:38.968628 readv(13, [{"\0H\0\0\17\0\0\0\0\0\0\2\0\0\10$\0\0\0\2\0$\10\0\0\0\0\0\0\0\0\0"..., 72}], 1) = 72
09:44:38.968866 readv(13, [{"\1\0\0\0\0\0\0\0\1\0\0\0\361\234\0\0\4\0\0\0\0\0\0\0\310\347V`\3\0\0\0"..., 36}], 1) = 36
09:44:38.968942 readv(13, [{"\1\"\0\0\361\234\0\0z\24\0\0\20\200d\340\30\6\0\0\r\0\3\0\306\347V`\1\0\0\1"..., 2048}], 1) = 2048
09:44:38.975307 readv(13, 0x17654fd0, 1) = -1 EAGAIN (Resource temporarily unavailable)
09:44:38.975389 readv(13, 0x17654fd0, 1) = -1 EAGAIN (Resource temporarily unavailable)
09:44:38.985228 readv(13, [{"\0H\0\0\17\0\0\0\0\0\0\2\0\0\6$\0\0\0\2\0$\6\0\0\0\0\0\0\0\0\0"..., 72}], 1) = 72
09:44:38.985378 readv(13, [{"\1\0\0\0\0\0\0\0\1\0\0\0\365\234\0\0\3\0\0\0\0\0\0\0\312\347V`\3\0\0\0"..., 36}], 1) = 36
09:44:38.985549 readv(13, [{"\1\"\0\0\365\234\0\0z\24\0\0\20\200\236s|\0\0\0\5\304\3\0\307\347V`\1\0005\37"..., 1536}], 1) = 1536
09:44:38.987276 readv(13, 0x17654fd0, 1) = -1 EAGAIN (Resource temporarily unavailable)
09:44:38.987331 readv(13, 0x17654fd0, 1) = -1 EAGAIN (Resource temporarily unavailable)
09:44:38.997682 readv(13, [{"\0H\0\0\17\0\0\0\0\0\0\2\0\0\n$\0\0\0\2\0$\n\0\0\0\0\0\0\0\0\0"..., 72}], 1) = 72
09:44:38.997832 readv(13, [{"\1\0\0\0\0\0\0\0\1\0\0\0\370\234\0\0\5\0\0\0\0\0\0\0\314\347V`\3\0\0\0"..., 36}], 1) = 36
09:44:38.997976 readv(13, [{"\1\"\0\0\370\234\0\0z\24\0\0\20\200B@|\0\0\0\5\0\3\0\311\347V`\1\0\0\0"..., 2560}], 1) = 2560
09:44:38.999316 readv(13, 0x17654fd0, 1) = -1 EAGAIN (Resource temporarily unavailable)
09:44:38.999371 readv(13, 0x17654fd0, 1) = -1 EAGAIN (Resource temporarily unavailable)
Process 15718 detached
--//13 -> socket:[1615732],我估计报错,是因为缓存已经读空.
--//可以通过如下命令观察.
# strace -f -p 15718 -e trace=io_submit -e write=all -tt
# strace -f -p 15718 -e trace=readv -e read=all -tt
--//可以看出mrp0实际上干的事情很少,仅仅显示日志应该到那里.
--//真正干活的是那个进程呢? 使用iotop观察.在主备库同时启动iotop -a -u oracle并观察(观察10分钟).
--//在主库观察:
# iotop -a -u oracle
Total DISK READ: 0.00 B/s | Total DISK WRITE: 30.77 K/s
TID PRIO USER DISK READ DISK WRITE SWAPIN IO> COMMAND
3831 be/4 oracle 0.00 B 24.50 M 1.11 % 2.80 % ora_ckpt_xxxxxx
3829 be/4 oracle 0.00 B 8.38 M 0.00 % 2.48 % ora_lgwr_xxxxxx
3827 be/4 oracle 0.00 B 7.59 M 0.00 % 1.11 % ora_dbw0_xxxxxx
24598 be/4 oracle 0.00 B 0.00 B -19.60 % 0.40 % oraclexxxxxx (LOCAL=NO)
--//在备库观察:
Total DISK READ: 0.00 B/s | Total DISK WRITE: 102.41 K/s
TID PRIO USER DISK READ DISK WRITE SWAPIN IO> COMMAND
1522 be/4 oracle 4.00 K 41.76 M 0.00 % 2.10 % ora_dbw0_xxxxxxdg
15718 be/4 oracle 0.00 B 8.81 M 0.00 % 0.87 % oraclexxxxxxdg (LOCAL=NO)
1526 be/4 oracle 0.00 B 26.62 M 2.10 % 0.79 % ora_ckpt_xxxxxxdg
7159 be/4 oracle 6.56 M 0.00 B 0.01 % 0.07 % oraclexxxxxxdg (LOCAL=NO)
--//对比可以发现在备库上真正干活的是dbw0,ckpt进程.
--//而单独跟踪备库的dbw0进程:
# strace -f -p 1522 -e trace=io_submit -tt
Process 1522 attached - interrupt to quit
09:53:29.280121 io_submit(46981453172736, 12, {{0x2abab87f24c0, 0, 1, 0, 260}, {0x2abab8aa2540, 0, 1, 0, 260}, {0x2abab87f3b20, 0, 1, 0, 260}, {0x2abab8a8b9a8, 0, 1, 0, 260}, {0x2abab8a236b8, 0, 1, 0, 260}, {0x2abab88dd958, 0, 1, 0, 260}, {0x2abab88a8718, 0, 1, 0, 260}, {0x2abab8b15b30, 0, 1, 0, 260}, {0x2abab8988e48, 0, 1, 0, 264}, {0x2abab8b2b068, 0, 1, 0, 264}, {0x2abab8c1c418, 0, 1, 0, 264}, {0x2abab8c55978, 0, 1, 0, 264}}) = 12
09:53:32.317414 io_submit(46981453172736, 26, {{0x2abab88159b8, 0, 1, 0, 260}, {0x2abab8c13840, 0, 1, 0, 260}, {0x2abab8ae7e68, 0, 1, 0, 260}, {0x2abab87f24c0, 0, 1, 0, 260}, {0x2abab8b2b068, 0, 1, 0, 260}, {0x2abab8ad2ec8, 0, 1, 0, 260}, {0x2abab8c1c418, 0, 1, 0, 264}, {0x2abab8a40c98, 0, 1, 0, 264}, {0x2abab88dd958, 0, 1, 0, 264}, {0x2abab8c65ac8, 0, 1, 0, 264}, {0x2abab8a236b8, 0, 1, 0, 264}, {0x2abab8988e48, 0, 1, 0, 264}, {0x2abab8b15b30, 0, 1, 0, 264}, {0x2abab8876cc8, 0, 1, 0, 264}, {0x2abab8a4bf98, 0, 1, 0, 264}, {0x2abab8a8b9a8, 0, 1, 0, 264}, {0x2abab87f3b20, 0, 1, 0, 264}, {0x2abab8c55978, 0, 1, 0, 264}, {0x2abab87cd968, 0, 1, 0, 264}, {0x2abab88c19d8, 0, 1, 0, 264}, {0x2abab8b09768, 0, 1, 0, 264}, {0x2abab87cb7d8, 0, 1, 0, 264}, {0x2abab8c84708, 0, 1, 0, 264}, {0x2abab8aa2540, 0, 1, 0, 264}, {0x2abab88a8718, 0, 1, 0, 264}, {0x2abab8ba9958, 0, 1, 0, 264}}) = 26
09:53:35.425030 io_submit(46981453172736, 51, {{0x2abab8977100, 0, 1, 0, 260}, {0x2abab88bd120, 0, 1, 0, 260}, {0x2abab8c94df0, 0, 1, 0, 260}, {0x2abab898aa40, 0, 1, 0, 260}, {0x2abab8a7c388, 0, 1, 0, 260}, {0x2abab87a4af0, 0, 1, 0, 260}, {0x2abab8a42e28, 0, 1, 0, 260}, {0x2abab8a9aa30, 0, 1, 0, 260}, {0x2abab8a24d18, 0, 1, 0, 260}, {0x2abab88dd958, 0, 1, 0, 260}, {0x2abab881f0c0, 0, 1, 0, 260}, {0x2abab8c84708, 0, 1, 0, 260}, {0x2abab89e8560, 0, 1, 0, 260}, {0x2abab8aa2540, 0, 1, 0, 260}, {0x2abab89caf80, 0, 1, 0, 260}, {0x2abab88159b8, 0, 1, 0, 260}, {0x2abab8c55978, 0, 1, 0, 262}, {0x2abab87cd968, 0, 1, 0, 262}, {0x2abab87f3b20, 0, 1, 0, 262}, {0x2abab87a8e10, 0, 1, 0, 264}, {0x2abab88c19d8, 0, 1, 0, 264}, {0x2abab8b2b068, 0, 1, 0, 264}, {0x2abab8a8b9a8, 0, 1, 0, 264}, {0x2abab8920b58, 0, 1, 0, 264}, {0x2abab87f24c0, 0, 1, 0, 264}, {0x2abab87cb7d8, 0, 1, 0, 264}, {0x2abab8ae7e68, 0, 1, 0, 264}, {0x2abab8c13840, 0, 1, 0, 264}, {0x2abab8ba9958, 0, 1, 0, 264}, {0x2abab8ade760, 0, 1, 0, 264}, {0x2abab87a7218, 0, 1, 0, 264}, {0x2abab8a4bf98, 0, 1, 0, 264}, {0x2abab8ad2ec8, 0, 1, 0, 264}, {0x2abab8c65ac8, 0, 1, 0, 264}, {0x2abab8876cc8, 0, 1, 0, 264}, {0x2abab8bc20e8, 0, 1, 0, 264}, {0x2abab8b97678, 0, 1, 0, 264}, {0x2abab8b86f90, 0, 1, 0, 264}, {0x2abab8b15b30, 0, 1, 0, 264}, {0x2abab8988e48, 0, 1, 0, 264}, {0x2abab88a8718, 0, 1, 0, 264}, {0x2abab8a236b8, 0, 1, 0, 264}, {0x2abab8b395c0, 0, 1, 0, 264}, {0x2abab8aec188, 0, 1, 0, 264}, {0x2abab8b0f0e8, 0, 1, 0, 264}, {0x2abab8b3e410, 0, 1, 0, 264}, {0x2abab89b54b0, 0, 1, 0, 264}, {0x2abab8b09768, 0, 1, 0, 264}, {0x2abab89141f8, 0, 1, 0, 264}, {0x2abab8bd7088, 0, 1, 0, 264}, {0x2abab8819cd8, 0, 1, 0, 264}}) = 51
Process 1522 detached
--//都是批量的写入操作.
--//而单独跟踪主库的dbw0进程:
# strace -f -p 3827 -e trace=io_submit -tt
Process 3827 attached - interrupt to quit
09:56:20.831067 io_submit(47150328119296, 1, {{0x2ae20a596c68, 0, 1, 0, 264}}) = 1
09:56:20.843466 io_submit(47150328119296, 1, {{0x2ae20a596c68, 0, 1, 0, 264}}) = 1
09:56:20.851023 io_submit(47150328119296, 1, {{0x2ae20a596c68, 0, 1, 0, 264}}) = 1
09:56:20.861885 io_submit(47150328119296, 1, {{0x2ae20a596c68, 0, 1, 0, 264}}) = 1
09:56:26.848136 io_submit(47150328119296, 1, {{0x2ae20a596c68, 0, 1, 0, 264}}) = 1
09:56:41.886321 io_submit(47150328119296, 1, {{0x2ae20a596c68, 0, 1, 0, 264}}) = 1
09:56:41.919465 io_submit(47150328119296, 1, {{0x2ae20a596c68, 0, 1, 0, 260}}) = 1
--//可以发现备库在应用日志时存在大量的写入操作.第2个参数数量一般都很大.
# man io_submit
IO_SUBMIT(2) Linux Programmer's Manual IO_SUBMIT(2)
NAME
io_submit - Submit asynchronous I/O blocks for processing
SYNOPSIS
#include <libaio.h>
long io_submit (aio_context_t ctx_id, long nr, struct iocb **iocbpp);
DESCRIPTION
io_submit() queues nr I/O request blocks for processing in the AIO context ctx_id. iocbpp should be an array of
nr AIO request blocks, which will be submitted to context ctx_id.
RETURN VALUE
io_submit() returns the number of iocbs submitted and 0 if nr is zero.