Systemtap examples, Identifying Contended User-Space Locks

本文的例子 可用于判断程序性能问题是否由于futex锁冲突引起的.


This section describes how to identify contended user-space locks throughout the system within a specific time period. The ability to identify contended user-space locks can help you investigate poor program performance that you suspect may be caused by futex contentions.

Simply put, futex contention occurs when multiple processes are trying to access the same lock variable at the same time. This can result in a poor performance because the lock serializes execution; one process obtains the lock while the other processes must wait for the lock variable to become available again.

修改成间隔输出后的脚本及注解如下 : 


[root@db-172-16-3-150 process]# cd /opt/systemtap/share/doc/systemtap/examples/process

[root@db-172-16-3-150 process]# cat futexes.stp

#! /usr/bin/env stap

# This script tries to identify contended user-space locks by hooking

# into the futex system call.

global FUTEX_WAIT = 0 /*, FUTEX_WAKE = 1 */

global FUTEX_PRIVATE_FLAG = 128 /* linux 2.6.22+ */

global FUTEX_CLOCK_REALTIME = 256 /* linux 2.6.29+ */

global lock_waits # long-lived stats on (tid,lock) blockage elapsed time

global process_names # long-lived pid-to-execname mapping

probe syscall.futex.return {  

  elapsed = gettimeofday_us() - @entry(gettimeofday_us())

  // elapsed 调到第一条, 增加计算时间精度.

  if (($op & ~(FUTEX_PRIVATE_FLAG|FUTEX_CLOCK_REALTIME)) != FUTEX_WAIT) next

//  next 表示跳过这次probe handler.

  process_names[pid()] = execname()

  lock_waits[pid(), $uaddr] <<< elapsed

}

//  $op存储一个比特位变量,  通过($op & ~(FUTEX_PRIVATE_FLAG|FUTEX_CLOCK_REALTIME)) != FUTEX_WAIT操作.

//  判断进程是否处于等待状态.

//  ~ 比特位翻转

//  | 比特或

//  & 比特与

//  参考 http://blog.163.com/digoal@126/blog/static/163877040201398103956233/

probe timer.s(5) {

  foreach ([pid+, lock] in lock_waits) 

    printf ("%s[%d] lock %p contended %d times, %d avg us\n",

            process_names[pid], pid, lock, @count(lock_waits[pid,lock]),

            @avg(lock_waits[pid,lock]))

  delete process_names

  delete process_names 

}

// 输出: 进程名, 锁内存地址, 锁冲突次数, 平均等待时间.

输出如下 : 


[root@db-172-16-3-150 process]# stap futexes.stp

auditd[1599] lock 0x7f08ffd1f294 contended 1 times, 3291 avg us

rs:main Q:Reg[1624] lock 0x7fbbc9d87e64 contended 1 times, 9619461 avg us

[1599] lock 0x7f08ffd1f294 contended 1 times, 3291 avg us

[1624] lock 0x7fbbc9d87e64 contended 1 times, 9619461 avg us

pgbench[23916] lock 0x7f1440f07360 contended 4 times, 134 avg us

[1599] lock 0x7f08ffd1f294 contended 1 times, 3291 avg us

[1624] lock 0x7fbbc9d87e64 contended 1 times, 9619461 avg us

automount[1973] lock 0x7faeb99e7224 contended 1 times, 3596 avg us

automount[1973] lock 0x7faea00008ec contended 1 times, 2014 avg us

automount[1973] lock 0x7faea00008c0 contended 1 times, 523 avg us

[23916] lock 0x7f1440f07360 contended 4 times, 134 avg us

[1599] lock 0x7f08ffd1f294 contended 1 times, 3291 avg us

[1624] lock 0x7fbbc9d87e64 contended 1 times, 9619461 avg us

[1973] lock 0x7faeb99e7224 contended 1 times, 3596 avg us

[1973] lock 0x7faea00008ec contended 1 times, 2014 avg us

[1973] lock 0x7faea00008c0 contended 1 times, 523 avg us

[23916] lock 0x7f1440f07360 contended 4 times, 134 avg us

本文涉及的probe alias原型 : 


# futex ______________________________________________________

# long sys_futex(u32 __user *uaddr,

#           int op,

#           int val,

#           struct timespec __user *utime,

#           u32 __user *uaddr2,

#           int val3)

# long compat_sys_futex(u32 __user *uaddr, int op, u32 val,

#               struct compat_timespec __user *utime, u32 __user *uaddr2,

#               u32 val3)

#

probe syscall.futex = kernel.function("sys_futex").call ?

{

        name = "futex"

        futex_uaddr = $uaddr

        op = $op

        val = $val

        utime_uaddr = $utime

        uaddr2_uaddr = $uaddr2

        val3 = $val3

        if ($op == 0)

                argstr = sprintf("%p, %s, %d, %s", $uaddr, _futex_op_str($op),

                        $val, _struct_timespec_u($utime, 1))

        else

                argstr = sprintf("%p, %s, %d", $uaddr, _futex_op_str($op),

                        $val)

}

probe syscall.futex.return = kernel.function("sys_futex").return ?

{

        name = "futex"

        retstr = return_str(1, $return)

}

源代码 : 


[root@db-172-16-3-150 linux]# stap -L 'kernel.function("sys_futex").return'

kernel.function("sys_futex@kernel/futex.c:2692").return $return:long int $uaddr:u32* $op:int $val:u32 $utime:struct timespec* $uaddr2:u32* $val3:u32 $ts:struct timespec $t:ktime_t

/usr/src/debug/kernel-2.6.32-358.el6/linux-2.6.32-358.el6.x86_64/kernel/futex.c

SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,

                struct timespec __user *, utime, u32 __user *, uaddr2,

                u32, val3)

{

        struct timespec ts;

        ktime_t t, *tp = NULL;

        u32 val2 = 0;

        int cmd = op & FUTEX_CMD_MASK;

        if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||

                      cmd == FUTEX_WAIT_BITSET ||

                      cmd == FUTEX_WAIT_REQUEUE_PI)) {

                if (copy_from_user(&ts, utime, sizeof(ts)) != 0)

                        return -EFAULT;

                if (!timespec_valid(&ts))

                        return -EINVAL;

                t = timespec_to_ktime(ts);

                if (cmd == FUTEX_WAIT)

                        t = ktime_add_safe(ktime_get(), t);

                tp = &t;

        }

        /*

         * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*.

         * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.

         */

        if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||

            cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)

                val2 = (u32) (unsigned long) utime;

        return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);

}

[参考]

1. man 2 futex


FUTEX(2)                   Linux Programmer’s Manual                  FUTEX(2)

NAME

       futex - Fast Userspace Locking system call

SYNOPSIS

       #include <linux/futex.h>

       #include <sys/time.h>

       int futex(int *uaddr, int op, int val, const struct timespec *timeout,

                 int *uaddr2, int val3);

DESCRIPTION

       The futex() system call provides a method for a program to wait for a value at a given address to change, and a

       method to wake up anyone waiting on a particular address (while the addresses for the same memory  in  separate

       processes  may  not  be equal, the kernel maps them internally so the same memory mapped in different locations

       will correspond for futex() calls).  It is typically used to implement the contended case of a lock  in  shared

       memory, as described in futex(7).

       When  a  futex(7)  operation  did not finish uncontended in userspace, a call needs to be made to the kernel to

       arbitrate.  Arbitration can either mean putting the calling process to sleep or, conversely, waking  a  waiting

       process.

       Callers  of  this  function are expected to adhere to the semantics as set out in futex(7).  As these semantics

       involve writing non-portable assembly instructions, this in turn probably means that most users will in fact be

       library authors and not general application developers.

       The  uaddr argument needs to point to an aligned integer which stores the counter.  The operation to execute is

       passed via the op argument, along with a value val.

       Five operations are currently defined:

       FUTEX_WAIT

              This operation atomically verifies that the futex address uaddr still contains the value val, and sleeps

              awaiting  FUTEX_WAKE  on this futex address.  If the timeout argument is non-NULL, its contents describe

              the maximum duration of the wait, which is infinite  otherwise.   The  arguments  uaddr2  and  val3  are

              ignored.

              For  futex(7),  this  call  is executed if decrementing the count gave a negative value (indicating con-

              tention), and will sleep until another process releases the futex and executes the FUTEX_WAKE operation.

       FUTEX_WAKE

              This operation wakes at most val processes waiting on this futex address (i.e., inside FUTEX_WAIT).  The

              arguments timeout, uaddr2 and val3 are ignored.

              For futex(7), this is executed if incrementing the count showed that there were waiters, once the  futex

              value has been set to 1 (indicating that it is available).

       FUTEX_FD (present up to and including Linux 2.6.25)

              To  support  asynchronous wakeups, this operation associates a file descriptor with a futex.  If another

              process executes a FUTEX_WAKE, the process will receive the signal number that was passed in  val.   The

              calling  process  must  close the returned file descriptor after use.  The arguments timeout, uaddr2 and

              val3 are ignored.

              To prevent race conditions, the caller should test if the futex has been upped after FUTEX_FD returns.

              Because it was inherently racy, FUTEX_FD has been removed from Linux 2.6.26 onwards.

       FUTEX_REQUEUE (since Linux 2.5.70)

              This operation was introduced in order to avoid a "thundering herd" effect when FUTEX_WAKE is  used  and

              all  processes  woken  up need to acquire another futex.  This call wakes up val processes, and requeues

              all other waiters on the futex at address uaddr2.  The arguments timeout and val3 are ignored.

       FUTEX_CMP_REQUEUE (since Linux 2.6.7)

              There was a race in the intended use of FUTEX_REQUEUE, so FUTEX_CMP_REQUEUE  was  introduced.   This  is

              similar to FUTEX_REQUEUE, but first checks whether the location uaddr still contains the value val3.  If

              not, the operation fails with the error EAGAIN.  The argument timeout is ignored.

RETURN VALUE

       Depending on which operation was executed, the returned value for a successful call can  have  differing  mean-

       ings.

       FUTEX_WAIT

              Returns  0  if the process was woken by a FUTEX_WAKE call.  In case of timeout, the operation fails with

              the error ETIMEDOUT.  If the futex was not equal to the expected value, the  operation  fails  with  the

              error  EWOULDBLOCK.  Signals (see signal(7)) or other spurious wakeups cause FUTEX_WAIT to fail with the

              error EINTR.

       FUTEX_WAKE

              Returns the number of processes woken up.

       FUTEX_FD

              Returns the new file descriptor associated with the futex.

       FUTEX_REQUEUE

              Returns the number of processes woken up.

       FUTEX_CMP_REQUEUE

              Returns the number of processes woken up.

       In the event of an error, all operations return -1, and set errno to indicate the error.

ERRORS

       EACCES No read access to futex memory.

       EAGAIN FUTEX_CMP_REQUEUE found an unexpected futex value.  (This  probably  indicates  a  race;  use  the  safe

              FUTEX_WAKE now.)

       EFAULT Error in getting timeout information from userspace.

       EINVAL An operation was not defined or error in page alignment.

       ENFILE The system limit on the total number of open files has been reached.

       ENOSYS Invalid operation specified in op.

VERSIONS

       Initial  futex support was merged in Linux 2.5.7 but with different semantics from what was described above.  A

       4-argument system call with the semantics given here was introduced in Linux 2.5.40.  In Linux 2.5.70 one argu-

       ment was added.  In Linux 2.6.7 a sixth argument was added — messy, especially on the s390 architecture.

CONFORMING TO

       This system call is Linux-specific.

NOTES

       To  reiterate, bare futexes are not intended as an easy-to-use abstraction for end-users.  (There is no wrapper

       function for this system call in glibc.)  Implementors are expected to be assembly literate and  to  have  read

       the sources of the futex userspace library referenced below.

SEE ALSO

       futex(7)

       Fuss,  Futexes  and Furwocks: Fast Userlevel Locking in Linux (proceedings of the Ottawa Linux Symposium 2002),

       futex example library, futex-*.tar.bz2 <URL:ftp://ftp.nl.kernel.org/pub/linux/kernel/people/rusty/>.

COLOPHON

       This page is part of release 3.22 of the Linux man-pages project.  A description of the project,  and  informa-

       tion about reporting bugs, can be found at http://www.kernel.org/doc/man-pages/.

Linux                             2008-11-27                          FUTEX(2)

2. man 7 futex


FUTEX(7)                   Linux Programmer’s Manual                  FUTEX(7)

NAME

       futex - Fast Userspace Locking

SYNOPSIS

       #include <linux/futex.h>

DESCRIPTION

       The Linux kernel provides futexes ("Fast Userspace muTexes") as a building block for fast userspace locking and

       semaphores.  Futexes are very basic and lend themselves well for building  higher  level  locking  abstractions

       such as POSIX mutexes.

       This  page does not set out to document all design decisions but restricts itself to issues relevant for appli-

       cation and library development.  Most programmers will in fact not be using futexes directly but  instead  rely

       on system libraries built on them, such as the NPTL pthreads implementation.

       A futex is identified by a piece of memory which can be shared between different processes.  In these different

       processes, it need not have identical addresses.  In its bare form, a futex has semaphore semantics;  it  is  a

       counter  that  can  be incremented and decremented atomically; processes can wait for the value to become posi-

       tive.

       Futex operation is entirely userspace for the non-contended case.  The kernel is only involved to arbitrate the

       contended  case.  As any sane design will strive for non-contention, futexes are also optimized for this situa-

       tion.

       In its bare form, a futex is an aligned integer which is only touched by atomic assembler  instructions.   Pro-

       cesses  can share this integer using mmap(2), via shared memory segments or because they share memory space, in

       which case the application is commonly called multithreaded.

   Semantics

       Any futex operation starts in userspace, but it may necessary to communicate with the kernel using the futex(2)

       system call.

       To "up" a futex, execute the proper assembler instructions that will cause the host CPU to atomically increment

       the integer.  Afterwards, check if it has in fact changed from 0 to 1, in which case there were no waiters  and

       the operation is done.  This is the non-contended case which is fast and should be common.

       In  the  contended case, the atomic increment changed the counter from -1  (or some other negative number).  If

       this is detected, there are waiters.  Userspace should now set the counter to 1 and instruct the kernel to wake

       up any waiters using the FUTEX_WAKE operation.

       Waiting  on  a futex, to "down" it, is the reverse operation.  Atomically decrement the counter and check if it

       changed to 0, in which case the operation is done and the futex was uncontended.  In all  other  circumstances,

       the  process should set the counter to -1 and request that the kernel wait for another process to up the futex.

       This is done using the FUTEX_WAIT operation.

       The futex(2) system call can optionally be passed a timeout specifying how long the kernel should wait for  the

       futex  to  be  upped.   In this case, semantics are more complex and the programmer is referred to futex(2) for

       more details.  The same holds for asynchronous futex waiting.

VERSIONS

       Initial futex support was merged in Linux 2.5.7 but with different semantics from those described above.   Cur-

       rent semantics are available from Linux 2.5.40 onwards.

NOTES

       To  reiterate,  bare  futexes  are  not intended as an easy to use abstraction for end-users.  Implementors are

       expected to be assembly literate and to have read the sources of the futex userspace library referenced  below.

       This man page illustrates the most common use of the futex(2) primitives: it is by no means the only one.

SEE ALSO

       futex(2)

       Fuss,  Futexes  and Furwocks: Fast Userlevel Locking in Linux (proceedings of the Ottawa Linux Symposium 2002),

       futex example library, futex-*.tar.bz2 <URL:ftp://ftp.kernel.org/pub/linux/kernel/people/rusty/>.

COLOPHON

       This page is part of release 3.22 of the Linux man-pages project.  A description of the project,  and  informa-

       tion about reporting bugs, can be found at http://www.kernel.org/doc/man-pages/.

Linux                             2002-12-31                          FUTEX(7)

3. https://sourceware.org/systemtap/SystemTap_Beginners_Guide/futexcontentionsect.html

时间: 2024-08-03 23:45:42

Systemtap examples, Identifying Contended User-Space Locks的相关文章

Linux下如何知道文件被那个进程写

本文链接地址: Linux下如何知道文件被那个进程写 一个问题:(想想能解决不?) "一个文件正在被进程写 我想查看这个进程 文件一直在增大 找不到谁在写 使用lsof也没找到" 这个问题挺有普遍性的,解决方法应该很多,这里我给大家提个比较直观的方法. linux下每个文件都会在某个块设备上存放,当然也都有相应的inode, 那么透过vfs.write我们就可以知道谁在不停的写入特定的设备上的inode. 幸运的是systemtap的安装包里带了inodewatch.stp,位于/us

MySQL进阶路:从小工到专家的必读书籍和必备工具

作者介绍 卢钧轶,DBAplus社群原创专家,目前就职于Facebook MySQL Infra Team,主要负责大规模MySQL数据库运维.在Failover.备份.监控.优化.数据库私有云等相关领域有一定经验和个人理解,曾先后就职于BesTV和大众点评网.个人博客:http://cenalulu.github.io/   一.MySQL入门书籍和方法分享 背景:各大论坛上总是有很多同学咨询想学习数据库,或者是为入行DBA做些准备.几年来作为一个MySQL DBA的成长过程有一些积累和感悟,

Linux下使用socktop来检测socket的通讯状况

所有的socket通讯都是通过socket接口来的,任何family的通讯包括unix域套接都要走的,所以只要截获了socket 读写的几个syscall 就可以了解unix域套接字的发送和接受情况.systemtap发行版本提供了个工具socktop, 位于 /usr/share/doc/systemtap/examples/network/socktop, 是个非常方便的工具, 干这个事情最合适了.安装为了部署 SystemTap,需要安装以下两个 RPM 包: 代码如下:systemtap

Dissecting the Disruptor: Why it’s so fast (part one) – Locks Are Bad

原文地址:http://mechanitis.blogspot.com/2011/07/dissecting-disruptor-why-its-so-fast.html(因前方有墙,所以我移到墙内) Martin Fowler has written a really good article describing not only the Disruptor, but also how it fits into the architecture at LMAX.  This gives so

OProfile &amp; Systemtap

Oprofile性能损耗小,如果CPU支持硬件监控的话(现在大多数CPU已经支持).但是Oprofile不能像stap样使用timer来间断输出或累计输出统计,STAP损耗较大.Oprofile 适合做性能诊断,例如系统中最耗CPU的进程,进程中哪些函数是比较耗CPU的,函数中哪段代码是最耗CPU的...operf开启监控, opreport, opannotate可以输出调用报告,或函数.汇编指令等统计情况.Stap 适合做跟踪.例子 :  [root@digoal ~]# cd /data0

[原创]systemtap脚本分析系统中dentry SLAB占用过高问题

摘要 利用systemtap脚本分析系统中dentry SLAB占用过高问题 原创文章:来自systemtap脚本分析系统中dentry SLAB占用过高问题 背景 长时间运行着的tengine主机有内存占用75%以上的报警. 操作系统版本: 2.6.32.el6.x86_64 原因定位 收集内存使用的相关信息如下: 因内存占用率报警mem:76.28%先看一下内存的总体使用状况,从下图中可以看出used占用较高,buffers/cached占用较少(关于cached占用过高的分析处理请参见另一

[unity3d]鼠标点击地面人物自动走动(也包含按键wasd&amp;amp;space控制)

在漫游游戏中常用的功能就是人物在场景中行走,必要的功能就是鼠标点击地面人物就朝着那个方向行走,键盘方向键前后左右也能控制人物的行走和跳跃,在官方自带的第三人称视角中做了一点修改,官方自带的ThirdPersonController中的摄像机自动指向人物的背面,这样不能看到人物的正面或者侧面,对ThirdPersonController脚本做了修改之后,可以旋转摄像机的视角,可以摄像机跟随,类似smoothfollow的功能. 值得注意提醒的一个,就是动画系统,选择老版本的动画系统,不然会提示找不

15 Examples To Master Linux Command Line History

When you are using Linux command line frequently, using the history effectively can be a major productivity boost. In fact, once you have mastered the 15 examples that I've provided here, you'll find using command line more enjoyable and fun. 1. Disp

linux 空间不够了,怎么办?Disk Requirements:At least 11MB more space needed on the / filesystem.

Disk Requirements:At least 11MB more space needed on the / filesystem.  linux 空间不够了,怎么办? 1>  查看空间多少:df -h 2>  查看当期内核: uname -r 3>  查找内核   rpm -qa | grep kernel 4>  删除多余的内核 su -c 'yum remove kernel-devel-2.6.32-431.20.3.el6.i686' 5>  删除系统日志