本文主要介绍一下ganglia metric的概念, 为以后更好的了解metric的扩展.
metric即监控项, gmond自带的核心模块中, 包含了7类监控项目, 共计40几个监控项.
使用gmond -m可以查看所有的监控项.
我们先来了解一下metric的数据结构 :
ganglia-3.6.0/lib/gm_protocol.h
struct Ganglia_25metric {
int key;
char *name; // metric name, 一台主机的metric name必须唯一
int tmax; // 指发生metric对应组内的所有metric到send channel的时间间隔, 对应collection_group中time_threshold配置.
Ganglia_value_types type; // 值的类型, 如int8 , float ...
char *units; // 单位, 只用作显示, 不用做计算
char *slope; // 值的变化趋势, 如zero(不变), positive(只增), negative(只减), both(即增即减)
char *fmt;
int msg_size;
char *desc; // 描述
int *metadata;
};
然后dump一个XML来看看 :
# netstat -anp|grep gmond
tcp 0 0 0.0.0.0:8649 0.0.0.0:* LISTEN 4271/gmond
udp 0 0 172.16.3.221:63894 239.2.11.71:8649 ESTABLISHED 4271/gmond
udp 0 0 239.2.11.71:8649 0.0.0.0:* 4271/gmond
将gmond的数据dump出来.
[root@db-172-16-3-221 cpu]# telnet 127.0.0.1 8649
截取一部分metric的信息如下
<GANGLIA_XML VERSION="3.6.0" SOURCE="gmond">
<CLUSTER NAME="test" LOCALTIME="1411027577" OWNER="digoal" LATLONG="111 122" URL="http://dba.sky-mobi.com">
<HOST NAME="db-172-16-3-221.localdomain" IP="172.16.3.221" TAGS="" REPORTED="1411027564" TN="12" TMAX="20" DMAX="86400" LOCATION="1,2,3" GMOND_STARTED="1411026704">
........
<METRIC NAME="load_one" VAL="0.00" TYPE="float" UNITS=" " TN="62" TMAX="70" DMAX="0" SLOPE="both">
<EXTRA_DATA>
<EXTRA_ELEMENT NAME="GROUP" VAL="load"/>
<EXTRA_ELEMENT NAME="DESC" VAL="One minute load average"/>
<EXTRA_ELEMENT NAME="TITLE" VAL="One Minute Load Average"/>
</EXTRA_DATA>
</METRIC>
......
</HOST>
<HOST NAME="abc" IP="172.16.3.150" TAGS="" REPORTED="1411026788" TN="788" TMAX="20" DMAX="86400" LOCATION="unspecified" GMOND_STARTED="0">
<METRIC NAME="load_five" VAL="1.0" TYPE="float" UNITS="" TN="810" TMAX="60" DMAX="0" SLOPE="both">
<EXTRA_DATA>
<EXTRA_ELEMENT NAME="TITLE" VAL="load_five"/>
<EXTRA_ELEMENT NAME="DESC" VAL="load_five"/>
<EXTRA_ELEMENT NAME="GROUP" VAL="cpu"/>
<EXTRA_ELEMENT NAME="CLUSTER" VAL="test"/>
<EXTRA_ELEMENT NAME="SPOOF_HOST" VAL="172.16.3.150:abc"/>
</EXTRA_DATA>
</METRIC>
......
</HOST>
host相关XML的代码
len = apr_snprintf(hostxml, 1024,
"<HOST NAME=\"%s\" IP=\"%s\" TAGS=\"%s\" REPORTED=\"%d\" TN=\"%d\" TMAX=\"%d\" DMAX=\"%d\" LOCATION=\"%s\" GMOND_STARTED=\"%d\">\n",
hostinfo->hostname,
hostinfo->ip,
tags ? tags : "",
(int)(hostinfo->last_heard_from / APR_USEC_PER_SEC),
tn,
host_tmax,
host_dmax,
hostinfo->location? hostinfo->location: "unspecified",
hostinfo->gmond_started);
含义参考
http://blog.163.com/digoal@126/blog/static/1638770402014812104313876/
metric相关XML的代码
len = apr_snprintf(metricxml, 1024,
"<METRIC NAME=\"%s\" VAL=\"%s\" TYPE=\"%s\" UNITS=\"%s\" TN=\"%d\" TMAX=\"%d\" DMAX=\"%d\" SLOPE=\"%s\">\n",
metricName,
gmetric_value_to_str(&(val->message_u.v_message)),
data->message_u.f_message.Ganglia_metadata_msg_u.gfull.metric.type,
data->message_u.f_message.Ganglia_metadata_msg_u.gfull.metric.units,
(int)((now - val->last_heard_from) / APR_USEC_PER_SEC),
data->message_u.f_message.Ganglia_metadata_msg_u.gfull.metric.tmax,
data->message_u.f_message.Ganglia_metadata_msg_u.gfull.metric.dmax,
slope_to_cstr(data->message_u.f_message.Ganglia_metadata_msg_u.gfull.metric.slope));
含义参考
http://blog.163.com/digoal@126/blog/static/1638770402014812104313876/
接下来, 我们可以看看gmond的核心模块中的其中一个load模块是如何采样的.
[root@db-172-16-3-221 cpu]# cat mod_load.c
#include <gm_metric.h>
#include <libmetrics.h>
mmodule load_module;
static int load_metric_init ( apr_pool_t *p )
{
int i;
libmetrics_init();
for (i = 0; load_module.metrics_info[i].name != NULL; i++) {
/* Initialize the metadata storage for each of the metrics and then
* store one or more key/value pairs. The define MGROUPS defines
* the key for the grouping attribute. */
MMETRIC_INIT_METADATA(&(load_module.metrics_info[i]),p);
MMETRIC_ADD_METADATA(&(load_module.metrics_info[i]),MGROUP,"load");
}
return 0;
}
static void load_metric_cleanup ( void )
{
}
static g_val_t load_metric_handler ( int metric_index )
{
g_val_t val;
/* The metric_index corresponds to the order in which
the metrics appear in the metric_info array
*/
switch (metric_index) {
case 0:
return load_one_func();
case 1:
return load_five_func();
case 2:
return load_fifteen_func();
}
/* default case */
val.f = 0;
return val;
}
static Ganglia_25metric load_metric_info[] =
{
{0, "load_one", 70, GANGLIA_VALUE_FLOAT, " ", "both", "%.2f", UDP_HEADER_SIZE+8, "One minute load average"},
{0, "load_five", 325, GANGLIA_VALUE_FLOAT, " ", "both", "%.2f", UDP_HEADER_SIZE+8, "Five minute load average"},
{0, "load_fifteen", 950, GANGLIA_VALUE_FLOAT, " ", "both", "%.2f", UDP_HEADER_SIZE+8, "Fifteen minute load average"},
{0, NULL}
};
mmodule load_module =
{
STD_MMODULE_STUFF,
load_metric_init,
load_metric_cleanup,
load_metric_info,
load_metric_handler,
};
除了metric name, 其实还看到了metric group 的信息. group方便gweb归类.
例如核心模块中的group load包含了3个metric. (load_one, load_five, load_fifteen)
[root@db-172-16-3-221 cpu]# gmond -m|grep load
load_fifteen Fifteen minute load average (module load_module)
load_one One minute load average (module load_module)
load_five Five minute load average (module load_module)
最后, 通过gmetric来了解一下metric.
gmetric是一个可以手工向udp send channel发送metric信息的工具. 它需要读gmond.conf配置文件, 从中得到udp send channel以及其他信息, 然后从命令行输入metric相关的值, 发送出去.
# man gmetric
GMETRIC(1) User Commands GMETRIC(1)
NAME
gmetric - manual page for Ganglia Custom Metric Utility
SYNOPSIS
gmetric [OPTIONS]...
DESCRIPTION
The Ganglia Metric Client (gmetric) announces a metric on the list of defined send channels defined in a con-
figuration file
-h, --help
Print help and exit
-V, --version
Print version and exit
-c, --conf=STRING //配置文件
The configuration file to use for finding send channels (default=‘/etc/ganglia/gmond.conf’)
-n, --name=STRING // metric name
Name of the metric
-v, --value=STRING // metric 值
Value of the metric
-t, --type=STRING // metric 值类型
Either string|int8|uint8|int16|uint16|int32|uint32|float|double
-u, --units=STRING // 单位
Unit of measure for the value e.g. Kilobytes, Celcius (default=‘’)
-s, --slope=STRING // 值属性
Either zero|positive|negative|both (default=‘both’)
-x, --tmax=INT // 类似gmond里配置的 collection group 消息发送间隔
The maximum time in seconds between gmetric calls (default=‘60’)
-d, --dmax=INT // metric 超时删除时间, 目前貌似没有用到这个.
The lifetime in seconds of this metric (default=‘0’)
-g, --group=STRING // metric属于哪个组, 可以参考核心模块, 例如load_five属于load组.
Group of the metric
-D, --desc=STRING // 描述
Description of the metric
-T, --title=STRING // 在gweb中显示为 title
Title of the metric
-S, --spoof=STRING // 指定metric的主机名, IP. (例如在A主机发送消息, 默认是会将metric归到A主机的, 但是如果你想从A主机发出B主机的metric, 那就需要spoof) (因为metric是以主机名为归类分开存储的)
IP address and name of host/device (colon separated) we are spoofing (default=‘’)
-H, --heartbeat // 心跳消息
spoof a heartbeat message (use with spoof option)
命令行帮助 :
[root@db-172-16-3-221 cpu]# gmetric -h
gmetric 3.6.0
The Ganglia Metric Client (gmetric) announces a metric
on the list of defined send channels defined in a configuration file
Usage: gmetric [OPTIONS]...
-h, --help Print help and exit
-V, --version Print version and exit
-c, --conf=STRING The configuration file to use for finding send channels
(default=`/opt/ganglia-core-3.6.0/etc/gmond.conf')
-n, --name=STRING Name of the metric
-v, --value=STRING Value of the metric
-t, --type=STRING Either
string|int8|uint8|int16|uint16|int32|uint32|float|double
-u, --units=STRING Unit of measure for the value e.g. Kilobytes, Celcius
(default=`')
-s, --slope=STRING Either zero|positive|negative|both (default=`both')
-x, --tmax=INT The maximum time in seconds between gmetric calls
(default=`60')
-d, --dmax=INT The lifetime in seconds of this metric (default=`0')
-g, --group=STRING Group(s) of the metric (comma-separated)
-C, --cluster=STRING Cluster of the metric
-D, --desc=STRING Description of the metric
-T, --title=STRING Title of the metric
-S, --spoof=STRING IP address and name of host/device (colon separated) we
are spoofing (default=`')
-H, --heartbeat spoof a heartbeat message (use with spoof option)
分组在gweb中应用 :
[参考]
1. http://blog.163.com/digoal@126/blog/static/1638770402014810227936/
2. http://blog.163.com/digoal@126/blog/static/1638770402014812104313876/
3. ganglia-3.6.0/gmond/modules/cpu/*
4. man gmetric