User Tools

Site Tools


Sidebar

js#vista.png msort nsort

filesystems:glusterfs_diamond_metrics

GlusterFS Diamond Collector

The initial test version of this collector. It is a quick hack to verify that I can get the metrics into influxdb/grafana/graphite. The next version will be using the XML output of the gluster utility to gather the metrics. let me know how I can improve it.

UPDATE: version 0.2 beta now uses the XML output of the gluster command to get the information needed.

UPDATE: version 0.3 beta now has the target_brick and target_volume configuration options available (defaults to all bricks and volumes). and provides all latency/hit related metrics. May consider adding further metrics if needed.

TODO: add more metrics and enable targeted gathering per node/brick and volume.

thanks

# coding=utf-8

"""
The GlusterFSCollector currently only collects latency percentages from the GlusterFS storage system.

version 0.3 beta

Documentation for GlusterFS profiling:
http://gluster.readthedocs.org/en/latest/Administrator%20Guide/Monitoring%20Workload/

#### Dependencies

 * glusterfs [https://www.gluster.org/]
 * Profiling enabled: gluster volume profile <VOLNAME> start

"""

import diamond.collector
import subprocess
import sys
try:
    import xml.etree.cElementTree as ET
except ImportError:
    import xml.etree.ElementTree as ET

metric_base = "glusterfs."
target_volume = ''
target_brick = ''

class GlusterFSCollector(diamond.collector.Collector):

    def get_default_config_help(self):
        config_help = super(GlusterFSCollector, self).get_default_config_help()
        config_help.update({
                'gluster_path': 'complete path to gluster binary.'
                        ' Defaults to /usr/sbin/gluster',
                'target_volume': 'which brick to send info on.'
                        ' Defaults to all',
                'target_brick': 'which node/server to send metrics for.'
                        ' Defaults to all',
        })
        return config_help

    def get_default_config(self):
        config = super(GlusterFSCollector, self).get_default_config()
        config.update({
                'path': 'glusterfs',
                'gluster_path': '/usr/sbin/gluster',
                'target_volume': '',
                'target_brick': ''
        })
        return config

    def collect(self):
        gluster_call = self.config['gluster_path'] + ' volume list'
        out = subprocess.Popen([gluster_call], stdout=subprocess.PIPE, shell=True)
        (volumes, err) = out.communicate()

        for volume in volumes.splitlines():
                #self.log.info("checking gluster volume " + volume)
                if ( volume == self.config['target_volume'] or self.config['target_volume'] == '' ):
                        metric_base = volume

                	xml_out=subprocess.Popen([self.config['gluster_path'] + " volume profile " + volume + " info cumulative --xml"], stdout=subprocess.PIPE, shell=True)
                        (raw_metrics, err) = xml_out.communicate()
	                xml_metrics = ET.XML(raw_metrics)

                        for volelem in xml_metrics.find('volProfile'):
                                if ( volelem.tag == 'brick' ):
                                        temp_bval = volelem.find('brickName').text
                                        temp_list = temp_bval.split(':')
	                	        brick_name = temp_list[0]

                                        #self.log.info("checking gluster brick " + brick_name)
                                        if ( brick_name == self.config['target_brick'] or self.config['target_brick'] == '' ):
                                                running_grand_avg_total = running_avg_total = running_calls_total = 0.0
                                                fop_stats = {}

                                                for fopstatselem in volelem.find('cumulativeStats').find('fopStats'):
                                                        #self.log.info("getting gluster metrics")
                                                        name = fopstatselem.findtext('name')
                                                        hits = fopstatselem.findtext('hits')
                                                        avg_latency = float(fopstatselem.findtext('avgLatency'))
                                                        min_latency = float(fopstatselem.findtext('minLatency'))
                                                        max_latency = float(fopstatselem.findtext('maxLatency'))
                                                        fop_total_avg = avg_latency * int(hits)
                                                        running_grand_avg_total = running_grand_avg_total + fop_total_avg
                                                        fop_stats[name] = hits, avg_latency, fop_total_avg, min_latency, max_latency

                                                for fop in fop_stats:
                                                        #self.log.info("sending gluster metrics")
                                                        metric_name = metric_base + "." + brick_name + "." + fop + ".pctLatency"
                                                        metric_value = (fop_stats[fop][2] / running_grand_avg_total) * 100
                                                        self.publish(metric_name, metric_value)
                                                        metric_name = metric_base + "." + brick_name + "." + fop + ".hits"
                                                        metric_value = fop_stats[fop][0]
                                                        self.publish(metric_name, metric_value)
                                                        metric_name = metric_base + "." + brick_name + "." + fop + ".avgLatency"
                                                        metric_value = fop_stats[fop][1]
                                                        self.publish(metric_name, metric_value)
                                                        metric_name = metric_base + "." + brick_name + "." + fop + ".totalLatency"
                                                        metric_value = fop_stats[fop][2]
                                                        self.publish(metric_name, metric_value)
                                                        metric_name = metric_base + "." + brick_name + "." + fop + ".minLatency"
                                                        metric_value = fop_stats[fop][3]
                                                        self.publish(metric_name, metric_value)
                                                        metric_name = metric_base + "." + brick_name + "." + fop + ".maxLatency"
                                                        metric_value = fop_stats[fop][4]
                                                        self.publish(metric_name, metric_value)
filesystems/glusterfs_diamond_metrics.txt · Last modified: 2020/02/24 11:16 (external edit)