From 7d0f09ecb9f26ce23bb702460ed5d57df08702a9 Mon Sep 17 00:00:00 2001 From: Michael Panzlaff Date: Wed, 4 Mar 2026 16:48:30 +0100 Subject: [PATCH] cc-metric-collector: update alex --- .../cc-metric-collector/alex/collectors.json | 6 +- nhr@fau/cc-metric-collector/alex/config.json | 14 ++- nhr@fau/cc-metric-collector/alex/router.json | 110 +++++++++--------- nhr@fau/cc-metric-collector/alex/sinks.json | 49 ++++---- .../cc-metric-collector/alex/sinks_debug.json | 65 ++++++----- 5 files changed, 124 insertions(+), 120 deletions(-) diff --git a/nhr@fau/cc-metric-collector/alex/collectors.json b/nhr@fau/cc-metric-collector/alex/collectors.json index 830f0b8..0cc35ed 100644 --- a/nhr@fau/cc-metric-collector/alex/collectors.json +++ b/nhr@fau/cc-metric-collector/alex/collectors.json @@ -27,15 +27,11 @@ "send_derived_values": true }, "diskstat" : {}, - "iostat" : {}, + "nfsiostat" : {}, "ibstat" : { "send_abs_values": true, "send_derived_values": true }, - "ipmistat" : { - "send_abs_values": true, - "send_derived_values": true - }, "tempstat" : { "tag_override" : { "hwmon0" : { diff --git a/nhr@fau/cc-metric-collector/alex/config.json b/nhr@fau/cc-metric-collector/alex/config.json index 308b08f..7806f75 100644 --- a/nhr@fau/cc-metric-collector/alex/config.json +++ b/nhr@fau/cc-metric-collector/alex/config.json @@ -1,8 +1,10 @@ { - "sinks": "/etc/cc-metric-collector/sinks.json", - "collectors" : "/etc/cc-metric-collector/collectors.json", - "receivers" : "/etc/cc-metric-collector/receivers.json", - "router" : "/etc/cc-metric-collector/router.json", - "interval": "60s", - "duration": "10s" + "sinks-file": "/etc/cc-metric-collector/sinks.json", + "collectors-file" : "/etc/cc-metric-collector/collectors.json", + "receivers-file" : "/etc/cc-metric-collector/receivers.json", + "router-file" : "/etc/cc-metric-collector/router.json", + "main" : { + "interval": "60s", + "duration": "10s" + } } diff --git a/nhr@fau/cc-metric-collector/alex/router.json b/nhr@fau/cc-metric-collector/alex/router.json index bb73852..7689bd8 100644 --- a/nhr@fau/cc-metric-collector/alex/router.json +++ b/nhr@fau/cc-metric-collector/alex/router.json @@ -1,58 +1,60 @@ { - "add_tags" : [ - { - "key" : "cluster", - "value" : "alex", - "if" : "*" - } - ], - "rename_metrics" : { - "load_one" : "cpu_load", - "cpu_load_core" : "cpu_load", - "net_bytes_in_bw" : "net_bytes_in", - "net_bytes_out_bw" : "net_bytes_out", - "net_pkts_in_bw" : "net_pkts_in", - "net_pkts_out_bw" : "net_pkts_out", - "ib_recv_bw" : "ib_recv", - "ib_xmit_bw" : "ib_xmit", - "ib_recv_pkts_bw": "ib_recv_pkts", - "ib_xmit_pkts_bw": "ib_xmit_pkts", - "lustre_read_bytes_diff" : "lustre_read_bytes", - "lustre_read_requests_diff" : "lustre_read_requests", - "lustre_write_bytes_diff" : "lustre_write_bytes", - "lustre_write_requests_diff" : "lustre_write_requests", - "lustre_open_diff" : "lustre_open", - "lustre_close_diff" : "lustre_close", - "lustre_setattr_diff" : "lustre_setattr", - "lustre_getattr_diff" : "lustre_getattr", - "lustre_statfs_diff": "lustre_statfs", - "lustre_inode_permission_diff" : "lustre_inode_permission", - "nv_util" : "acc_utilization", - "nv_fb_mem_used" : "acc_mem_used", - "nv_power_usage" : "acc_power" + "process_messages" : { + "rename_messages" : { + "load_one" : "cpu_load", + "cpu_load_core" : "cpu_load", + "net_bytes_in_bw" : "net_bytes_in", + "net_bytes_out_bw" : "net_bytes_out", + "net_pkts_in_bw" : "net_pkts_in", + "net_pkts_out_bw" : "net_pkts_out", + "ib_recv_bw" : "ib_recv", + "ib_xmit_bw" : "ib_xmit", + "ib_recv_pkts_bw": "ib_recv_pkts", + "ib_xmit_pkts_bw": "ib_xmit_pkts", + "lustre_read_bytes_diff" : "lustre_read_bytes", + "lustre_read_requests_diff" : "lustre_read_requests", + "lustre_write_bytes_diff" : "lustre_write_bytes", + "lustre_write_requests_diff" : "lustre_write_requests", + "lustre_open_diff" : "lustre_open", + "lustre_close_diff" : "lustre_close", + "lustre_setattr_diff" : "lustre_setattr", + "lustre_getattr_diff" : "lustre_getattr", + "lustre_statfs_diff": "lustre_statfs", + "lustre_inode_permission_diff" : "lustre_inode_permission", + "nv_util" : "acc_utilization", + "nv_fb_mem_used" : "acc_mem_used", + "nv_power_usage" : "acc_power" + }, + "add_tags_if" : [ + { + "key" : "cluster", + "value" : "alex", + "if" : "true" + } + ], + "drop_messages" : [ + "net_bytes_in", + "net_bytes_out", + "ib_recv", + "ib_xmit", + "ib_recv_pkts", + "ib_xmit_pkts", + "net_pkts_in", + "net_pkts_out", + "lustre_read_bytes", + "lustre_read_requests", + "lustre_write_bytes", + "lustre_write_requests" + ], + "change_unit_prefix": { + "name == 'mem_used'": "G", + "name == 'swap_used'": "G", + "name == 'mem_total'": "G", + "name == 'swap_total'": "G", + "name == 'cpufreq'": "M" + }, + "normalize_metrics" : true }, - "drop_metrics" : [ - "net_bytes_in", - "net_bytes_out", - "ib_recv", - "ib_xmit", - "ib_recv_pkts", - "ib_xmit_pkts", - "net_pkts_in", - "net_pkts_out", - "lustre_read_bytes", - "lustre_read_requests", - "lustre_write_bytes", - "lustre_write_requests" - ], "interval_timestamp" : false, - "num_cache_intervals" : 0, - "change_unit_prefix": { - "mem_used": "G", - "swap_used": "G", - "mem_total": "G", - "swap_total": "G", - "cpufreq": "M" - }, - "normalize_metrics" : true + "num_cache_intervals" : 0 } diff --git a/nhr@fau/cc-metric-collector/alex/sinks.json b/nhr@fau/cc-metric-collector/alex/sinks.json index 75e092a..fdd3830 100644 --- a/nhr@fau/cc-metric-collector/alex/sinks.json +++ b/nhr@fau/cc-metric-collector/alex/sinks.json @@ -1,26 +1,27 @@ { - "nhrinflux": { - "type": "influxasync", - "host": "monitoring-test.nhr.uni-erlangen.de", - "port": "8086", - "organization": "ClusterCockpit", - "database": "alex", - "password": "XYZ", - "ssl": true, - "meta_as_tags": [ - "unit" - ] - }, - "alexstore": { - "type": "http", - "url": "http://monitoring.nhr.fau.de:8082/api/write?cluster=alex", - "jwt": "XYZ", - "meta_as_tags": [ - "unit" - ], - "idle_connection_timeout": "60s", - "flush_delay": "2s", - "max_retries": 1, - "timeout": "10s" - } + "nhrinflux" : { + "type" : "influxasync", + "host": "monitoring-test.nhr.uni-erlangen.de", + "port": "8086", + "organization" : "ClusterCockpit", + "database" : "alex", + "password": "XYX", + "ssl": true, + "meta_as_tags" : [ + "unit" + ] + }, + "alexstore" : { + "type" : "http", + "url" : "http://monitoring.nhr.fau.de:8082/api/write?cluster=alex", + "jwt": "XYZ", + "meta_as_tags" : [ + "unit" + ], + "idle_connection_timeout" : "60s", + "flush_delay" : "2s", + "max_retries" : 1, + "timeout" : "10s", + "precision": "s" + } } diff --git a/nhr@fau/cc-metric-collector/alex/sinks_debug.json b/nhr@fau/cc-metric-collector/alex/sinks_debug.json index 5f7215f..f459ee2 100644 --- a/nhr@fau/cc-metric-collector/alex/sinks_debug.json +++ b/nhr@fau/cc-metric-collector/alex/sinks_debug.json @@ -1,33 +1,36 @@ { - "fritzganglia": { - "type": "libganglia", - "gmond_config": "/etc/ganglia/gmond.conf", - "libganglia_path": "libganglia.so.0", - "add_ganglia_group": true - }, - "nhrinflux": { - "type": "influxasync", - "host": "monitoring-test.nhr.uni-erlangen.de", - "port": "8086", - "organization": "ClusterCockpit", - "database": "fritz_neu", - "password": "XZY", - "ssl": true, - "meta_as_tags": [ - "unit" - ] - }, - "fritzstore": { - "type": "http", - "url": "http://monitoring.nhr.fau.de:8082/api/write?cluster=fritz", - "jwt": "XZY", - "meta_as_tags": [ - "unit" - ], - "idle_connection_timeout": "60s" - }, - "debugstdout": { - "type": "stdout", - "output_file": "/tmp/debug.log" - } + "fritzganglia" : { + "type" : "libganglia", + "gmond_config" : "/etc/ganglia/gmond.conf", + "libganglia_path": "libganglia.so.0", + "add_ganglia_group": true + }, + "nhrinflux" : { + "type" : "influxasync", + "host": "monitoring-test.nhr.uni-erlangen.de", + "port": "8086", + "organization" : "ClusterCockpit", + "database" : "alex", + "password": "XYZ", + "ssl": true, + "meta_as_tags" : [ + "unit" + ] + }, + "alexstore" : { + "type" : "http", + "url" : "http://monitoring.nhr.fau.de:8082/api/write?cluster=alex", + "jwt": "XYZ", + "meta_as_tags" : [ + "unit" + ], + "idle_connection_timeout" : "60s", + "flush_delay" : "2s", + "max_retries" : 1, + "timeout" : "10s" + }, + "debugstdout" : { + "type": "stdout", + "output_file" : "/tmp/debug.log" + } }