FAU config

This commit is contained in:
2023-06-13 07:26:59 +02:00
parent 19ad3e6953
commit 39c152b71f
58 changed files with 5946 additions and 2 deletions

View File

@@ -0,0 +1,145 @@
{
"nfs4stat" : {},
"memstat" : {
"numa_stats": true,
"node_stats": true
},
"cpustat" : {},
"loadavg" : {},
"schedstat": {},
"nvidia" : {
"use_pci_info_as_type_id": true
},
"lustrestat" : {
"send_all_metrics" : true,
"use_sudo": false,
"send_diff_values": true,
"send_derived_values": true,
"send_abs_values": false
},
"netstat" : {
"include_devices" : [
"enp1s0",
"enp70s0f0",
"enp195s0f0"
],
"send_abs_values": true,
"send_derived_values": true
},
"diskstat" : {},
"iostat" : {},
"ibstat" : {
"send_abs_values": true,
"send_derived_values": true
},
"ipmistat" : {
"send_abs_values": true,
"send_derived_values": true
},
"tempstat" : {
"tag_override" : {
"hwmon0" : {
"type" : "socket",
"type-id" : "0"
},
"hwmon1" : {
"type" : "socket",
"type-id" : "1"
}
}
},
"likwid": {
"force_overwrite" : true,
"invalid_to_zero" : true,
"access_mode" : "accessdaemon",
"accessdaemon_path" : "/apps/likwid/system/sbin",
"liblikwid_path": "/apps/likwid/system/lib/liblikwid.so",
"eventsets": [
{
"events": {
"FIXC1": "ACTUAL_CPU_CLOCK",
"FIXC2": "MAX_CPU_CLOCK",
"PMC0": "RETIRED_INSTRUCTIONS",
"PMC1": "CPU_CLOCKS_UNHALTED",
"PMC2": "RETIRED_SSE_AVX_FLOPS_ALL",
"PMC3": "MERGE",
"DFC0": "DRAM_CHANNEL_0",
"DFC1": "DRAM_CHANNEL_1",
"DFC2": "DRAM_CHANNEL_2",
"DFC3": "DRAM_CHANNEL_3"
},
"metrics": [
{
"name": "ipc",
"calc": "PMC0/PMC1",
"type": "hwthread",
"publish": true
},
{
"name": "flops_any",
"calc": "1E-9*PMC2/time",
"unit": "GFlops/s",
"type": "hwthread",
"publish": true
},
{
"name": "clock",
"calc": "1E-6*(FIXC1/FIXC2)/inverseClock",
"type": "hwthread",
"unit": "MHz",
"publish": true
},
{
"name": "mem1",
"calc": "1E-9*(DFC0+DFC1+DFC2+DFC3)*64.0/time",
"unit": "Gbyte/s",
"type": "socket",
"publish": false
}
]
},
{
"events": {
"DFC0": "DRAM_CHANNEL_4",
"DFC1": "DRAM_CHANNEL_5",
"DFC2": "DRAM_CHANNEL_6",
"DFC3": "DRAM_CHANNEL_7",
"PWR0": "RAPL_CORE_ENERGY",
"PWR1": "RAPL_PKG_ENERGY"
},
"metrics": [
{
"name": "core_power",
"calc": "PWR0/time",
"unit": "Watt",
"type": "hwthread",
"publish": true
},
{
"name": "cpu_power",
"calc": "PWR1/time",
"type": "socket",
"unit": "Watt",
"publish": true
},
{
"name": "mem2",
"calc": "1E-9*(DFC0+DFC1+DFC2+DFC3)*64.0/time",
"unit": "Gbyte/s",
"type": "socket",
"publish": false
}
]
}
],
"globalmetrics": [
{
"name": "mem_bw",
"calc": "mem1+mem2",
"type": "socket",
"unit": "Gbyte/s",
"publish": true
}
]
}
}

View File

@@ -0,0 +1,8 @@
{
"sinks": "/etc/cc-metric-collector/sinks.json",
"collectors" : "/etc/cc-metric-collector/collectors.json",
"receivers" : "/etc/cc-metric-collector/receivers.json",
"router" : "/etc/cc-metric-collector/router.json",
"interval": "60s",
"duration": "10s"
}

View File

@@ -0,0 +1 @@
{}

View File

@@ -0,0 +1,58 @@
{
"add_tags" : [
{
"key" : "cluster",
"value" : "alex",
"if" : "*"
}
],
"rename_metrics" : {
"load_one" : "cpu_load",
"cpu_load_core" : "cpu_load",
"net_bytes_in_bw" : "net_bytes_in",
"net_bytes_out_bw" : "net_bytes_out",
"net_pkts_in_bw" : "net_pkts_in",
"net_pkts_out_bw" : "net_pkts_out",
"ib_recv_bw" : "ib_recv",
"ib_xmit_bw" : "ib_xmit",
"ib_recv_pkts_bw": "ib_recv_pkts",
"ib_xmit_pkts_bw": "ib_xmit_pkts",
"lustre_read_bytes_diff" : "lustre_read_bytes",
"lustre_read_requests_diff" : "lustre_read_requests",
"lustre_write_bytes_diff" : "lustre_write_bytes",
"lustre_write_requests_diff" : "lustre_write_requests",
"lustre_open_diff" : "lustre_open",
"lustre_close_diff" : "lustre_close",
"lustre_setattr_diff" : "lustre_setattr",
"lustre_getattr_diff" : "lustre_getattr",
"lustre_statfs_diff": "lustre_statfs",
"lustre_inode_permission_diff" : "lustre_inode_permission",
"nv_util" : "acc_utilization",
"nv_fb_mem_used" : "acc_mem_used",
"nv_power_usage" : "acc_power"
},
"drop_metrics" : [
"net_bytes_in",
"net_bytes_out",
"ib_recv",
"ib_xmit",
"ib_recv_pkts",
"ib_xmit_pkts",
"net_pkts_in",
"net_pkts_out",
"lustre_read_bytes",
"lustre_read_requests",
"lustre_write_bytes",
"lustre_write_requests"
],
"interval_timestamp" : false,
"num_cache_intervals" : 0,
"change_unit_prefix": {
"mem_used": "G",
"swap_used": "G",
"mem_total": "G",
"swap_total": "G",
"cpufreq": "M"
},
"normalize_metrics" : true
}

View File

@@ -0,0 +1,26 @@
{
"nhrinflux": {
"type": "influxasync",
"host": "monitoring-test.nhr.uni-erlangen.de",
"port": "8086",
"organization": "ClusterCockpit",
"database": "alex",
"password": "XYZ",
"ssl": true,
"meta_as_tags": [
"unit"
]
},
"alexstore": {
"type": "http",
"url": "http://monitoring.nhr.fau.de:8082/api/write?cluster=alex",
"jwt": "XYZ",
"meta_as_tags": [
"unit"
],
"idle_connection_timeout": "60s",
"flush_delay": "2s",
"max_retries": 1,
"timeout": "10s"
}
}

View File

@@ -0,0 +1,33 @@
{
"fritzganglia": {
"type": "libganglia",
"gmond_config": "/etc/ganglia/gmond.conf",
"libganglia_path": "libganglia.so.0",
"add_ganglia_group": true
},
"nhrinflux": {
"type": "influxasync",
"host": "monitoring-test.nhr.uni-erlangen.de",
"port": "8086",
"organization": "ClusterCockpit",
"database": "fritz_neu",
"password": "XZY",
"ssl": true,
"meta_as_tags": [
"unit"
]
},
"fritzstore": {
"type": "http",
"url": "http://monitoring.nhr.fau.de:8082/api/write?cluster=fritz",
"jwt": "XZY",
"meta_as_tags": [
"unit"
],
"idle_connection_timeout": "60s"
},
"debugstdout": {
"type": "stdout",
"output_file": "/tmp/debug.log"
}
}