cc-metric-collector/collectors/likwidMetric.go

202 lines
5.2 KiB
Go
Raw Normal View History

2021-03-25 14:47:10 +01:00
package collectors
2021-03-25 17:47:08 +01:00
2021-03-25 14:47:10 +01:00
/*
#cgo CFLAGS: -I./likwid
#cgo LDFLAGS: -L./likwid -llikwid -llikwid-hwloc -lm
#include <stdlib.h>
#include <likwid.h>
*/
import "C"
import (
"errors"
2021-03-25 17:47:08 +01:00
"fmt"
"log"
2021-03-25 14:47:10 +01:00
"strings"
"time"
"unsafe"
)
type LikwidCollector struct {
MetricCollector
2021-03-25 17:47:08 +01:00
cpulist []C.int
2021-03-25 14:47:10 +01:00
sock2tid map[int]int
2021-03-25 17:47:08 +01:00
metrics map[C.int]map[string]int
groups map[string]C.int
init bool
2021-03-25 14:47:10 +01:00
}
type LikwidMetric struct {
2021-03-25 17:47:08 +01:00
name string
search string
socket_scope bool
group_idx int
2021-03-25 14:47:10 +01:00
}
const GROUPPATH = `/home/unrz139/Work/cc-metric-collector/collectors/likwid/groups`
var likwid_metrics = map[string][]LikwidMetric{
2021-03-25 17:47:08 +01:00
"MEM_DP": {LikwidMetric{name: "mem_bw", search: "Memory bandwidth [MBytes/s]", socket_scope: true},
LikwidMetric{name: "pwr1", search: "Power [W]", socket_scope: true},
LikwidMetric{name: "pwr2", search: "Power DRAM [W]", socket_scope: true},
LikwidMetric{name: "flops_dp", search: "DP [MFLOP/s]", socket_scope: false}},
"FLOPS_SP": {LikwidMetric{name: "clock", search: "Clock [MHz]", socket_scope: false},
LikwidMetric{name: "cpi", search: "CPI", socket_scope: false},
LikwidMetric{name: "flops_sp", search: "SP [MFLOP/s]", socket_scope: false}},
2021-03-25 14:47:10 +01:00
}
func getMetricId(group C.int, search string) (int, error) {
2021-03-25 17:47:08 +01:00
for i := 0; i < int(C.perfmon_getNumberOfMetrics(group)); i++ {
mname := C.perfmon_getMetricName(group, C.int(i))
go_mname := C.GoString(mname)
if strings.Contains(go_mname, search) {
return i, nil
}
2021-03-25 14:47:10 +01:00
2021-03-25 17:47:08 +01:00
}
return -1, errors.New(fmt.Sprintf("Cannot find metric for search string '%s' in group %d", search, int(group)))
2021-03-25 14:47:10 +01:00
}
func getSocketCpus() map[C.int]int {
2021-03-25 17:47:08 +01:00
slist := SocketList()
var cpu C.int
outmap := make(map[C.int]int)
for _, s := range slist {
t := C.CString(fmt.Sprintf("S%d", s))
clen := C.cpustr_to_cpulist(t, &cpu, 1)
if int(clen) == 1 {
outmap[cpu] = s
}
}
return outmap
2021-03-25 14:47:10 +01:00
}
func (m *LikwidCollector) Init() error {
var ret C.int
2021-03-25 17:47:08 +01:00
m.name = "LikwidCollector"
2021-03-25 14:47:10 +01:00
m.setup()
cpulist := CpuList()
m.cpulist = make([]C.int, len(cpulist))
slist := getSocketCpus()
2021-03-25 17:47:08 +01:00
2021-03-25 14:47:10 +01:00
m.sock2tid = make(map[int]int)
for i, c := range cpulist {
2021-03-25 17:47:08 +01:00
m.cpulist[i] = C.int(c)
if sid, found := slist[m.cpulist[i]]; found {
m.sock2tid[sid] = i
}
2021-03-25 14:47:10 +01:00
}
m.metrics = make(map[C.int]map[string]int)
m.groups = make(map[string]C.int)
ret = C.topology_init()
if ret != 0 {
return errors.New("Failed to initialize LIKWID topology")
}
ret = C.perfmon_init(C.int(len(m.cpulist)), &m.cpulist[0])
if ret != 0 {
C.topology_finalize()
return errors.New("Failed to initialize LIKWID topology")
}
2021-03-25 14:47:10 +01:00
gpath := C.CString(GROUPPATH)
C.config_setGroupPath(gpath)
2021-03-25 17:47:08 +01:00
C.free(unsafe.Pointer(gpath))
2021-03-25 14:47:10 +01:00
for g, metrics := range likwid_metrics {
2021-03-25 17:47:08 +01:00
cstr := C.CString(g)
gid := C.perfmon_addEventSet(cstr)
if gid >= 0 {
gmetrics := 0
2021-03-25 17:47:08 +01:00
for i, metric := range metrics {
idx, err := getMetricId(gid, metric.search)
if err != nil {
log.Print(err)
} else {
likwid_metrics[g][i].group_idx = idx
gmetrics++
2021-03-25 17:47:08 +01:00
}
}
if gmetrics > 0 {
m.groups[g] = gid
}
2021-03-25 17:47:08 +01:00
} else {
log.Print("Failed to add events set ", g)
}
C.free(unsafe.Pointer(cstr))
}
if len(m.groups) == 0 {
C.perfmon_finalize()
C.topology_finalize()
return errors.New("No LIKWID performance group initialized")
}
m.init = true
return nil
2021-03-25 14:47:10 +01:00
}
2021-03-25 17:47:08 +01:00
func (m *LikwidCollector) Read(interval time.Duration) {
if m.init {
var ret C.int
2021-03-25 17:47:08 +01:00
for gname, gid := range m.groups {
ret = C.perfmon_setupCounters(gid)
if ret != 0 {
log.Print("Failed to setup performance group ", gname)
continue
}
ret = C.perfmon_startCounters()
if ret != 0 {
log.Print("Failed to start performance group ", gname)
continue
}
2021-03-25 17:47:08 +01:00
time.Sleep(interval)
ret = C.perfmon_stopCounters()
if ret != 0 {
log.Print("Failed to stop performance group ", gname)
continue
}
2021-03-25 17:47:08 +01:00
for _, lmetric := range likwid_metrics[gname] {
if lmetric.socket_scope {
for sid, tid := range m.sock2tid {
res := C.perfmon_getLastMetric(gid, C.int(lmetric.group_idx), C.int(tid))
m.sockets[int(sid)][lmetric.name] = float64(res)
// log.Print("Metric '", lmetric.name,"' on Socket ",int(sid)," returns ", m.sockets[int(sid)][lmetric.name])
2021-03-25 17:47:08 +01:00
}
} else {
for tid, cpu := range m.cpulist {
res := C.perfmon_getLastMetric(gid, C.int(lmetric.group_idx), C.int(tid))
m.cpus[int(cpu)][lmetric.name] = float64(res)
// log.Print("Metric '", lmetric.name,"' on CPU ",int(cpu)," returns ", m.cpus[int(cpu)][lmetric.name])
2021-03-25 17:47:08 +01:00
}
}
}
for cpu := range m.cpus {
if flops_dp, found := m.cpus[cpu]["flops_dp"]; found {
if flops_sp, found := m.cpus[cpu]["flops_sp"]; found {
m.cpus[cpu]["flops_any"] = (2 * flops_dp.(float64)) + flops_sp.(float64)
2021-03-25 17:47:08 +01:00
}
}
}
for sid := range m.sockets {
if pwr1, found := m.sockets[int(sid)]["pwr1"]; found {
if pwr2, found := m.sockets[int(sid)]["pwr2"]; found {
sum := pwr1.(float64) + pwr2.(float64)
if sum > 0 {
m.sockets[int(sid)]["power"] = sum
}
delete(m.sockets[int(sid)], "pwr2")
}
delete(m.sockets[int(sid)], "pwr1")
}
}
}
2021-03-25 14:47:10 +01:00
}
}
func (m *LikwidCollector) Close() {
if m.init {
C.perfmon_finalize()
C.topology_finalize()
m.init = false
}
2021-03-25 17:47:08 +01:00
return
2021-03-25 14:47:10 +01:00
}