mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2024-11-10 04:27:25 +01:00
Use MetricAggregator to calculate metrics in LIKWID collector.
This commit is contained in:
parent
4633c8f58d
commit
ed62e952ce
@ -24,7 +24,7 @@ import (
|
|||||||
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-metric-collector/internal/ccLogger"
|
||||||
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
lp "github.com/ClusterCockpit/cc-metric-collector/internal/ccMetric"
|
||||||
topo "github.com/ClusterCockpit/cc-metric-collector/internal/ccTopology"
|
topo "github.com/ClusterCockpit/cc-metric-collector/internal/ccTopology"
|
||||||
"github.com/PaesslerAG/gval"
|
mr "github.com/ClusterCockpit/cc-metric-collector/internal/metricRouter"
|
||||||
)
|
)
|
||||||
|
|
||||||
type MetricScope string
|
type MetricScope string
|
||||||
@ -43,16 +43,32 @@ func (ms MetricScope) String() string {
|
|||||||
return string(ms)
|
return string(ms)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ms MetricScope) Likwid() string {
|
||||||
|
LikwidDomains := map[string]string{
|
||||||
|
"hwthread": "",
|
||||||
|
"core": "",
|
||||||
|
"llc": "C",
|
||||||
|
"numadomain": "M",
|
||||||
|
"die": "D",
|
||||||
|
"socket": "S",
|
||||||
|
"node": "N",
|
||||||
|
}
|
||||||
|
return LikwidDomains[string(ms)]
|
||||||
|
}
|
||||||
|
|
||||||
func (ms MetricScope) Granularity() int {
|
func (ms MetricScope) Granularity() int {
|
||||||
grans := []string{"hwthread", "core", "llc", "numadomain", "die", "socket", "node"}
|
for i, g := range GetAllMetricScopes() {
|
||||||
for i, g := range grans {
|
if ms == g {
|
||||||
if ms.String() == g {
|
|
||||||
return i
|
return i
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetAllMetricScopes() []MetricScope {
|
||||||
|
return []MetricScope{"hwthread" /*, "core", "llc", "numadomain", "die",*/, "socket", "node"}
|
||||||
|
}
|
||||||
|
|
||||||
type LikwidCollectorMetricConfig struct {
|
type LikwidCollectorMetricConfig struct {
|
||||||
Name string `json:"name"` // Name of the metric
|
Name string `json:"name"` // Name of the metric
|
||||||
Calc string `json:"calc"` // Calculation for the metric using
|
Calc string `json:"calc"` // Calculation for the metric using
|
||||||
@ -77,16 +93,18 @@ type LikwidCollectorConfig struct {
|
|||||||
|
|
||||||
type LikwidCollector struct {
|
type LikwidCollector struct {
|
||||||
metricCollector
|
metricCollector
|
||||||
cpulist []C.int
|
cpulist []C.int
|
||||||
sock2tid map[int]int
|
cpu2tid map[int]int
|
||||||
metrics map[C.int]map[string]int
|
sock2tid map[int]int
|
||||||
groups []C.int
|
scopeRespTids map[MetricScope]map[int]int
|
||||||
config LikwidCollectorConfig
|
metrics map[C.int]map[string]int
|
||||||
results map[int]map[int]map[string]interface{}
|
groups []C.int
|
||||||
mresults map[int]map[int]map[string]float64
|
config LikwidCollectorConfig
|
||||||
gmresults map[int]map[string]float64
|
results map[int]map[int]map[string]interface{}
|
||||||
basefreq float64
|
mresults map[int]map[int]map[string]float64
|
||||||
running bool
|
gmresults map[int]map[string]float64
|
||||||
|
basefreq float64
|
||||||
|
running bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type LikwidMetric struct {
|
type LikwidMetric struct {
|
||||||
@ -138,28 +156,8 @@ func getBaseFreq() float64 {
|
|||||||
return freq
|
return freq
|
||||||
}
|
}
|
||||||
|
|
||||||
func getSocketCpus() map[C.int]int {
|
|
||||||
slist := SocketList()
|
|
||||||
var cpu C.int
|
|
||||||
outmap := make(map[C.int]int)
|
|
||||||
for _, s := range slist {
|
|
||||||
t := C.CString(fmt.Sprintf("S%d", s))
|
|
||||||
clen := C.cpustr_to_cpulist(t, &cpu, 1)
|
|
||||||
if int(clen) == 1 {
|
|
||||||
outmap[cpu] = s
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return outmap
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *LikwidCollector) CatchGvalPanic() {
|
|
||||||
if rerr := recover(); rerr != nil {
|
|
||||||
cclog.ComponentError(m.name, "Gval failed to calculate a metric", rerr)
|
|
||||||
m.init = false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *LikwidCollector) initGranularity() {
|
func (m *LikwidCollector) initGranularity() {
|
||||||
|
splitRegex := regexp.MustCompile("[+-/*()]")
|
||||||
for _, evset := range m.config.Eventsets {
|
for _, evset := range m.config.Eventsets {
|
||||||
evset.granulatity = make(map[string]MetricScope)
|
evset.granulatity = make(map[string]MetricScope)
|
||||||
for counter, event := range evset.Events {
|
for counter, event := range evset.Events {
|
||||||
@ -169,7 +167,7 @@ func (m *LikwidCollector) initGranularity() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for i, metric := range evset.Metrics {
|
for i, metric := range evset.Metrics {
|
||||||
s := regexp.MustCompile("[+-/*()]").Split(metric.Calc, -1)
|
s := splitRegex.Split(metric.Calc, -1)
|
||||||
gran := MetricScope("hwthread")
|
gran := MetricScope("hwthread")
|
||||||
evset.Metrics[i].granulatity = gran
|
evset.Metrics[i].granulatity = gran
|
||||||
for _, x := range s {
|
for _, x := range s {
|
||||||
@ -183,7 +181,7 @@ func (m *LikwidCollector) initGranularity() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for i, metric := range m.config.Metrics {
|
for i, metric := range m.config.Metrics {
|
||||||
s := regexp.MustCompile("[+-/*()]").Split(metric.Calc, -1)
|
s := splitRegex.Split(metric.Calc, -1)
|
||||||
gran := MetricScope("hwthread")
|
gran := MetricScope("hwthread")
|
||||||
m.config.Metrics[i].granulatity = gran
|
m.config.Metrics[i].granulatity = gran
|
||||||
for _, x := range s {
|
for _, x := range s {
|
||||||
@ -199,6 +197,59 @@ func (m *LikwidCollector) initGranularity() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type TopoResolveFunc func(cpuid int) int
|
||||||
|
|
||||||
|
func (m *LikwidCollector) getResponsiblities() map[MetricScope]map[int]int {
|
||||||
|
get_cpus := func(scope MetricScope) map[int]int {
|
||||||
|
var slist []int
|
||||||
|
var cpu C.int
|
||||||
|
var input func(index int) string
|
||||||
|
switch scope {
|
||||||
|
case "node":
|
||||||
|
slist = []int{0}
|
||||||
|
input = func(index int) string { return "N:0" }
|
||||||
|
case "socket":
|
||||||
|
input = func(index int) string { return fmt.Sprintf("%s%d:0", scope.Likwid(), index) }
|
||||||
|
slist = topo.SocketList()
|
||||||
|
// case "numadomain":
|
||||||
|
// input = func(index int) string { return fmt.Sprintf("%s%d:0", scope.Likwid(), index) }
|
||||||
|
// slist = topo.NumaNodeList()
|
||||||
|
// cclog.Debug(scope, " ", input(0), " ", slist)
|
||||||
|
// case "die":
|
||||||
|
// input = func(index int) string { return fmt.Sprintf("%s%d:0", scope.Likwid(), index) }
|
||||||
|
// slist = topo.DieList()
|
||||||
|
// case "llc":
|
||||||
|
// input = fmt.Sprintf("%s%d:0", scope.Likwid(), s)
|
||||||
|
// slist = topo.LLCacheList()
|
||||||
|
case "hwthread":
|
||||||
|
input = func(index int) string { return fmt.Sprintf("%d", index) }
|
||||||
|
slist = topo.CpuList()
|
||||||
|
}
|
||||||
|
outmap := make(map[int]int)
|
||||||
|
for _, s := range slist {
|
||||||
|
t := C.CString(input(s))
|
||||||
|
clen := C.cpustr_to_cpulist(t, &cpu, 1)
|
||||||
|
if int(clen) == 1 {
|
||||||
|
outmap[s] = m.cpu2tid[int(cpu)]
|
||||||
|
} else {
|
||||||
|
cclog.Error(fmt.Sprintf("Cannot determine responsible CPU for %s", input(s)))
|
||||||
|
outmap[s] = -1
|
||||||
|
}
|
||||||
|
C.free(unsafe.Pointer(t))
|
||||||
|
}
|
||||||
|
return outmap
|
||||||
|
}
|
||||||
|
|
||||||
|
scopes := GetAllMetricScopes()
|
||||||
|
complete := make(map[MetricScope]map[int]int)
|
||||||
|
for _, s := range scopes {
|
||||||
|
cclog.Debug("Start ", s)
|
||||||
|
complete[s] = get_cpus(s)
|
||||||
|
cclog.Debug("End ", s)
|
||||||
|
}
|
||||||
|
return complete
|
||||||
|
}
|
||||||
|
|
||||||
func (m *LikwidCollector) Init(config json.RawMessage) error {
|
func (m *LikwidCollector) Init(config json.RawMessage) error {
|
||||||
var ret C.int
|
var ret C.int
|
||||||
m.name = "LikwidCollector"
|
m.name = "LikwidCollector"
|
||||||
@ -208,40 +259,39 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m.initGranularity()
|
|
||||||
if m.config.ForceOverwrite {
|
if m.config.ForceOverwrite {
|
||||||
|
cclog.ComponentDebug(m.name, "Set LIKWID_FORCE=1")
|
||||||
os.Setenv("LIKWID_FORCE", "1")
|
os.Setenv("LIKWID_FORCE", "1")
|
||||||
}
|
}
|
||||||
m.setup()
|
m.setup()
|
||||||
// in some cases, gval causes a panic. We catch it with the handler and deactivate
|
|
||||||
// the collector (m.init = false).
|
|
||||||
defer m.CatchGvalPanic()
|
|
||||||
|
|
||||||
m.meta = map[string]string{"source": m.name, "group": "PerfCounter"}
|
m.meta = map[string]string{"source": m.name, "group": "PerfCounter"}
|
||||||
|
cclog.ComponentDebug(m.name, "Get cpulist and init maps and lists")
|
||||||
cpulist := topo.CpuList()
|
cpulist := topo.CpuList()
|
||||||
m.cpulist = make([]C.int, len(cpulist))
|
m.cpulist = make([]C.int, len(cpulist))
|
||||||
|
m.cpu2tid = make(map[int]int)
|
||||||
cclog.ComponentDebug(m.name, "Create maps for socket, numa, core and die metrics")
|
|
||||||
m.sock2tid = make(map[int]int)
|
|
||||||
// m.numa2tid = make(map[int]int)
|
|
||||||
// m.core2tid = make(map[int]int)
|
|
||||||
// m.die2tid = make(map[int]int)
|
|
||||||
for i, c := range cpulist {
|
for i, c := range cpulist {
|
||||||
m.cpulist[i] = C.int(c)
|
m.cpulist[i] = C.int(c)
|
||||||
m.sock2tid[topo.GetCpuSocket(c)] = i
|
m.cpu2tid[c] = i
|
||||||
// m.numa2tid[topo.GetCpuNumaDomain(c)] = i
|
|
||||||
// m.core2tid[topo.GetCpuCore(c)] = i
|
|
||||||
// m.die2tid[topo.GetCpuDie(c)] = i
|
|
||||||
}
|
}
|
||||||
m.results = make(map[int]map[int]map[string]interface{})
|
m.results = make(map[int]map[int]map[string]interface{})
|
||||||
m.mresults = make(map[int]map[int]map[string]float64)
|
m.mresults = make(map[int]map[int]map[string]float64)
|
||||||
m.gmresults = make(map[int]map[string]float64)
|
m.gmresults = make(map[int]map[string]float64)
|
||||||
|
cclog.ComponentDebug(m.name, "initialize LIKWID topology")
|
||||||
ret = C.topology_init()
|
ret = C.topology_init()
|
||||||
if ret != 0 {
|
if ret != 0 {
|
||||||
err := errors.New("failed to initialize LIKWID topology")
|
err := errors.New("failed to initialize LIKWID topology")
|
||||||
cclog.ComponentError(m.name, err.Error())
|
cclog.ComponentError(m.name, err.Error())
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Determine which counter works at which level. PMC*: hwthread, *BOX*: socket, ...
|
||||||
|
m.initGranularity()
|
||||||
|
// Generate map for MetricScope -> scope_id (like socket id) -> responsible id (offset in cpulist)
|
||||||
|
m.scopeRespTids = m.getResponsiblities()
|
||||||
|
|
||||||
|
cclog.ComponentDebug(m.name, "initialize LIKWID perfmon module")
|
||||||
ret = C.perfmon_init(C.int(len(m.cpulist)), &m.cpulist[0])
|
ret = C.perfmon_init(C.int(len(m.cpulist)), &m.cpulist[0])
|
||||||
if ret != 0 {
|
if ret != 0 {
|
||||||
C.topology_finalize()
|
C.topology_finalize()
|
||||||
@ -250,28 +300,33 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This is for the global metrics computation test
|
||||||
globalParams := make(map[string]interface{})
|
globalParams := make(map[string]interface{})
|
||||||
globalParams["time"] = float64(1.0)
|
globalParams["time"] = float64(1.0)
|
||||||
globalParams["inverseClock"] = float64(1.0)
|
globalParams["inverseClock"] = float64(1.0)
|
||||||
|
// While adding the events, we test the metrics whether they can be computed at all
|
||||||
for i, evset := range m.config.Eventsets {
|
for i, evset := range m.config.Eventsets {
|
||||||
estr := eventsToEventStr(evset.Events)
|
estr := eventsToEventStr(evset.Events)
|
||||||
|
// Generate parameter list for the metric computing test
|
||||||
params := make(map[string]interface{})
|
params := make(map[string]interface{})
|
||||||
params["time"] = float64(1.0)
|
params["time"] = float64(1.0)
|
||||||
params["inverseClock"] = float64(1.0)
|
params["inverseClock"] = float64(1.0)
|
||||||
for counter, _ := range evset.Events {
|
for counter := range evset.Events {
|
||||||
params[counter] = float64(1.0)
|
params[counter] = float64(1.0)
|
||||||
}
|
}
|
||||||
for _, metric := range evset.Metrics {
|
for _, metric := range evset.Metrics {
|
||||||
_, err := gval.Evaluate(metric.Calc, params, gval.Full())
|
// Try to evaluate the metric
|
||||||
|
_, err := mr.EvalFloat64Condition(metric.Calc, params)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
|
cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
// If the metric is not in the parameter list for the global metrics, add it
|
||||||
if _, ok := globalParams[metric.Name]; !ok {
|
if _, ok := globalParams[metric.Name]; !ok {
|
||||||
globalParams[metric.Name] = float64(1.0)
|
globalParams[metric.Name] = float64(1.0)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Now we add the list of events to likwid
|
||||||
cstr := C.CString(estr)
|
cstr := C.CString(estr)
|
||||||
gid := C.perfmon_addEventSet(cstr)
|
gid := C.perfmon_addEventSet(cstr)
|
||||||
if gid >= 0 {
|
if gid >= 0 {
|
||||||
@ -283,17 +338,21 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
|
|||||||
for tid := range m.cpulist {
|
for tid := range m.cpulist {
|
||||||
m.results[i][tid] = make(map[string]interface{})
|
m.results[i][tid] = make(map[string]interface{})
|
||||||
m.mresults[i][tid] = make(map[string]float64)
|
m.mresults[i][tid] = make(map[string]float64)
|
||||||
m.gmresults[tid] = make(map[string]float64)
|
if i == 0 {
|
||||||
|
m.gmresults[tid] = make(map[string]float64)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, metric := range m.config.Metrics {
|
for _, metric := range m.config.Metrics {
|
||||||
_, err := gval.Evaluate(metric.Calc, globalParams, gval.Full())
|
// Try to evaluate the global metric
|
||||||
|
_, err := mr.EvalFloat64Condition(metric.Calc, globalParams)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
|
cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If no event set could be added, shut down LikwidCollector
|
||||||
if len(m.groups) == 0 {
|
if len(m.groups) == 0 {
|
||||||
C.perfmon_finalize()
|
C.perfmon_finalize()
|
||||||
C.topology_finalize()
|
C.topology_finalize()
|
||||||
@ -306,6 +365,7 @@ func (m *LikwidCollector) Init(config json.RawMessage) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// take a measurement for 'interval' seconds of event set index 'group'
|
||||||
func (m *LikwidCollector) takeMeasurement(group int, interval time.Duration) error {
|
func (m *LikwidCollector) takeMeasurement(group int, interval time.Duration) error {
|
||||||
var ret C.int
|
var ret C.int
|
||||||
gid := m.groups[group]
|
gid := m.groups[group]
|
||||||
@ -336,101 +396,104 @@ func (m *LikwidCollector) takeMeasurement(group int, interval time.Duration) err
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *LikwidCollector) calcEventsetMetrics(group int, interval time.Duration) error {
|
// Get all measurement results for an event set, derive the metric values out of the measurement results and send it
|
||||||
|
func (m *LikwidCollector) calcEventsetMetrics(group int, interval time.Duration, output chan lp.CCMetric) error {
|
||||||
var eidx C.int
|
var eidx C.int
|
||||||
evset := m.config.Eventsets[group]
|
evset := m.config.Eventsets[group]
|
||||||
gid := m.groups[group]
|
gid := m.groups[group]
|
||||||
for tid := range m.cpulist {
|
|
||||||
for eidx = 0; int(eidx) < len(evset.Events); eidx++ {
|
// Go over events and get the results
|
||||||
ctr := C.perfmon_getCounterName(gid, eidx)
|
for eidx = 0; int(eidx) < len(evset.Events); eidx++ {
|
||||||
gctr := C.GoString(ctr)
|
ctr := C.perfmon_getCounterName(gid, eidx)
|
||||||
res := C.perfmon_getLastResult(gid, eidx, C.int(tid))
|
ev := C.perfmon_getEventName(gid, eidx)
|
||||||
m.results[group][tid][gctr] = float64(res)
|
gctr := C.GoString(ctr)
|
||||||
if m.results[group][tid][gctr] == 0 {
|
gev := C.GoString(ev)
|
||||||
m.results[group][tid][gctr] = 1.0
|
// MetricScope for the counter (and if needed the event)
|
||||||
|
scope := getGranularity(gctr, gev)
|
||||||
|
// Get the map scope-id -> tids
|
||||||
|
// This way we read less counters like only the responsible hardware thread for a socket
|
||||||
|
scopemap := m.scopeRespTids[scope]
|
||||||
|
for _, tid := range scopemap {
|
||||||
|
if tid >= 0 {
|
||||||
|
m.results[group][tid]["time"] = interval.Seconds()
|
||||||
|
m.results[group][tid]["inverseClock"] = float64(1.0 / m.basefreq)
|
||||||
|
res := C.perfmon_getLastResult(gid, eidx, C.int(tid))
|
||||||
|
m.results[group][tid][gctr] = float64(res)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m.results[group][tid]["time"] = interval.Seconds()
|
|
||||||
m.results[group][tid]["inverseClock"] = float64(1.0 / m.basefreq)
|
|
||||||
for _, metric := range evset.Metrics {
|
|
||||||
value, err := gval.Evaluate(metric.Calc, m.results[group][tid], gval.Full())
|
|
||||||
if err != nil {
|
|
||||||
cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
m.mresults[group][tid][metric.Name] = value.(float64)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *LikwidCollector) calcGlobalMetrics(interval time.Duration) error {
|
// Go over the event set metrics, derive the value out of the event:counter values and send it
|
||||||
for _, metric := range m.config.Metrics {
|
for _, metric := range evset.Metrics {
|
||||||
for tid := range m.cpulist {
|
// The metric scope is determined in the Init() function
|
||||||
params := make(map[string]interface{})
|
// Get the map scope-id -> tids
|
||||||
for j := range m.groups {
|
scopemap := m.scopeRespTids[metric.Scope]
|
||||||
for mname, mres := range m.mresults[j][tid] {
|
for domain, tid := range scopemap {
|
||||||
params[mname] = mres
|
if tid >= 0 {
|
||||||
|
value, err := mr.EvalFloat64Condition(metric.Calc, m.results[group][tid])
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
m.mresults[group][tid][metric.Name] = value
|
||||||
|
// Now we have the result, send it with the proper tags
|
||||||
|
tags := map[string]string{"type": metric.Scope.String()}
|
||||||
|
if metric.Scope != "node" {
|
||||||
|
tags["type-id"] = fmt.Sprintf("%d", domain)
|
||||||
|
}
|
||||||
|
fields := map[string]interface{}{"value": value}
|
||||||
|
y, err := lp.New(metric.Name, tags, m.meta, fields, time.Now())
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
value, err := gval.Evaluate(metric.Calc, params, gval.Full())
|
}
|
||||||
if err != nil {
|
}
|
||||||
cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
|
|
||||||
continue
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Go over the global metrics, derive the value out of the event sets' metric values and send it
|
||||||
|
func (m *LikwidCollector) calcGlobalMetrics(interval time.Duration, output chan lp.CCMetric) error {
|
||||||
|
for _, metric := range m.config.Metrics {
|
||||||
|
scopemap := m.scopeRespTids[metric.Scope]
|
||||||
|
for domain, tid := range scopemap {
|
||||||
|
if tid >= 0 {
|
||||||
|
// Here we generate parameter list
|
||||||
|
params := make(map[string]interface{})
|
||||||
|
for j := range m.groups {
|
||||||
|
for mname, mres := range m.mresults[j][tid] {
|
||||||
|
params[mname] = mres
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Evaluate the metric
|
||||||
|
value, err := mr.EvalFloat64Condition(metric.Calc, params)
|
||||||
|
if err != nil {
|
||||||
|
cclog.ComponentError(m.name, "Calculation for metric", metric.Name, "failed:", err.Error())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
m.gmresults[tid][metric.Name] = value
|
||||||
|
// Now we have the result, send it with the proper tags
|
||||||
|
tags := map[string]string{"type": metric.Scope.String()}
|
||||||
|
if metric.Scope != "node" {
|
||||||
|
tags["type-id"] = fmt.Sprintf("%d", domain)
|
||||||
|
}
|
||||||
|
fields := map[string]interface{}{"value": value}
|
||||||
|
y, err := lp.New(metric.Name, tags, m.meta, fields, time.Now())
|
||||||
|
if err == nil {
|
||||||
|
output <- y
|
||||||
|
}
|
||||||
}
|
}
|
||||||
m.gmresults[tid][metric.Name] = value.(float64)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// func (m *LikwidCollector) calcResultMetrics(interval time.Duration) ([]lp.CCMetric, error) {
|
// main read function taking multiple measurement rounds, each 'interval' seconds long
|
||||||
// var err error = nil
|
|
||||||
// metrics := make([]lp.CCMetric, 0)
|
|
||||||
// for i := range m.groups {
|
|
||||||
// evset := m.config.Eventsets[i]
|
|
||||||
// for _, metric := range evset.Metrics {
|
|
||||||
// log.Print(metric.Name, " ", metric.Scope, " ", metric.granulatity)
|
|
||||||
// if metric.Scope.Granularity() > metric.granulatity.Granularity() {
|
|
||||||
// log.Print("Different granularity wanted for ", metric.Name, ": ", metric.Scope, " vs ", metric.granulatity)
|
|
||||||
// var idlist []int
|
|
||||||
// idfunc := func(cpuid int) int { return cpuid }
|
|
||||||
// switch metric.Scope {
|
|
||||||
// case "socket":
|
|
||||||
// idlist = topo.SocketList()
|
|
||||||
// idfunc = topo.GetCpuSocket
|
|
||||||
// case "numa":
|
|
||||||
// idlist = topo.NumaNodeList()
|
|
||||||
// idfunc = topo.GetCpuNumaDomain
|
|
||||||
// case "core":
|
|
||||||
// idlist = topo.CoreList()
|
|
||||||
// idfunc = topo.GetCpuCore
|
|
||||||
// case "die":
|
|
||||||
// idlist = topo.DieList()
|
|
||||||
// idfunc = topo.GetCpuDie
|
|
||||||
// case "node":
|
|
||||||
// idlist = topo.CpuList()
|
|
||||||
// }
|
|
||||||
// for i := 0; i < num_results; i++ {
|
|
||||||
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// for _, metric := range m.config.Metrics {
|
|
||||||
// log.Print(metric.Name, " ", metric.Scope, " ", metric.granulatity)
|
|
||||||
// if metric.Scope.Granularity() > metric.granulatity.Granularity() {
|
|
||||||
// log.Print("Different granularity wanted for ", metric.Name, ": ", metric.Scope, " vs ", metric.granulatity)
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// return metrics, err
|
|
||||||
// }
|
|
||||||
|
|
||||||
func (m *LikwidCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
func (m *LikwidCollector) Read(interval time.Duration, output chan lp.CCMetric) {
|
||||||
if !m.init {
|
if !m.init {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer m.CatchGvalPanic()
|
|
||||||
|
|
||||||
for i, _ := range m.groups {
|
for i, _ := range m.groups {
|
||||||
// measure event set 'i' for 'interval' seconds
|
// measure event set 'i' for 'interval' seconds
|
||||||
@ -439,77 +502,11 @@ func (m *LikwidCollector) Read(interval time.Duration, output chan lp.CCMetric)
|
|||||||
cclog.ComponentError(m.name, err.Error())
|
cclog.ComponentError(m.name, err.Error())
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
m.calcEventsetMetrics(i, interval)
|
// read measurements and derive event set metrics
|
||||||
}
|
m.calcEventsetMetrics(i, interval, output)
|
||||||
|
|
||||||
m.calcGlobalMetrics(interval)
|
|
||||||
|
|
||||||
//metrics, err = m.calcResultMetrics(interval)
|
|
||||||
|
|
||||||
for i := range m.groups {
|
|
||||||
evset := m.config.Eventsets[i]
|
|
||||||
for _, metric := range evset.Metrics {
|
|
||||||
|
|
||||||
_, skip := stringArrayContains(m.config.ExcludeMetrics, metric.Name)
|
|
||||||
if metric.Publish && !skip {
|
|
||||||
if metric.Scope == "socket" {
|
|
||||||
for sid, tid := range m.sock2tid {
|
|
||||||
y, err := lp.New(metric.Name,
|
|
||||||
map[string]string{"type": "socket",
|
|
||||||
"type-id": fmt.Sprintf("%d", int(sid))},
|
|
||||||
m.meta,
|
|
||||||
map[string]interface{}{"value": m.mresults[i][tid][metric.Name]},
|
|
||||||
time.Now())
|
|
||||||
if err == nil {
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if metric.Scope == "hwthread" {
|
|
||||||
for tid, cpu := range m.cpulist {
|
|
||||||
y, err := lp.New(metric.Name,
|
|
||||||
map[string]string{"type": "cpu",
|
|
||||||
"type-id": fmt.Sprintf("%d", int(cpu))},
|
|
||||||
m.meta,
|
|
||||||
map[string]interface{}{"value": m.mresults[i][tid][metric.Name]},
|
|
||||||
time.Now())
|
|
||||||
if err == nil {
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for _, metric := range m.config.Metrics {
|
|
||||||
_, skip := stringArrayContains(m.config.ExcludeMetrics, metric.Name)
|
|
||||||
if metric.Publish && !skip {
|
|
||||||
if metric.Scope == "socket" {
|
|
||||||
for sid, tid := range m.sock2tid {
|
|
||||||
y, err := lp.New(metric.Name,
|
|
||||||
map[string]string{"type": "socket",
|
|
||||||
"type-id": fmt.Sprintf("%d", int(sid))},
|
|
||||||
m.meta,
|
|
||||||
map[string]interface{}{"value": m.gmresults[tid][metric.Name]},
|
|
||||||
time.Now())
|
|
||||||
if err == nil {
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if metric.Scope == "hwthread" {
|
|
||||||
for tid, cpu := range m.cpulist {
|
|
||||||
y, err := lp.New(metric.Name,
|
|
||||||
map[string]string{"type": "cpu",
|
|
||||||
"type-id": fmt.Sprintf("%d", int(cpu))},
|
|
||||||
m.meta,
|
|
||||||
map[string]interface{}{"value": m.gmresults[tid][metric.Name]},
|
|
||||||
time.Now())
|
|
||||||
if err == nil {
|
|
||||||
output <- y
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
// use the event set metrics to derive the global metrics
|
||||||
|
m.calcGlobalMetrics(interval, output)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *LikwidCollector) Close() {
|
func (m *LikwidCollector) Close() {
|
||||||
|
Loading…
Reference in New Issue
Block a user