add more information to existing errors logs and panics

- '$ROOT/$FILE' for better localization in the code
- add text where none was given
- fix unnecessary sprintf nesting in influxv2 and prometheus metricrepo logging
This commit is contained in:
Christoph Kluge
2023-01-19 16:59:14 +01:00
parent 5abd3641b2
commit 24a4244f19
31 changed files with 254 additions and 253 deletions

View File

@@ -202,7 +202,7 @@ func (ccms *CCMetricStore) LoadData(
for _, res := range row {
if res.Error != nil {
errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error))
errors = append(errors, fmt.Sprintf("METRICDATA/CCMS > failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error))
continue
}
@@ -245,7 +245,7 @@ func (ccms *CCMetricStore) LoadData(
}
if len(errors) != 0 {
return jobData, fmt.Errorf("cc-metric-store: %s", strings.Join(errors, ", "))
return jobData, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
}
return jobData, nil
@@ -272,8 +272,8 @@ func (ccms *CCMetricStore) buildQueries(
remoteName := ccms.toRemoteName(metric)
mc := archive.GetMetricConfig(job.Cluster, metric)
if mc == nil {
// return nil, fmt.Errorf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
// log.Printf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
// return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
// log.Printf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
continue
}
@@ -483,7 +483,7 @@ func (ccms *CCMetricStore) buildQueries(
continue
}
return nil, nil, fmt.Errorf("TODO: unhandled case: native-scope=%s, requested-scope=%s", nativeScope, requestedScope)
return nil, nil, fmt.Errorf("METRICDATA/CCMS > TODO: unhandled case: native-scope=%s, requested-scope=%s", nativeScope, requestedScope)
}
}
}
@@ -521,7 +521,7 @@ func (ccms *CCMetricStore) LoadStats(
metric := ccms.toLocalName(query.Metric)
data := res[0]
if data.Error != nil {
return nil, fmt.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
}
metricdata, ok := stats[metric]
@@ -531,7 +531,7 @@ func (ccms *CCMetricStore) LoadStats(
}
if data.Avg.IsNaN() || data.Min.IsNaN() || data.Max.IsNaN() {
return nil, fmt.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
}
metricdata[query.Hostname] = schema.MetricStatistics{
@@ -593,11 +593,11 @@ func (ccms *CCMetricStore) LoadNodeData(
metric := ccms.toLocalName(query.Metric)
qdata := res[0]
if qdata.Error != nil {
errors = append(errors, fmt.Sprintf("fetching %s for node %s failed: %s", metric, query.Hostname, *qdata.Error))
errors = append(errors, fmt.Sprintf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, *qdata.Error))
}
if qdata.Avg.IsNaN() || qdata.Min.IsNaN() || qdata.Max.IsNaN() {
// return nil, fmt.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
// return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
qdata.Avg, qdata.Min, qdata.Max = 0., 0., 0.
}
@@ -627,7 +627,7 @@ func (ccms *CCMetricStore) LoadNodeData(
}
if len(errors) != 0 {
return data, fmt.Errorf("cc-metric-store: %s", strings.Join(errors, ", "))
return data, fmt.Errorf("METRICDATA/CCMS > errors: %s", strings.Join(errors, ", "))
}
return data, nil

View File

@@ -10,12 +10,12 @@ import (
"encoding/json"
"errors"
"fmt"
"log"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/ClusterCockpit/cc-backend/pkg/log"
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
)
@@ -71,7 +71,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(
for _, h := range job.Resources {
if h.HWThreads != nil || h.Accelerators != nil {
// TODO
return nil, errors.New("the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
}
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname))
}
@@ -84,7 +84,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(
switch scope {
case "node":
// Get Finest Granularity, Groupy By Measurement and Hostname (== Metric / Node), Calculate Mean for 60s windows
// log.Println("Note: Scope 'node' requested. ")
// log.Info("METRICDATA/INFLUXV2 > Scope 'node' requested. ")
query = fmt.Sprintf(`
from(bucket: "%s")
|> range(start: %s, stop: %s)
@@ -97,10 +97,10 @@ func (idb *InfluxDBv2DataRepository) LoadData(
idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix+int64(job.Duration)+int64(1))),
measurementsCond, hostsCond)
case "socket":
log.Println("Note: Scope 'socket' requested, but not yet supported: Will return 'node' scope only. ")
log.Info("METRICDATA/INFLUXV2 > Scope 'socket' requested, but not yet supported: Will return 'node' scope only. ")
continue
case "core":
log.Println("Note: Scope 'core' requested, but not yet supported: Will return 'node' scope only. ")
log.Info("METRICDATA/INFLUXV2 > Scope 'core' requested, but not yet supported: Will return 'node' scope only. ")
continue
// Get Finest Granularity only, Set NULL to 0.0
// query = fmt.Sprintf(`
@@ -114,9 +114,9 @@ func (idb *InfluxDBv2DataRepository) LoadData(
// idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix + int64(job.Duration) + int64(1) )),
// measurementsCond, hostsCond)
default:
log.Println("Note: Unknown Scope requested: Will return 'node' scope. ")
log.Info("METRICDATA/INFLUXV2 > Unknown Scope requested: Will return 'node' scope. ")
continue
// return nil, errors.New("the InfluxDB metric data repository does not yet support other scopes than 'node'")
// return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support other scopes than 'node'")
}
rows, err := idb.queryClient.Query(ctx, query)
@@ -208,15 +208,15 @@ func (idb *InfluxDBv2DataRepository) LoadData(
for _, scope := range scopes {
if scope == "node" { // No 'socket/core' support yet
for metric, nodes := range stats {
// log.Println(fmt.Sprintf("<< Add Stats for : Field %s >>", metric))
// log.Debugf("<< Add Stats for : Field %s >>", metric)
for node, stats := range nodes {
// log.Println(fmt.Sprintf("<< Add Stats for : Host %s : Min %.2f, Max %.2f, Avg %.2f >>", node, stats.Min, stats.Max, stats.Avg ))
// log.Debugf("<< Add Stats for : Host %s : Min %.2f, Max %.2f, Avg %.2f >>", node, stats.Min, stats.Max, stats.Avg )
for index, _ := range jobData[metric][scope].Series {
// log.Println(fmt.Sprintf("<< Try to add Stats to Series in Position %d >>", index))
// log.Debugf("<< Try to add Stats to Series in Position %d >>", index)
if jobData[metric][scope].Series[index].Hostname == node {
// log.Println(fmt.Sprintf("<< Match for Series in Position %d : Host %s >>", index, jobData[metric][scope].Series[index].Hostname))
// log.Debugf("<< Match for Series in Position %d : Host %s >>", index, jobData[metric][scope].Series[index].Hostname)
jobData[metric][scope].Series[index].Statistics = &schema.MetricStatistics{Avg: stats.Avg, Min: stats.Min, Max: stats.Max}
// log.Println(fmt.Sprintf("<< Result Inner: Min %.2f, Max %.2f, Avg %.2f >>", jobData[metric][scope].Series[index].Statistics.Min, jobData[metric][scope].Series[index].Statistics.Max, jobData[metric][scope].Series[index].Statistics.Avg))
// log.Debugf("<< Result Inner: Min %.2f, Max %.2f, Avg %.2f >>", jobData[metric][scope].Series[index].Statistics.Min, jobData[metric][scope].Series[index].Statistics.Max, jobData[metric][scope].Series[index].Statistics.Avg)
}
}
}
@@ -228,9 +228,9 @@ func (idb *InfluxDBv2DataRepository) LoadData(
// for _, scope := range scopes {
// for _, met := range metrics {
// for _, series := range jobData[met][scope].Series {
// log.Println(fmt.Sprintf("<< Result: %d data points for metric %s on %s with scope %s, Stats: Min %.2f, Max %.2f, Avg %.2f >>",
// log.Debugf("<< Result: %d data points for metric %s on %s with scope %s, Stats: Min %.2f, Max %.2f, Avg %.2f >>",
// len(series.Data), met, series.Hostname, scope,
// series.Statistics.Min, series.Statistics.Max, series.Statistics.Avg))
// series.Statistics.Min, series.Statistics.Max, series.Statistics.Avg)
// }
// }
// }
@@ -249,7 +249,7 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
for _, h := range job.Resources {
if h.HWThreads != nil || h.Accelerators != nil {
// TODO
return nil, errors.New("the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
}
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname))
}
@@ -258,7 +258,7 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
// lenMet := len(metrics)
for _, metric := range metrics {
// log.Println(fmt.Sprintf("<< You are here: %s (Index %d of %d metrics)", metric, index, lenMet))
// log.Debugf("<< You are here: %s (Index %d of %d metrics)", metric, index, lenMet)
query := fmt.Sprintf(`
data = from(bucket: "%s")
@@ -285,17 +285,17 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
avg, avgok := row.ValueByKey("avg").(float64)
if !avgok {
// log.Println(fmt.Sprintf(">> Assertion error for metric %s, statistic AVG. Expected 'float64', got %v", metric, avg))
// log.Debugf(">> Assertion error for metric %s, statistic AVG. Expected 'float64', got %v", metric, avg)
avg = 0.0
}
min, minok := row.ValueByKey("min").(float64)
if !minok {
// log.Println(fmt.Sprintf(">> Assertion error for metric %s, statistic MIN. Expected 'float64', got %v", metric, min))
// log.Debugf(">> Assertion error for metric %s, statistic MIN. Expected 'float64', got %v", metric, min)
min = 0.0
}
max, maxok := row.ValueByKey("max").(float64)
if !maxok {
// log.Println(fmt.Sprintf(">> Assertion error for metric %s, statistic MAX. Expected 'float64', got %v", metric, max))
// log.Debugf(">> Assertion error for metric %s, statistic MAX. Expected 'float64', got %v", metric, max)
max = 0.0
}
@@ -319,7 +319,7 @@ func (idb *InfluxDBv2DataRepository) LoadNodeData(
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
// TODO : Implement to be used in Analysis- und System/Node-View
log.Println(fmt.Sprintf("LoadNodeData unimplemented for InfluxDBv2DataRepository, Args: cluster %s, metrics %v, nodes %v, scopes %v", cluster, metrics, nodes, scopes))
log.Infof("METRICDATA/INFLUXV2 > LoadNodeData unimplemented for InfluxDBv2DataRepository, Args: cluster %s, metrics %v, nodes %v, scopes %v", cluster, metrics, nodes, scopes)
return nil, errors.New("unimplemented for InfluxDBv2DataRepository")
return nil, errors.New("METRICDATA/INFLUXV2 > unimplemented for InfluxDBv2DataRepository")
}

View File

@@ -60,7 +60,7 @@ func Init(disableArchive bool) error {
case "test":
mdr = &TestMetricDataRepository{}
default:
return fmt.Errorf("unkown metric data repository '%s' for cluster '%s'", kind.Kind, cluster.Name)
return fmt.Errorf("METRICDATA/METRICDATA > unkown metric data repository '%s' for cluster '%s'", kind.Kind, cluster.Name)
}
if err := mdr.Init(cluster.MetricDataRepository); err != nil {
@@ -90,7 +90,7 @@ func LoadData(job *schema.Job,
repo, ok := metricDataRepos[job.Cluster]
if !ok {
return fmt.Errorf("no metric data repository configured for '%s'", job.Cluster), 0, 0
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
}
if scopes == nil {
@@ -107,7 +107,7 @@ func LoadData(job *schema.Job,
jd, err = repo.LoadData(job, metrics, scopes, ctx)
if err != nil {
if len(jd) != 0 {
log.Errorf("partial error: %s", err.Error())
log.Errorf("METRICDATA/METRICDATA > partial error: %s", err.Error())
} else {
return err, 0, 0
}
@@ -182,7 +182,7 @@ func LoadAverages(
repo, ok := metricDataRepos[job.Cluster]
if !ok {
return fmt.Errorf("no metric data repository configured for '%s'", job.Cluster)
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
}
stats, err := repo.LoadStats(job, metrics, ctx)
@@ -217,7 +217,7 @@ func LoadNodeData(
repo, ok := metricDataRepos[cluster]
if !ok {
return nil, fmt.Errorf("no metric data repository configured for '%s'", cluster)
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
}
if metrics == nil {
@@ -229,14 +229,14 @@ func LoadNodeData(
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil {
if len(data) != 0 {
log.Errorf("partial error: %s", err.Error())
log.Errorf("METRICDATA/METRICDATA > partial error: %s", err.Error())
} else {
return nil, err
}
}
if data == nil {
return nil, fmt.Errorf("the metric data repository for '%s' does not support this query", cluster)
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
}
return data, nil

View File

@@ -163,7 +163,7 @@ func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
rt = promcfg.NewBasicAuthRoundTripper(config.Username, prom_pw, "", promapi.DefaultRoundTripper)
} else {
if config.Username != "" {
return errors.New("Prometheus username provided, but PROMETHEUS_PASSWORD not set.")
return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set.")
}
}
// init client
@@ -184,9 +184,9 @@ func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
for metric, templ := range config.Templates {
pdb.templates[metric], err = template.New(metric).Parse(templ)
if err == nil {
log.Debugf("Added PromQL template for %s: %s", metric, templ)
log.Debugf("METRICDATA/PROMETHEUS > Added PromQL template for %s: %s", metric, templ)
} else {
log.Errorf("Failed to parse PromQL template %s for metric %s", templ, metric)
log.Errorf("METRICDATA/PROMETHEUS > Failed to parse PromQL template %s for metric %s", templ, metric)
}
}
return nil
@@ -213,14 +213,14 @@ func (pdb *PrometheusDataRepository) FormatQuery(
if templ, ok := pdb.templates[metric]; ok {
err := templ.Execute(buf, args)
if err != nil {
return "", errors.New(fmt.Sprintf("Error compiling template %v", templ))
return "", errors.New(fmt.Sprintf("METRICDATA/PROMETHEUS > Error compiling template %v", templ))
} else {
query := buf.String()
log.Debugf(fmt.Sprintf("PromQL: %s", query))
log.Debugf("METRICDATA/PROMETHEUS > PromQL: %s", query)
return query, nil
}
} else {
return "", errors.New(fmt.Sprintf("No PromQL for metric %s configured.", metric))
return "", errors.New(fmt.Sprintf("METRICDATA/PROMETHEUS > No PromQL for metric %s configured.", metric))
}
}
@@ -283,16 +283,15 @@ func (pdb *PrometheusDataRepository) LoadData(
for _, scope := range scopes {
if scope != schema.MetricScopeNode {
logOnce.Do(func(){log.Infof(fmt.Sprintf("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope))})
logOnce.Do(func(){log.Infof("METRICDATA/PROMETHEUS > Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)})
continue
}
for _, metric := range metrics {
metricConfig := archive.GetMetricConfig(job.Cluster, metric)
if metricConfig == nil {
log.Errorf(fmt.Sprintf("Error in LoadData: Metric %s for cluster %s not configured",
metric, job.Cluster))
return nil, errors.New("Prometheus querry error")
log.Errorf("METRICDATA/PROMETHEUS > Error in LoadData: Metric %s for cluster %s not configured", metric, job.Cluster)
return nil, errors.New("METRICDATA/PROMETHEUS > Prometheus query error")
}
query, err := pdb.FormatQuery(metric, scope, nodes, job.Cluster)
if err != nil {
@@ -308,11 +307,11 @@ func (pdb *PrometheusDataRepository) LoadData(
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
if err != nil {
log.Errorf(fmt.Sprintf("Prometheus query error in LoadData: %v\nQuery: %s", err, query))
return nil, errors.New("Prometheus querry error")
log.Errorf("METRICDATA/PROMETHEUS > Prometheus query error in LoadData: %v\nQuery: %s", err, query)
return nil, errors.New("METRICDATA/PROMETHEUS > Prometheus query error")
}
if len(warnings) > 0 {
log.Warnf(fmt.Sprintf("Warnings: %v\n", warnings))
log.Warnf("Warnings: %v\n", warnings)
}
// init data structures
@@ -390,15 +389,14 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
}
for _, scope := range scopes {
if scope != schema.MetricScopeNode {
logOnce.Do(func(){log.Infof(fmt.Sprintf("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope))})
logOnce.Do(func(){log.Infof("METRICDATA/PROMETHEUS > Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)})
continue
}
for _, metric := range metrics {
metricConfig := archive.GetMetricConfig(cluster, metric)
if metricConfig == nil {
log.Errorf(fmt.Sprintf("Error in LoadNodeData: Metric %s for cluster %s not configured",
metric, cluster))
return nil, errors.New("Prometheus querry error")
log.Errorf("METRICDATA/PROMETHEUS > Error in LoadNodeData: Metric %s for cluster %s not configured", metric, cluster)
return nil, errors.New("METRICDATA/PROMETHEUS > Prometheus querry error")
}
query, err := pdb.FormatQuery(metric, scope, nodes, cluster)
if err != nil {
@@ -414,11 +412,11 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
if err != nil {
log.Errorf(fmt.Sprintf("Prometheus query error in LoadNodeData: %v\n", err))
return nil, errors.New("Prometheus querry error")
log.Errorf("METRICDATA/PROMETHEUS > Prometheus query error in LoadNodeData: %v\n", err)
return nil, errors.New("METRICDATA/PROMETHEUS > Prometheus querry error")
}
if len(warnings) > 0 {
log.Warnf(fmt.Sprintf("Warnings: %v\n", warnings))
log.Warnf("METRICDATA/PROMETHEUS > Warnings: %v\n", warnings)
}
step := int64(metricConfig.Timestep)
@@ -444,6 +442,6 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
}
}
t1 := time.Since(t0)
log.Debugf(fmt.Sprintf("LoadNodeData of %v nodes took %s", len(data), t1))
log.Debugf("METRICDATA/PROMETHEUS > LoadNodeData of %v nodes took %s", len(data), t1)
return data, nil
}