2021-12-08 10:14:45 +01:00
package metricdata
import (
"context"
2021-12-16 13:17:48 +01:00
"errors"
2021-12-08 10:14:45 +01:00
"fmt"
2021-12-09 16:26:59 +01:00
"log"
2021-12-08 10:14:45 +01:00
"strings"
"time"
2022-03-15 18:35:27 +01:00
"crypto/tls"
2022-03-17 16:15:35 +01:00
"encoding/json"
2021-12-08 10:14:45 +01:00
2022-01-27 09:40:59 +01:00
"github.com/ClusterCockpit/cc-backend/config"
"github.com/ClusterCockpit/cc-backend/schema"
2021-12-08 10:14:45 +01:00
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
)
2022-03-17 16:15:35 +01:00
type InfluxDBv2DataRepositoryConfig struct {
Url string ` json:"url" `
Token string ` json:"token" `
2022-03-18 11:40:42 +01:00
Bucket string ` json:"bucket" `
Org string ` json:"org" `
SkipTls bool ` json:"skiptls" `
2022-03-17 16:15:35 +01:00
}
2021-12-08 10:14:45 +01:00
type InfluxDBv2DataRepository struct {
client influxdb2 . Client
queryClient influxdb2Api . QueryAPI
bucket , measurement string
}
2022-03-17 16:15:35 +01:00
func ( idb * InfluxDBv2DataRepository ) Init ( rawConfig json . RawMessage ) error {
var config InfluxDBv2DataRepositoryConfig
if err := json . Unmarshal ( rawConfig , & config ) ; err != nil {
return err
}
2021-12-08 10:14:45 +01:00
2022-03-18 11:40:42 +01:00
idb . client = influxdb2 . NewClientWithOptions ( config . Url , config . Token , influxdb2 . DefaultOptions ( ) . SetTLSConfig ( & tls . Config { InsecureSkipVerify : config . SkipTls , } ) )
idb . queryClient = idb . client . QueryAPI ( config . Org )
idb . bucket = config . Bucket
2022-03-15 18:35:27 +01:00
2021-12-08 10:14:45 +01:00
return nil
}
2022-03-16 11:58:57 +01:00
func ( idb * InfluxDBv2DataRepository ) formatTime ( t time . Time ) string {
return t . Format ( time . RFC3339 ) // Like “2006-01-02T15:04:05Z07:00”
2021-12-08 10:14:45 +01:00
}
2022-03-15 18:35:27 +01:00
func ( idb * InfluxDBv2DataRepository ) epochToTime ( epoch int64 ) time . Time {
return time . Unix ( epoch , 0 )
}
func ( idb * InfluxDBv2DataRepository ) LoadData ( job * schema . Job , metrics [ ] string , scopes [ ] schema . MetricScope , ctx context . Context ) ( schema . JobData , error ) {
2022-03-21 10:10:47 +01:00
measurementsConds := make ( [ ] string , 0 , len ( metrics ) )
2021-12-08 10:14:45 +01:00
for _ , m := range metrics {
2022-03-21 10:10:47 +01:00
measurementsConds = append ( measurementsConds , fmt . Sprintf ( ` r["_measurement"] == "%s" ` , m ) )
2021-12-08 10:14:45 +01:00
}
2022-03-21 10:10:47 +01:00
measurementsCond := strings . Join ( measurementsConds , " or " )
2021-12-08 10:14:45 +01:00
2021-12-16 13:17:48 +01:00
hostsConds := make ( [ ] string , 0 , len ( job . Resources ) )
for _ , h := range job . Resources {
if h . HWThreads != nil || h . Accelerators != nil {
2022-03-18 12:57:57 +01:00
// TODO
return nil , errors . New ( "the InfluxDB metric data repository does not yet support HWThreads or Accelerators" )
2021-12-16 13:17:48 +01:00
}
2022-03-21 10:10:47 +01:00
hostsConds = append ( hostsConds , fmt . Sprintf ( ` r["hostname"] == "%s" ` , h . Hostname ) )
2021-12-08 10:14:45 +01:00
}
hostsCond := strings . Join ( hostsConds , " or " )
2022-03-15 18:35:27 +01:00
jobData := make ( schema . JobData ) // Empty Schema: map[<string>FIELD]map[<MetricScope>SCOPE]<*JobMetric>METRIC
2022-03-21 10:10:47 +01:00
// Requested Scopes
for _ , scope := range scopes {
query := ""
switch scope {
case "node" :
2022-03-22 10:59:24 +01:00
// Get Finest Granularity, Groupy By Measurement and Hostname (== Metric / Node), Calculate Mean for 60s windows
2022-03-21 16:08:16 +01:00
// log.Println("Note: Scope 'node' requested. ")
2022-03-21 10:10:47 +01:00
query = fmt . Sprintf ( `
from ( bucket : "%s" )
| > range ( start : % s , stop : % s )
2022-03-22 10:59:24 +01:00
| > filter ( fn : ( r ) = > ( % s ) and ( % s ) )
2022-03-21 10:10:47 +01:00
| > drop ( columns : [ "_start" , "_stop" ] )
| > group ( columns : [ "hostname" , "_measurement" ] )
| > aggregateWindow ( every : 60 s , fn : mean )
2022-03-22 10:59:24 +01:00
| > drop ( columns : [ "_time" ] ) ` ,
2022-03-21 10:10:47 +01:00
idb . bucket ,
idb . formatTime ( job . StartTime ) , idb . formatTime ( idb . epochToTime ( job . StartTimeUnix + int64 ( job . Duration ) + int64 ( 1 ) ) ) ,
measurementsCond , hostsCond )
2022-03-21 13:36:19 +01:00
case "socket" :
log . Println ( "Note: Scope 'socket' requested, but not yet supported: Will return 'node' scope only. " )
continue
case "core" :
2022-03-21 16:08:16 +01:00
log . Println ( "Note: Scope 'core' requested, but not yet supported: Will return 'node' scope only. " )
2022-03-21 13:36:19 +01:00
continue
2022-03-21 16:08:16 +01:00
// Get Finest Granularity only, Set NULL to 0.0
2022-03-21 13:36:19 +01:00
// query = fmt.Sprintf(`
2022-03-21 16:08:16 +01:00
// from(bucket: "%s")
// |> range(start: %s, stop: %s)
// |> filter(fn: (r) => %s )
// |> filter(fn: (r) => %s )
// |> drop(columns: ["_start", "_stop", "cluster"])
// |> map(fn: (r) => (if exists r._value then {r with _value: r._value} else {r with _value: 0.0}))`,
// idb.bucket,
// idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix + int64(job.Duration) + int64(1) )),
// measurementsCond, hostsCond)
2022-03-21 10:10:47 +01:00
default :
2022-03-21 13:36:19 +01:00
log . Println ( "Note: Unknown Scope requested: Will return 'node' scope. " )
continue
2022-03-21 11:18:31 +01:00
// return nil, errors.New("the InfluxDB metric data repository does not yet support other scopes than 'node'")
2022-03-21 10:10:47 +01:00
}
2021-12-08 10:14:45 +01:00
2022-03-21 10:10:47 +01:00
rows , err := idb . queryClient . Query ( ctx , query )
if err != nil {
return nil , err
}
2022-03-15 18:35:27 +01:00
2022-03-22 10:59:24 +01:00
// Init Metrics: Only Node level now -> TODO: Matching /check on scope level ...
2022-03-21 10:10:47 +01:00
for _ , metric := range metrics {
jobMetric , ok := jobData [ metric ]
if ! ok {
mc := config . GetMetricConfig ( job . Cluster , metric )
jobMetric = map [ schema . MetricScope ] * schema . JobMetric {
scope : { // uses scope var from above!
Unit : mc . Unit ,
Scope : scope ,
Timestep : mc . Timestep ,
Series : make ( [ ] schema . Series , 0 , len ( job . Resources ) ) ,
StatisticsSeries : nil , // Should be: &schema.StatsSeries{},
} ,
}
}
jobData [ metric ] = jobMetric
}
2021-12-08 10:14:45 +01:00
2022-03-21 10:10:47 +01:00
// Process Result: Time-Data
2022-03-21 16:08:16 +01:00
field , host , hostSeries := "" , "" , schema . Series { }
// typeId := 0
switch scope {
case "node" :
for rows . Next ( ) {
row := rows . Record ( )
if ( host == "" || host != row . ValueByKey ( "hostname" ) . ( string ) || rows . TableChanged ( ) ) {
if ( host != "" ) {
// Append Series before reset
jobData [ field ] [ scope ] . Series = append ( jobData [ field ] [ scope ] . Series , hostSeries )
}
field , host = row . Measurement ( ) , row . ValueByKey ( "hostname" ) . ( string )
hostSeries = schema . Series {
Hostname : host ,
Statistics : nil ,
Data : make ( [ ] schema . Float , 0 ) ,
}
}
2022-03-22 10:59:24 +01:00
val , ok := row . Value ( ) . ( float64 )
if ok {
hostSeries . Data = append ( hostSeries . Data , schema . Float ( val ) )
} else {
hostSeries . Data = append ( hostSeries . Data , schema . Float ( 0 ) )
}
2022-03-21 16:08:16 +01:00
}
case "socket" :
continue
case "core" :
continue
// Include Series.Id in hostSeries
// for rows.Next() {
// row := rows.Record()
// if ( host == "" || host != row.ValueByKey("hostname").(string) || typeId != row.ValueByKey("type-id").(int) || rows.TableChanged() ) {
// if ( host != "" ) {
// // Append Series before reset
// jobData[field][scope].Series = append(jobData[field][scope].Series, hostSeries)
// }
// field, host, typeId = row.Measurement(), row.ValueByKey("hostname").(string), row.ValueByKey("type-id").(int)
// hostSeries = schema.Series{
// Hostname: host,
// Id: &typeId,
// Statistics: nil,
// Data: make([]schema.Float, 0),
// }
// }
// val := row.Value().(float64)
// hostSeries.Data = append(hostSeries.Data, schema.Float(val))
// }
default :
continue
// return nil, errors.New("the InfluxDB metric data repository does not yet support other scopes than 'node, core'")
2022-03-21 10:10:47 +01:00
}
// Append last Series
jobData [ field ] [ scope ] . Series = append ( jobData [ field ] [ scope ] . Series , hostSeries )
2021-12-08 10:14:45 +01:00
}
2022-03-21 10:10:47 +01:00
// Get Stats
2021-12-08 11:50:16 +01:00
stats , err := idb . LoadStats ( job , metrics , ctx )
if err != nil {
return nil , err
}
2022-03-15 18:35:27 +01:00
2022-03-21 10:10:47 +01:00
for _ , scope := range scopes {
2022-03-21 16:08:16 +01:00
if scope == "node" { // No 'socket/core' support yet
2022-03-21 13:36:19 +01:00
for metric , nodes := range stats {
// log.Println(fmt.Sprintf("<< Add Stats for : Field %s >>", metric))
for node , stats := range nodes {
// log.Println(fmt.Sprintf("<< Add Stats for : Host %s : Min %.2f, Max %.2f, Avg %.2f >>", node, stats.Min, stats.Max, stats.Avg ))
for index , _ := range jobData [ metric ] [ scope ] . Series {
// log.Println(fmt.Sprintf("<< Try to add Stats to Series in Position %d >>", index))
if jobData [ metric ] [ scope ] . Series [ index ] . Hostname == node {
// log.Println(fmt.Sprintf("<< Match for Series in Position %d : Host %s >>", index, jobData[metric][scope].Series[index].Hostname))
jobData [ metric ] [ scope ] . Series [ index ] . Statistics = & schema . MetricStatistics { Avg : stats . Avg , Min : stats . Min , Max : stats . Max }
// log.Println(fmt.Sprintf("<< Result Inner: Min %.2f, Max %.2f, Avg %.2f >>", jobData[metric][scope].Series[index].Statistics.Min, jobData[metric][scope].Series[index].Statistics.Max, jobData[metric][scope].Series[index].Statistics.Avg))
}
2022-03-21 10:10:47 +01:00
}
2021-12-08 11:50:16 +01:00
}
}
}
}
2022-03-16 11:58:57 +01:00
// DEBUG:
2022-03-21 10:10:47 +01:00
// for _, scope := range scopes {
// for _, met := range metrics {
// for _, series := range jobData[met][scope].Series {
// log.Println(fmt.Sprintf("<< Result: %d data points for metric %s on %s with scope %s, Stats: Min %.2f, Max %.2f, Avg %.2f >>",
// len(series.Data), met, series.Hostname, scope,
// series.Statistics.Min, series.Statistics.Max, series.Statistics.Avg))
// }
// }
// }
2022-03-15 18:35:27 +01:00
2021-12-08 11:50:16 +01:00
return jobData , nil
2021-12-08 10:14:45 +01:00
}
2022-03-15 18:35:27 +01:00
func ( idb * InfluxDBv2DataRepository ) LoadStats ( job * schema . Job , metrics [ ] string , ctx context . Context ) ( map [ string ] map [ string ] schema . MetricStatistics , error ) {
2022-03-16 17:28:05 +01:00
2021-12-08 11:50:16 +01:00
stats := map [ string ] map [ string ] schema . MetricStatistics { }
2021-12-16 13:17:48 +01:00
hostsConds := make ( [ ] string , 0 , len ( job . Resources ) )
for _ , h := range job . Resources {
2022-03-15 18:35:27 +01:00
if h . HWThreads != nil || h . Accelerators != nil {
2022-03-21 10:10:47 +01:00
// TODO
return nil , errors . New ( "the InfluxDB metric data repository does not yet support HWThreads or Accelerators" )
2022-03-15 18:35:27 +01:00
}
2022-03-21 10:10:47 +01:00
hostsConds = append ( hostsConds , fmt . Sprintf ( ` r["hostname"] == "%s" ` , h . Hostname ) )
2021-12-08 11:50:16 +01:00
}
hostsCond := strings . Join ( hostsConds , " or " )
2022-03-21 13:36:19 +01:00
// lenMet := len(metrics)
2022-03-21 14:14:48 +01:00
for _ , metric := range metrics {
2022-03-21 13:36:19 +01:00
// log.Println(fmt.Sprintf("<< You are here: %s (Index %d of %d metrics)", metric, index, lenMet))
2022-03-15 18:35:27 +01:00
query := fmt . Sprintf ( `
2022-03-18 12:57:57 +01:00
data = from ( bucket : "%s" )
| > range ( start : % s , stop : % s )
2022-03-21 10:10:47 +01:00
| > filter ( fn : ( r ) = > r . _measurement == "%s" and r . _field == "value" and ( % s ) )
2022-03-18 12:57:57 +01:00
union ( tables : [ data | > mean ( column : "_value" ) | > set ( key : "_field" , value : "avg" ) ,
data | > min ( column : "_value" ) | > set ( key : "_field" , value : "min" ) ,
data | > max ( column : "_value" ) | > set ( key : "_field" , value : "max" ) ] )
2022-03-21 10:10:47 +01:00
| > pivot ( rowKey : [ "hostname" ] , columnKey : [ "_field" ] , valueColumn : "_value" )
2022-03-18 12:57:57 +01:00
| > group ( ) ` ,
2022-03-15 18:35:27 +01:00
idb . bucket ,
idb . formatTime ( job . StartTime ) , idb . formatTime ( idb . epochToTime ( job . StartTimeUnix + int64 ( job . Duration ) + int64 ( 1 ) ) ) ,
2022-03-21 10:10:47 +01:00
metric , hostsCond )
2021-12-08 10:14:45 +01:00
2022-03-15 18:35:27 +01:00
rows , err := idb . queryClient . Query ( ctx , query )
if err != nil {
return nil , err
2021-12-08 10:14:45 +01:00
}
2022-03-15 18:35:27 +01:00
nodes := map [ string ] schema . MetricStatistics { }
for rows . Next ( ) {
2022-03-21 13:36:19 +01:00
row := rows . Record ( )
2022-03-21 10:10:47 +01:00
host := row . ValueByKey ( "hostname" ) . ( string )
2022-03-21 13:36:19 +01:00
avg , avgok := row . ValueByKey ( "avg" ) . ( float64 )
if ! avgok { log . Println ( fmt . Sprintf ( ">> Assertion error for metric %s, statistic AVG. Expected 'float64', got %v" , metric , avg ) ) }
min , minok := row . ValueByKey ( "min" ) . ( float64 )
if ! minok { log . Println ( fmt . Sprintf ( ">> Assertion error for metric %s, statistic MIN. Expected 'float64', got %v" , metric , min ) ) }
max , maxok := row . ValueByKey ( "max" ) . ( float64 )
if ! maxok { log . Println ( fmt . Sprintf ( ">> Assertion error for metric %s, statistic MAX. Expected 'float64', got %v" , metric , max ) ) }
2022-03-15 18:35:27 +01:00
nodes [ host ] = schema . MetricStatistics {
Avg : avg ,
Min : min ,
Max : max ,
}
}
stats [ metric ] = nodes
2021-12-08 10:14:45 +01:00
}
2021-12-08 11:50:16 +01:00
return stats , nil
}
2022-03-15 18:35:27 +01:00
func ( idb * InfluxDBv2DataRepository ) LoadNodeData ( cluster , partition string , metrics , nodes [ ] string , scopes [ ] schema . MetricScope , from , to time . Time , ctx context . Context ) ( map [ string ] map [ string ] [ ] * schema . JobMetric , error ) {
// TODO : Implement to be used in Analysis- und System/Node-View
2022-03-16 17:28:05 +01:00
log . Println ( fmt . Sprintf ( "LoadNodeData unimplemented for InfluxDBv2DataRepository, Args: cluster %s, partition %s, metrics %v, nodes %v, scopes %v" , cluster , partition , metrics , nodes , scopes ) )
2022-03-15 18:35:27 +01:00
2022-03-16 17:28:05 +01:00
return nil , errors . New ( "unimplemented for InfluxDBv2DataRepository" )
2021-12-08 10:14:45 +01:00
}