BC: new schemas for basically everything

This commit is contained in:
Lou Knauer
2021-12-16 13:17:48 +01:00
parent 7fcc39a144
commit 89333666b3
14 changed files with 1631 additions and 549 deletions

View File

@@ -11,7 +11,6 @@ import (
"path"
"path/filepath"
"strconv"
"strings"
"github.com/ClusterCockpit/cc-jobarchive/config"
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
@@ -21,19 +20,14 @@ import (
// For a given job, return the path of the `data.json`/`meta.json` file.
// TODO: Implement Issue ClusterCockpit/ClusterCockpit#97
func getPath(job *model.Job, file string, checkLegacy bool) (string, error) {
id, err := strconv.Atoi(strings.Split(job.JobID, ".")[0])
if err != nil {
return "", err
}
lvl1, lvl2 := fmt.Sprintf("%d", id/1000), fmt.Sprintf("%03d", id%1000)
lvl1, lvl2 := fmt.Sprintf("%d", job.JobID/1000), fmt.Sprintf("%03d", job.JobID%1000)
if !checkLegacy {
return filepath.Join(JobArchivePath, job.ClusterID, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
return filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
}
legacyPath := filepath.Join(JobArchivePath, job.ClusterID, lvl1, lvl2, file)
legacyPath := filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, file)
if _, err := os.Stat(legacyPath); errors.Is(err, os.ErrNotExist) {
return filepath.Join(JobArchivePath, job.ClusterID, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
return filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
}
return legacyPath, nil
@@ -87,13 +81,13 @@ func UpdateTags(job *model.Job, tags []*model.JobTag) error {
f.Close()
metaFile.Tags = make([]struct {
Name string "json:\"name\""
Type string "json:\"type\""
Name string "json:\"Name\""
Type string "json:\"Type\""
}, 0)
for _, tag := range tags {
metaFile.Tags = append(metaFile.Tags, struct {
Name string "json:\"name\""
Type string "json:\"type\""
Name string "json:\"Name\""
Type string "json:\"Type\""
}{
Name: tag.TagName,
Type: tag.TagType,
@@ -143,7 +137,7 @@ func ArchiveJob(job *model.Job, ctx context.Context) (*schema.JobMeta, error) {
}
allMetrics := make([]string, 0)
metricConfigs := config.GetClusterConfig(job.ClusterID).MetricConfig
metricConfigs := config.GetClusterConfig(job.Cluster).MetricConfig
for _, mc := range metricConfigs {
allMetrics = append(allMetrics, mc.Name)
}
@@ -153,13 +147,13 @@ func ArchiveJob(job *model.Job, ctx context.Context) (*schema.JobMeta, error) {
}
tags := []struct {
Name string `json:"name"`
Type string `json:"type"`
Name string `json:"Name"`
Type string `json:"Type"`
}{}
for _, tag := range job.Tags {
tags = append(tags, struct {
Name string `json:"name"`
Type string `json:"type"`
Name string `json:"Name"`
Type string `json:"Type"`
}{
Name: tag.TagName,
Type: tag.TagType,
@@ -167,16 +161,25 @@ func ArchiveJob(job *model.Job, ctx context.Context) (*schema.JobMeta, error) {
}
metaData := &schema.JobMeta{
JobId: job.JobID,
UserId: job.UserID,
ClusterId: job.ClusterID,
NumNodes: job.NumNodes,
JobState: job.State.String(),
StartTime: job.StartTime.Unix(),
Duration: int64(job.Duration),
Nodes: job.Nodes,
Tags: tags,
Statistics: make(map[string]*schema.JobMetaStatistics),
JobId: int64(job.JobID),
User: job.User,
Project: job.Project,
Cluster: job.Cluster,
NumNodes: job.NumNodes,
NumHWThreads: job.NumHWThreads,
NumAcc: job.NumAcc,
Exclusive: int8(job.Exclusive),
MonitoringStatus: int8(job.MonitoringStatus),
SMT: int8(job.Smt),
Partition: job.Partition,
ArrayJobId: job.ArrayJobID,
JobState: string(job.State),
StartTime: job.StartTime.Unix(),
Duration: int64(job.Duration),
Resources: job.Resources,
MetaData: "", // TODO/FIXME: Handle `meta_data`!
Tags: tags,
Statistics: make(map[string]*schema.JobMetaStatistics),
}
for metric, data := range jobData {
@@ -188,7 +191,7 @@ func ArchiveJob(job *model.Job, ctx context.Context) (*schema.JobMeta, error) {
}
metaData.Statistics[metric] = &schema.JobMetaStatistics{
Unit: config.GetMetricConfig(job.ClusterID, metric).Unit,
Unit: config.GetMetricConfig(job.Cluster, metric).Unit,
Avg: avg / float64(job.NumNodes),
Min: min,
Max: max,

View File

@@ -61,8 +61,13 @@ func (ccms *CCMetricStore) doRequest(job *model.Job, suffix string, metrics []st
from, to := job.StartTime.Unix(), job.StartTime.Add(time.Duration(job.Duration)*time.Second).Unix()
reqBody := ApiRequestBody{}
reqBody.Metrics = metrics
for _, node := range job.Nodes {
reqBody.Selectors = append(reqBody.Selectors, []string{job.ClusterID, node})
for _, node := range job.Resources {
if node.Accelerators != nil || node.HWThreads != nil {
// TODO/FIXME:
return nil, errors.New("todo: cc-metric-store resources: Accelerator/HWThreads")
}
reqBody.Selectors = append(reqBody.Selectors, []string{job.Cluster, node.Hostname})
}
reqBodyBytes, err := json.Marshal(reqBody)
@@ -86,33 +91,38 @@ func (ccms *CCMetricStore) LoadData(job *model.Job, metrics []string, ctx contex
return nil, err
}
resdata := make([]map[string]ApiMetricData, 0, len(job.Nodes))
resdata := make([]map[string]ApiMetricData, 0, len(job.Resources))
if err := json.NewDecoder(res.Body).Decode(&resdata); err != nil {
return nil, err
}
var jobData schema.JobData = make(schema.JobData)
for _, metric := range metrics {
mc := config.GetMetricConfig(job.ClusterID, metric)
mc := config.GetMetricConfig(job.Cluster, metric)
metricData := &schema.JobMetric{
Scope: "node", // TODO: FIXME: Whatever...
Unit: mc.Unit,
Timestep: mc.Sampletime,
Series: make([]*schema.MetricSeries, 0, len(job.Nodes)),
Timestep: mc.Timestep,
Series: make([]*schema.MetricSeries, 0, len(job.Resources)),
}
for i, node := range job.Nodes {
for i, node := range job.Resources {
if node.Accelerators != nil || node.HWThreads != nil {
// TODO/FIXME:
return nil, errors.New("todo: cc-metric-store resources: Accelerator/HWThreads")
}
data := resdata[i][metric]
if data.Error != nil {
return nil, errors.New(*data.Error)
}
if data.Avg == nil || data.Min == nil || data.Max == nil {
return nil, fmt.Errorf("no data for node '%s' and metric '%s'", node, metric)
return nil, fmt.Errorf("no data for node '%s' and metric '%s'", node.Hostname, metric)
}
metricData.Series = append(metricData.Series, &schema.MetricSeries{
NodeID: node,
Data: data.Data,
Hostname: node.Hostname,
Data: data.Data,
Statistics: &schema.MetricStatistics{
Avg: *data.Avg,
Min: *data.Min,
@@ -132,7 +142,7 @@ func (ccms *CCMetricStore) LoadStats(job *model.Job, metrics []string, ctx conte
return nil, err
}
resdata := make([]map[string]ApiStatsData, 0, len(job.Nodes))
resdata := make([]map[string]ApiStatsData, 0, len(job.Resources))
if err := json.NewDecoder(res.Body).Decode(&resdata); err != nil {
return nil, err
}
@@ -140,17 +150,22 @@ func (ccms *CCMetricStore) LoadStats(job *model.Job, metrics []string, ctx conte
stats := map[string]map[string]schema.MetricStatistics{}
for _, metric := range metrics {
nodestats := map[string]schema.MetricStatistics{}
for i, node := range job.Nodes {
for i, node := range job.Resources {
if node.Accelerators != nil || node.HWThreads != nil {
// TODO/FIXME:
return nil, errors.New("todo: cc-metric-store resources: Accelerator/HWThreads")
}
data := resdata[i][metric]
if data.Error != nil {
return nil, errors.New(*data.Error)
}
if data.Samples == 0 {
return nil, fmt.Errorf("no data for node '%s' and metric '%s'", node, metric)
return nil, fmt.Errorf("no data for node '%s' and metric '%s'", node.Hostname, metric)
}
nodestats[node] = schema.MetricStatistics{
nodestats[node.Hostname] = schema.MetricStatistics{
Avg: float64(data.Avg),
Min: float64(data.Min),
Max: float64(data.Max),

View File

@@ -2,6 +2,7 @@ package metricdata
import (
"context"
"errors"
"fmt"
"log"
"os"
@@ -46,9 +47,14 @@ func (idb *InfluxDBv2DataRepository) LoadData(job *model.Job, metrics []string,
}
fieldsCond := strings.Join(fieldsConds, " or ")
hostsConds := make([]string, 0, len(job.Nodes))
for _, h := range job.Nodes {
hostsConds = append(hostsConds, fmt.Sprintf(`r.host == "%s"`, h))
hostsConds := make([]string, 0, len(job.Resources))
for _, h := range job.Resources {
if h.HWThreads != nil || h.Accelerators != nil {
// TODO/FIXME...
return nil, errors.New("the InfluxDB metric data repository does not support HWThreads or Accelerators")
}
hostsConds = append(hostsConds, fmt.Sprintf(`r.host == "%s"`, h.Hostname))
}
hostsCond := strings.Join(hostsConds, " or ")
@@ -72,18 +78,18 @@ func (idb *InfluxDBv2DataRepository) LoadData(job *model.Job, metrics []string,
field, host := row.Field(), row.ValueByKey("host").(string)
jobMetric, ok := jobData[field]
if !ok {
mc := config.GetMetricConfig(job.ClusterID, field)
mc := config.GetMetricConfig(job.Cluster, field)
jobMetric = &schema.JobMetric{
Scope: "node", // TODO: FIXME: Whatever...
Unit: mc.Unit,
Timestep: mc.Sampletime,
Series: make([]*schema.MetricSeries, 0, len(job.Nodes)),
Timestep: mc.Timestep,
Series: make([]*schema.MetricSeries, 0, len(job.Resources)),
}
jobData[field] = jobMetric
}
currentSeries = &schema.MetricSeries{
NodeID: host,
Hostname: host,
Statistics: nil,
Data: make([]schema.Float, 0),
}
@@ -102,7 +108,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(job *model.Job, metrics []string,
jobMetric := jobData[metric]
for node, stats := range nodes {
for _, series := range jobMetric.Series {
if series.NodeID == node {
if series.Hostname == node {
series.Statistics = &stats
}
}
@@ -115,9 +121,14 @@ func (idb *InfluxDBv2DataRepository) LoadData(job *model.Job, metrics []string,
func (idb *InfluxDBv2DataRepository) LoadStats(job *model.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
stats := map[string]map[string]schema.MetricStatistics{}
hostsConds := make([]string, 0, len(job.Nodes))
for _, h := range job.Nodes {
hostsConds = append(hostsConds, fmt.Sprintf(`r.host == "%s"`, h))
hostsConds := make([]string, 0, len(job.Resources))
for _, h := range job.Resources {
if h.HWThreads != nil || h.Accelerators != nil {
// TODO/FIXME...
return nil, errors.New("the InfluxDB metric data repository does not support HWThreads or Accelerators")
}
hostsConds = append(hostsConds, fmt.Sprintf(`r.host == "%s"`, h.Hostname))
}
hostsCond := strings.Join(hostsConds, " or ")

View File

@@ -59,9 +59,9 @@ func Init(jobArchivePath string, disableArchive bool) error {
// Fetches the metric data for a job.
func LoadData(job *model.Job, metrics []string, ctx context.Context) (schema.JobData, error) {
if job.State == model.JobStateRunning || !useArchive {
repo, ok := metricDataRepos[job.ClusterID]
repo, ok := metricDataRepos[job.Cluster]
if !ok {
return nil, fmt.Errorf("no metric data repository configured for '%s'", job.ClusterID)
return nil, fmt.Errorf("no metric data repository configured for '%s'", job.Cluster)
}
return repo.LoadData(job, metrics, ctx)
@@ -90,9 +90,9 @@ func LoadAverages(job *model.Job, metrics []string, data [][]schema.Float, ctx c
return loadAveragesFromArchive(job, metrics, data)
}
repo, ok := metricDataRepos[job.ClusterID]
repo, ok := metricDataRepos[job.Cluster]
if !ok {
return fmt.Errorf("no metric data repository configured for '%s'", job.ClusterID)
return fmt.Errorf("no metric data repository configured for '%s'", job.Cluster)
}
stats, err := repo.LoadStats(job, metrics, ctx)