mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-07-23 12:51:40 +02:00
BC: new schemas for basically everything
This commit is contained in:
@@ -11,7 +11,6 @@ import (
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-jobarchive/config"
|
||||
"github.com/ClusterCockpit/cc-jobarchive/graph/model"
|
||||
@@ -21,19 +20,14 @@ import (
|
||||
// For a given job, return the path of the `data.json`/`meta.json` file.
|
||||
// TODO: Implement Issue ClusterCockpit/ClusterCockpit#97
|
||||
func getPath(job *model.Job, file string, checkLegacy bool) (string, error) {
|
||||
id, err := strconv.Atoi(strings.Split(job.JobID, ".")[0])
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
lvl1, lvl2 := fmt.Sprintf("%d", id/1000), fmt.Sprintf("%03d", id%1000)
|
||||
lvl1, lvl2 := fmt.Sprintf("%d", job.JobID/1000), fmt.Sprintf("%03d", job.JobID%1000)
|
||||
if !checkLegacy {
|
||||
return filepath.Join(JobArchivePath, job.ClusterID, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
|
||||
return filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
|
||||
}
|
||||
|
||||
legacyPath := filepath.Join(JobArchivePath, job.ClusterID, lvl1, lvl2, file)
|
||||
legacyPath := filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, file)
|
||||
if _, err := os.Stat(legacyPath); errors.Is(err, os.ErrNotExist) {
|
||||
return filepath.Join(JobArchivePath, job.ClusterID, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
|
||||
return filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
|
||||
}
|
||||
|
||||
return legacyPath, nil
|
||||
@@ -87,13 +81,13 @@ func UpdateTags(job *model.Job, tags []*model.JobTag) error {
|
||||
f.Close()
|
||||
|
||||
metaFile.Tags = make([]struct {
|
||||
Name string "json:\"name\""
|
||||
Type string "json:\"type\""
|
||||
Name string "json:\"Name\""
|
||||
Type string "json:\"Type\""
|
||||
}, 0)
|
||||
for _, tag := range tags {
|
||||
metaFile.Tags = append(metaFile.Tags, struct {
|
||||
Name string "json:\"name\""
|
||||
Type string "json:\"type\""
|
||||
Name string "json:\"Name\""
|
||||
Type string "json:\"Type\""
|
||||
}{
|
||||
Name: tag.TagName,
|
||||
Type: tag.TagType,
|
||||
@@ -143,7 +137,7 @@ func ArchiveJob(job *model.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
}
|
||||
|
||||
allMetrics := make([]string, 0)
|
||||
metricConfigs := config.GetClusterConfig(job.ClusterID).MetricConfig
|
||||
metricConfigs := config.GetClusterConfig(job.Cluster).MetricConfig
|
||||
for _, mc := range metricConfigs {
|
||||
allMetrics = append(allMetrics, mc.Name)
|
||||
}
|
||||
@@ -153,13 +147,13 @@ func ArchiveJob(job *model.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
}
|
||||
|
||||
tags := []struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Name string `json:"Name"`
|
||||
Type string `json:"Type"`
|
||||
}{}
|
||||
for _, tag := range job.Tags {
|
||||
tags = append(tags, struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Name string `json:"Name"`
|
||||
Type string `json:"Type"`
|
||||
}{
|
||||
Name: tag.TagName,
|
||||
Type: tag.TagType,
|
||||
@@ -167,16 +161,25 @@ func ArchiveJob(job *model.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
}
|
||||
|
||||
metaData := &schema.JobMeta{
|
||||
JobId: job.JobID,
|
||||
UserId: job.UserID,
|
||||
ClusterId: job.ClusterID,
|
||||
NumNodes: job.NumNodes,
|
||||
JobState: job.State.String(),
|
||||
StartTime: job.StartTime.Unix(),
|
||||
Duration: int64(job.Duration),
|
||||
Nodes: job.Nodes,
|
||||
Tags: tags,
|
||||
Statistics: make(map[string]*schema.JobMetaStatistics),
|
||||
JobId: int64(job.JobID),
|
||||
User: job.User,
|
||||
Project: job.Project,
|
||||
Cluster: job.Cluster,
|
||||
NumNodes: job.NumNodes,
|
||||
NumHWThreads: job.NumHWThreads,
|
||||
NumAcc: job.NumAcc,
|
||||
Exclusive: int8(job.Exclusive),
|
||||
MonitoringStatus: int8(job.MonitoringStatus),
|
||||
SMT: int8(job.Smt),
|
||||
Partition: job.Partition,
|
||||
ArrayJobId: job.ArrayJobID,
|
||||
JobState: string(job.State),
|
||||
StartTime: job.StartTime.Unix(),
|
||||
Duration: int64(job.Duration),
|
||||
Resources: job.Resources,
|
||||
MetaData: "", // TODO/FIXME: Handle `meta_data`!
|
||||
Tags: tags,
|
||||
Statistics: make(map[string]*schema.JobMetaStatistics),
|
||||
}
|
||||
|
||||
for metric, data := range jobData {
|
||||
@@ -188,7 +191,7 @@ func ArchiveJob(job *model.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
}
|
||||
|
||||
metaData.Statistics[metric] = &schema.JobMetaStatistics{
|
||||
Unit: config.GetMetricConfig(job.ClusterID, metric).Unit,
|
||||
Unit: config.GetMetricConfig(job.Cluster, metric).Unit,
|
||||
Avg: avg / float64(job.NumNodes),
|
||||
Min: min,
|
||||
Max: max,
|
||||
|
@@ -61,8 +61,13 @@ func (ccms *CCMetricStore) doRequest(job *model.Job, suffix string, metrics []st
|
||||
from, to := job.StartTime.Unix(), job.StartTime.Add(time.Duration(job.Duration)*time.Second).Unix()
|
||||
reqBody := ApiRequestBody{}
|
||||
reqBody.Metrics = metrics
|
||||
for _, node := range job.Nodes {
|
||||
reqBody.Selectors = append(reqBody.Selectors, []string{job.ClusterID, node})
|
||||
for _, node := range job.Resources {
|
||||
if node.Accelerators != nil || node.HWThreads != nil {
|
||||
// TODO/FIXME:
|
||||
return nil, errors.New("todo: cc-metric-store resources: Accelerator/HWThreads")
|
||||
}
|
||||
|
||||
reqBody.Selectors = append(reqBody.Selectors, []string{job.Cluster, node.Hostname})
|
||||
}
|
||||
|
||||
reqBodyBytes, err := json.Marshal(reqBody)
|
||||
@@ -86,33 +91,38 @@ func (ccms *CCMetricStore) LoadData(job *model.Job, metrics []string, ctx contex
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resdata := make([]map[string]ApiMetricData, 0, len(job.Nodes))
|
||||
resdata := make([]map[string]ApiMetricData, 0, len(job.Resources))
|
||||
if err := json.NewDecoder(res.Body).Decode(&resdata); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var jobData schema.JobData = make(schema.JobData)
|
||||
for _, metric := range metrics {
|
||||
mc := config.GetMetricConfig(job.ClusterID, metric)
|
||||
mc := config.GetMetricConfig(job.Cluster, metric)
|
||||
metricData := &schema.JobMetric{
|
||||
Scope: "node", // TODO: FIXME: Whatever...
|
||||
Unit: mc.Unit,
|
||||
Timestep: mc.Sampletime,
|
||||
Series: make([]*schema.MetricSeries, 0, len(job.Nodes)),
|
||||
Timestep: mc.Timestep,
|
||||
Series: make([]*schema.MetricSeries, 0, len(job.Resources)),
|
||||
}
|
||||
for i, node := range job.Nodes {
|
||||
for i, node := range job.Resources {
|
||||
if node.Accelerators != nil || node.HWThreads != nil {
|
||||
// TODO/FIXME:
|
||||
return nil, errors.New("todo: cc-metric-store resources: Accelerator/HWThreads")
|
||||
}
|
||||
|
||||
data := resdata[i][metric]
|
||||
if data.Error != nil {
|
||||
return nil, errors.New(*data.Error)
|
||||
}
|
||||
|
||||
if data.Avg == nil || data.Min == nil || data.Max == nil {
|
||||
return nil, fmt.Errorf("no data for node '%s' and metric '%s'", node, metric)
|
||||
return nil, fmt.Errorf("no data for node '%s' and metric '%s'", node.Hostname, metric)
|
||||
}
|
||||
|
||||
metricData.Series = append(metricData.Series, &schema.MetricSeries{
|
||||
NodeID: node,
|
||||
Data: data.Data,
|
||||
Hostname: node.Hostname,
|
||||
Data: data.Data,
|
||||
Statistics: &schema.MetricStatistics{
|
||||
Avg: *data.Avg,
|
||||
Min: *data.Min,
|
||||
@@ -132,7 +142,7 @@ func (ccms *CCMetricStore) LoadStats(job *model.Job, metrics []string, ctx conte
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resdata := make([]map[string]ApiStatsData, 0, len(job.Nodes))
|
||||
resdata := make([]map[string]ApiStatsData, 0, len(job.Resources))
|
||||
if err := json.NewDecoder(res.Body).Decode(&resdata); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -140,17 +150,22 @@ func (ccms *CCMetricStore) LoadStats(job *model.Job, metrics []string, ctx conte
|
||||
stats := map[string]map[string]schema.MetricStatistics{}
|
||||
for _, metric := range metrics {
|
||||
nodestats := map[string]schema.MetricStatistics{}
|
||||
for i, node := range job.Nodes {
|
||||
for i, node := range job.Resources {
|
||||
if node.Accelerators != nil || node.HWThreads != nil {
|
||||
// TODO/FIXME:
|
||||
return nil, errors.New("todo: cc-metric-store resources: Accelerator/HWThreads")
|
||||
}
|
||||
|
||||
data := resdata[i][metric]
|
||||
if data.Error != nil {
|
||||
return nil, errors.New(*data.Error)
|
||||
}
|
||||
|
||||
if data.Samples == 0 {
|
||||
return nil, fmt.Errorf("no data for node '%s' and metric '%s'", node, metric)
|
||||
return nil, fmt.Errorf("no data for node '%s' and metric '%s'", node.Hostname, metric)
|
||||
}
|
||||
|
||||
nodestats[node] = schema.MetricStatistics{
|
||||
nodestats[node.Hostname] = schema.MetricStatistics{
|
||||
Avg: float64(data.Avg),
|
||||
Min: float64(data.Min),
|
||||
Max: float64(data.Max),
|
||||
|
@@ -2,6 +2,7 @@ package metricdata
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
@@ -46,9 +47,14 @@ func (idb *InfluxDBv2DataRepository) LoadData(job *model.Job, metrics []string,
|
||||
}
|
||||
fieldsCond := strings.Join(fieldsConds, " or ")
|
||||
|
||||
hostsConds := make([]string, 0, len(job.Nodes))
|
||||
for _, h := range job.Nodes {
|
||||
hostsConds = append(hostsConds, fmt.Sprintf(`r.host == "%s"`, h))
|
||||
hostsConds := make([]string, 0, len(job.Resources))
|
||||
for _, h := range job.Resources {
|
||||
if h.HWThreads != nil || h.Accelerators != nil {
|
||||
// TODO/FIXME...
|
||||
return nil, errors.New("the InfluxDB metric data repository does not support HWThreads or Accelerators")
|
||||
}
|
||||
|
||||
hostsConds = append(hostsConds, fmt.Sprintf(`r.host == "%s"`, h.Hostname))
|
||||
}
|
||||
hostsCond := strings.Join(hostsConds, " or ")
|
||||
|
||||
@@ -72,18 +78,18 @@ func (idb *InfluxDBv2DataRepository) LoadData(job *model.Job, metrics []string,
|
||||
field, host := row.Field(), row.ValueByKey("host").(string)
|
||||
jobMetric, ok := jobData[field]
|
||||
if !ok {
|
||||
mc := config.GetMetricConfig(job.ClusterID, field)
|
||||
mc := config.GetMetricConfig(job.Cluster, field)
|
||||
jobMetric = &schema.JobMetric{
|
||||
Scope: "node", // TODO: FIXME: Whatever...
|
||||
Unit: mc.Unit,
|
||||
Timestep: mc.Sampletime,
|
||||
Series: make([]*schema.MetricSeries, 0, len(job.Nodes)),
|
||||
Timestep: mc.Timestep,
|
||||
Series: make([]*schema.MetricSeries, 0, len(job.Resources)),
|
||||
}
|
||||
jobData[field] = jobMetric
|
||||
}
|
||||
|
||||
currentSeries = &schema.MetricSeries{
|
||||
NodeID: host,
|
||||
Hostname: host,
|
||||
Statistics: nil,
|
||||
Data: make([]schema.Float, 0),
|
||||
}
|
||||
@@ -102,7 +108,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(job *model.Job, metrics []string,
|
||||
jobMetric := jobData[metric]
|
||||
for node, stats := range nodes {
|
||||
for _, series := range jobMetric.Series {
|
||||
if series.NodeID == node {
|
||||
if series.Hostname == node {
|
||||
series.Statistics = &stats
|
||||
}
|
||||
}
|
||||
@@ -115,9 +121,14 @@ func (idb *InfluxDBv2DataRepository) LoadData(job *model.Job, metrics []string,
|
||||
func (idb *InfluxDBv2DataRepository) LoadStats(job *model.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
stats := map[string]map[string]schema.MetricStatistics{}
|
||||
|
||||
hostsConds := make([]string, 0, len(job.Nodes))
|
||||
for _, h := range job.Nodes {
|
||||
hostsConds = append(hostsConds, fmt.Sprintf(`r.host == "%s"`, h))
|
||||
hostsConds := make([]string, 0, len(job.Resources))
|
||||
for _, h := range job.Resources {
|
||||
if h.HWThreads != nil || h.Accelerators != nil {
|
||||
// TODO/FIXME...
|
||||
return nil, errors.New("the InfluxDB metric data repository does not support HWThreads or Accelerators")
|
||||
}
|
||||
|
||||
hostsConds = append(hostsConds, fmt.Sprintf(`r.host == "%s"`, h.Hostname))
|
||||
}
|
||||
hostsCond := strings.Join(hostsConds, " or ")
|
||||
|
||||
|
@@ -59,9 +59,9 @@ func Init(jobArchivePath string, disableArchive bool) error {
|
||||
// Fetches the metric data for a job.
|
||||
func LoadData(job *model.Job, metrics []string, ctx context.Context) (schema.JobData, error) {
|
||||
if job.State == model.JobStateRunning || !useArchive {
|
||||
repo, ok := metricDataRepos[job.ClusterID]
|
||||
repo, ok := metricDataRepos[job.Cluster]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("no metric data repository configured for '%s'", job.ClusterID)
|
||||
return nil, fmt.Errorf("no metric data repository configured for '%s'", job.Cluster)
|
||||
}
|
||||
|
||||
return repo.LoadData(job, metrics, ctx)
|
||||
@@ -90,9 +90,9 @@ func LoadAverages(job *model.Job, metrics []string, data [][]schema.Float, ctx c
|
||||
return loadAveragesFromArchive(job, metrics, data)
|
||||
}
|
||||
|
||||
repo, ok := metricDataRepos[job.ClusterID]
|
||||
repo, ok := metricDataRepos[job.Cluster]
|
||||
if !ok {
|
||||
return fmt.Errorf("no metric data repository configured for '%s'", job.ClusterID)
|
||||
return fmt.Errorf("no metric data repository configured for '%s'", job.Cluster)
|
||||
}
|
||||
|
||||
stats, err := repo.LoadStats(job, metrics, ctx)
|
||||
|
Reference in New Issue
Block a user