mirror of
				https://github.com/ClusterCockpit/cc-backend
				synced 2025-10-31 16:05:06 +01:00 
			
		
		
		
	Merge branch 'master' into 37-provide-a-s3-compatible-storage-backend-for-the-job-archive
This commit is contained in:
		| @@ -1,26 +1,20 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
| package archive | ||||
|  | ||||
| import ( | ||||
| 	"bufio" | ||||
| 	"bytes" | ||||
| 	"compress/gzip" | ||||
| 	"encoding/json" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"path/filepath" | ||||
| 	"strconv" | ||||
| 	"sync" | ||||
|  | ||||
| 	"github.com/ClusterCockpit/cc-backend/internal/config" | ||||
| 	"github.com/ClusterCockpit/cc-backend/pkg/log" | ||||
| 	"github.com/ClusterCockpit/cc-backend/pkg/lrucache" | ||||
| 	"github.com/ClusterCockpit/cc-backend/pkg/schema" | ||||
| ) | ||||
|  | ||||
| const Version uint64 = 1 | ||||
| const Version uint64 = 2 | ||||
|  | ||||
| type ArchiveBackend interface { | ||||
| 	Init(rawConfig json.RawMessage) (uint64, error) | ||||
| @@ -33,6 +27,8 @@ type ArchiveBackend interface { | ||||
|  | ||||
| 	LoadJobData(job *schema.Job) (schema.JobData, error) | ||||
|  | ||||
| 	LoadJobStats(job *schema.Job) (schema.ScopedJobStats, error) | ||||
|  | ||||
| 	LoadClusterCfg(name string) (*schema.Cluster, error) | ||||
|  | ||||
| 	StoreJobMeta(jobMeta *schema.JobMeta) error | ||||
| @@ -60,105 +56,55 @@ type JobContainer struct { | ||||
| } | ||||
|  | ||||
| var ( | ||||
| 	initOnce   sync.Once | ||||
| 	cache      *lrucache.Cache = lrucache.New(128 * 1024 * 1024) | ||||
| 	ar         ArchiveBackend | ||||
| 	useArchive bool | ||||
| ) | ||||
|  | ||||
| func getPath( | ||||
| 	job *schema.Job, | ||||
| 	rootPath string, | ||||
| 	file string, | ||||
| ) string { | ||||
| 	return filepath.Join( | ||||
| 		getDirectory(job, rootPath), file) | ||||
| } | ||||
|  | ||||
| func getDirectory( | ||||
| 	job *schema.Job, | ||||
| 	rootPath string, | ||||
| ) string { | ||||
| 	lvl1, lvl2 := fmt.Sprintf("%d", job.JobID/1000), fmt.Sprintf("%03d", job.JobID%1000) | ||||
|  | ||||
| 	return filepath.Join( | ||||
| 		rootPath, | ||||
| 		job.Cluster, | ||||
| 		lvl1, lvl2, | ||||
| 		strconv.FormatInt(job.StartTime.Unix(), 10)) | ||||
| } | ||||
|  | ||||
| func loadJobMeta(b []byte) (*schema.JobMeta, error) { | ||||
| 	if config.Keys.Validate { | ||||
| 		if err := schema.Validate(schema.Meta, bytes.NewReader(b)); err != nil { | ||||
| 			return &schema.JobMeta{}, fmt.Errorf("validate job meta: %v", err) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return DecodeJobMeta(bytes.NewReader(b)) | ||||
| } | ||||
|  | ||||
| func loadJobData(f io.Reader, key string, isCompressed bool) (schema.JobData, error) { | ||||
| 	if isCompressed { | ||||
| 		r, err := gzip.NewReader(f) | ||||
| 		if err != nil { | ||||
| 			log.Errorf(" %v", err) | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		defer r.Close() | ||||
|  | ||||
| 		if config.Keys.Validate { | ||||
| 			if err := schema.Validate(schema.Data, r); err != nil { | ||||
| 				return schema.JobData{}, fmt.Errorf("validate job data: %v", err) | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		return DecodeJobData(r, key) | ||||
| 	} else { | ||||
| 		if config.Keys.Validate { | ||||
| 			if err := schema.Validate(schema.Data, bufio.NewReader(f)); err != nil { | ||||
| 				return schema.JobData{}, fmt.Errorf("validate job data: %v", err) | ||||
| 			} | ||||
| 		} | ||||
| 		return DecodeJobData(bufio.NewReader(f), key) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func Init(rawConfig json.RawMessage, disableArchive bool) error { | ||||
| 	useArchive = !disableArchive | ||||
| 	var err error | ||||
|  | ||||
| 	var cfg struct { | ||||
| 		Kind string `json:"kind"` | ||||
| 	} | ||||
| 	initOnce.Do(func() { | ||||
| 		useArchive = !disableArchive | ||||
|  | ||||
| 	if err := json.Unmarshal(rawConfig, &cfg); err != nil { | ||||
| 		log.Warn("Error while unmarshaling raw config json") | ||||
| 		return err | ||||
| 	} | ||||
| 		var cfg struct { | ||||
| 			Kind string `json:"kind"` | ||||
| 		} | ||||
|  | ||||
| 	switch cfg.Kind { | ||||
| 	case "file": | ||||
| 		ar = &FsArchive{} | ||||
| 		// case "s3": | ||||
| 		// 	ar = &S3Archive{} | ||||
| 	default: | ||||
| 		return fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind) | ||||
| 	} | ||||
| 		if err = json.Unmarshal(rawConfig, &cfg); err != nil { | ||||
| 			log.Warn("Error while unmarshaling raw config json") | ||||
| 			return | ||||
| 		} | ||||
|  | ||||
| 	version, err := ar.Init(rawConfig) | ||||
| 	if err != nil { | ||||
| 		log.Error("Error while initializing archiveBackend") | ||||
| 		return err | ||||
| 	} | ||||
| 	log.Infof("Load archive version %d", version) | ||||
| 		switch cfg.Kind { | ||||
| 		case "file": | ||||
| 			ar = &FsArchive{} | ||||
| 			// case "s3": | ||||
| 			// 	ar = &S3Archive{} | ||||
| 		default: | ||||
| 			err = fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind) | ||||
| 		} | ||||
|  | ||||
| 	return initClusterConfig() | ||||
| 		var version uint64 | ||||
| 		version, err = ar.Init(rawConfig) | ||||
| 		if err != nil { | ||||
| 			log.Errorf("Error while initializing archiveBackend: %s", err.Error()) | ||||
| 			return | ||||
| 		} | ||||
| 		log.Infof("Load archive version %d", version) | ||||
|  | ||||
| 		err = initClusterConfig() | ||||
| 	}) | ||||
|  | ||||
| 	return err | ||||
| } | ||||
|  | ||||
| func GetHandle() ArchiveBackend { | ||||
| 	return ar | ||||
| } | ||||
|  | ||||
| // Helper to metricdata.LoadAverages(). | ||||
| // Helper to metricdataloader.LoadAverages(). | ||||
| func LoadAveragesFromArchive( | ||||
| 	job *schema.Job, | ||||
| 	metrics []string, | ||||
| @@ -166,7 +112,7 @@ func LoadAveragesFromArchive( | ||||
| ) error { | ||||
| 	metaFile, err := ar.LoadJobMeta(job) | ||||
| 	if err != nil { | ||||
| 		log.Warn("Error while loading job metadata from archiveBackend") | ||||
| 		log.Errorf("Error while loading job metadata from archiveBackend: %s", err.Error()) | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| @@ -181,16 +127,80 @@ func LoadAveragesFromArchive( | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // Helper to metricdataloader.LoadJobStats(). | ||||
| func LoadStatsFromArchive( | ||||
| 	job *schema.Job, | ||||
| 	metrics []string, | ||||
| ) (map[string]schema.MetricStatistics, error) { | ||||
| 	data := make(map[string]schema.MetricStatistics, len(metrics)) | ||||
| 	metaFile, err := ar.LoadJobMeta(job) | ||||
| 	if err != nil { | ||||
| 		log.Errorf("Error while loading job metadata from archiveBackend: %s", err.Error()) | ||||
| 		return data, err | ||||
| 	} | ||||
|  | ||||
| 	for _, m := range metrics { | ||||
| 		stat, ok := metaFile.Statistics[m] | ||||
| 		if !ok { | ||||
| 			data[m] = schema.MetricStatistics{Min: 0.0, Avg: 0.0, Max: 0.0} | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		data[m] = schema.MetricStatistics{ | ||||
| 			Avg: stat.Avg, | ||||
| 			Min: stat.Min, | ||||
| 			Max: stat.Max, | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return data, nil | ||||
| } | ||||
|  | ||||
| // Helper to metricdataloader.LoadScopedJobStats(). | ||||
| func LoadScopedStatsFromArchive( | ||||
| 	job *schema.Job, | ||||
| 	metrics []string, | ||||
| 	scopes []schema.MetricScope, | ||||
| ) (schema.ScopedJobStats, error) { | ||||
| 	data, err := ar.LoadJobStats(job) | ||||
| 	if err != nil { | ||||
| 		log.Errorf("Error while loading job stats from archiveBackend: %s", err.Error()) | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	return data, nil | ||||
| } | ||||
|  | ||||
| func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) { | ||||
| 	metaFile, err := ar.LoadJobMeta(job) | ||||
| 	if err != nil { | ||||
| 		log.Warn("Error while loading job metadata from archiveBackend") | ||||
| 		log.Errorf("Error while loading job metadata from archiveBackend: %s", err.Error()) | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	return metaFile.Statistics, nil | ||||
| } | ||||
|  | ||||
| // If the job is archived, find its `meta.json` file and override the Metadata | ||||
| // in that JSON file. If the job is not archived, nothing is done. | ||||
| func UpdateMetadata(job *schema.Job, metadata map[string]string) error { | ||||
| 	if job.State == schema.JobStateRunning || !useArchive { | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	jobMeta, err := ar.LoadJobMeta(job) | ||||
| 	if err != nil { | ||||
| 		log.Errorf("Error while loading job metadata from archiveBackend: %s", err.Error()) | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	for k, v := range metadata { | ||||
| 		jobMeta.MetaData[k] = v | ||||
| 	} | ||||
|  | ||||
| 	return ar.StoreJobMeta(jobMeta) | ||||
| } | ||||
|  | ||||
| // If the job is archived, find its `meta.json` file and override the tags list | ||||
| // in that JSON file. If the job is not archived, nothing is done. | ||||
| func UpdateTags(job *schema.Job, tags []*schema.Tag) error { | ||||
| @@ -200,15 +210,16 @@ func UpdateTags(job *schema.Job, tags []*schema.Tag) error { | ||||
|  | ||||
| 	jobMeta, err := ar.LoadJobMeta(job) | ||||
| 	if err != nil { | ||||
| 		log.Warn("Error while loading job metadata from archiveBackend") | ||||
| 		log.Errorf("Error while loading job metadata from archiveBackend: %s", err.Error()) | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	jobMeta.Tags = make([]*schema.Tag, 0) | ||||
| 	for _, tag := range tags { | ||||
| 		jobMeta.Tags = append(jobMeta.Tags, &schema.Tag{ | ||||
| 			Name: tag.Name, | ||||
| 			Type: tag.Type, | ||||
| 			Name:  tag.Name, | ||||
| 			Type:  tag.Type, | ||||
| 			Scope: tag.Scope, | ||||
| 		}) | ||||
| 	} | ||||
|  | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
| @@ -12,13 +12,16 @@ import ( | ||||
| 	"github.com/ClusterCockpit/cc-backend/pkg/schema" | ||||
| ) | ||||
|  | ||||
| var Clusters []*schema.Cluster | ||||
| var nodeLists map[string]map[string]NodeList | ||||
| var ( | ||||
| 	Clusters         []*schema.Cluster | ||||
| 	GlobalMetricList []*schema.GlobalMetricListItem | ||||
| 	NodeLists        map[string]map[string]NodeList | ||||
| ) | ||||
|  | ||||
| func initClusterConfig() error { | ||||
|  | ||||
| 	Clusters = []*schema.Cluster{} | ||||
| 	nodeLists = map[string]map[string]NodeList{} | ||||
| 	NodeLists = map[string]map[string]NodeList{} | ||||
| 	metricLookup := make(map[string]schema.GlobalMetricListItem) | ||||
|  | ||||
| 	for _, c := range ar.GetClusters() { | ||||
|  | ||||
| @@ -49,11 +52,79 @@ func initClusterConfig() error { | ||||
| 			if !mc.Scope.Valid() { | ||||
| 				return errors.New("cluster.metricConfig.scope must be a valid scope ('node', 'scocket', ...)") | ||||
| 			} | ||||
|  | ||||
| 			ml, ok := metricLookup[mc.Name] | ||||
| 			if !ok { | ||||
| 				metricLookup[mc.Name] = schema.GlobalMetricListItem{ | ||||
| 					Name: mc.Name, Scope: mc.Scope, Unit: mc.Unit, Footprint: mc.Footprint, | ||||
| 				} | ||||
| 				ml = metricLookup[mc.Name] | ||||
| 			} | ||||
| 			availability := schema.ClusterSupport{Cluster: cluster.Name} | ||||
| 			scLookup := make(map[string]*schema.SubClusterConfig) | ||||
|  | ||||
| 			for _, scc := range mc.SubClusters { | ||||
| 				scLookup[scc.Name] = scc | ||||
| 			} | ||||
|  | ||||
| 			for _, sc := range cluster.SubClusters { | ||||
| 				newMetric := &schema.MetricConfig{ | ||||
| 					Unit:          mc.Unit, | ||||
| 					Energy:        mc.Energy, | ||||
| 					Name:          mc.Name, | ||||
| 					Scope:         mc.Scope, | ||||
| 					Aggregation:   mc.Aggregation, | ||||
| 					Peak:          mc.Peak, | ||||
| 					Caution:       mc.Caution, | ||||
| 					Alert:         mc.Alert, | ||||
| 					Timestep:      mc.Timestep, | ||||
| 					Normal:        mc.Normal, | ||||
| 					LowerIsBetter: mc.LowerIsBetter, | ||||
| 				} | ||||
|  | ||||
| 				if mc.Footprint != "" { | ||||
| 					newMetric.Footprint = mc.Footprint | ||||
| 				} | ||||
|  | ||||
| 				if cfg, ok := scLookup[sc.Name]; ok { | ||||
| 					if !cfg.Remove { | ||||
| 						availability.SubClusters = append(availability.SubClusters, sc.Name) | ||||
| 						newMetric.Peak = cfg.Peak | ||||
| 						newMetric.Normal = cfg.Normal | ||||
| 						newMetric.Caution = cfg.Caution | ||||
| 						newMetric.Alert = cfg.Alert | ||||
| 						newMetric.Footprint = cfg.Footprint | ||||
| 						newMetric.Energy = cfg.Energy | ||||
| 						newMetric.LowerIsBetter = cfg.LowerIsBetter | ||||
| 						sc.MetricConfig = append(sc.MetricConfig, *newMetric) | ||||
|  | ||||
| 						if newMetric.Footprint != "" { | ||||
| 							sc.Footprint = append(sc.Footprint, newMetric.Name) | ||||
| 							ml.Footprint = newMetric.Footprint | ||||
| 						} | ||||
| 						if newMetric.Energy != "" { | ||||
| 							sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name) | ||||
| 						} | ||||
| 					} | ||||
| 				} else { | ||||
| 					availability.SubClusters = append(availability.SubClusters, sc.Name) | ||||
| 					sc.MetricConfig = append(sc.MetricConfig, *newMetric) | ||||
|  | ||||
| 					if newMetric.Footprint != "" { | ||||
| 						sc.Footprint = append(sc.Footprint, newMetric.Name) | ||||
| 					} | ||||
| 					if newMetric.Energy != "" { | ||||
| 						sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name) | ||||
| 					} | ||||
| 				} | ||||
| 			} | ||||
| 			ml.Availability = append(metricLookup[mc.Name].Availability, availability) | ||||
| 			metricLookup[mc.Name] = ml | ||||
| 		} | ||||
|  | ||||
| 		Clusters = append(Clusters, cluster) | ||||
|  | ||||
| 		nodeLists[cluster.Name] = make(map[string]NodeList) | ||||
| 		NodeLists[cluster.Name] = make(map[string]NodeList) | ||||
| 		for _, sc := range cluster.SubClusters { | ||||
| 			if sc.Nodes == "*" { | ||||
| 				continue | ||||
| @@ -63,15 +134,18 @@ func initClusterConfig() error { | ||||
| 			if err != nil { | ||||
| 				return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > in %s/cluster.json: %w", cluster.Name, err) | ||||
| 			} | ||||
| 			nodeLists[cluster.Name][sc.Name] = nl | ||||
| 			NodeLists[cluster.Name][sc.Name] = nl | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	for _, ml := range metricLookup { | ||||
| 		GlobalMetricList = append(GlobalMetricList, &ml) | ||||
| 	} | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func GetCluster(cluster string) *schema.Cluster { | ||||
|  | ||||
| 	for _, c := range Clusters { | ||||
| 		if c.Name == cluster { | ||||
| 			return c | ||||
| @@ -90,11 +164,10 @@ func GetSubCluster(cluster, subcluster string) (*schema.SubCluster, error) { | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return nil, fmt.Errorf("Subcluster '%v' not found for cluster '%v', or cluster '%v' not configured!", subcluster, cluster, cluster) | ||||
| 	return nil, fmt.Errorf("subcluster '%v' not found for cluster '%v', or cluster '%v' not configured", subcluster, cluster, cluster) | ||||
| } | ||||
|  | ||||
| func GetMetricConfig(cluster, metric string) *schema.MetricConfig { | ||||
|  | ||||
| 	for _, c := range Clusters { | ||||
| 		if c.Name == cluster { | ||||
| 			for _, m := range c.MetricConfig { | ||||
| @@ -110,7 +183,6 @@ func GetMetricConfig(cluster, metric string) *schema.MetricConfig { | ||||
| // AssignSubCluster sets the `job.subcluster` property of the job based | ||||
| // on its cluster and resources. | ||||
| func AssignSubCluster(job *schema.BaseJob) error { | ||||
|  | ||||
| 	cluster := GetCluster(job.Cluster) | ||||
| 	if cluster == nil { | ||||
| 		return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > unkown cluster: %v", job.Cluster) | ||||
| @@ -130,7 +202,7 @@ func AssignSubCluster(job *schema.BaseJob) error { | ||||
| 	} | ||||
|  | ||||
| 	host0 := job.Resources[0].Hostname | ||||
| 	for sc, nl := range nodeLists[job.Cluster] { | ||||
| 	for sc, nl := range NodeLists[job.Cluster] { | ||||
| 		if nl != nil && nl.Contains(host0) { | ||||
| 			job.SubCluster = sc | ||||
| 			return nil | ||||
| @@ -146,8 +218,7 @@ func AssignSubCluster(job *schema.BaseJob) error { | ||||
| } | ||||
|  | ||||
| func GetSubClusterByNode(cluster, hostname string) (string, error) { | ||||
|  | ||||
| 	for sc, nl := range nodeLists[cluster] { | ||||
| 	for sc, nl := range NodeLists[cluster] { | ||||
| 		if nl != nil && nl.Contains(hostname) { | ||||
| 			return sc, nil | ||||
| 		} | ||||
| @@ -164,3 +235,13 @@ func GetSubClusterByNode(cluster, hostname string) (string, error) { | ||||
|  | ||||
| 	return "", fmt.Errorf("ARCHIVE/CLUSTERCONFIG > no subcluster found for cluster %v and host %v", cluster, hostname) | ||||
| } | ||||
|  | ||||
| func MetricIndex(mc []schema.MetricConfig, name string) (int, error) { | ||||
| 	for i, m := range mc { | ||||
| 		if m.Name == name { | ||||
| 			return i, nil | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return 0, fmt.Errorf("unknown metric name %s", name) | ||||
| } | ||||
|   | ||||
							
								
								
									
										39
									
								
								pkg/archive/clusterConfig_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								pkg/archive/clusterConfig_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,39 @@ | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
| package archive_test | ||||
|  | ||||
| import ( | ||||
| 	"encoding/json" | ||||
| 	"testing" | ||||
|  | ||||
| 	"github.com/ClusterCockpit/cc-backend/pkg/archive" | ||||
| ) | ||||
|  | ||||
| func TestClusterConfig(t *testing.T) { | ||||
| 	if err := archive.Init(json.RawMessage("{\"kind\": \"file\",\"path\": \"testdata/archive\"}"), false); err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
|  | ||||
| 	sc, err := archive.GetSubCluster("fritz", "spr1tb") | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 	// spew.Dump(sc.MetricConfig) | ||||
| 	if len(sc.Footprint) != 3 { | ||||
| 		t.Fail() | ||||
| 	} | ||||
| 	if len(sc.MetricConfig) != 15 { | ||||
| 		t.Fail() | ||||
| 	} | ||||
|  | ||||
| 	for _, metric := range sc.MetricConfig { | ||||
| 		if metric.LowerIsBetter && metric.Name != "mem_used" { | ||||
| 			t.Fail() | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// spew.Dump(archive.GlobalMetricList) | ||||
| 	// t.Fail() | ||||
| } | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
| @@ -40,6 +40,109 @@ type clusterInfo struct { | ||||
| 	diskSize  float64 | ||||
| } | ||||
|  | ||||
| func getDirectory( | ||||
| 	job *schema.Job, | ||||
| 	rootPath string, | ||||
| ) string { | ||||
| 	lvl1, lvl2 := fmt.Sprintf("%d", job.JobID/1000), fmt.Sprintf("%03d", job.JobID%1000) | ||||
|  | ||||
| 	return filepath.Join( | ||||
| 		rootPath, | ||||
| 		job.Cluster, | ||||
| 		lvl1, lvl2, | ||||
| 		strconv.FormatInt(job.StartTime.Unix(), 10)) | ||||
| } | ||||
|  | ||||
| func getPath( | ||||
| 	job *schema.Job, | ||||
| 	rootPath string, | ||||
| 	file string, | ||||
| ) string { | ||||
| 	return filepath.Join( | ||||
| 		getDirectory(job, rootPath), file) | ||||
| } | ||||
|  | ||||
| func loadJobMeta(filename string) (*schema.JobMeta, error) { | ||||
| 	b, err := os.ReadFile(filename) | ||||
| 	if err != nil { | ||||
| 		log.Errorf("loadJobMeta() > open file error: %v", err) | ||||
| 		return &schema.JobMeta{}, err | ||||
| 	} | ||||
| 	if config.Keys.Validate { | ||||
| 		if err := schema.Validate(schema.Meta, bytes.NewReader(b)); err != nil { | ||||
| 			return &schema.JobMeta{}, fmt.Errorf("validate job meta: %v", err) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return DecodeJobMeta(bytes.NewReader(b)) | ||||
| } | ||||
|  | ||||
| func loadJobData(filename string, isCompressed bool) (schema.JobData, error) { | ||||
| 	f, err := os.Open(filename) | ||||
| 	if err != nil { | ||||
| 		log.Errorf("fsBackend LoadJobData()- %v", err) | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	defer f.Close() | ||||
|  | ||||
| 	if isCompressed { | ||||
| 		r, err := gzip.NewReader(f) | ||||
| 		if err != nil { | ||||
| 			log.Errorf(" %v", err) | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		defer r.Close() | ||||
|  | ||||
| 		if config.Keys.Validate { | ||||
| 			if err := schema.Validate(schema.Data, r); err != nil { | ||||
| 				return schema.JobData{}, fmt.Errorf("validate job data: %v", err) | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		return DecodeJobData(r, filename) | ||||
| 	} else { | ||||
| 		if config.Keys.Validate { | ||||
| 			if err := schema.Validate(schema.Data, bufio.NewReader(f)); err != nil { | ||||
| 				return schema.JobData{}, fmt.Errorf("validate job data: %v", err) | ||||
| 			} | ||||
| 		} | ||||
| 		return DecodeJobData(bufio.NewReader(f), filename) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func loadJobStats(filename string, isCompressed bool) (schema.ScopedJobStats, error) { | ||||
| 	f, err := os.Open(filename) | ||||
| 	if err != nil { | ||||
| 		log.Errorf("fsBackend LoadJobStats()- %v", err) | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	defer f.Close() | ||||
|  | ||||
| 	if isCompressed { | ||||
| 		r, err := gzip.NewReader(f) | ||||
| 		if err != nil { | ||||
| 			log.Errorf(" %v", err) | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		defer r.Close() | ||||
|  | ||||
| 		if config.Keys.Validate { | ||||
| 			if err := schema.Validate(schema.Data, r); err != nil { | ||||
| 				return nil, fmt.Errorf("validate job data: %v", err) | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		return DecodeJobStats(r, filename) | ||||
| 	} else { | ||||
| 		if config.Keys.Validate { | ||||
| 			if err := schema.Validate(schema.Data, bufio.NewReader(f)); err != nil { | ||||
| 				return nil, fmt.Errorf("validate job data: %v", err) | ||||
| 			} | ||||
| 		} | ||||
| 		return DecodeJobStats(bufio.NewReader(f), filename) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (fsa *FsArchive) Init(rawConfig json.RawMessage) (uint64, error) { | ||||
| 	var config FsArchiveConfig | ||||
| 	if err := json.Unmarshal(rawConfig, &config); err != nil { | ||||
| @@ -317,6 +420,18 @@ func (fsa *FsArchive) LoadJobData(job *schema.Job) (schema.JobData, error) { | ||||
| 	return loadJobData(f, filename, isCompressed) | ||||
| } | ||||
|  | ||||
| func (fsa *FsArchive) LoadJobStats(job *schema.Job) (schema.ScopedJobStats, error) { | ||||
| 	var isCompressed bool = true | ||||
| 	filename := getPath(job, fsa.path, "data.json.gz") | ||||
|  | ||||
| 	if !util.CheckFileExists(filename) { | ||||
| 		filename = getPath(job, fsa.path, "data.json") | ||||
| 		isCompressed = false | ||||
| 	} | ||||
|  | ||||
| 	return loadJobStats(filename, isCompressed) | ||||
| } | ||||
|  | ||||
| func (fsa *FsArchive) LoadJobMeta(job *schema.Job) (*schema.JobMeta, error) { | ||||
| 	filename := getPath(job, fsa.path, "meta.json") | ||||
| 	b, err := os.ReadFile(filename) | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
| @@ -49,10 +49,10 @@ func TestInit(t *testing.T) { | ||||
| 	if fsa.path != "testdata/archive" { | ||||
| 		t.Fail() | ||||
| 	} | ||||
| 	if version != 1 { | ||||
| 	if version != 2 { | ||||
| 		t.Fail() | ||||
| 	} | ||||
| 	if len(fsa.clusters) != 1 || fsa.clusters[0] != "emmy" { | ||||
| 	if len(fsa.clusters) != 3 || fsa.clusters[1] != "emmy" { | ||||
| 		t.Fail() | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
| @@ -9,8 +9,8 @@ import ( | ||||
| 	"io" | ||||
| 	"time" | ||||
|  | ||||
| 	"github.com/ClusterCockpit/cc-backend/pkg/schema" | ||||
| 	"github.com/ClusterCockpit/cc-backend/pkg/log" | ||||
| 	"github.com/ClusterCockpit/cc-backend/pkg/schema" | ||||
| ) | ||||
|  | ||||
| func DecodeJobData(r io.Reader, k string) (schema.JobData, error) { | ||||
| @@ -32,6 +32,43 @@ func DecodeJobData(r io.Reader, k string) (schema.JobData, error) { | ||||
| 	return data.(schema.JobData), nil | ||||
| } | ||||
|  | ||||
| func DecodeJobStats(r io.Reader, k string) (schema.ScopedJobStats, error) { | ||||
| 	jobData, err := DecodeJobData(r, k) | ||||
| 	// Convert schema.JobData to schema.ScopedJobStats | ||||
| 	if jobData != nil { | ||||
| 		scopedJobStats := make(schema.ScopedJobStats) | ||||
| 		for metric, metricData := range jobData { | ||||
| 			if _, ok := scopedJobStats[metric]; !ok { | ||||
| 				scopedJobStats[metric] = make(map[schema.MetricScope][]*schema.ScopedStats) | ||||
| 			} | ||||
|  | ||||
| 			for scope, jobMetric := range metricData { | ||||
| 				if _, ok := scopedJobStats[metric][scope]; !ok { | ||||
| 					scopedJobStats[metric][scope] = make([]*schema.ScopedStats, 0) | ||||
| 				} | ||||
|  | ||||
| 				for _, series := range jobMetric.Series { | ||||
| 					scopedJobStats[metric][scope] = append(scopedJobStats[metric][scope], &schema.ScopedStats{ | ||||
| 						Hostname: series.Hostname, | ||||
| 						Id:       series.Id, | ||||
| 						Data:     &series.Statistics, | ||||
| 					}) | ||||
| 				} | ||||
|  | ||||
| 				// So that one can later check len(scopedJobStats[metric][scope]): Remove from map if empty | ||||
| 				if len(scopedJobStats[metric][scope]) == 0 { | ||||
| 					delete(scopedJobStats[metric], scope) | ||||
| 					if len(scopedJobStats[metric]) == 0 { | ||||
| 						delete(scopedJobStats, metric) | ||||
| 					} | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		return scopedJobStats, nil | ||||
| 	} | ||||
| 	return nil, err | ||||
| } | ||||
|  | ||||
| func DecodeJobMeta(r io.Reader) (*schema.JobMeta, error) { | ||||
| 	var d schema.JobMeta | ||||
| 	if err := json.NewDecoder(r).Decode(&d); err != nil { | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
|   | ||||
							
								
								
									
										2772
									
								
								pkg/archive/testdata/archive/alex/cluster.json
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2772
									
								
								pkg/archive/testdata/archive/alex/cluster.json
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										1601
									
								
								pkg/archive/testdata/archive/fritz/cluster.json
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1601
									
								
								pkg/archive/testdata/archive/fritz/cluster.json
									
									
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										2
									
								
								pkg/archive/testdata/archive/version.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								pkg/archive/testdata/archive/version.txt
									
									
									
									
										vendored
									
									
								
							| @@ -1 +1 @@ | ||||
| 1 | ||||
| 2 | ||||
|   | ||||
							
								
								
									
										238
									
								
								pkg/log/log.go
									
									
									
									
									
								
							
							
						
						
									
										238
									
								
								pkg/log/log.go
									
									
									
									
									
								
							| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
| @@ -46,12 +46,12 @@ var loglevel string = "info" | ||||
| /* CONFIG */ | ||||
|  | ||||
| func Init(lvl string, logdate bool) { | ||||
|  | ||||
| 	// Discard I/O for all writers below selected loglevel; <CRITICAL> is always written. | ||||
| 	switch lvl { | ||||
| 	case "crit": | ||||
| 		ErrWriter = io.Discard | ||||
| 		fallthrough | ||||
| 	case "err", "fatal": | ||||
| 	case "err": | ||||
| 		WarnWriter = io.Discard | ||||
| 		fallthrough | ||||
| 	case "warn": | ||||
| @@ -63,8 +63,7 @@ func Init(lvl string, logdate bool) { | ||||
| 		// Nothing to do... | ||||
| 		break | ||||
| 	default: | ||||
| 		fmt.Printf("pkg/log: Flag 'loglevel' has invalid value %#v\npkg/log: Will use default loglevel 'debug'\n", lvl) | ||||
| 		//SetLogLevel("debug") | ||||
| 		fmt.Printf("pkg/log: Flag 'loglevel' has invalid value %#v\npkg/log: Will use default loglevel '%s'\n", lvl, loglevel) | ||||
| 	} | ||||
|  | ||||
| 	if !logdate { | ||||
| @@ -84,109 +83,138 @@ func Init(lvl string, logdate bool) { | ||||
| 	loglevel = lvl | ||||
| } | ||||
|  | ||||
| /* PRINT */ | ||||
|  | ||||
| // Private helper | ||||
| func printStr(v ...interface{}) string { | ||||
| 	return fmt.Sprint(v...) | ||||
| } | ||||
|  | ||||
| // Uses Info() -> If errorpath required at some point: | ||||
| // Will need own writer with 'Output(2, out)' to correctly render path | ||||
| func Print(v ...interface{}) { | ||||
| 	Info(v...) | ||||
| } | ||||
|  | ||||
| func Debug(v ...interface{}) { | ||||
| 	DebugLog.Output(2, printStr(v...)) | ||||
| } | ||||
|  | ||||
| func Info(v ...interface{}) { | ||||
| 	InfoLog.Output(2, printStr(v...)) | ||||
| } | ||||
|  | ||||
| func Warn(v ...interface{}) { | ||||
| 	WarnLog.Output(2, printStr(v...)) | ||||
| } | ||||
|  | ||||
| func Error(v ...interface{}) { | ||||
| 	ErrLog.Output(2, printStr(v...)) | ||||
| } | ||||
|  | ||||
| // Writes panic stacktrace, but keeps application alive | ||||
| func Panic(v ...interface{}) { | ||||
| 	ErrLog.Output(2, printStr(v...)) | ||||
| 	panic("Panic triggered ...") | ||||
| } | ||||
|  | ||||
| func Crit(v ...interface{}) { | ||||
| 	CritLog.Output(2, printStr(v...)) | ||||
| } | ||||
|  | ||||
| // Writes critical log, stops application | ||||
| func Fatal(v ...interface{}) { | ||||
| 	CritLog.Output(2, printStr(v...)) | ||||
| 	os.Exit(1) | ||||
| } | ||||
|  | ||||
| /* PRINT FORMAT*/ | ||||
|  | ||||
| // Private helper | ||||
| func printfStr(format string, v ...interface{}) string { | ||||
| 	return fmt.Sprintf(format, v...) | ||||
| } | ||||
|  | ||||
| // Uses Infof() -> If errorpath required at some point: | ||||
| // Will need own writer with 'Output(2, out)' to correctly render path | ||||
| func Printf(format string, v ...interface{}) { | ||||
| 	Infof(format, v...) | ||||
| } | ||||
|  | ||||
| func Debugf(format string, v ...interface{}) { | ||||
| 	DebugLog.Output(2, printfStr(format, v...)) | ||||
| } | ||||
|  | ||||
| func Infof(format string, v ...interface{}) { | ||||
| 	InfoLog.Output(2, printfStr(format, v...)) | ||||
| } | ||||
|  | ||||
| func Warnf(format string, v ...interface{}) { | ||||
| 	WarnLog.Output(2, printfStr(format, v...)) | ||||
| } | ||||
|  | ||||
| func Errorf(format string, v ...interface{}) { | ||||
| 	ErrLog.Output(2, printfStr(format, v...)) | ||||
| } | ||||
|  | ||||
| // Writes panic stacktrace, but keeps application alive | ||||
| func Panicf(format string, v ...interface{}) { | ||||
| 	ErrLog.Output(2, printfStr(format, v...)) | ||||
| 	panic("Panic triggered ...") | ||||
| } | ||||
|  | ||||
| func Critf(format string, v ...interface{}) { | ||||
| 	CritLog.Output(2, printfStr(format, v...)) | ||||
| } | ||||
|  | ||||
| // Writes crit log, stops application | ||||
| func Fatalf(format string, v ...interface{}) { | ||||
| 	CritLog.Output(2, printfStr(format, v...)) | ||||
| 	os.Exit(1) | ||||
| } | ||||
| /* HELPER */ | ||||
|  | ||||
| func Loglevel() string { | ||||
| 	return loglevel | ||||
| } | ||||
|  | ||||
| /* SPECIAL */ | ||||
| /* PRIVATE HELPER */ | ||||
|  | ||||
| // func Finfof(w io.Writer, format string, v ...interface{}) { | ||||
| // 	if w != io.Discard { | ||||
| // 		if logDateTime { | ||||
| // 			currentTime := time.Now() | ||||
| // 			fmt.Fprintf(InfoWriter, currentTime.String()+InfoPrefix+format+"\n", v...) | ||||
| // 		} else { | ||||
| // 			fmt.Fprintf(InfoWriter, InfoPrefix+format+"\n", v...) | ||||
| // 		} | ||||
| // 	} | ||||
| // } | ||||
| // Return unformatted string | ||||
| func printStr(v ...interface{}) string { | ||||
| 	return fmt.Sprint(v...) | ||||
| } | ||||
|  | ||||
| // Return formatted string | ||||
| func printfStr(format string, v ...interface{}) string { | ||||
| 	return fmt.Sprintf(format, v...) | ||||
| } | ||||
|  | ||||
| /* PRINT */ | ||||
|  | ||||
| // Prints to STDOUT without string formatting; application continues. | ||||
| // Used for special cases not requiring log information like date or location. | ||||
| func Print(v ...interface{}) { | ||||
| 	fmt.Fprintln(os.Stdout, v...) | ||||
| } | ||||
|  | ||||
| // Prints to STDOUT without string formatting; application exits with error code 0. | ||||
| // Used for exiting succesfully with message after expected outcome, e.g. successful single-call application runs. | ||||
| func Exit(v ...interface{}) { | ||||
| 	fmt.Fprintln(os.Stdout, v...) | ||||
| 	os.Exit(0) | ||||
| } | ||||
|  | ||||
| // Prints to STDOUT without string formatting; application exits with error code 1. | ||||
| // Used for terminating with message after to be expected errors, e.g. wrong arguments or during init(). | ||||
| func Abort(v ...interface{}) { | ||||
| 	fmt.Fprintln(os.Stdout, v...) | ||||
| 	os.Exit(1) | ||||
| } | ||||
|  | ||||
| // Prints to DEBUG writer without string formatting; application continues. | ||||
| // Used for logging additional information, primarily for development. | ||||
| func Debug(v ...interface{}) { | ||||
| 	DebugLog.Output(2, printStr(v...)) | ||||
| } | ||||
|  | ||||
| // Prints to INFO writer without string formatting; application continues. | ||||
| // Used for logging additional information, e.g. notable returns or common fail-cases. | ||||
| func Info(v ...interface{}) { | ||||
| 	InfoLog.Output(2, printStr(v...)) | ||||
| } | ||||
|  | ||||
| // Prints to WARNING writer without string formatting; application continues. | ||||
| // Used for logging important information, e.g. uncommon edge-cases or administration related information. | ||||
| func Warn(v ...interface{}) { | ||||
| 	WarnLog.Output(2, printStr(v...)) | ||||
| } | ||||
|  | ||||
| // Prints to ERROR writer without string formatting; application continues. | ||||
| // Used for logging errors, but code still can return default(s) or nil. | ||||
| func Error(v ...interface{}) { | ||||
| 	ErrLog.Output(2, printStr(v...)) | ||||
| } | ||||
|  | ||||
| // Prints to CRITICAL writer without string formatting; application exits with error code 1. | ||||
| // Used for terminating on unexpected errors with date and code location. | ||||
| func Fatal(v ...interface{}) { | ||||
| 	CritLog.Output(2, printStr(v...)) | ||||
| 	os.Exit(1) | ||||
| } | ||||
|  | ||||
| // Prints to PANIC function without string formatting; application exits with panic. | ||||
| // Used for terminating on unexpected errors with stacktrace. | ||||
| func Panic(v ...interface{}) { | ||||
| 	panic(printStr(v...)) | ||||
| } | ||||
|  | ||||
| /* PRINT FORMAT*/ | ||||
|  | ||||
| // Prints to STDOUT with string formatting; application continues. | ||||
| // Used for special cases not requiring log information like date or location. | ||||
| func Printf(format string, v ...interface{}) { | ||||
| 	fmt.Fprintf(os.Stdout, format, v...) | ||||
| } | ||||
|  | ||||
| // Prints to STDOUT with string formatting; application exits with error code 0. | ||||
| // Used for exiting succesfully with message after expected outcome, e.g. successful single-call application runs. | ||||
| func Exitf(format string, v ...interface{}) { | ||||
| 	fmt.Fprintf(os.Stdout, format, v...) | ||||
| 	os.Exit(0) | ||||
| } | ||||
|  | ||||
| // Prints to STDOUT with string formatting; application exits with error code 1. | ||||
| // Used for terminating with message after to be expected errors, e.g. wrong arguments or during init(). | ||||
| func Abortf(format string, v ...interface{}) { | ||||
| 	fmt.Fprintf(os.Stdout, format, v...) | ||||
| 	os.Exit(1) | ||||
| } | ||||
|  | ||||
| // Prints to DEBUG writer with string formatting; application continues. | ||||
| // Used for logging additional information, primarily for development. | ||||
| func Debugf(format string, v ...interface{}) { | ||||
| 	DebugLog.Output(2, printfStr(format, v...)) | ||||
| } | ||||
|  | ||||
| // Prints to INFO writer with string formatting; application continues. | ||||
| // Used for logging additional information, e.g. notable returns or common fail-cases. | ||||
| func Infof(format string, v ...interface{}) { | ||||
| 	InfoLog.Output(2, printfStr(format, v...)) | ||||
| } | ||||
|  | ||||
| // Prints to WARNING writer with string formatting; application continues. | ||||
| // Used for logging important information, e.g. uncommon edge-cases or administration related information. | ||||
| func Warnf(format string, v ...interface{}) { | ||||
| 	WarnLog.Output(2, printfStr(format, v...)) | ||||
| } | ||||
|  | ||||
| // Prints to ERROR writer with string formatting; application continues. | ||||
| // Used for logging errors, but code still can return default(s) or nil. | ||||
| func Errorf(format string, v ...interface{}) { | ||||
| 	ErrLog.Output(2, printfStr(format, v...)) | ||||
| } | ||||
|  | ||||
| // Prints to CRITICAL writer with string formatting; application exits with error code 1. | ||||
| // Used for terminating on unexpected errors with date and code location. | ||||
| func Fatalf(format string, v ...interface{}) { | ||||
| 	CritLog.Output(2, printfStr(format, v...)) | ||||
| 	os.Exit(1) | ||||
| } | ||||
|  | ||||
| // Prints to PANIC function with string formatting; application exits with panic. | ||||
| // Used for terminating on unexpected errors with stacktrace. | ||||
| func Panicf(format string, v ...interface{}) { | ||||
| 	panic(printfStr(format, v...)) | ||||
| } | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
|   | ||||
							
								
								
									
										123
									
								
								pkg/resampler/resampler.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										123
									
								
								pkg/resampler/resampler.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,123 @@ | ||||
| package resampler | ||||
|  | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"math" | ||||
|  | ||||
| 	"github.com/ClusterCockpit/cc-backend/pkg/schema" | ||||
| ) | ||||
|  | ||||
| func SimpleResampler(data []schema.Float, old_frequency int64, new_frequency int64) ([]schema.Float, int64, error) { | ||||
| 	if old_frequency == 0 || new_frequency == 0 || new_frequency <= old_frequency { | ||||
| 		return data, old_frequency, nil | ||||
| 	} | ||||
|  | ||||
| 	if new_frequency%old_frequency != 0 { | ||||
| 		return nil, 0, errors.New("new sampling frequency should be multiple of the old frequency") | ||||
| 	} | ||||
|  | ||||
| 	var step int = int(new_frequency / old_frequency) | ||||
| 	var new_data_length = len(data) / step | ||||
|  | ||||
| 	if new_data_length == 0 || len(data) < 100 || new_data_length >= len(data) { | ||||
| 		return data, old_frequency, nil | ||||
| 	} | ||||
|  | ||||
| 	new_data := make([]schema.Float, new_data_length) | ||||
|  | ||||
| 	for i := 0; i < new_data_length; i++ { | ||||
| 		new_data[i] = data[i*step] | ||||
| 	} | ||||
|  | ||||
| 	return new_data, new_frequency, nil | ||||
| } | ||||
|  | ||||
| // Inspired by one of the algorithms from https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf | ||||
| // Adapted from https://github.com/haoel/downsampling/blob/master/core/lttb.go | ||||
| func LargestTriangleThreeBucket(data []schema.Float, old_frequency int, new_frequency int) ([]schema.Float, int, error) { | ||||
|  | ||||
| 	if old_frequency == 0 || new_frequency == 0 || new_frequency <= old_frequency { | ||||
| 		return data, old_frequency, nil | ||||
| 	} | ||||
|  | ||||
| 	if new_frequency%old_frequency != 0 { | ||||
| 		return nil, 0, errors.New(fmt.Sprintf("new sampling frequency : %d should be multiple of the old frequency : %d", new_frequency, old_frequency)) | ||||
| 	} | ||||
|  | ||||
| 	var step int = int(new_frequency / old_frequency) | ||||
| 	var new_data_length = len(data) / step | ||||
|  | ||||
| 	if new_data_length == 0 || len(data) < 100 || new_data_length >= len(data) { | ||||
| 		return data, old_frequency, nil | ||||
| 	} | ||||
|  | ||||
| 	new_data := make([]schema.Float, 0, new_data_length) | ||||
|  | ||||
| 	// Bucket size. Leave room for start and end data points | ||||
| 	bucketSize := float64(len(data)-2) / float64(new_data_length-2) | ||||
|  | ||||
| 	new_data = append(new_data, data[0]) // Always add the first point | ||||
|  | ||||
| 	// We have 3 pointers represent for | ||||
| 	// > bucketLow - the current bucket's beginning location | ||||
| 	// > bucketMiddle - the current bucket's ending location, | ||||
| 	//                  also the beginning location of next bucket | ||||
| 	// > bucketHight - the next bucket's ending location. | ||||
| 	bucketLow := 1 | ||||
| 	bucketMiddle := int(math.Floor(bucketSize)) + 1 | ||||
|  | ||||
| 	var prevMaxAreaPoint int | ||||
|  | ||||
| 	for i := 0; i < new_data_length-2; i++ { | ||||
|  | ||||
| 		bucketHigh := int(math.Floor(float64(i+2)*bucketSize)) + 1 | ||||
| 		if bucketHigh >= len(data)-1 { | ||||
| 			bucketHigh = len(data) - 2 | ||||
| 		} | ||||
|  | ||||
| 		// Calculate point average for next bucket (containing c) | ||||
| 		avgPointX, avgPointY := calculateAverageDataPoint(data[bucketMiddle:bucketHigh+1], int64(bucketMiddle)) | ||||
|  | ||||
| 		// Get the range for current bucket | ||||
| 		currBucketStart := bucketLow | ||||
| 		currBucketEnd := bucketMiddle | ||||
|  | ||||
| 		// Point a | ||||
| 		pointX := prevMaxAreaPoint | ||||
| 		pointY := data[prevMaxAreaPoint] | ||||
|  | ||||
| 		maxArea := -1.0 | ||||
|  | ||||
| 		var maxAreaPoint int | ||||
| 		flag_ := 0 | ||||
| 		for ; currBucketStart < currBucketEnd; currBucketStart++ { | ||||
|  | ||||
| 			area := calculateTriangleArea(schema.Float(pointX), pointY, avgPointX, avgPointY, schema.Float(currBucketStart), data[currBucketStart]) | ||||
| 			if area > maxArea { | ||||
| 				maxArea = area | ||||
| 				maxAreaPoint = currBucketStart | ||||
| 			} | ||||
| 			if math.IsNaN(float64(avgPointY)) { | ||||
| 				flag_ = 1 | ||||
|  | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		if flag_ == 1 { | ||||
| 			new_data = append(new_data, schema.NaN) // Pick this point from the bucket | ||||
|  | ||||
| 		} else { | ||||
| 			new_data = append(new_data, data[maxAreaPoint]) // Pick this point from the bucket | ||||
| 		} | ||||
| 		prevMaxAreaPoint = maxAreaPoint // This MaxArea point is the next's prevMAxAreaPoint | ||||
|  | ||||
| 		//move to the next window | ||||
| 		bucketLow = bucketMiddle | ||||
| 		bucketMiddle = bucketHigh | ||||
| 	} | ||||
|  | ||||
| 	new_data = append(new_data, data[len(data)-1]) // Always add last | ||||
|  | ||||
| 	return new_data, new_frequency, nil | ||||
| } | ||||
							
								
								
									
										35
									
								
								pkg/resampler/util.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								pkg/resampler/util.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,35 @@ | ||||
| package resampler | ||||
|  | ||||
| import ( | ||||
| 	"math" | ||||
|  | ||||
| 	"github.com/ClusterCockpit/cc-backend/pkg/schema" | ||||
| ) | ||||
|  | ||||
| func calculateTriangleArea(paX, paY, pbX, pbY, pcX, pcY schema.Float) float64 { | ||||
| 	area := ((paX-pcX)*(pbY-paY) - (paX-pbX)*(pcY-paY)) * 0.5 | ||||
| 	return math.Abs(float64(area)) | ||||
| } | ||||
|  | ||||
| func calculateAverageDataPoint(points []schema.Float, xStart int64) (avgX schema.Float, avgY schema.Float) { | ||||
| 	flag := 0 | ||||
| 	for _, point := range points { | ||||
| 		avgX += schema.Float(xStart) | ||||
| 		avgY += point | ||||
| 		xStart++ | ||||
| 		if math.IsNaN(float64(point)) { | ||||
| 			flag = 1 | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	l := schema.Float(len(points)) | ||||
|  | ||||
| 	avgX /= l | ||||
| 	avgY /= l | ||||
|  | ||||
| 	if flag == 1 { | ||||
| 		return avgX, schema.NaN | ||||
| 	} else { | ||||
| 		return avgX, avgY | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										142
									
								
								pkg/runtimeEnv/setup.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										142
									
								
								pkg/runtimeEnv/setup.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,142 @@ | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
| package runtimeEnv | ||||
|  | ||||
| import ( | ||||
| 	"bufio" | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"os/exec" | ||||
| 	"os/user" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 	"syscall" | ||||
|  | ||||
| 	"github.com/ClusterCockpit/cc-backend/pkg/log" | ||||
| ) | ||||
|  | ||||
| // Very simple and limited .env file reader. | ||||
| // All variable definitions found are directly | ||||
| // added to the processes environment. | ||||
| func LoadEnv(file string) error { | ||||
| 	f, err := os.Open(file) | ||||
| 	if err != nil { | ||||
| 		log.Error("Error while opening .env file") | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	defer f.Close() | ||||
| 	s := bufio.NewScanner(bufio.NewReader(f)) | ||||
| 	for s.Scan() { | ||||
| 		line := s.Text() | ||||
| 		if strings.HasPrefix(line, "#") || len(line) == 0 { | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		if strings.Contains(line, "#") { | ||||
| 			return errors.New("'#' are only supported at the start of a line") | ||||
| 		} | ||||
|  | ||||
| 		line = strings.TrimPrefix(line, "export ") | ||||
| 		parts := strings.SplitN(line, "=", 2) | ||||
| 		if len(parts) != 2 { | ||||
| 			return fmt.Errorf("RUNTIME/SETUP > unsupported line: %#v", line) | ||||
| 		} | ||||
|  | ||||
| 		key := strings.TrimSpace(parts[0]) | ||||
| 		val := strings.TrimSpace(parts[1]) | ||||
| 		if strings.HasPrefix(val, "\"") { | ||||
| 			if !strings.HasSuffix(val, "\"") { | ||||
| 				return fmt.Errorf("RUNTIME/SETUP > unsupported line: %#v", line) | ||||
| 			} | ||||
|  | ||||
| 			runes := []rune(val[1 : len(val)-1]) | ||||
| 			sb := strings.Builder{} | ||||
| 			for i := 0; i < len(runes); i++ { | ||||
| 				if runes[i] == '\\' { | ||||
| 					i++ | ||||
| 					switch runes[i] { | ||||
| 					case 'n': | ||||
| 						sb.WriteRune('\n') | ||||
| 					case 'r': | ||||
| 						sb.WriteRune('\r') | ||||
| 					case 't': | ||||
| 						sb.WriteRune('\t') | ||||
| 					case '"': | ||||
| 						sb.WriteRune('"') | ||||
| 					default: | ||||
| 						return fmt.Errorf("RUNTIME/SETUP > unsupported escape sequence in quoted string: backslash %#v", runes[i]) | ||||
| 					} | ||||
| 					continue | ||||
| 				} | ||||
| 				sb.WriteRune(runes[i]) | ||||
| 			} | ||||
|  | ||||
| 			val = sb.String() | ||||
| 		} | ||||
|  | ||||
| 		os.Setenv(key, val) | ||||
| 	} | ||||
|  | ||||
| 	return s.Err() | ||||
| } | ||||
|  | ||||
| // Changes the processes user and group to that | ||||
| // specified in the config.json. The go runtime | ||||
| // takes care of all threads (and not only the calling one) | ||||
| // executing the underlying systemcall. | ||||
| func DropPrivileges(username string, group string) error { | ||||
| 	if group != "" { | ||||
| 		g, err := user.LookupGroup(group) | ||||
| 		if err != nil { | ||||
| 			log.Warn("Error while looking up group") | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		gid, _ := strconv.Atoi(g.Gid) | ||||
| 		if err := syscall.Setgid(gid); err != nil { | ||||
| 			log.Warn("Error while setting gid") | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if username != "" { | ||||
| 		u, err := user.Lookup(username) | ||||
| 		if err != nil { | ||||
| 			log.Warn("Error while looking up user") | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		uid, _ := strconv.Atoi(u.Uid) | ||||
| 		if err := syscall.Setuid(uid); err != nil { | ||||
| 			log.Warn("Error while setting uid") | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // If started via systemd, inform systemd that we are running: | ||||
| // https://www.freedesktop.org/software/systemd/man/sd_notify.html | ||||
| func SystemdNotifiy(ready bool, status string) { | ||||
| 	if os.Getenv("NOTIFY_SOCKET") == "" { | ||||
| 		// Not started using systemd | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	args := []string{fmt.Sprintf("--pid=%d", os.Getpid())} | ||||
| 	if ready { | ||||
| 		args = append(args, "--ready") | ||||
| 	} | ||||
|  | ||||
| 	if status != "" { | ||||
| 		args = append(args, fmt.Sprintf("--status=%s", status)) | ||||
| 	} | ||||
|  | ||||
| 	cmd := exec.Command("systemd-notify", args...) | ||||
| 	cmd.Run() // errors ignored on purpose, there is not much to do anyways. | ||||
| } | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
| @@ -30,38 +30,47 @@ type MetricValue struct { | ||||
| } | ||||
|  | ||||
| type SubCluster struct { | ||||
| 	Name            string      `json:"name"` | ||||
| 	Nodes           string      `json:"nodes"` | ||||
| 	ProcessorType   string      `json:"processorType"` | ||||
| 	SocketsPerNode  int         `json:"socketsPerNode"` | ||||
| 	CoresPerSocket  int         `json:"coresPerSocket"` | ||||
| 	ThreadsPerCore  int         `json:"threadsPerCore"` | ||||
| 	FlopRateScalar  MetricValue `json:"flopRateScalar"` | ||||
| 	FlopRateSimd    MetricValue `json:"flopRateSimd"` | ||||
| 	MemoryBandwidth MetricValue `json:"memoryBandwidth"` | ||||
| 	Topology        Topology    `json:"topology"` | ||||
| 	Name            string         `json:"name"` | ||||
| 	Nodes           string         `json:"nodes"` | ||||
| 	ProcessorType   string         `json:"processorType"` | ||||
| 	Topology        Topology       `json:"topology"` | ||||
| 	FlopRateScalar  MetricValue    `json:"flopRateScalar"` | ||||
| 	FlopRateSimd    MetricValue    `json:"flopRateSimd"` | ||||
| 	MemoryBandwidth MetricValue    `json:"memoryBandwidth"` | ||||
| 	MetricConfig    []MetricConfig `json:"metricConfig,omitempty"` | ||||
| 	Footprint       []string       `json:"footprint,omitempty"` | ||||
| 	EnergyFootprint []string       `json:"energyFootprint,omitempty"` | ||||
| 	SocketsPerNode  int            `json:"socketsPerNode"` | ||||
| 	CoresPerSocket  int            `json:"coresPerSocket"` | ||||
| 	ThreadsPerCore  int            `json:"threadsPerCore"` | ||||
| } | ||||
|  | ||||
| type SubClusterConfig struct { | ||||
| 	Name    string  `json:"name"` | ||||
| 	Peak    float64 `json:"peak"` | ||||
| 	Normal  float64 `json:"normal"` | ||||
| 	Caution float64 `json:"caution"` | ||||
| 	Alert   float64 `json:"alert"` | ||||
| 	Remove  bool    `json:"remove"` | ||||
| 	Name          string  `json:"name"` | ||||
| 	Footprint     string  `json:"footprint,omitempty"` | ||||
| 	Energy        string  `json:"energy"` | ||||
| 	Peak          float64 `json:"peak"` | ||||
| 	Normal        float64 `json:"normal"` | ||||
| 	Caution       float64 `json:"caution"` | ||||
| 	Alert         float64 `json:"alert"` | ||||
| 	Remove        bool    `json:"remove"` | ||||
| 	LowerIsBetter bool    `json:"lowerIsBetter"` | ||||
| } | ||||
|  | ||||
| type MetricConfig struct { | ||||
| 	Name        string              `json:"name"` | ||||
| 	Unit        Unit                `json:"unit"` | ||||
| 	Scope       MetricScope         `json:"scope"` | ||||
| 	Aggregation string              `json:"aggregation"` | ||||
| 	Timestep    int                 `json:"timestep"` | ||||
| 	Peak        float64             `json:"peak"` | ||||
| 	Normal      float64             `json:"normal"` | ||||
| 	Caution     float64             `json:"caution"` | ||||
| 	Alert       float64             `json:"alert"` | ||||
| 	SubClusters []*SubClusterConfig `json:"subClusters,omitempty"` | ||||
| 	Unit          Unit                `json:"unit"` | ||||
| 	Energy        string              `json:"energy"` | ||||
| 	Name          string              `json:"name"` | ||||
| 	Scope         MetricScope         `json:"scope"` | ||||
| 	Aggregation   string              `json:"aggregation"` | ||||
| 	Footprint     string              `json:"footprint,omitempty"` | ||||
| 	SubClusters   []*SubClusterConfig `json:"subClusters,omitempty"` | ||||
| 	Peak          float64             `json:"peak"` | ||||
| 	Caution       float64             `json:"caution"` | ||||
| 	Alert         float64             `json:"alert"` | ||||
| 	Timestep      int                 `json:"timestep"` | ||||
| 	Normal        float64             `json:"normal"` | ||||
| 	LowerIsBetter bool                `json:"lowerIsBetter"` | ||||
| } | ||||
|  | ||||
| type Cluster struct { | ||||
| @@ -70,14 +79,27 @@ type Cluster struct { | ||||
| 	SubClusters  []*SubCluster   `json:"subClusters"` | ||||
| } | ||||
|  | ||||
| type ClusterSupport struct { | ||||
| 	Cluster     string   `json:"cluster"` | ||||
| 	SubClusters []string `json:"subclusters"` | ||||
| } | ||||
|  | ||||
| type GlobalMetricListItem struct { | ||||
| 	Name         string           `json:"name"` | ||||
| 	Unit         Unit             `json:"unit"` | ||||
| 	Scope        MetricScope      `json:"scope"` | ||||
| 	Footprint    string           `json:"footprint,omitempty"` | ||||
| 	Availability []ClusterSupport `json:"availability"` | ||||
| } | ||||
|  | ||||
| // Return a list of socket IDs given a list of hwthread IDs.  Even if just one | ||||
| // hwthread is in that socket, add it to the list.  If no hwthreads other than | ||||
| // those in the argument list are assigned to one of the sockets in the first | ||||
| // return value, return true as the second value.  TODO: Optimize this, there | ||||
| // must be a more efficient way/algorithm. | ||||
| func (topo *Topology) GetSocketsFromHWThreads( | ||||
| 	hwthreads []int) (sockets []int, exclusive bool) { | ||||
|  | ||||
| 	hwthreads []int, | ||||
| ) (sockets []int, exclusive bool) { | ||||
| 	socketsMap := map[int]int{} | ||||
| 	for _, hwthread := range hwthreads { | ||||
| 		for socket, hwthreadsInSocket := range topo.Socket { | ||||
| @@ -100,14 +122,46 @@ func (topo *Topology) GetSocketsFromHWThreads( | ||||
| 	return sockets, exclusive | ||||
| } | ||||
|  | ||||
| // Return a list of socket IDs given a list of core IDs.  Even if just one | ||||
| // core is in that socket, add it to the list.  If no cores other than | ||||
| // those in the argument list are assigned to one of the sockets in the first | ||||
| // return value, return true as the second value.  TODO: Optimize this, there | ||||
| // must be a more efficient way/algorithm. | ||||
| func (topo *Topology) GetSocketsFromCores ( | ||||
| 	cores []int, | ||||
| ) (sockets []int, exclusive bool) { | ||||
| 	socketsMap := map[int]int{} | ||||
| 	for _, core := range cores { | ||||
| 		for _, hwthreadInCore := range topo.Core[core] { | ||||
| 			for socket, hwthreadsInSocket := range topo.Socket { | ||||
| 				for _, hwthreadInSocket := range hwthreadsInSocket { | ||||
| 					if hwthreadInCore == hwthreadInSocket { | ||||
| 						socketsMap[socket] += 1 | ||||
| 					} | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	exclusive = true | ||||
| 	hwthreadsPerSocket := len(topo.Node) / len(topo.Socket) | ||||
| 	sockets = make([]int, 0, len(socketsMap)) | ||||
| 	for socket, count := range socketsMap { | ||||
| 		sockets = append(sockets, socket) | ||||
| 		exclusive = exclusive && count == hwthreadsPerSocket | ||||
| 	} | ||||
|  | ||||
| 	return sockets, exclusive | ||||
| } | ||||
|  | ||||
| // Return a list of core IDs given a list of hwthread IDs.  Even if just one | ||||
| // hwthread is in that core, add it to the list.  If no hwthreads other than | ||||
| // those in the argument list are assigned to one of the cores in the first | ||||
| // return value, return true as the second value.  TODO: Optimize this, there | ||||
| // must be a more efficient way/algorithm. | ||||
| func (topo *Topology) GetCoresFromHWThreads( | ||||
| 	hwthreads []int) (cores []int, exclusive bool) { | ||||
|  | ||||
| 	hwthreads []int, | ||||
| ) (cores []int, exclusive bool) { | ||||
| 	coresMap := map[int]int{} | ||||
| 	for _, hwthread := range hwthreads { | ||||
| 		for core, hwthreadsInCore := range topo.Core { | ||||
| @@ -136,8 +190,8 @@ func (topo *Topology) GetCoresFromHWThreads( | ||||
| // memory domains in the first return value, return true as the second value. | ||||
| // TODO: Optimize this, there must be a more efficient way/algorithm. | ||||
| func (topo *Topology) GetMemoryDomainsFromHWThreads( | ||||
| 	hwthreads []int) (memDoms []int, exclusive bool) { | ||||
|  | ||||
| 	hwthreads []int, | ||||
| ) (memDoms []int, exclusive bool) { | ||||
| 	memDomsMap := map[int]int{} | ||||
| 	for _, hwthread := range hwthreads { | ||||
| 		for memDom, hwthreadsInmemDom := range topo.MemoryDomain { | ||||
| @@ -172,7 +226,17 @@ func (topo *Topology) GetAcceleratorID(id int) (string, error) { | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (topo *Topology) GetAcceleratorIDs() ([]int, error) { | ||||
| // Return list of hardware (string) accelerator IDs | ||||
| func (topo *Topology) GetAcceleratorIDs() []string { | ||||
| 	accels := make([]string, 0) | ||||
| 	for _, accel := range topo.Accelerators { | ||||
| 		accels = append(accels, accel.ID) | ||||
| 	} | ||||
| 	return accels | ||||
| } | ||||
|  | ||||
| // Outdated? Or: Return indices of accelerators in parent array? | ||||
| func (topo *Topology) GetAcceleratorIDsAsInt() ([]int, error) { | ||||
| 	accels := make([]int, 0) | ||||
| 	for _, accel := range topo.Accelerators { | ||||
| 		id, err := strconv.Atoi(accel.ID) | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
| @@ -23,6 +23,12 @@ type LdapConfig struct { | ||||
| 	SyncUserOnLogin bool `json:"syncUserOnLogin"` | ||||
| } | ||||
|  | ||||
| type OpenIDConfig struct { | ||||
| 	Provider          string `json:"provider"` | ||||
| 	SyncUserOnLogin   bool   `json:"syncUserOnLogin"` | ||||
| 	UpdateUserOnLogin bool   `json:"updateUserOnLogin"` | ||||
| } | ||||
|  | ||||
| type JWTAuthConfig struct { | ||||
| 	// Specifies for how long a JWT token shall be valid | ||||
| 	// as a string parsable by time.ParseDuration(). | ||||
| @@ -40,6 +46,9 @@ type JWTAuthConfig struct { | ||||
|  | ||||
| 	// Should an non-existent user be added to the DB based on the information in the token | ||||
| 	SyncUserOnLogin bool `json:"syncUserOnLogin"` | ||||
|  | ||||
| 	// Should an existent user be updated in the DB based on the information in the token | ||||
| 	UpdateUserOnLogin bool `json:"updateUserOnLogin"` | ||||
| } | ||||
|  | ||||
| type IntRange struct { | ||||
| @@ -48,8 +57,9 @@ type IntRange struct { | ||||
| } | ||||
|  | ||||
| type TimeRange struct { | ||||
| 	From *time.Time `json:"from"` | ||||
| 	To   *time.Time `json:"to"` | ||||
| 	From  *time.Time `json:"from"` | ||||
| 	To    *time.Time `json:"to"` | ||||
| 	Range string     `json:"range,omitempty"` | ||||
| } | ||||
|  | ||||
| type FilterRanges struct { | ||||
| @@ -65,10 +75,24 @@ type ClusterConfig struct { | ||||
| } | ||||
|  | ||||
| type Retention struct { | ||||
| 	Age       int    `json:"age"` | ||||
| 	IncludeDB bool   `json:"includeDB"` | ||||
| 	Policy    string `json:"policy"` | ||||
| 	Location  string `json:"location"` | ||||
| 	Age       int    `json:"age"` | ||||
| 	IncludeDB bool   `json:"includeDB"` | ||||
| } | ||||
|  | ||||
| type ResampleConfig struct { | ||||
| 	// Array of resampling target resolutions, in seconds; Example: [600,300,60] | ||||
| 	Resolutions []int `json:"resolutions"` | ||||
| 	// Trigger next zoom level at less than this many visible datapoints | ||||
| 	Trigger int `json:"trigger"` | ||||
| } | ||||
|  | ||||
| type CronFrequency struct { | ||||
| 	// Duration Update Worker [Defaults to '5m'] | ||||
| 	DurationWorker string `json:"duration-worker"` | ||||
| 	// Metric-Footprint Update Worker [Defaults to '10m'] | ||||
| 	FootprintWorker string `json:"footprint-worker"` | ||||
| } | ||||
|  | ||||
| // Format of the configuration (file). See below for the defaults. | ||||
| @@ -76,7 +100,7 @@ type ProgramConfig struct { | ||||
| 	// Address where the http (or https) server will listen on (for example: 'localhost:80'). | ||||
| 	Addr string `json:"addr"` | ||||
|  | ||||
| 	// Addresses from which secured API endpoints can be reached | ||||
| 	// Addresses from which secured admin API endpoints can be reached, can be wildcard "*" | ||||
| 	ApiAllowedIPs []string `json:"apiAllowedIPs"` | ||||
|  | ||||
| 	// Drop root permissions once .env was read and the port was taken. | ||||
| @@ -109,8 +133,9 @@ type ProgramConfig struct { | ||||
| 	Validate bool `json:"validate"` | ||||
|  | ||||
| 	// For LDAP Authentication and user synchronisation. | ||||
| 	LdapConfig *LdapConfig    `json:"ldap"` | ||||
| 	JwtConfig  *JWTAuthConfig `json:"jwts"` | ||||
| 	LdapConfig   *LdapConfig    `json:"ldap"` | ||||
| 	JwtConfig    *JWTAuthConfig `json:"jwts"` | ||||
| 	OpenIDConfig *OpenIDConfig  `json:"oidc"` | ||||
|  | ||||
| 	// If 0 or empty, the session does not expire! | ||||
| 	SessionMaxAge string `json:"session-max-age"` | ||||
| @@ -127,6 +152,9 @@ type ProgramConfig struct { | ||||
| 	// be provided! Most options here can be overwritten by the user. | ||||
| 	UiDefaults map[string]interface{} `json:"ui-defaults"` | ||||
|  | ||||
| 	// If exists, will enable dynamic zoom in frontend metric plots using the configured values | ||||
| 	EnableResampling *ResampleConfig `json:"enable-resampling"` | ||||
|  | ||||
| 	// Where to store MachineState files | ||||
| 	MachineStateDir string `json:"machine-state-dir"` | ||||
|  | ||||
| @@ -136,6 +164,13 @@ type ProgramConfig struct { | ||||
| 	// Defines time X in seconds in which jobs are considered to be "short" and will be filtered in specific views. | ||||
| 	ShortRunningJobsDuration int `json:"short-running-jobs-duration"` | ||||
|  | ||||
| 	// Energy Mix CO2 Emission Constant [g/kWh] | ||||
| 	// If entered, displays estimated CO2 emission for job based on jobs totalEnergy | ||||
| 	EmissionConstant int `json:"emission-constant"` | ||||
|  | ||||
| 	// Frequency of cron job workers | ||||
| 	CronFrequency *CronFrequency `json:"cron-frequency"` | ||||
|  | ||||
| 	// Array of Clusters | ||||
| 	Clusters []*ClusterConfig `json:"clusters"` | ||||
| } | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
| @@ -16,30 +16,33 @@ import ( | ||||
| // Common subset of Job and JobMeta. Use one of those, not this type directly. | ||||
|  | ||||
| type BaseJob struct { | ||||
| 	// The unique identifier of a job | ||||
| 	JobID      int64  `json:"jobId" db:"job_id" example:"123000"` | ||||
| 	User       string `json:"user" db:"user" example:"abcd100h"`                       // The unique identifier of a user | ||||
| 	Project    string `json:"project" db:"project" example:"abcd200"`                  // The unique identifier of a project | ||||
| 	Cluster    string `json:"cluster" db:"cluster" example:"fritz"`                    // The unique identifier of a cluster | ||||
| 	SubCluster string `json:"subCluster" db:"subcluster" example:"main"`               // The unique identifier of a sub cluster | ||||
| 	Partition  string `json:"partition,omitempty" db:"partition" example:"main"`       // The Slurm partition to which the job was submitted | ||||
| 	ArrayJobId int64  `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"` // The unique identifier of an array job | ||||
| 	NumNodes   int32  `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"`         // Number of nodes used (Min > 0) | ||||
| 	// NumCores         int32             `json:"numCores" db:"num_cores" example:"20" minimum:"1"`                                                             // Number of HWThreads used (Min > 0) | ||||
| 	NumHWThreads     int32             `json:"numHwthreads,omitempty" db:"num_hwthreads" example:"20" minimum:"1"`                                           // Number of HWThreads used (Min > 0) | ||||
| 	NumAcc           int32             `json:"numAcc,omitempty" db:"num_acc" example:"2" minimum:"1"`                                                        // Number of accelerators used (Min > 0) | ||||
| 	Exclusive        int32             `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"`                                                 // Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user | ||||
| 	MonitoringStatus int32             `json:"monitoringStatus,omitempty" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"`                        // State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull | ||||
| 	SMT              int32             `json:"smt,omitempty" db:"smt" example:"4"`                                                                           // SMT threads used by job | ||||
| 	State            JobState          `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"` // Final state of job | ||||
| 	Duration         int32             `json:"duration" db:"duration" example:"43200" minimum:"1"`                                                           // Duration of job in seconds (Min > 0) | ||||
| 	Walltime         int64             `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"`                                                 // Requested walltime of job in seconds (Min > 0) | ||||
| 	Tags             []*Tag            `json:"tags,omitempty"`                                                                                               // List of tags | ||||
| 	RawResources     []byte            `json:"-" db:"resources"`                                                                                             // Resources used by job [As Bytes] | ||||
| 	Resources        []*Resource       `json:"resources"`                                                                                                    // Resources used by job | ||||
| 	RawMetaData      []byte            `json:"-" db:"meta_data"`                                                                                             // Additional information about the job [As Bytes] | ||||
| 	MetaData         map[string]string `json:"metaData"`                                                                                                     // Additional information about the job | ||||
| 	ConcurrentJobs   JobLinkResultList `json:"concurrentJobs"` | ||||
| 	Cluster            string             `json:"cluster" db:"cluster" example:"fritz"` | ||||
| 	SubCluster         string             `json:"subCluster" db:"subcluster" example:"main"` | ||||
| 	Partition          string             `json:"partition,omitempty" db:"cluster_partition" example:"main"` | ||||
| 	Project            string             `json:"project" db:"project" example:"abcd200"` | ||||
| 	User               string             `json:"user" db:"hpc_user" example:"abcd100h"` | ||||
| 	State              JobState           `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"` | ||||
| 	Tags               []*Tag             `json:"tags,omitempty"` | ||||
| 	RawEnergyFootprint []byte             `json:"-" db:"energy_footprint"` | ||||
| 	RawFootprint       []byte             `json:"-" db:"footprint"` | ||||
| 	RawMetaData        []byte             `json:"-" db:"meta_data"` | ||||
| 	RawResources       []byte             `json:"-" db:"resources"` | ||||
| 	Resources          []*Resource        `json:"resources"` | ||||
| 	EnergyFootprint    map[string]float64 `json:"energyFootprint"` | ||||
| 	Footprint          map[string]float64 `json:"footprint"` | ||||
| 	MetaData           map[string]string  `json:"metaData"` | ||||
| 	ConcurrentJobs     JobLinkResultList  `json:"concurrentJobs"` | ||||
| 	Energy             float64            `json:"energy" db:"energy"` | ||||
| 	ArrayJobId         int64              `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"` | ||||
| 	Walltime           int64              `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"` | ||||
| 	JobID              int64              `json:"jobId" db:"job_id" example:"123000"` | ||||
| 	Duration           int32              `json:"duration" db:"duration" example:"43200" minimum:"1"` | ||||
| 	SMT                int32              `json:"smt,omitempty" db:"smt" example:"4"` | ||||
| 	MonitoringStatus   int32              `json:"monitoringStatus,omitempty" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"` | ||||
| 	Exclusive          int32              `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"` | ||||
| 	NumAcc             int32              `json:"numAcc,omitempty" db:"num_acc" example:"2" minimum:"1"` | ||||
| 	NumHWThreads       int32              `json:"numHwthreads,omitempty" db:"num_hwthreads" example:"20" minimum:"1"` | ||||
| 	NumNodes           int32              `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"` | ||||
| } | ||||
|  | ||||
| // Job struct type | ||||
| @@ -49,19 +52,10 @@ type BaseJob struct { | ||||
| // Job model | ||||
| // @Description Information of a HPC job. | ||||
| type Job struct { | ||||
| 	// The unique identifier of a job in the database | ||||
| 	ID int64 `json:"id" db:"id"` | ||||
| 	StartTime time.Time `json:"startTime"` | ||||
| 	BaseJob | ||||
| 	StartTimeUnix    int64     `json:"-" db:"start_time" example:"1649723812"` // Start epoch time stamp in seconds | ||||
| 	StartTime        time.Time `json:"startTime"`                              // Start time as 'time.Time' data type | ||||
| 	MemUsedMax       float64   `json:"memUsedMax" db:"mem_used_max"`           // MemUsedMax as Float64 | ||||
| 	FlopsAnyAvg      float64   `json:"flopsAnyAvg" db:"flops_any_avg"`         // FlopsAnyAvg as Float64 | ||||
| 	MemBwAvg         float64   `json:"memBwAvg" db:"mem_bw_avg"`               // MemBwAvg as Float64 | ||||
| 	LoadAvg          float64   `json:"loadAvg" db:"load_avg"`                  // LoadAvg as Float64 | ||||
| 	NetBwAvg         float64   `json:"-" db:"net_bw_avg"`                      // NetBwAvg as Float64 | ||||
| 	NetDataVolTotal  float64   `json:"-" db:"net_data_vol_total"`              // NetDataVolTotal as Float64 | ||||
| 	FileBwAvg        float64   `json:"-" db:"file_bw_avg"`                     // FileBwAvg as Float64 | ||||
| 	FileDataVolTotal float64   `json:"-" db:"file_data_vol_total"`             // FileDataVolTotal as Float64 | ||||
| 	ID            int64 `json:"id" db:"id"` | ||||
| 	StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"` | ||||
| } | ||||
|  | ||||
| //	JobMeta struct type | ||||
| @@ -88,11 +82,10 @@ type JobLinkResultList struct { | ||||
| // JobMeta model | ||||
| // @Description Meta data information of a HPC job. | ||||
| type JobMeta struct { | ||||
| 	// The unique identifier of a job in the database | ||||
| 	ID *int64 `json:"id,omitempty"` | ||||
| 	ID         *int64                   `json:"id,omitempty"` | ||||
| 	Statistics map[string]JobStatistics `json:"statistics"` | ||||
| 	BaseJob | ||||
| 	StartTime  int64                    `json:"startTime" db:"start_time" example:"1649723812" minimum:"1"` // Start epoch time stamp in seconds (Min > 0) | ||||
| 	Statistics map[string]JobStatistics `json:"statistics"`                                                 // Metric statistics of job | ||||
| 	StartTime int64 `json:"startTime" db:"start_time" example:"1649723812" minimum:"1"` | ||||
| } | ||||
|  | ||||
| const ( | ||||
| @@ -124,18 +117,19 @@ type JobStatistics struct { | ||||
| // Tag model | ||||
| // @Description Defines a tag using name and type. | ||||
| type Tag struct { | ||||
| 	ID   int64  `json:"id" db:"id"`                           // The unique DB identifier of a tag | ||||
| 	Type string `json:"type" db:"tag_type" example:"Debug"`   // Tag Type | ||||
| 	Name string `json:"name" db:"tag_name" example:"Testjob"` // Tag Name | ||||
| 	Type  string `json:"type" db:"tag_type" example:"Debug"` | ||||
| 	Name  string `json:"name" db:"tag_name" example:"Testjob"` | ||||
| 	Scope string `json:"scope" db:"tag_scope" example:"global"` | ||||
| 	ID    int64  `json:"id" db:"id"` | ||||
| } | ||||
|  | ||||
| // Resource model | ||||
| // @Description A resource used by a job | ||||
| type Resource struct { | ||||
| 	Hostname      string   `json:"hostname"`                // Name of the host (= node) | ||||
| 	HWThreads     []int    `json:"hwthreads,omitempty"`     // List of OS processor ids | ||||
| 	Accelerators  []string `json:"accelerators,omitempty"`  // List of of accelerator device ids | ||||
| 	Configuration string   `json:"configuration,omitempty"` // The configuration options of the node | ||||
| 	Hostname      string   `json:"hostname"` | ||||
| 	Configuration string   `json:"configuration,omitempty"` | ||||
| 	HWThreads     []int    `json:"hwthreads,omitempty"` | ||||
| 	Accelerators  []string `json:"accelerators,omitempty"` | ||||
| } | ||||
|  | ||||
| type JobState string | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
| @@ -10,22 +10,31 @@ import ( | ||||
| 	"math" | ||||
| 	"sort" | ||||
| 	"unsafe" | ||||
|  | ||||
| 	"github.com/ClusterCockpit/cc-backend/internal/util" | ||||
| ) | ||||
|  | ||||
| type JobData map[string]map[MetricScope]*JobMetric | ||||
| type ScopedJobStats map[string]map[MetricScope][]*ScopedStats | ||||
|  | ||||
| type JobMetric struct { | ||||
| 	Unit             Unit         `json:"unit"` | ||||
| 	Timestep         int          `json:"timestep"` | ||||
| 	Series           []Series     `json:"series"` | ||||
| 	StatisticsSeries *StatsSeries `json:"statisticsSeries,omitempty"` | ||||
| 	Unit             Unit         `json:"unit"` | ||||
| 	Series           []Series     `json:"series"` | ||||
| 	Timestep         int          `json:"timestep"` | ||||
| } | ||||
|  | ||||
| type Series struct { | ||||
| 	Hostname   string           `json:"hostname"` | ||||
| 	Id         *string          `json:"id,omitempty"` | ||||
| 	Statistics MetricStatistics `json:"statistics"` | ||||
| 	Hostname   string           `json:"hostname"` | ||||
| 	Data       []Float          `json:"data"` | ||||
| 	Statistics MetricStatistics `json:"statistics"` | ||||
| } | ||||
|  | ||||
| type ScopedStats struct { | ||||
| 	Hostname string            `json:"hostname"` | ||||
| 	Id       *string           `json:"id,omitempty"` | ||||
| 	Data     *MetricStatistics `json:"data"` | ||||
| } | ||||
|  | ||||
| type MetricStatistics struct { | ||||
| @@ -35,10 +44,11 @@ type MetricStatistics struct { | ||||
| } | ||||
|  | ||||
| type StatsSeries struct { | ||||
| 	Percentiles map[int][]Float `json:"percentiles,omitempty"` | ||||
| 	Mean        []Float         `json:"mean"` | ||||
| 	Median      []Float         `json:"median"` | ||||
| 	Min         []Float         `json:"min"` | ||||
| 	Max         []Float         `json:"max"` | ||||
| 	Percentiles map[int][]Float `json:"percentiles,omitempty"` | ||||
| } | ||||
|  | ||||
| type MetricScope string | ||||
| @@ -121,6 +131,7 @@ func (jd *JobData) Size() int { | ||||
| 			if metric.StatisticsSeries != nil { | ||||
| 				n += len(metric.StatisticsSeries.Max) | ||||
| 				n += len(metric.StatisticsSeries.Mean) | ||||
| 				n += len(metric.StatisticsSeries.Median) | ||||
| 				n += len(metric.StatisticsSeries.Min) | ||||
| 			} | ||||
|  | ||||
| @@ -149,53 +160,74 @@ func (jm *JobMetric) AddStatisticsSeries() { | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	min, mean, max := make([]Float, n), make([]Float, n), make([]Float, n) | ||||
| 	// mean := make([]Float, n) | ||||
| 	min, median, max := make([]Float, n), make([]Float, n), make([]Float, n) | ||||
| 	i := 0 | ||||
| 	for ; i < m; i++ { | ||||
| 		smin, ssum, smax := math.MaxFloat32, 0.0, -math.MaxFloat32 | ||||
| 		seriesCount := len(jm.Series) | ||||
| 		// ssum := 0.0 | ||||
| 		smin, smed, smax := math.MaxFloat32, make([]float64, seriesCount), -math.MaxFloat32 | ||||
| 		notnan := 0 | ||||
| 		for j := 0; j < len(jm.Series); j++ { | ||||
| 		for j := 0; j < seriesCount; j++ { | ||||
| 			x := float64(jm.Series[j].Data[i]) | ||||
| 			if math.IsNaN(x) { | ||||
| 				continue | ||||
| 			} | ||||
|  | ||||
| 			notnan += 1 | ||||
| 			ssum += x | ||||
| 			// ssum += x | ||||
| 			smed[j] = x | ||||
| 			smin = math.Min(smin, x) | ||||
| 			smax = math.Max(smax, x) | ||||
| 		} | ||||
|  | ||||
| 		if notnan < 3 { | ||||
| 			min[i] = NaN | ||||
| 			mean[i] = NaN | ||||
| 			// mean[i] = NaN | ||||
| 			median[i] = NaN | ||||
| 			max[i] = NaN | ||||
| 		} else { | ||||
| 			min[i] = Float(smin) | ||||
| 			mean[i] = Float(ssum / float64(notnan)) | ||||
| 			// mean[i] = Float(ssum / float64(notnan)) | ||||
| 			max[i] = Float(smax) | ||||
|  | ||||
| 			medianRaw, err := util.Median(smed) | ||||
| 			if err != nil { | ||||
| 				median[i] = NaN | ||||
| 			} else { | ||||
| 				median[i] = Float(medianRaw) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	for ; i < n; i++ { | ||||
| 		min[i] = NaN | ||||
| 		mean[i] = NaN | ||||
| 		// mean[i] = NaN | ||||
| 		median[i] = NaN | ||||
| 		max[i] = NaN | ||||
| 	} | ||||
|  | ||||
| 	if smooth { | ||||
| 		for i := 2; i < len(mean)-2; i++ { | ||||
| 		for i := 2; i < len(median)-2; i++ { | ||||
| 			if min[i].IsNaN() { | ||||
| 				continue | ||||
| 			} | ||||
|  | ||||
| 			min[i] = (min[i-2] + min[i-1] + min[i] + min[i+1] + min[i+2]) / 5 | ||||
| 			max[i] = (max[i-2] + max[i-1] + max[i] + max[i+1] + max[i+2]) / 5 | ||||
| 			mean[i] = (mean[i-2] + mean[i-1] + mean[i] + mean[i+1] + mean[i+2]) / 5 | ||||
| 			// mean[i] = (mean[i-2] + mean[i-1] + mean[i] + mean[i+1] + mean[i+2]) / 5 | ||||
| 			// Reduce Median further | ||||
| 			smoothRaw := []float64{float64(median[i-2]), float64(median[i-1]), float64(median[i]), float64(median[i+1]), float64(median[i+2])} | ||||
| 			smoothMedian, err := util.Median(smoothRaw) | ||||
| 			if err != nil { | ||||
| 				median[i] = NaN | ||||
| 			} else { | ||||
| 				median[i] = Float(smoothMedian) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	jm.StatisticsSeries = &StatsSeries{Mean: mean, Min: min, Max: max} | ||||
| 	jm.StatisticsSeries = &StatsSeries{Median: median, Min: min, Max: max} // Mean: mean | ||||
| } | ||||
|  | ||||
| func (jd *JobData) AddNodeScope(metric string) bool { | ||||
| @@ -204,7 +236,7 @@ func (jd *JobData) AddNodeScope(metric string) bool { | ||||
| 		return false | ||||
| 	} | ||||
|  | ||||
| 	var maxScope MetricScope = MetricScopeInvalid | ||||
| 	maxScope := MetricScopeInvalid | ||||
| 	for scope := range scopes { | ||||
| 		maxScope = maxScope.Max(scope) | ||||
| 	} | ||||
| @@ -266,6 +298,21 @@ func (jd *JobData) AddNodeScope(metric string) bool { | ||||
| 	return true | ||||
| } | ||||
|  | ||||
| func (jd *JobData) RoundMetricStats() { | ||||
| 	// TODO: Make Digit-Precision Configurable? (Currently: Fixed to 2 Digits) | ||||
| 	for _, scopes := range *jd { | ||||
| 		for _, jm := range scopes { | ||||
| 			for index := range jm.Series { | ||||
| 				jm.Series[index].Statistics = MetricStatistics{ | ||||
| 					Avg: (math.Round(jm.Series[index].Statistics.Avg*100) / 100), | ||||
| 					Min: (math.Round(jm.Series[index].Statistics.Min*100) / 100), | ||||
| 					Max: (math.Round(jm.Series[index].Statistics.Max*100) / 100), | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (jm *JobMetric) AddPercentiles(ps []int) bool { | ||||
| 	if jm.StatisticsSeries == nil { | ||||
| 		jm.AddStatisticsSeries() | ||||
|   | ||||
| @@ -1,284 +1,339 @@ | ||||
| { | ||||
|     "$schema": "http://json-schema.org/draft/2020-12/schema", | ||||
|     "$id": "embedfs://cluster.schema.json", | ||||
|     "title": "HPC cluster description", | ||||
|     "description": "Meta data information of a HPC cluster", | ||||
|     "type": "object", | ||||
|     "properties": { | ||||
|         "name": { | ||||
|             "description": "The unique identifier of a cluster", | ||||
|             "type": "string" | ||||
|         }, | ||||
|         "metricConfig": { | ||||
|             "description": "Metric specifications", | ||||
|             "type": "array", | ||||
|             "items": { | ||||
|                 "type": "object", | ||||
|                 "properties": { | ||||
|                     "name": { | ||||
|                         "description": "Metric name", | ||||
|                         "type": "string" | ||||
|                     }, | ||||
|                     "unit": { | ||||
|                         "description": "Metric unit", | ||||
|                         "$ref": "embedfs://unit.schema.json" | ||||
|                     }, | ||||
|                     "scope": { | ||||
|                         "description": "Native measurement resolution", | ||||
|                         "type": "string" | ||||
|                     }, | ||||
|                     "timestep": { | ||||
|                         "description": "Frequency of timeseries points", | ||||
|                         "type": "integer" | ||||
|                     }, | ||||
|                     "aggregation": { | ||||
|                         "description": "How the metric is aggregated", | ||||
|                         "type": "string", | ||||
|                         "enum": [ | ||||
|                             "sum", | ||||
|                             "avg" | ||||
|                         ] | ||||
|                     }, | ||||
|                     "peak": { | ||||
|                         "description": "Metric peak threshold (Upper metric limit)", | ||||
|                         "type": "number" | ||||
|                     }, | ||||
|                     "normal": { | ||||
|                         "description": "Metric normal threshold", | ||||
|                         "type": "number" | ||||
|                     }, | ||||
|                     "caution": { | ||||
|                         "description": "Metric caution threshold (Suspicious but does not require immediate action)", | ||||
|                         "type": "number" | ||||
|                     }, | ||||
|                     "alert": { | ||||
|                         "description": "Metric alert threshold (Requires immediate action)", | ||||
|                         "type": "number" | ||||
|                     }, | ||||
|                     "subClusters": { | ||||
|                         "description": "Array of cluster hardware partition metric thresholds", | ||||
|                         "type": "array", | ||||
|                         "items": { | ||||
|                             "type": "object", | ||||
|                             "properties": { | ||||
|                                 "name": { | ||||
|                                     "description": "Hardware partition name", | ||||
|                                     "type": "string" | ||||
|                                 }, | ||||
|                                 "peak": { | ||||
|                                     "type": "number" | ||||
|                                 }, | ||||
|                                 "normal": { | ||||
|                                     "type": "number" | ||||
|                                 }, | ||||
|                                 "caution": { | ||||
|                                     "type": "number" | ||||
|                                 }, | ||||
|                                 "alert": { | ||||
|                                     "type": "number" | ||||
|                                 }, | ||||
|                                 "remove": { | ||||
|                                     "type": "boolean" | ||||
|                                 } | ||||
|                             }, | ||||
|                             "required": [ | ||||
|                                 "name" | ||||
|                             ] | ||||
|                         } | ||||
|                     } | ||||
|                 }, | ||||
|                 "required": [ | ||||
|                     "name", | ||||
|                     "unit", | ||||
|                     "scope", | ||||
|                     "timestep", | ||||
|                     "aggregation", | ||||
|                     "peak", | ||||
|                     "normal", | ||||
|                     "caution", | ||||
|                     "alert" | ||||
|                 ] | ||||
|             }, | ||||
|             "minItems": 1 | ||||
|         }, | ||||
|         "subClusters": { | ||||
|             "description": "Array of cluster hardware partitions", | ||||
|             "type": "array", | ||||
|             "items": { | ||||
|                 "type": "object", | ||||
|                 "properties": { | ||||
|                     "name": { | ||||
|                         "description": "Hardware partition name", | ||||
|                         "type": "string" | ||||
|                     }, | ||||
|                     "processorType": { | ||||
|                         "description": "Processor type", | ||||
|                         "type": "string" | ||||
|                     }, | ||||
|                     "socketsPerNode": { | ||||
|                         "description": "Number of sockets per node", | ||||
|                         "type": "integer" | ||||
|                     }, | ||||
|                     "coresPerSocket": { | ||||
|                         "description": "Number of cores per socket", | ||||
|                         "type": "integer" | ||||
|                     }, | ||||
|                     "threadsPerCore": { | ||||
|                         "description": "Number of SMT threads per core", | ||||
|                         "type": "integer" | ||||
|                     }, | ||||
|                     "flopRateScalar": { | ||||
|                         "description": "Theoretical node peak flop rate for scalar code in GFlops/s", | ||||
|                         "type": "object", | ||||
|                         "properties": { | ||||
|                             "unit": { | ||||
|                                 "description": "Metric unit", | ||||
|                                 "$ref": "embedfs://unit.schema.json" | ||||
|                             }, | ||||
|                             "value": { | ||||
|                                 "type": "number" | ||||
|                             } | ||||
|                         } | ||||
|                     }, | ||||
|                     "flopRateSimd": { | ||||
|                         "description": "Theoretical node peak flop rate for SIMD code in GFlops/s", | ||||
|                         "type": "object", | ||||
|                         "properties": { | ||||
|                             "unit": { | ||||
|                                 "description": "Metric unit", | ||||
|                                 "$ref": "embedfs://unit.schema.json" | ||||
|                             }, | ||||
|                             "value": { | ||||
|                                 "type": "number" | ||||
|                             } | ||||
|                         } | ||||
|                     }, | ||||
|                     "memoryBandwidth": { | ||||
|                         "description": "Theoretical node peak memory bandwidth in GB/s", | ||||
|                         "type": "object", | ||||
|                         "properties": { | ||||
|                             "unit": { | ||||
|                                 "description": "Metric unit", | ||||
|                                 "$ref": "embedfs://unit.schema.json" | ||||
|                             }, | ||||
|                             "value": { | ||||
|                                 "type": "number" | ||||
|                             } | ||||
|                         } | ||||
|                     }, | ||||
|                     "nodes": { | ||||
|                         "description": "Node list expression", | ||||
|                         "type": "string" | ||||
|                     }, | ||||
|                     "topology": { | ||||
|                         "description": "Node topology", | ||||
|                         "type": "object", | ||||
|                         "properties": { | ||||
|                             "node": { | ||||
|                                 "description": "HwTread lists of node", | ||||
|                                 "type": "array", | ||||
|                                 "items": { | ||||
|                                     "type": "integer" | ||||
|                                 } | ||||
|                             }, | ||||
|                             "socket": { | ||||
|                                 "description": "HwTread lists of sockets", | ||||
|                                 "type": "array", | ||||
|                                 "items": { | ||||
|                                     "type": "array", | ||||
|                                     "items": { | ||||
|                                         "type": "integer" | ||||
|                                     } | ||||
|                                 } | ||||
|                             }, | ||||
|                             "memoryDomain": { | ||||
|                                 "description": "HwTread lists of memory domains", | ||||
|                                 "type": "array", | ||||
|                                 "items": { | ||||
|                                     "type": "array", | ||||
|                                     "items": { | ||||
|                                         "type": "integer" | ||||
|                                     } | ||||
|                                 } | ||||
|                             }, | ||||
|                             "die": { | ||||
|                                 "description": "HwTread lists of dies", | ||||
|                                 "type": "array", | ||||
|                                 "items": { | ||||
|                                     "type": "array", | ||||
|                                     "items": { | ||||
|                                         "type": "integer" | ||||
|                                     } | ||||
|                                 } | ||||
|                             }, | ||||
|                             "core": { | ||||
|                                 "description": "HwTread lists of cores", | ||||
|                                 "type": "array", | ||||
|                                 "items": { | ||||
|                                     "type": "array", | ||||
|                                     "items": { | ||||
|                                         "type": "integer" | ||||
|                                     } | ||||
|                                 } | ||||
|                             }, | ||||
|                             "accelerators": { | ||||
|                                 "type": "array", | ||||
|                                 "description": "List of of accelerator devices", | ||||
|                                 "items": { | ||||
|                                     "type": "object", | ||||
|                                     "properties": { | ||||
|                                         "id": { | ||||
|                                             "type": "string", | ||||
|                                             "description": "The unique device id" | ||||
|                                         }, | ||||
|                                         "type": { | ||||
|                                             "type": "string", | ||||
|                                             "description": "The accelerator type", | ||||
|                                             "enum": [ | ||||
|                                                 "Nvidia GPU", | ||||
|                                                 "AMD GPU", | ||||
|                                                 "Intel GPU" | ||||
|                                             ] | ||||
|                                         }, | ||||
|                                         "model": { | ||||
|                                             "type": "string", | ||||
|                                             "description": "The accelerator model" | ||||
|                                         } | ||||
|                                     }, | ||||
|                                     "required": [ | ||||
|                                         "id", | ||||
|                                         "type", | ||||
|                                         "model" | ||||
|                                     ] | ||||
|                                 } | ||||
|                             } | ||||
|                         }, | ||||
|                         "required": [ | ||||
|                             "node", | ||||
|                             "socket", | ||||
|                             "memoryDomain" | ||||
|                         ] | ||||
|                     } | ||||
|                 }, | ||||
|                 "required": [ | ||||
|                     "name", | ||||
|                     "nodes", | ||||
|                     "topology", | ||||
|                     "processorType", | ||||
|                     "socketsPerNode", | ||||
|                     "coresPerSocket", | ||||
|                     "threadsPerCore", | ||||
|                     "flopRateScalar", | ||||
|                     "flopRateSimd", | ||||
|                     "memoryBandwidth" | ||||
|                 ] | ||||
|             }, | ||||
|             "minItems": 1 | ||||
|         } | ||||
|   "$schema": "http://json-schema.org/draft/2020-12/schema", | ||||
|   "$id": "embedfs://cluster.schema.json", | ||||
|   "title": "HPC cluster description", | ||||
|   "description": "Meta data information of a HPC cluster", | ||||
|   "type": "object", | ||||
|   "properties": { | ||||
|     "name": { | ||||
|       "description": "The unique identifier of a cluster", | ||||
|       "type": "string" | ||||
|     }, | ||||
|     "required": [ | ||||
|         "name", | ||||
|         "metricConfig", | ||||
|         "subClusters" | ||||
|     ] | ||||
|     "metricConfig": { | ||||
|       "description": "Metric specifications", | ||||
|       "type": "array", | ||||
|       "items": { | ||||
|         "type": "object", | ||||
|         "properties": { | ||||
|           "name": { | ||||
|             "description": "Metric name", | ||||
|             "type": "string" | ||||
|           }, | ||||
|           "unit": { | ||||
|             "description": "Metric unit", | ||||
|             "$ref": "embedfs://unit.schema.json" | ||||
|           }, | ||||
|           "scope": { | ||||
|             "description": "Native measurement resolution", | ||||
|             "type": "string", | ||||
|             "enum": [ | ||||
|               "node", | ||||
|               "socket", | ||||
|               "memoryDomain", | ||||
|               "core", | ||||
|               "hwthread", | ||||
|               "accelerator" | ||||
|             ] | ||||
|           }, | ||||
|           "timestep": { | ||||
|             "description": "Frequency of timeseries points in seconds", | ||||
|             "type": "integer" | ||||
|           }, | ||||
|           "aggregation": { | ||||
|             "description": "How the metric is aggregated", | ||||
|             "type": "string", | ||||
|             "enum": [ | ||||
|               "sum", | ||||
|               "avg" | ||||
|             ] | ||||
|           }, | ||||
|           "footprint": { | ||||
|             "description": "Is it a footprint metric and what type", | ||||
|             "type": "string", | ||||
|             "enum": [ | ||||
|               "avg", | ||||
|               "max", | ||||
|               "min" | ||||
|             ] | ||||
|           }, | ||||
|           "energy": { | ||||
|             "description": "Is it used to calculate job energy", | ||||
|             "type": "string", | ||||
|             "enum": [ | ||||
|               "power", | ||||
|               "energy" | ||||
|             ] | ||||
|           }, | ||||
|           "lowerIsBetter": { | ||||
|             "description": "Is lower better.", | ||||
|             "type": "boolean" | ||||
|           }, | ||||
|           "peak": { | ||||
|             "description": "Metric peak threshold (Upper metric limit)", | ||||
|             "type": "number" | ||||
|           }, | ||||
|           "normal": { | ||||
|             "description": "Metric normal threshold", | ||||
|             "type": "number" | ||||
|           }, | ||||
|           "caution": { | ||||
|             "description": "Metric caution threshold (Suspicious but does not require immediate action)", | ||||
|             "type": "number" | ||||
|           }, | ||||
|           "alert": { | ||||
|             "description": "Metric alert threshold (Requires immediate action)", | ||||
|             "type": "number" | ||||
|           }, | ||||
|           "subClusters": { | ||||
|             "description": "Array of cluster hardware partition metric thresholds", | ||||
|             "type": "array", | ||||
|             "items": { | ||||
|               "type": "object", | ||||
|               "properties": { | ||||
|                 "name": { | ||||
|                   "description": "Hardware partition name", | ||||
|                   "type": "string" | ||||
|                 }, | ||||
|                 "footprint": { | ||||
|                   "description": "Is it a footprint metric and what type. Overwrite global setting", | ||||
|                   "type": "string", | ||||
|                   "enum": [ | ||||
|                     "avg", | ||||
|                     "max", | ||||
|                     "min" | ||||
|                   ] | ||||
|                 }, | ||||
|                 "energy": { | ||||
|                   "description": "Is it used to calculate job energy. Overwrite global", | ||||
|                   "type": "string", | ||||
|                   "enum": [ | ||||
|                     "power", | ||||
|                     "energy" | ||||
|                   ] | ||||
|                 }, | ||||
|                 "lowerIsBetter": { | ||||
|                   "description": "Is lower better. Overwrite global", | ||||
|                   "type": "boolean" | ||||
|                 }, | ||||
|                 "peak": { | ||||
|                   "description": "The maximum possible metric value", | ||||
|                   "type": "number" | ||||
|                 }, | ||||
|                 "normal": { | ||||
|                   "description": "A common metric value level", | ||||
|                   "type": "number" | ||||
|                 }, | ||||
|                 "caution": { | ||||
|                   "description": "Metric value requires attention", | ||||
|                   "type": "number" | ||||
|                 }, | ||||
|                 "alert": { | ||||
|                   "description": "Metric value requiring immediate attention", | ||||
|                   "type": "number" | ||||
|                 }, | ||||
|                 "remove": { | ||||
|                   "description": "Remove this metric for this subcluster", | ||||
|                   "type": "boolean" | ||||
|                 } | ||||
|               }, | ||||
|               "required": [ | ||||
|                 "name" | ||||
|               ] | ||||
|             } | ||||
|           } | ||||
|         }, | ||||
|         "required": [ | ||||
|           "name", | ||||
|           "unit", | ||||
|           "scope", | ||||
|           "timestep", | ||||
|           "aggregation", | ||||
|           "peak", | ||||
|           "normal", | ||||
|           "caution", | ||||
|           "alert" | ||||
|         ] | ||||
|       }, | ||||
|       "minItems": 1 | ||||
|     }, | ||||
|     "subClusters": { | ||||
|       "description": "Array of cluster hardware partitions", | ||||
|       "type": "array", | ||||
|       "items": { | ||||
|         "type": "object", | ||||
|         "properties": { | ||||
|           "name": { | ||||
|             "description": "Hardware partition name", | ||||
|             "type": "string" | ||||
|           }, | ||||
|           "processorType": { | ||||
|             "description": "Processor type", | ||||
|             "type": "string" | ||||
|           }, | ||||
|           "socketsPerNode": { | ||||
|             "description": "Number of sockets per node", | ||||
|             "type": "integer" | ||||
|           }, | ||||
|           "coresPerSocket": { | ||||
|             "description": "Number of cores per socket", | ||||
|             "type": "integer" | ||||
|           }, | ||||
|           "threadsPerCore": { | ||||
|             "description": "Number of SMT threads per core", | ||||
|             "type": "integer" | ||||
|           }, | ||||
|           "flopRateScalar": { | ||||
|             "description": "Theoretical node peak flop rate for scalar code in GFlops/s", | ||||
|             "type": "object", | ||||
|             "properties": { | ||||
|               "unit": { | ||||
|                 "description": "Metric unit", | ||||
|                 "$ref": "embedfs://unit.schema.json" | ||||
|               }, | ||||
|               "value": { | ||||
|                 "type": "number" | ||||
|               } | ||||
|             } | ||||
|           }, | ||||
|           "flopRateSimd": { | ||||
|             "description": "Theoretical node peak flop rate for SIMD code in GFlops/s", | ||||
|             "type": "object", | ||||
|             "properties": { | ||||
|               "unit": { | ||||
|                 "description": "Metric unit", | ||||
|                 "$ref": "embedfs://unit.schema.json" | ||||
|               }, | ||||
|               "value": { | ||||
|                 "type": "number" | ||||
|               } | ||||
|             } | ||||
|           }, | ||||
|           "memoryBandwidth": { | ||||
|             "description": "Theoretical node peak memory bandwidth in GB/s", | ||||
|             "type": "object", | ||||
|             "properties": { | ||||
|               "unit": { | ||||
|                 "description": "Metric unit", | ||||
|                 "$ref": "embedfs://unit.schema.json" | ||||
|               }, | ||||
|               "value": { | ||||
|                 "type": "number" | ||||
|               } | ||||
|             } | ||||
|           }, | ||||
|           "nodes": { | ||||
|             "description": "Node list expression", | ||||
|             "type": "string" | ||||
|           }, | ||||
|           "topology": { | ||||
|             "description": "Node topology", | ||||
|             "type": "object", | ||||
|             "properties": { | ||||
|               "node": { | ||||
|                 "description": "HwTread lists of node", | ||||
|                 "type": "array", | ||||
|                 "items": { | ||||
|                   "type": "integer" | ||||
|                 } | ||||
|               }, | ||||
|               "socket": { | ||||
|                 "description": "HwTread lists of sockets", | ||||
|                 "type": "array", | ||||
|                 "items": { | ||||
|                   "type": "array", | ||||
|                   "items": { | ||||
|                     "type": "integer" | ||||
|                   } | ||||
|                 } | ||||
|               }, | ||||
|               "memoryDomain": { | ||||
|                 "description": "HwTread lists of memory domains", | ||||
|                 "type": "array", | ||||
|                 "items": { | ||||
|                   "type": "array", | ||||
|                   "items": { | ||||
|                     "type": "integer" | ||||
|                   } | ||||
|                 } | ||||
|               }, | ||||
|               "die": { | ||||
|                 "description": "HwTread lists of dies", | ||||
|                 "type": "array", | ||||
|                 "items": { | ||||
|                   "type": "array", | ||||
|                   "items": { | ||||
|                     "type": "integer" | ||||
|                   } | ||||
|                 } | ||||
|               }, | ||||
|               "core": { | ||||
|                 "description": "HwTread lists of cores", | ||||
|                 "type": "array", | ||||
|                 "items": { | ||||
|                   "type": "array", | ||||
|                   "items": { | ||||
|                     "type": "integer" | ||||
|                   } | ||||
|                 } | ||||
|               }, | ||||
|               "accelerators": { | ||||
|                 "type": "array", | ||||
|                 "description": "List of of accelerator devices", | ||||
|                 "items": { | ||||
|                   "type": "object", | ||||
|                   "properties": { | ||||
|                     "id": { | ||||
|                       "type": "string", | ||||
|                       "description": "The unique device id" | ||||
|                     }, | ||||
|                     "type": { | ||||
|                       "type": "string", | ||||
|                       "description": "The accelerator type", | ||||
|                       "enum": [ | ||||
|                         "Nvidia GPU", | ||||
|                         "AMD GPU", | ||||
|                         "Intel GPU" | ||||
|                       ] | ||||
|                     }, | ||||
|                     "model": { | ||||
|                       "type": "string", | ||||
|                       "description": "The accelerator model" | ||||
|                     } | ||||
|                   }, | ||||
|                   "required": [ | ||||
|                     "id", | ||||
|                     "type", | ||||
|                     "model" | ||||
|                   ] | ||||
|                 } | ||||
|               } | ||||
|             }, | ||||
|             "required": [ | ||||
|               "node", | ||||
|               "socket", | ||||
|               "memoryDomain" | ||||
|             ] | ||||
|           } | ||||
|         }, | ||||
|         "required": [ | ||||
|           "name", | ||||
|           "nodes", | ||||
|           "topology", | ||||
|           "processorType", | ||||
|           "socketsPerNode", | ||||
|           "coresPerSocket", | ||||
|           "threadsPerCore", | ||||
|           "flopRateScalar", | ||||
|           "flopRateSimd", | ||||
|           "memoryBandwidth" | ||||
|         ] | ||||
|       }, | ||||
|       "minItems": 1 | ||||
|     } | ||||
|   }, | ||||
|   "required": [ | ||||
|     "name", | ||||
|     "metricConfig", | ||||
|     "subClusters" | ||||
|   ] | ||||
| } | ||||
|   | ||||
| @@ -1,433 +1,498 @@ | ||||
| { | ||||
|     "$schema": "http://json-schema.org/draft/2020-12/schema", | ||||
|     "$id": "embedfs://config.schema.json", | ||||
|     "title": "cc-backend configuration file schema", | ||||
|     "type": "object", | ||||
|     "properties": { | ||||
|         "addr": { | ||||
|             "description": "Address where the http (or https) server will listen on (for example: 'localhost:80').", | ||||
|             "type": "string" | ||||
|   "$schema": "http://json-schema.org/draft/2020-12/schema", | ||||
|   "$id": "embedfs://config.schema.json", | ||||
|   "title": "cc-backend configuration file schema", | ||||
|   "type": "object", | ||||
|   "properties": { | ||||
|     "addr": { | ||||
|       "description": "Address where the http (or https) server will listen on (for example: 'localhost:80').", | ||||
|       "type": "string" | ||||
|     }, | ||||
|     "apiAllowedIPs": { | ||||
|       "description": "Addresses from which secured API endpoints can be reached", | ||||
|       "type": "array", | ||||
|       "items": { | ||||
|         "type": "string" | ||||
|       } | ||||
|     }, | ||||
|     "user": { | ||||
|       "description": "Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.", | ||||
|       "type": "string" | ||||
|     }, | ||||
|     "group": { | ||||
|       "description": "Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.", | ||||
|       "type": "string" | ||||
|     }, | ||||
|     "disable-authentication": { | ||||
|       "description": "Disable authentication (for everything: API, Web-UI, ...).", | ||||
|       "type": "boolean" | ||||
|     }, | ||||
|     "embed-static-files": { | ||||
|       "description": "If all files in `web/frontend/public` should be served from within the binary itself (they are embedded) or not.", | ||||
|       "type": "boolean" | ||||
|     }, | ||||
|     "static-files": { | ||||
|       "description": "Folder where static assets can be found, if embed-static-files is false.", | ||||
|       "type": "string" | ||||
|     }, | ||||
|     "db-driver": { | ||||
|       "description": "sqlite3 or mysql (mysql will work for mariadb as well).", | ||||
|       "type": "string", | ||||
|       "enum": [ | ||||
|         "sqlite3", | ||||
|         "mysql" | ||||
|       ] | ||||
|     }, | ||||
|     "db": { | ||||
|       "description": "For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!).", | ||||
|       "type": "string" | ||||
|     }, | ||||
|     "archive": { | ||||
|       "description": "Configuration keys for job-archive", | ||||
|       "type": "object", | ||||
|       "properties": { | ||||
|         "kind": { | ||||
|           "description": "Backend type for job-archive", | ||||
|           "type": "string", | ||||
|           "enum": [ | ||||
|             "file", | ||||
|             "s3" | ||||
|           ] | ||||
|         }, | ||||
|         "user": { | ||||
|             "description": "Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.", | ||||
|             "type": "string" | ||||
|         "path": { | ||||
|           "description": "Path to job archive for file backend", | ||||
|           "type": "string" | ||||
|         }, | ||||
|         "group": { | ||||
|             "description": "Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.", | ||||
|             "type": "string" | ||||
|         "compression": { | ||||
|           "description": "Setup automatic compression for jobs older than number of days", | ||||
|           "type": "integer" | ||||
|         }, | ||||
|         "disable-authentication": { | ||||
|             "description": "Disable authentication (for everything: API, Web-UI, ...).", | ||||
|             "type": "boolean" | ||||
|         }, | ||||
|         "embed-static-files": { | ||||
|             "description": "If all files in `web/frontend/public` should be served from within the binary itself (they are embedded) or not.", | ||||
|             "type": "boolean" | ||||
|         }, | ||||
|         "static-files": { | ||||
|             "description": "Folder where static assets can be found, if embed-static-files is false.", | ||||
|             "type": "string" | ||||
|         }, | ||||
|         "db-driver": { | ||||
|             "description": "sqlite3 or mysql (mysql will work for mariadb as well).", | ||||
|             "type": "string", | ||||
|             "enum": [ | ||||
|                 "sqlite3", | ||||
|                 "mysql" | ||||
|             ] | ||||
|         }, | ||||
|         "db": { | ||||
|             "description": "For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!).", | ||||
|             "type": "string" | ||||
|         }, | ||||
|         "job-archive": { | ||||
|             "description": "Configuration keys for job-archive", | ||||
|             "type": "object", | ||||
|             "properties": { | ||||
|                 "kind": { | ||||
|                     "description": "Backend type for job-archive", | ||||
|                     "type": "string", | ||||
|                     "enum": [ | ||||
|                         "file", | ||||
|                         "s3" | ||||
|                     ] | ||||
|                 }, | ||||
|                 "path": { | ||||
|                     "description": "Path to job archive for file backend", | ||||
|                     "type": "string" | ||||
|                 }, | ||||
|                 "compression": { | ||||
|                     "description": "Setup automatic compression for jobs older than number of days", | ||||
|                     "type": "integer" | ||||
|                 }, | ||||
|                 "retention": { | ||||
|                     "description": "Configuration keys for retention", | ||||
|                     "type": "object", | ||||
|                     "properties": { | ||||
|                         "policy": { | ||||
|                             "description": "Retention policy", | ||||
|                             "type": "string", | ||||
|                             "enum": [ | ||||
|                                 "none", | ||||
|                                 "delete", | ||||
|                                 "move" | ||||
|                             ] | ||||
|                         }, | ||||
|                         "includeDB": { | ||||
|                             "description": "Also remove jobs from database", | ||||
|                             "type": "boolean" | ||||
|                         }, | ||||
|                         "age": { | ||||
|                             "description": "Act on jobs with startTime older than age (in days)", | ||||
|                             "type": "integer" | ||||
|                         }, | ||||
|                         "location": { | ||||
|                             "description": "The target directory for retention. Only applicable for retention move.", | ||||
|                             "type": "string" | ||||
|                         } | ||||
|                     }, | ||||
|                     "required": [ | ||||
|                         "policy" | ||||
|                     ] | ||||
|                 } | ||||
|         "retention": { | ||||
|           "description": "Configuration keys for retention", | ||||
|           "type": "object", | ||||
|           "properties": { | ||||
|             "policy": { | ||||
|               "description": "Retention policy", | ||||
|               "type": "string", | ||||
|               "enum": [ | ||||
|                 "none", | ||||
|                 "delete", | ||||
|                 "move" | ||||
|               ] | ||||
|             }, | ||||
|             "required": [ | ||||
|                 "kind" | ||||
|             ] | ||||
|             "includeDB": { | ||||
|               "description": "Also remove jobs from database", | ||||
|               "type": "boolean" | ||||
|             }, | ||||
|             "age": { | ||||
|               "description": "Act on jobs with startTime older than age (in days)", | ||||
|               "type": "integer" | ||||
|             }, | ||||
|             "location": { | ||||
|               "description": "The target directory for retention. Only applicable for retention move.", | ||||
|               "type": "string" | ||||
|             } | ||||
|           }, | ||||
|           "required": [ | ||||
|             "policy" | ||||
|           ] | ||||
|         } | ||||
|       }, | ||||
|       "required": [ | ||||
|         "kind" | ||||
|       ] | ||||
|     }, | ||||
|     "disable-archive": { | ||||
|       "description": "Keep all metric data in the metric data repositories, do not write to the job-archive.", | ||||
|       "type": "boolean" | ||||
|     }, | ||||
|     "validate": { | ||||
|       "description": "Validate all input json documents against json schema.", | ||||
|       "type": "boolean" | ||||
|     }, | ||||
|     "session-max-age": { | ||||
|       "description": "Specifies for how long a session shall be valid  as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire!", | ||||
|       "type": "string" | ||||
|     }, | ||||
|     "https-cert-file": { | ||||
|       "description": "Filepath to SSL certificate. If also https-key-file is set use HTTPS using those certificates.", | ||||
|       "type": "string" | ||||
|     }, | ||||
|     "https-key-file": { | ||||
|       "description": "Filepath to SSL key file. If also https-cert-file is set use HTTPS using those certificates.", | ||||
|       "type": "string" | ||||
|     }, | ||||
|     "redirect-http-to": { | ||||
|       "description": "If not the empty string and addr does not end in :80, redirect every request incoming at port 80 to that url.", | ||||
|       "type": "string" | ||||
|     }, | ||||
|     "stop-jobs-exceeding-walltime": { | ||||
|       "description": "If not zero, automatically mark jobs as stopped running X seconds longer than their walltime. Only applies if walltime is set for job.", | ||||
|       "type": "integer" | ||||
|     }, | ||||
|     "short-running-jobs-duration": { | ||||
|       "description": "Do not show running jobs shorter than X seconds.", | ||||
|       "type": "integer" | ||||
|     }, | ||||
|     "emission-constant": { | ||||
|       "description": ".", | ||||
|       "type": "integer" | ||||
|     }, | ||||
|     "cron-frequency": { | ||||
|       "description": "Frequency of cron job workers.", | ||||
|       "type": "object", | ||||
|       "properties": { | ||||
|         "duration-worker": { | ||||
|           "description": "Duration Update Worker [Defaults to '5m']", | ||||
|           "type": "string" | ||||
|         }, | ||||
|         "disable-archive": { | ||||
|             "description": "Keep all metric data in the metric data repositories, do not write to the job-archive.", | ||||
|             "type": "boolean" | ||||
|         "footprint-worker": { | ||||
|           "description": "Metric-Footprint Update Worker [Defaults to '10m']", | ||||
|           "type": "string" | ||||
|         } | ||||
|       } | ||||
|     }, | ||||
|     "enable-resampling": { | ||||
|       "description": "Enable dynamic zoom in frontend metric plots.", | ||||
|       "type": "object", | ||||
|       "properties": { | ||||
|         "trigger": { | ||||
|           "description": "Trigger next zoom level at less than this many visible datapoints.", | ||||
|           "type": "integer" | ||||
|         }, | ||||
|         "validate": { | ||||
|             "description": "Validate all input json documents against json schema.", | ||||
|             "type": "boolean" | ||||
|         }, | ||||
|         "session-max-age": { | ||||
|             "description": "Specifies for how long a session shall be valid  as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire!", | ||||
|             "type": "string" | ||||
|         }, | ||||
|         "https-cert-file": { | ||||
|             "description": "Filepath to SSL certificate. If also https-key-file is set use HTTPS using those certificates.", | ||||
|             "type": "string" | ||||
|         }, | ||||
|         "https-key-file": { | ||||
|             "description": "Filepath to SSL key file. If also https-cert-file is set use HTTPS using those certificates.", | ||||
|             "type": "string" | ||||
|         }, | ||||
|         "redirect-http-to": { | ||||
|             "description": "If not the empty string and addr does not end in :80, redirect every request incoming at port 80 to that url.", | ||||
|             "type": "string" | ||||
|         }, | ||||
|         "stop-jobs-exceeding-walltime": { | ||||
|             "description": "If not zero, automatically mark jobs as stopped running X seconds longer than their walltime. Only applies if walltime is set for job.", | ||||
|         "resolutions": { | ||||
|           "description": "Array of resampling target resolutions, in seconds.", | ||||
|           "type": "array", | ||||
|           "items": { | ||||
|             "type": "integer" | ||||
|           } | ||||
|         } | ||||
|       }, | ||||
|       "required": [ | ||||
|         "trigger", | ||||
|         "resolutions" | ||||
|       ] | ||||
|     }, | ||||
|     "jwts": { | ||||
|       "description": "For JWT token authentication.", | ||||
|       "type": "object", | ||||
|       "properties": { | ||||
|         "max-age": { | ||||
|           "description": "Configure how long a token is valid. As string parsable by time.ParseDuration()", | ||||
|           "type": "string" | ||||
|         }, | ||||
|         "short-running-jobs-duration": { | ||||
|             "description": "Do not show running jobs shorter than X seconds.", | ||||
|             "type": "integer" | ||||
|         "cookieName": { | ||||
|           "description": "Cookie that should be checked for a JWT token.", | ||||
|           "type": "string" | ||||
|         }, | ||||
|         "jwts": { | ||||
|             "description": "For JWT token authentication.", | ||||
|         "validateUser": { | ||||
|           "description": "Deny login for users not in database (but defined in JWT). Overwrite roles in JWT with database roles.", | ||||
|           "type": "boolean" | ||||
|         }, | ||||
|         "trustedIssuer": { | ||||
|           "description": "Issuer that should be accepted when validating external JWTs ", | ||||
|           "type": "string" | ||||
|         }, | ||||
|         "syncUserOnLogin": { | ||||
|           "description": "Add non-existent user to DB at login attempt with values provided in JWT.", | ||||
|           "type": "boolean" | ||||
|         } | ||||
|       }, | ||||
|       "required": [ | ||||
|         "max-age" | ||||
|       ] | ||||
|     }, | ||||
|     "oidc": { | ||||
|       "provider": { | ||||
|         "description": "", | ||||
|         "type": "string" | ||||
|       }, | ||||
|       "syncUserOnLogin": { | ||||
|         "description": "", | ||||
|         "type": "boolean" | ||||
|       }, | ||||
|       "updateUserOnLogin": { | ||||
|         "description": "", | ||||
|         "type": "boolean" | ||||
|       }, | ||||
|       "required": [ | ||||
|         "provider" | ||||
|       ] | ||||
|     }, | ||||
|     "ldap": { | ||||
|       "description": "For LDAP Authentication and user synchronisation.", | ||||
|       "type": "object", | ||||
|       "properties": { | ||||
|         "url": { | ||||
|           "description": "URL of LDAP directory server.", | ||||
|           "type": "string" | ||||
|         }, | ||||
|         "user_base": { | ||||
|           "description": "Base DN of user tree root.", | ||||
|           "type": "string" | ||||
|         }, | ||||
|         "search_dn": { | ||||
|           "description": "DN for authenticating LDAP admin account with general read rights.", | ||||
|           "type": "string" | ||||
|         }, | ||||
|         "user_bind": { | ||||
|           "description": "Expression used to authenticate users via LDAP bind. Must contain uid={username}.", | ||||
|           "type": "string" | ||||
|         }, | ||||
|         "user_filter": { | ||||
|           "description": "Filter to extract users for syncing.", | ||||
|           "type": "string" | ||||
|         }, | ||||
|         "username_attr": { | ||||
|           "description": "Attribute with full username. Default: gecos", | ||||
|           "type": "string" | ||||
|         }, | ||||
|         "sync_interval": { | ||||
|           "description": "Interval used for syncing local user table with LDAP directory. Parsed using time.ParseDuration.", | ||||
|           "type": "string" | ||||
|         }, | ||||
|         "sync_del_old_users": { | ||||
|           "description": "Delete obsolete users in database.", | ||||
|           "type": "boolean" | ||||
|         }, | ||||
|         "syncUserOnLogin": { | ||||
|           "description": "Add non-existent user to DB at login attempt if user exists in Ldap directory", | ||||
|           "type": "boolean" | ||||
|         } | ||||
|       }, | ||||
|       "required": [ | ||||
|         "url", | ||||
|         "user_base", | ||||
|         "search_dn", | ||||
|         "user_bind", | ||||
|         "user_filter" | ||||
|       ] | ||||
|     }, | ||||
|     "clusters": { | ||||
|       "description": "Configuration for the clusters to be displayed.", | ||||
|       "type": "array", | ||||
|       "items": { | ||||
|         "type": "object", | ||||
|         "properties": { | ||||
|           "name": { | ||||
|             "description": "The name of the cluster.", | ||||
|             "type": "string" | ||||
|           }, | ||||
|           "metricDataRepository": { | ||||
|             "description": "Type of the metric data repository for this cluster", | ||||
|             "type": "object", | ||||
|             "properties": { | ||||
|                 "max-age": { | ||||
|                     "description": "Configure how long a token is valid. As string parsable by time.ParseDuration()", | ||||
|                     "type": "string" | ||||
|                 }, | ||||
|                 "cookieName": { | ||||
|                     "description": "Cookie that should be checked for a JWT token.", | ||||
|                     "type": "string" | ||||
|                 }, | ||||
|                 "validateUser": { | ||||
|                     "description": "Deny login for users not in database (but defined in JWT). Overwrite roles in JWT with database roles.", | ||||
|                     "type": "boolean" | ||||
|                 }, | ||||
|                 "trustedIssuer": { | ||||
|                     "description": "Issuer that should be accepted when validating external JWTs ", | ||||
|                     "type": "string" | ||||
|                 }, | ||||
|                 "syncUserOnLogin": { | ||||
|                     "description": "Add non-existent user to DB at login attempt with values provided in JWT.", | ||||
|                     "type": "boolean" | ||||
|                 } | ||||
|               "kind": { | ||||
|                 "type": "string", | ||||
|                 "enum": [ | ||||
|                   "influxdb", | ||||
|                   "prometheus", | ||||
|                   "cc-metric-store", | ||||
|                   "test" | ||||
|                 ] | ||||
|               }, | ||||
|               "url": { | ||||
|                 "type": "string" | ||||
|               }, | ||||
|               "token": { | ||||
|                 "type": "string" | ||||
|               } | ||||
|             }, | ||||
|             "required": [ | ||||
|                 "max-age" | ||||
|               "kind", | ||||
|               "url" | ||||
|             ] | ||||
|         }, | ||||
|         "ldap": { | ||||
|             "description": "For LDAP Authentication and user synchronisation.", | ||||
|           }, | ||||
|           "filterRanges": { | ||||
|             "description": "This option controls the slider ranges for the UI controls of numNodes, duration, and startTime.", | ||||
|             "type": "object", | ||||
|             "properties": { | ||||
|                 "url": { | ||||
|                     "description": "URL of LDAP directory server.", | ||||
|                     "type": "string" | ||||
|                 }, | ||||
|                 "user_base": { | ||||
|                     "description": "Base DN of user tree root.", | ||||
|                     "type": "string" | ||||
|                 }, | ||||
|                 "search_dn": { | ||||
|                     "description": "DN for authenticating LDAP admin account with general read rights.", | ||||
|                     "type": "string" | ||||
|                 }, | ||||
|                 "user_bind": { | ||||
|                     "description": "Expression used to authenticate users via LDAP bind. Must contain uid={username}.", | ||||
|                     "type": "string" | ||||
|                 }, | ||||
|                 "user_filter": { | ||||
|                     "description": "Filter to extract users for syncing.", | ||||
|                     "type": "string" | ||||
|                 }, | ||||
|                 "username_attr": { | ||||
|                     "description": "Attribute with full username. Default: gecos", | ||||
|                     "type": "string" | ||||
|                 }, | ||||
|                 "sync_interval": { | ||||
|                     "description": "Interval used for syncing local user table with LDAP directory. Parsed using time.ParseDuration.", | ||||
|                     "type": "string" | ||||
|                 }, | ||||
|                 "sync_del_old_users": { | ||||
|                     "description": "Delete obsolete users in database.", | ||||
|                     "type": "boolean" | ||||
|                 }, | ||||
|                 "syncUserOnLogin": { | ||||
|                     "description": "Add non-existent user to DB at login attempt if user exists in Ldap directory", | ||||
|                     "type": "boolean" | ||||
|                 } | ||||
|             }, | ||||
|             "required": [ | ||||
|                 "url", | ||||
|                 "user_base", | ||||
|                 "search_dn", | ||||
|                 "user_bind", | ||||
|                 "user_filter" | ||||
|             ] | ||||
|         }, | ||||
|         "clusters": { | ||||
|             "description": "Configuration for the clusters to be displayed.", | ||||
|             "type": "array", | ||||
|             "items": { | ||||
|               "numNodes": { | ||||
|                 "description": "UI slider range for number of nodes", | ||||
|                 "type": "object", | ||||
|                 "properties": { | ||||
|                     "name": { | ||||
|                         "description": "The name of the cluster.", | ||||
|                         "type": "string" | ||||
|                     }, | ||||
|                     "metricDataRepository": { | ||||
|                         "description": "Type of the metric data repository for this cluster", | ||||
|                         "type": "object", | ||||
|                         "properties": { | ||||
|                             "kind": { | ||||
|                                 "type": "string", | ||||
|                                 "enum": [ | ||||
|                                     "influxdb", | ||||
|                                     "prometheus", | ||||
|                                     "cc-metric-store", | ||||
|                                     "test" | ||||
|                                 ] | ||||
|                             }, | ||||
|                             "url": { | ||||
|                                 "type": "string" | ||||
|                             }, | ||||
|                             "token": { | ||||
|                                 "type": "string" | ||||
|                             } | ||||
|                         }, | ||||
|                         "required": [ | ||||
|                             "kind", | ||||
|                             "url" | ||||
|                         ] | ||||
|                     }, | ||||
|                     "filterRanges": { | ||||
|                         "description": "This option controls the slider ranges for the UI controls of numNodes, duration, and startTime.", | ||||
|                         "type": "object", | ||||
|                         "properties": { | ||||
|                             "numNodes": { | ||||
|                                 "description": "UI slider range for number of nodes", | ||||
|                                 "type": "object", | ||||
|                                 "properties": { | ||||
|                                     "from": { | ||||
|                                         "type": "integer" | ||||
|                                     }, | ||||
|                                     "to": { | ||||
|                                         "type": "integer" | ||||
|                                     } | ||||
|                                 }, | ||||
|                                 "required": [ | ||||
|                                     "from", | ||||
|                                     "to" | ||||
|                                 ] | ||||
|                             }, | ||||
|                             "duration": { | ||||
|                                 "description": "UI slider range for duration", | ||||
|                                 "type": "object", | ||||
|                                 "properties": { | ||||
|                                     "from": { | ||||
|                                         "type": "integer" | ||||
|                                     }, | ||||
|                                     "to": { | ||||
|                                         "type": "integer" | ||||
|                                     } | ||||
|                                 }, | ||||
|                                 "required": [ | ||||
|                                     "from", | ||||
|                                     "to" | ||||
|                                 ] | ||||
|                             }, | ||||
|                             "startTime": { | ||||
|                                 "description": "UI slider range for start time", | ||||
|                                 "type": "object", | ||||
|                                 "properties": { | ||||
|                                     "from": { | ||||
|                                         "type": "string", | ||||
|                                         "format": "date-time" | ||||
|                                     }, | ||||
|                                     "to": { | ||||
|                                         "type": "null" | ||||
|                                     } | ||||
|                                 }, | ||||
|                                 "required": [ | ||||
|                                     "from", | ||||
|                                     "to" | ||||
|                                 ] | ||||
|                             } | ||||
|                         }, | ||||
|                         "required": [ | ||||
|                             "numNodes", | ||||
|                             "duration", | ||||
|                             "startTime" | ||||
|                         ] | ||||
|                     } | ||||
|                   "from": { | ||||
|                     "type": "integer" | ||||
|                   }, | ||||
|                   "to": { | ||||
|                     "type": "integer" | ||||
|                   } | ||||
|                 }, | ||||
|                 "required": [ | ||||
|                     "name", | ||||
|                     "metricDataRepository", | ||||
|                     "filterRanges" | ||||
|                 ], | ||||
|                 "minItems": 1 | ||||
|             } | ||||
|         }, | ||||
|         "ui-defaults": { | ||||
|             "description": "Default configuration for web UI", | ||||
|             "type": "object", | ||||
|             "properties": { | ||||
|                 "plot_general_colorBackground": { | ||||
|                     "description": "Color plot background according to job average threshold limits", | ||||
|                     "type": "boolean" | ||||
|                 }, | ||||
|                 "plot_general_lineWidth": { | ||||
|                     "description": "Initial linewidth", | ||||
|                   "from", | ||||
|                   "to" | ||||
|                 ] | ||||
|               }, | ||||
|               "duration": { | ||||
|                 "description": "UI slider range for duration", | ||||
|                 "type": "object", | ||||
|                 "properties": { | ||||
|                   "from": { | ||||
|                     "type": "integer" | ||||
|                 }, | ||||
|                 "plot_list_jobsPerPage": { | ||||
|                     "description": "Jobs shown per page in job lists", | ||||
|                   }, | ||||
|                   "to": { | ||||
|                     "type": "integer" | ||||
|                   } | ||||
|                 }, | ||||
|                 "plot_view_plotsPerRow": { | ||||
|                     "description": "Number of plots per row in single job view", | ||||
|                     "type": "integer" | ||||
|                 "required": [ | ||||
|                   "from", | ||||
|                   "to" | ||||
|                 ] | ||||
|               }, | ||||
|               "startTime": { | ||||
|                 "description": "UI slider range for start time", | ||||
|                 "type": "object", | ||||
|                 "properties": { | ||||
|                   "from": { | ||||
|                     "type": "string", | ||||
|                     "format": "date-time" | ||||
|                   }, | ||||
|                   "to": { | ||||
|                     "type": "null" | ||||
|                   } | ||||
|                 }, | ||||
|                 "plot_view_showPolarplot": { | ||||
|                     "description": "Option to toggle polar plot in single job view", | ||||
|                     "type": "boolean" | ||||
|                 }, | ||||
|                 "plot_view_showRoofline": { | ||||
|                     "description": "Option to toggle roofline plot in single job view", | ||||
|                     "type": "boolean" | ||||
|                 }, | ||||
|                 "plot_view_showStatTable": { | ||||
|                     "description": "Option to toggle the node statistic table in single job view", | ||||
|                     "type": "boolean" | ||||
|                 }, | ||||
|                 "system_view_selectedMetric": { | ||||
|                     "description": "Initial metric shown in system view", | ||||
|                     "type": "string" | ||||
|                 }, | ||||
|                 "analysis_view_histogramMetrics": { | ||||
|                     "description": "Metrics to show as job count histograms in analysis view", | ||||
|                     "type": "array", | ||||
|                     "items": { | ||||
|                         "type": "string", | ||||
|                         "minItems": 1 | ||||
|                     } | ||||
|                 }, | ||||
|                 "analysis_view_scatterPlotMetrics": { | ||||
|                     "description": "Initial scatter plto configuration in analysis view", | ||||
|                     "type": "array", | ||||
|                     "items": { | ||||
|                         "type": "array", | ||||
|                         "items": { | ||||
|                             "type": "string", | ||||
|                             "minItems": 2, | ||||
|                             "maxItems": 2 | ||||
|                         }, | ||||
|                         "minItems": 1 | ||||
|                     } | ||||
|                 }, | ||||
|                 "job_view_nodestats_selectedMetrics": { | ||||
|                     "description": "Initial metrics shown in node statistics table of single job view", | ||||
|                     "type": "array", | ||||
|                     "items": { | ||||
|                         "type": "string", | ||||
|                         "minItems": 1 | ||||
|                     } | ||||
|                 }, | ||||
|                 "job_view_polarPlotMetrics": { | ||||
|                     "description": "Metrics shown in polar plot of single job view", | ||||
|                     "type": "array", | ||||
|                     "items": { | ||||
|                         "type": "string", | ||||
|                         "minItems": 1 | ||||
|                     } | ||||
|                 }, | ||||
|                 "job_view_selectedMetrics": { | ||||
|                     "description": "", | ||||
|                     "type": "array", | ||||
|                     "items": { | ||||
|                         "type": "string", | ||||
|                         "minItems": 1 | ||||
|                     } | ||||
|                 }, | ||||
|                 "plot_general_colorscheme": { | ||||
|                     "description": "Initial color scheme", | ||||
|                     "type": "array", | ||||
|                     "items": { | ||||
|                         "type": "string", | ||||
|                         "minItems": 1 | ||||
|                     } | ||||
|                 }, | ||||
|                 "plot_list_selectedMetrics": { | ||||
|                     "description": "Initial metric plots shown in jobs lists", | ||||
|                     "type": "array", | ||||
|                     "items": { | ||||
|                         "type": "string", | ||||
|                         "minItems": 1 | ||||
|                     } | ||||
|                 } | ||||
|                 "required": [ | ||||
|                   "from", | ||||
|                   "to" | ||||
|                 ] | ||||
|               } | ||||
|             }, | ||||
|             "required": [ | ||||
|                 "plot_general_colorBackground", | ||||
|                 "plot_general_lineWidth", | ||||
|                 "plot_list_jobsPerPage", | ||||
|                 "plot_view_plotsPerRow", | ||||
|                 "plot_view_showPolarplot", | ||||
|                 "plot_view_showRoofline", | ||||
|                 "plot_view_showStatTable", | ||||
|                 "system_view_selectedMetric", | ||||
|                 "analysis_view_histogramMetrics", | ||||
|                 "analysis_view_scatterPlotMetrics", | ||||
|                 "job_view_nodestats_selectedMetrics", | ||||
|                 "job_view_polarPlotMetrics", | ||||
|                 "job_view_selectedMetrics", | ||||
|                 "plot_general_colorscheme", | ||||
|                 "plot_list_selectedMetrics" | ||||
|               "numNodes", | ||||
|               "duration", | ||||
|               "startTime" | ||||
|             ] | ||||
|         } | ||||
|           } | ||||
|         }, | ||||
|         "required": [ | ||||
|           "name", | ||||
|           "metricDataRepository", | ||||
|           "filterRanges" | ||||
|         ], | ||||
|         "minItems": 1 | ||||
|       } | ||||
|     }, | ||||
|     "required": [ | ||||
|         "jwts", | ||||
|         "clusters" | ||||
|     ] | ||||
|     "ui-defaults": { | ||||
|       "description": "Default configuration for web UI", | ||||
|       "type": "object", | ||||
|       "properties": { | ||||
|         "plot_general_colorBackground": { | ||||
|           "description": "Color plot background according to job average threshold limits", | ||||
|           "type": "boolean" | ||||
|         }, | ||||
|         "plot_general_lineWidth": { | ||||
|           "description": "Initial linewidth", | ||||
|           "type": "integer" | ||||
|         }, | ||||
|         "plot_list_jobsPerPage": { | ||||
|           "description": "Jobs shown per page in job lists", | ||||
|           "type": "integer" | ||||
|         }, | ||||
|         "plot_view_plotsPerRow": { | ||||
|           "description": "Number of plots per row in single job view", | ||||
|           "type": "integer" | ||||
|         }, | ||||
|         "plot_view_showPolarplot": { | ||||
|           "description": "Option to toggle polar plot in single job view", | ||||
|           "type": "boolean" | ||||
|         }, | ||||
|         "plot_view_showRoofline": { | ||||
|           "description": "Option to toggle roofline plot in single job view", | ||||
|           "type": "boolean" | ||||
|         }, | ||||
|         "plot_view_showStatTable": { | ||||
|           "description": "Option to toggle the node statistic table in single job view", | ||||
|           "type": "boolean" | ||||
|         }, | ||||
|         "system_view_selectedMetric": { | ||||
|           "description": "Initial metric shown in system view", | ||||
|           "type": "string" | ||||
|         }, | ||||
|         "job_view_showFootprint": { | ||||
|           "description": "Option to toggle footprint ui in single job view", | ||||
|           "type": "boolean" | ||||
|         }, | ||||
|         "job_list_usePaging": { | ||||
|           "description": "Option to switch from continous scroll to paging", | ||||
|           "type": "boolean" | ||||
|         }, | ||||
|         "analysis_view_histogramMetrics": { | ||||
|           "description": "Metrics to show as job count histograms in analysis view", | ||||
|           "type": "array", | ||||
|           "items": { | ||||
|             "type": "string", | ||||
|             "minItems": 1 | ||||
|           } | ||||
|         }, | ||||
|         "analysis_view_scatterPlotMetrics": { | ||||
|           "description": "Initial scatter plto configuration in analysis view", | ||||
|           "type": "array", | ||||
|           "items": { | ||||
|             "type": "array", | ||||
|             "items": { | ||||
|               "type": "string", | ||||
|               "minItems": 2, | ||||
|               "maxItems": 2 | ||||
|             }, | ||||
|             "minItems": 1 | ||||
|           } | ||||
|         }, | ||||
|         "job_view_nodestats_selectedMetrics": { | ||||
|           "description": "Initial metrics shown in node statistics table of single job view", | ||||
|           "type": "array", | ||||
|           "items": { | ||||
|             "type": "string", | ||||
|             "minItems": 1 | ||||
|           } | ||||
|         }, | ||||
|         "job_view_selectedMetrics": { | ||||
|           "description": "Initial metrics shown as plots in single job view", | ||||
|           "type": "array", | ||||
|           "items": { | ||||
|             "type": "string", | ||||
|             "minItems": 1 | ||||
|           } | ||||
|         }, | ||||
|         "plot_general_colorscheme": { | ||||
|           "description": "Initial color scheme", | ||||
|           "type": "array", | ||||
|           "items": { | ||||
|             "type": "string", | ||||
|             "minItems": 1 | ||||
|           } | ||||
|         }, | ||||
|         "plot_list_selectedMetrics": { | ||||
|           "description": "Initial metric plots shown in jobs lists", | ||||
|           "type": "array", | ||||
|           "items": { | ||||
|             "type": "string", | ||||
|             "minItems": 1 | ||||
|           } | ||||
|         } | ||||
|       }, | ||||
|       "required": [ | ||||
|         "plot_general_colorBackground", | ||||
|         "plot_general_lineWidth", | ||||
|         "plot_list_jobsPerPage", | ||||
|         "plot_view_plotsPerRow", | ||||
|         "plot_view_showPolarplot", | ||||
|         "plot_view_showRoofline", | ||||
|         "plot_view_showStatTable", | ||||
|         "system_view_selectedMetric", | ||||
|         "job_view_showFootprint", | ||||
|         "job_list_usePaging", | ||||
|         "analysis_view_histogramMetrics", | ||||
|         "analysis_view_scatterPlotMetrics", | ||||
|         "job_view_nodestats_selectedMetrics", | ||||
|         "job_view_selectedMetrics", | ||||
|         "plot_general_colorscheme", | ||||
|         "plot_list_selectedMetrics" | ||||
|       ] | ||||
|     } | ||||
|   }, | ||||
|   "required": [ | ||||
|     "jwts", | ||||
|     "clusters", | ||||
|     "apiAllowedIPs" | ||||
|   ] | ||||
| } | ||||
|   | ||||
| @@ -1,490 +1,490 @@ | ||||
| { | ||||
|     "$schema": "http://json-schema.org/draft/2020-12/schema", | ||||
|     "$id": "embedfs://job-data.schema.json", | ||||
|     "title": "Job metric data list", | ||||
|     "description": "Collection of metric data of a HPC job", | ||||
|     "type": "object", | ||||
|     "properties": { | ||||
|         "mem_used": { | ||||
|             "description": "Memory capacity used", | ||||
|             "type": "object", | ||||
|             "properties": { | ||||
|                 "node": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 } | ||||
|             }, | ||||
|             "required": [ | ||||
|                 "node" | ||||
|             ] | ||||
|         }, | ||||
|         "flops_any": { | ||||
|             "description": "Total flop rate with DP flops scaled up", | ||||
|             "properties": { | ||||
|                 "node": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "socket": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "memoryDomain": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "core": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "hwthread": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 } | ||||
|             }, | ||||
|             "minProperties": 1 | ||||
|         }, | ||||
|         "mem_bw": { | ||||
|             "description": "Main memory bandwidth", | ||||
|             "properties": { | ||||
|                 "node": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "socket": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "memoryDomain": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 } | ||||
|             }, | ||||
|             "minProperties": 1 | ||||
|         }, | ||||
|         "net_bw": { | ||||
|             "description": "Total fast interconnect network bandwidth", | ||||
|             "type": "object", | ||||
|             "properties": { | ||||
|                 "node": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 } | ||||
|             }, | ||||
|             "required": [ | ||||
|                 "node" | ||||
|             ] | ||||
|         }, | ||||
|         "ipc": { | ||||
|             "description": "Instructions executed per cycle", | ||||
|             "properties": { | ||||
|                 "node": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "socket": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "memoryDomain": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "core": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "hwthread": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 } | ||||
|             }, | ||||
|             "minProperties": 1 | ||||
|         }, | ||||
|         "cpu_user": { | ||||
|             "description": "CPU user active core utilization", | ||||
|             "properties": { | ||||
|                 "node": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "socket": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "memoryDomain": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "core": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "hwthread": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 } | ||||
|             }, | ||||
|             "minProperties": 1 | ||||
|         }, | ||||
|         "cpu_load": { | ||||
|             "description": "CPU requested core utilization (load 1m)", | ||||
|             "properties": { | ||||
|                 "node": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 } | ||||
|             }, | ||||
|             "required": [ | ||||
|                 "node" | ||||
|             ] | ||||
|         }, | ||||
|         "flops_dp": { | ||||
|             "description": "Double precision flop rate", | ||||
|             "properties": { | ||||
|                 "node": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "socket": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "memoryDomain": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "core": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "hwthread": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 } | ||||
|             }, | ||||
|             "minProperties": 1 | ||||
|         }, | ||||
|         "flops_sp": { | ||||
|             "description": "Single precision flops rate", | ||||
|             "properties": { | ||||
|                 "node": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "socket": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "memoryDomain": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "core": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "hwthread": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 } | ||||
|             }, | ||||
|             "minProperties": 1 | ||||
|         }, | ||||
|         "vectorization_ratio": { | ||||
|             "description": "Fraction of arithmetic instructions using SIMD instructions", | ||||
|             "properties": { | ||||
|                 "node": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "socket": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "memoryDomain": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "core": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "hwthread": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 } | ||||
|             }, | ||||
|             "minProperties": 1 | ||||
|         }, | ||||
|         "cpu_power": { | ||||
|             "description": "CPU power consumption", | ||||
|             "properties": { | ||||
|                 "node": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "socket": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 } | ||||
|             }, | ||||
|             "minProperties": 1 | ||||
|         }, | ||||
|         "mem_power": { | ||||
|             "description": "Memory power consumption", | ||||
|             "properties": { | ||||
|                 "node": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "socket": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 } | ||||
|             }, | ||||
|             "minProperties": 1 | ||||
|         }, | ||||
|         "acc_utilization": { | ||||
|             "description": "GPU utilization", | ||||
|             "properties": { | ||||
|                 "accelerator": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 } | ||||
|             }, | ||||
|             "required": [ | ||||
|                 "accelerator" | ||||
|             ] | ||||
|         }, | ||||
|         "acc_mem_used": { | ||||
|             "description": "GPU memory capacity used", | ||||
|             "properties": { | ||||
|                 "accelerator": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 } | ||||
|             }, | ||||
|             "required": [ | ||||
|                 "accelerator" | ||||
|             ] | ||||
|         }, | ||||
|         "acc_power": { | ||||
|             "description": "GPU power consumption", | ||||
|             "properties": { | ||||
|                 "accelerator": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 } | ||||
|             }, | ||||
|             "required": [ | ||||
|                 "accelerator" | ||||
|             ] | ||||
|         }, | ||||
|         "clock": { | ||||
|             "description": "Average core frequency", | ||||
|             "properties": { | ||||
|                 "node": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "socket": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "memoryDomain": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "core": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 }, | ||||
|                 "hwthread": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 } | ||||
|             }, | ||||
|             "minProperties": 1 | ||||
|         }, | ||||
|         "eth_read_bw": { | ||||
|             "description": "Ethernet read bandwidth", | ||||
|             "properties": { | ||||
|                 "node": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 } | ||||
|             }, | ||||
|             "required": [ | ||||
|                 "node" | ||||
|             ] | ||||
|         }, | ||||
|         "eth_write_bw": { | ||||
|             "description": "Ethernet write bandwidth", | ||||
|             "properties": { | ||||
|                 "node": { | ||||
|                     "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                 } | ||||
|             }, | ||||
|             "required": [ | ||||
|                 "node" | ||||
|             ] | ||||
|         }, | ||||
|         "filesystems": { | ||||
|             "description": "Array of filesystems", | ||||
|             "type": "array", | ||||
|             "items": { | ||||
|                 "type": "object", | ||||
|                 "properties": { | ||||
|                     "name": { | ||||
|                         "type": "string" | ||||
|                     }, | ||||
|                     "type": { | ||||
|                         "type": "string", | ||||
|                         "enum": [ | ||||
|                             "nfs", | ||||
|                             "lustre", | ||||
|                             "gpfs", | ||||
|                             "nvme", | ||||
|                             "ssd", | ||||
|                             "hdd", | ||||
|                             "beegfs" | ||||
|                         ] | ||||
|                     }, | ||||
|                     "read_bw": { | ||||
|                         "description": "File system read bandwidth", | ||||
|                         "properties": { | ||||
|                             "node": { | ||||
|                                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                             } | ||||
|                         }, | ||||
|                         "required": [ | ||||
|                             "node" | ||||
|                         ] | ||||
|                     }, | ||||
|                     "write_bw": { | ||||
|                         "description": "File system write bandwidth", | ||||
|                         "properties": { | ||||
|                             "node": { | ||||
|                                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                             } | ||||
|                         }, | ||||
|                         "required": [ | ||||
|                             "node" | ||||
|                         ] | ||||
|                     }, | ||||
|                     "read_req": { | ||||
|                         "description": "File system read requests", | ||||
|                         "properties": { | ||||
|                             "node": { | ||||
|                                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                             } | ||||
|                         }, | ||||
|                         "required": [ | ||||
|                             "node" | ||||
|                         ] | ||||
|                     }, | ||||
|                     "write_req": { | ||||
|                         "description": "File system write requests", | ||||
|                         "properties": { | ||||
|                             "node": { | ||||
|                                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                             } | ||||
|                         }, | ||||
|                         "required": [ | ||||
|                             "node" | ||||
|                         ] | ||||
|                     }, | ||||
|                     "inodes": { | ||||
|                         "description": "File system write requests", | ||||
|                         "properties": { | ||||
|                             "node": { | ||||
|                                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                             } | ||||
|                         }, | ||||
|                         "required": [ | ||||
|                             "node" | ||||
|                         ] | ||||
|                     }, | ||||
|                     "accesses": { | ||||
|                         "description": "File system open and close", | ||||
|                         "properties": { | ||||
|                             "node": { | ||||
|                                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                             } | ||||
|                         }, | ||||
|                         "required": [ | ||||
|                             "node" | ||||
|                         ] | ||||
|                     }, | ||||
|                     "fsync": { | ||||
|                         "description": "File system fsync", | ||||
|                         "properties": { | ||||
|                             "node": { | ||||
|                                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                             } | ||||
|                         }, | ||||
|                         "required": [ | ||||
|                             "node" | ||||
|                         ] | ||||
|                     }, | ||||
|                     "create": { | ||||
|                         "description": "File system create", | ||||
|                         "properties": { | ||||
|                             "node": { | ||||
|                                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                             } | ||||
|                         }, | ||||
|                         "required": [ | ||||
|                             "node" | ||||
|                         ] | ||||
|                     }, | ||||
|                     "open": { | ||||
|                         "description": "File system open", | ||||
|                         "properties": { | ||||
|                             "node": { | ||||
|                                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                             } | ||||
|                         }, | ||||
|                         "required": [ | ||||
|                             "node" | ||||
|                         ] | ||||
|                     }, | ||||
|                     "close": { | ||||
|                         "description": "File system close", | ||||
|                         "properties": { | ||||
|                             "node": { | ||||
|                                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                             } | ||||
|                         }, | ||||
|                         "required": [ | ||||
|                             "node" | ||||
|                         ] | ||||
|                     }, | ||||
|                     "seek": { | ||||
|                         "description": "File system seek", | ||||
|                         "properties": { | ||||
|                             "node": { | ||||
|                                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|                             } | ||||
|                         }, | ||||
|                         "required": [ | ||||
|                             "node" | ||||
|                         ] | ||||
|                     } | ||||
|                 }, | ||||
|                 "required": [ | ||||
|                     "name", | ||||
|                     "type", | ||||
|                     "read_bw", | ||||
|                     "write_bw" | ||||
|                 ] | ||||
|             }, | ||||
|             "minItems": 1 | ||||
|   "$schema": "http://json-schema.org/draft/2020-12/schema", | ||||
|   "$id": "embedfs://job-data.schema.json", | ||||
|   "title": "Job metric data list", | ||||
|   "description": "Collection of metric data of a HPC job", | ||||
|   "type": "object", | ||||
|   "properties": { | ||||
|     "mem_used": { | ||||
|       "description": "Memory capacity used", | ||||
|       "type": "object", | ||||
|       "properties": { | ||||
|         "node": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         } | ||||
|       }, | ||||
|       "required": [ | ||||
|         "node" | ||||
|       ] | ||||
|     }, | ||||
|     "ic_rcv_packets": { | ||||
|         "description": "Network interconnect read packets", | ||||
|     "flops_any": { | ||||
|       "description": "Total flop rate with DP flops scaled up", | ||||
|       "properties": { | ||||
|         "node": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "socket": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "memoryDomain": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "core": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "hwthread": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         } | ||||
|       }, | ||||
|       "minProperties": 1 | ||||
|     }, | ||||
|     "mem_bw": { | ||||
|       "description": "Main memory bandwidth", | ||||
|       "properties": { | ||||
|         "node": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "socket": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "memoryDomain": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         } | ||||
|       }, | ||||
|       "minProperties": 1 | ||||
|     }, | ||||
|     "net_bw": { | ||||
|       "description": "Total fast interconnect network bandwidth", | ||||
|       "type": "object", | ||||
|       "properties": { | ||||
|         "node": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         } | ||||
|       }, | ||||
|       "required": [ | ||||
|         "node" | ||||
|       ] | ||||
|     }, | ||||
|     "ipc": { | ||||
|       "description": "Instructions executed per cycle", | ||||
|       "properties": { | ||||
|         "node": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "socket": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "memoryDomain": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "core": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "hwthread": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         } | ||||
|       }, | ||||
|       "minProperties": 1 | ||||
|     }, | ||||
|     "cpu_user": { | ||||
|       "description": "CPU user active core utilization", | ||||
|       "properties": { | ||||
|         "node": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "socket": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "memoryDomain": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "core": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "hwthread": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         } | ||||
|       }, | ||||
|       "minProperties": 1 | ||||
|     }, | ||||
|     "cpu_load": { | ||||
|       "description": "CPU requested core utilization (load 1m)", | ||||
|       "properties": { | ||||
|         "node": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         } | ||||
|       }, | ||||
|       "required": [ | ||||
|         "node" | ||||
|       ] | ||||
|     }, | ||||
|     "flops_dp": { | ||||
|       "description": "Double precision flop rate", | ||||
|       "properties": { | ||||
|         "node": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "socket": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "memoryDomain": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "core": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "hwthread": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         } | ||||
|       }, | ||||
|       "minProperties": 1 | ||||
|     }, | ||||
|     "flops_sp": { | ||||
|       "description": "Single precision flops rate", | ||||
|       "properties": { | ||||
|         "node": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "socket": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "memoryDomain": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "core": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "hwthread": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         } | ||||
|       }, | ||||
|       "minProperties": 1 | ||||
|     }, | ||||
|     "vectorization_ratio": { | ||||
|       "description": "Fraction of arithmetic instructions using SIMD instructions", | ||||
|       "properties": { | ||||
|         "node": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "socket": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "memoryDomain": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "core": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "hwthread": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         } | ||||
|       }, | ||||
|       "minProperties": 1 | ||||
|     }, | ||||
|     "cpu_power": { | ||||
|       "description": "CPU power consumption", | ||||
|       "properties": { | ||||
|         "node": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "socket": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         } | ||||
|       }, | ||||
|       "minProperties": 1 | ||||
|     }, | ||||
|     "mem_power": { | ||||
|       "description": "Memory power consumption", | ||||
|       "properties": { | ||||
|         "node": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "socket": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         } | ||||
|       }, | ||||
|       "minProperties": 1 | ||||
|     }, | ||||
|     "acc_utilization": { | ||||
|       "description": "GPU utilization", | ||||
|       "properties": { | ||||
|         "accelerator": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         } | ||||
|       }, | ||||
|       "required": [ | ||||
|         "accelerator" | ||||
|       ] | ||||
|     }, | ||||
|     "acc_mem_used": { | ||||
|       "description": "GPU memory capacity used", | ||||
|       "properties": { | ||||
|         "accelerator": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         } | ||||
|       }, | ||||
|       "required": [ | ||||
|         "accelerator" | ||||
|       ] | ||||
|     }, | ||||
|     "acc_power": { | ||||
|       "description": "GPU power consumption", | ||||
|       "properties": { | ||||
|         "accelerator": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         } | ||||
|       }, | ||||
|       "required": [ | ||||
|         "accelerator" | ||||
|       ] | ||||
|     }, | ||||
|     "clock": { | ||||
|       "description": "Average core frequency", | ||||
|       "properties": { | ||||
|         "node": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "socket": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "memoryDomain": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "core": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         }, | ||||
|         "hwthread": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         } | ||||
|       }, | ||||
|       "minProperties": 1 | ||||
|     }, | ||||
|     "eth_read_bw": { | ||||
|       "description": "Ethernet read bandwidth", | ||||
|       "properties": { | ||||
|         "node": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         } | ||||
|       }, | ||||
|       "required": [ | ||||
|         "node" | ||||
|       ] | ||||
|     }, | ||||
|     "eth_write_bw": { | ||||
|       "description": "Ethernet write bandwidth", | ||||
|       "properties": { | ||||
|         "node": { | ||||
|           "$ref": "embedfs://job-metric-data.schema.json" | ||||
|         } | ||||
|       }, | ||||
|       "required": [ | ||||
|         "node" | ||||
|       ] | ||||
|     }, | ||||
|     "filesystems": { | ||||
|       "description": "Array of filesystems", | ||||
|       "type": "array", | ||||
|       "items": { | ||||
|         "type": "object", | ||||
|         "properties": { | ||||
|             "node": { | ||||
|           "name": { | ||||
|             "type": "string" | ||||
|           }, | ||||
|           "type": { | ||||
|             "type": "string", | ||||
|             "enum": [ | ||||
|               "nfs", | ||||
|               "lustre", | ||||
|               "gpfs", | ||||
|               "nvme", | ||||
|               "ssd", | ||||
|               "hdd", | ||||
|               "beegfs" | ||||
|             ] | ||||
|           }, | ||||
|           "read_bw": { | ||||
|             "description": "File system read bandwidth", | ||||
|             "properties": { | ||||
|               "node": { | ||||
|                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|             } | ||||
|               } | ||||
|             }, | ||||
|             "required": [ | ||||
|               "node" | ||||
|             ] | ||||
|           }, | ||||
|           "write_bw": { | ||||
|             "description": "File system write bandwidth", | ||||
|             "properties": { | ||||
|               "node": { | ||||
|                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|               } | ||||
|             }, | ||||
|             "required": [ | ||||
|               "node" | ||||
|             ] | ||||
|           }, | ||||
|           "read_req": { | ||||
|             "description": "File system read requests", | ||||
|             "properties": { | ||||
|               "node": { | ||||
|                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|               } | ||||
|             }, | ||||
|             "required": [ | ||||
|               "node" | ||||
|             ] | ||||
|           }, | ||||
|           "write_req": { | ||||
|             "description": "File system write requests", | ||||
|             "properties": { | ||||
|               "node": { | ||||
|                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|               } | ||||
|             }, | ||||
|             "required": [ | ||||
|               "node" | ||||
|             ] | ||||
|           }, | ||||
|           "inodes": { | ||||
|             "description": "File system write requests", | ||||
|             "properties": { | ||||
|               "node": { | ||||
|                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|               } | ||||
|             }, | ||||
|             "required": [ | ||||
|               "node" | ||||
|             ] | ||||
|           }, | ||||
|           "accesses": { | ||||
|             "description": "File system open and close", | ||||
|             "properties": { | ||||
|               "node": { | ||||
|                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|               } | ||||
|             }, | ||||
|             "required": [ | ||||
|               "node" | ||||
|             ] | ||||
|           }, | ||||
|           "fsync": { | ||||
|             "description": "File system fsync", | ||||
|             "properties": { | ||||
|               "node": { | ||||
|                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|               } | ||||
|             }, | ||||
|             "required": [ | ||||
|               "node" | ||||
|             ] | ||||
|           }, | ||||
|           "create": { | ||||
|             "description": "File system create", | ||||
|             "properties": { | ||||
|               "node": { | ||||
|                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|               } | ||||
|             }, | ||||
|             "required": [ | ||||
|               "node" | ||||
|             ] | ||||
|           }, | ||||
|           "open": { | ||||
|             "description": "File system open", | ||||
|             "properties": { | ||||
|               "node": { | ||||
|                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|               } | ||||
|             }, | ||||
|             "required": [ | ||||
|               "node" | ||||
|             ] | ||||
|           }, | ||||
|           "close": { | ||||
|             "description": "File system close", | ||||
|             "properties": { | ||||
|               "node": { | ||||
|                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|               } | ||||
|             }, | ||||
|             "required": [ | ||||
|               "node" | ||||
|             ] | ||||
|           }, | ||||
|           "seek": { | ||||
|             "description": "File system seek", | ||||
|             "properties": { | ||||
|               "node": { | ||||
|                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|               } | ||||
|             }, | ||||
|             "required": [ | ||||
|               "node" | ||||
|             ] | ||||
|           } | ||||
|         }, | ||||
|         "required": [ | ||||
|             "node" | ||||
|         ] | ||||
|     }, | ||||
|     "ic_send_packets": { | ||||
|         "description": "Network interconnect send packet", | ||||
|         "properties": { | ||||
|             "node": { | ||||
|                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|             } | ||||
|         }, | ||||
|         "required": [ | ||||
|             "node" | ||||
|         ] | ||||
|     }, | ||||
|     "ic_read_bw": { | ||||
|         "description": "Network interconnect read bandwidth", | ||||
|         "properties": { | ||||
|             "node": { | ||||
|                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|             } | ||||
|         }, | ||||
|         "required": [ | ||||
|             "node" | ||||
|         ] | ||||
|     }, | ||||
|     "ic_write_bw": { | ||||
|         "description": "Network interconnect write bandwidth", | ||||
|         "properties": { | ||||
|             "node": { | ||||
|                 "$ref": "embedfs://job-metric-data.schema.json" | ||||
|             } | ||||
|         }, | ||||
|         "required": [ | ||||
|             "node" | ||||
|           "name", | ||||
|           "type", | ||||
|           "read_bw", | ||||
|           "write_bw" | ||||
|         ] | ||||
|       }, | ||||
|       "minItems": 1 | ||||
|     } | ||||
|   }, | ||||
|   "ic_rcv_packets": { | ||||
|     "description": "Network interconnect read packets", | ||||
|     "properties": { | ||||
|       "node": { | ||||
|         "$ref": "embedfs://job-metric-data.schema.json" | ||||
|       } | ||||
|     }, | ||||
|     "required": [ | ||||
|         "cpu_user", | ||||
|         "cpu_load", | ||||
|         "mem_used", | ||||
|         "flops_any", | ||||
|         "mem_bw", | ||||
|         "net_bw", | ||||
|         "filesystems" | ||||
|       "node" | ||||
|     ] | ||||
|   }, | ||||
|   "ic_send_packets": { | ||||
|     "description": "Network interconnect send packet", | ||||
|     "properties": { | ||||
|       "node": { | ||||
|         "$ref": "embedfs://job-metric-data.schema.json" | ||||
|       } | ||||
|     }, | ||||
|     "required": [ | ||||
|       "node" | ||||
|     ] | ||||
|   }, | ||||
|   "ic_read_bw": { | ||||
|     "description": "Network interconnect read bandwidth", | ||||
|     "properties": { | ||||
|       "node": { | ||||
|         "$ref": "embedfs://job-metric-data.schema.json" | ||||
|       } | ||||
|     }, | ||||
|     "required": [ | ||||
|       "node" | ||||
|     ] | ||||
|   }, | ||||
|   "ic_write_bw": { | ||||
|     "description": "Network interconnect write bandwidth", | ||||
|     "properties": { | ||||
|       "node": { | ||||
|         "$ref": "embedfs://job-metric-data.schema.json" | ||||
|       } | ||||
|     }, | ||||
|     "required": [ | ||||
|       "node" | ||||
|     ] | ||||
|   }, | ||||
|   "required": [ | ||||
|     "cpu_user", | ||||
|     "cpu_load", | ||||
|     "mem_used", | ||||
|     "flops_any", | ||||
|     "mem_bw", | ||||
|     "net_bw", | ||||
|     "filesystems" | ||||
|   ] | ||||
| } | ||||
|   | ||||
| @@ -1,351 +1,351 @@ | ||||
| { | ||||
|     "$schema": "http://json-schema.org/draft/2020-12/schema", | ||||
|     "$id": "embedfs://job-meta.schema.json", | ||||
|     "title": "Job meta data", | ||||
|     "description": "Meta data information of a HPC job", | ||||
|     "type": "object", | ||||
|     "properties": { | ||||
|         "jobId": { | ||||
|             "description": "The unique identifier of a job", | ||||
|             "type": "integer" | ||||
|         }, | ||||
|         "user": { | ||||
|             "description": "The unique identifier of a user", | ||||
|   "$schema": "http://json-schema.org/draft/2020-12/schema", | ||||
|   "$id": "embedfs://job-meta.schema.json", | ||||
|   "title": "Job meta data", | ||||
|   "description": "Meta data information of a HPC job", | ||||
|   "type": "object", | ||||
|   "properties": { | ||||
|     "jobId": { | ||||
|       "description": "The unique identifier of a job", | ||||
|       "type": "integer" | ||||
|     }, | ||||
|     "user": { | ||||
|       "description": "The unique identifier of a user", | ||||
|       "type": "string" | ||||
|     }, | ||||
|     "project": { | ||||
|       "description": "The unique identifier of a project", | ||||
|       "type": "string" | ||||
|     }, | ||||
|     "cluster": { | ||||
|       "description": "The unique identifier of a cluster", | ||||
|       "type": "string" | ||||
|     }, | ||||
|     "subCluster": { | ||||
|       "description": "The unique identifier of a sub cluster", | ||||
|       "type": "string" | ||||
|     }, | ||||
|     "partition": { | ||||
|       "description": "The Slurm partition to which the job was submitted", | ||||
|       "type": "string" | ||||
|     }, | ||||
|     "arrayJobId": { | ||||
|       "description": "The unique identifier of an array job", | ||||
|       "type": "integer" | ||||
|     }, | ||||
|     "numNodes": { | ||||
|       "description": "Number of nodes used", | ||||
|       "type": "integer", | ||||
|       "exclusiveMinimum": 0 | ||||
|     }, | ||||
|     "numHwthreads": { | ||||
|       "description": "Number of HWThreads used", | ||||
|       "type": "integer", | ||||
|       "exclusiveMinimum": 0 | ||||
|     }, | ||||
|     "numAcc": { | ||||
|       "description": "Number of accelerators used", | ||||
|       "type": "integer", | ||||
|       "exclusiveMinimum": 0 | ||||
|     }, | ||||
|     "exclusive": { | ||||
|       "description": "Specifies how nodes are shared. 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive, 2 - Shared among multiple jobs of same user", | ||||
|       "type": "integer", | ||||
|       "minimum": 0, | ||||
|       "maximum": 2 | ||||
|     }, | ||||
|     "monitoringStatus": { | ||||
|       "description": "State of monitoring system during job run", | ||||
|       "type": "integer" | ||||
|     }, | ||||
|     "smt": { | ||||
|       "description": "SMT threads used by job", | ||||
|       "type": "integer" | ||||
|     }, | ||||
|     "walltime": { | ||||
|       "description": "Requested walltime of job in seconds", | ||||
|       "type": "integer", | ||||
|       "exclusiveMinimum": 0 | ||||
|     }, | ||||
|     "jobState": { | ||||
|       "description": "Final state of job", | ||||
|       "type": "string", | ||||
|       "enum": [ | ||||
|         "completed", | ||||
|         "failed", | ||||
|         "cancelled", | ||||
|         "stopped", | ||||
|         "out_of_memory", | ||||
|         "timeout" | ||||
|       ] | ||||
|     }, | ||||
|     "startTime": { | ||||
|       "description": "Start epoch time stamp in seconds", | ||||
|       "type": "integer", | ||||
|       "exclusiveMinimum": 0 | ||||
|     }, | ||||
|     "duration": { | ||||
|       "description": "Duration of job in seconds", | ||||
|       "type": "integer", | ||||
|       "exclusiveMinimum": 0 | ||||
|     }, | ||||
|     "resources": { | ||||
|       "description": "Resources used by job", | ||||
|       "type": "array", | ||||
|       "items": { | ||||
|         "type": "object", | ||||
|         "properties": { | ||||
|           "hostname": { | ||||
|             "type": "string" | ||||
|         }, | ||||
|         "project": { | ||||
|             "description": "The unique identifier of a project", | ||||
|             "type": "string" | ||||
|         }, | ||||
|         "cluster": { | ||||
|             "description": "The unique identifier of a cluster", | ||||
|             "type": "string" | ||||
|         }, | ||||
|         "subCluster": { | ||||
|             "description": "The unique identifier of a sub cluster", | ||||
|             "type": "string" | ||||
|         }, | ||||
|         "partition": { | ||||
|             "description": "The Slurm partition to which the job was submitted", | ||||
|             "type": "string" | ||||
|         }, | ||||
|         "arrayJobId": { | ||||
|             "description": "The unique identifier of an array job", | ||||
|             "type": "integer" | ||||
|         }, | ||||
|         "numNodes": { | ||||
|             "description": "Number of nodes used", | ||||
|             "type": "integer", | ||||
|             "exclusiveMinimum": 0 | ||||
|         }, | ||||
|         "numHwthreads": { | ||||
|             "description": "Number of HWThreads used", | ||||
|             "type": "integer", | ||||
|             "exclusiveMinimum": 0 | ||||
|         }, | ||||
|         "numAcc": { | ||||
|             "description": "Number of accelerators used", | ||||
|             "type": "integer", | ||||
|             "exclusiveMinimum": 0 | ||||
|         }, | ||||
|         "exclusive": { | ||||
|             "description": "Specifies how nodes are shared. 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive, 2 - Shared among multiple jobs of same user", | ||||
|             "type": "integer", | ||||
|             "minimum": 0, | ||||
|             "maximum": 2 | ||||
|         }, | ||||
|         "monitoringStatus": { | ||||
|             "description": "State of monitoring system during job run", | ||||
|             "type": "integer" | ||||
|         }, | ||||
|         "smt": { | ||||
|             "description": "SMT threads used by job", | ||||
|             "type": "integer" | ||||
|         }, | ||||
|         "walltime": { | ||||
|             "description": "Requested walltime of job in seconds", | ||||
|             "type": "integer", | ||||
|             "exclusiveMinimum": 0 | ||||
|         }, | ||||
|         "jobState": { | ||||
|             "description": "Final state of job", | ||||
|           }, | ||||
|           "hwthreads": { | ||||
|             "type": "array", | ||||
|             "description": "List of OS processor ids", | ||||
|             "items": { | ||||
|               "type": "integer" | ||||
|             } | ||||
|           }, | ||||
|           "accelerators": { | ||||
|             "type": "array", | ||||
|             "description": "List of of accelerator device ids", | ||||
|             "items": { | ||||
|               "type": "string" | ||||
|             } | ||||
|           }, | ||||
|           "configuration": { | ||||
|             "type": "string", | ||||
|             "enum": [ | ||||
|                 "completed", | ||||
|                 "failed", | ||||
|                 "cancelled", | ||||
|                 "stopped", | ||||
|                 "out_of_memory", | ||||
|                 "timeout" | ||||
|             ] | ||||
|             "description": "The configuration options of the node" | ||||
|           } | ||||
|         }, | ||||
|         "startTime": { | ||||
|             "description": "Start epoch time stamp in seconds", | ||||
|             "type": "integer", | ||||
|             "exclusiveMinimum": 0 | ||||
|         "required": [ | ||||
|           "hostname" | ||||
|         ], | ||||
|         "minItems": 1 | ||||
|       } | ||||
|     }, | ||||
|     "metaData": { | ||||
|       "description": "Additional information about the job", | ||||
|       "type": "object", | ||||
|       "properties": { | ||||
|         "jobScript": { | ||||
|           "type": "string", | ||||
|           "description": "The batch script of the job" | ||||
|         }, | ||||
|         "duration": { | ||||
|             "description": "Duration of job in seconds", | ||||
|             "type": "integer", | ||||
|             "exclusiveMinimum": 0 | ||||
|         "jobName": { | ||||
|           "type": "string", | ||||
|           "description": "Slurm Job name" | ||||
|         }, | ||||
|         "resources": { | ||||
|             "description": "Resources used by job", | ||||
|             "type": "array", | ||||
|             "items": { | ||||
|                 "type": "object", | ||||
|                 "properties": { | ||||
|                     "hostname": { | ||||
|                         "type": "string" | ||||
|                     }, | ||||
|                     "hwthreads": { | ||||
|                         "type": "array", | ||||
|                         "description": "List of OS processor ids", | ||||
|                         "items": { | ||||
|                             "type": "integer" | ||||
|                         } | ||||
|                     }, | ||||
|                     "accelerators": { | ||||
|                         "type": "array", | ||||
|                         "description": "List of of accelerator device ids", | ||||
|                         "items": { | ||||
|                             "type": "string" | ||||
|                         } | ||||
|                     }, | ||||
|                     "configuration": { | ||||
|                         "type": "string", | ||||
|                         "description": "The configuration options of the node" | ||||
|                     } | ||||
|                 }, | ||||
|                 "required": [ | ||||
|                     "hostname" | ||||
|                 ], | ||||
|                 "minItems": 1 | ||||
|             } | ||||
|         "slurmInfo": { | ||||
|           "type": "string", | ||||
|           "description": "Additional slurm infos as show by scontrol show job" | ||||
|         } | ||||
|       } | ||||
|     }, | ||||
|     "tags": { | ||||
|       "description": "List of tags", | ||||
|       "type": "array", | ||||
|       "items": { | ||||
|         "type": "object", | ||||
|         "properties": { | ||||
|           "name": { | ||||
|             "type": "string" | ||||
|           }, | ||||
|           "type": { | ||||
|             "type": "string" | ||||
|           } | ||||
|         }, | ||||
|         "metaData": { | ||||
|             "description": "Additional information about the job", | ||||
|         "required": [ | ||||
|           "name", | ||||
|           "type" | ||||
|         ] | ||||
|       }, | ||||
|       "uniqueItems": true | ||||
|     }, | ||||
|     "statistics": { | ||||
|       "description": "Job statistic data", | ||||
|       "type": "object", | ||||
|       "properties": { | ||||
|         "mem_used": { | ||||
|           "description": "Memory capacity used (required)", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "cpu_load": { | ||||
|           "description": "CPU requested core utilization (load 1m) (required)", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "flops_any": { | ||||
|           "description": "Total flop rate with DP flops scaled up (required)", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "mem_bw": { | ||||
|           "description": "Main memory bandwidth (required)", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "net_bw": { | ||||
|           "description": "Total fast interconnect network bandwidth (required)", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "file_bw": { | ||||
|           "description": "Total file IO bandwidth (required)", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "ipc": { | ||||
|           "description": "Instructions executed per cycle", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "cpu_user": { | ||||
|           "description": "CPU user active core utilization", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "flops_dp": { | ||||
|           "description": "Double precision flop rate", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "flops_sp": { | ||||
|           "description": "Single precision flops rate", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "rapl_power": { | ||||
|           "description": "CPU power consumption", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "acc_used": { | ||||
|           "description": "GPU utilization", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "acc_mem_used": { | ||||
|           "description": "GPU memory capacity used", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "acc_power": { | ||||
|           "description": "GPU power consumption", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "clock": { | ||||
|           "description": "Average core frequency", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "eth_read_bw": { | ||||
|           "description": "Ethernet read bandwidth", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "eth_write_bw": { | ||||
|           "description": "Ethernet write bandwidth", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "ic_rcv_packets": { | ||||
|           "description": "Network interconnect read packets", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "ic_send_packets": { | ||||
|           "description": "Network interconnect send packet", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "ic_read_bw": { | ||||
|           "description": "Network interconnect read bandwidth", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "ic_write_bw": { | ||||
|           "description": "Network interconnect write bandwidth", | ||||
|           "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|         }, | ||||
|         "filesystems": { | ||||
|           "description": "Array of filesystems", | ||||
|           "type": "array", | ||||
|           "items": { | ||||
|             "type": "object", | ||||
|             "properties": { | ||||
|                 "jobScript": { | ||||
|                     "type": "string", | ||||
|                     "description": "The batch script of the job" | ||||
|                 }, | ||||
|                 "jobName": { | ||||
|                     "type": "string", | ||||
|                     "description": "Slurm Job name" | ||||
|                 }, | ||||
|                 "slurmInfo": { | ||||
|                     "type": "string", | ||||
|                     "description": "Additional slurm infos as show by scontrol show job" | ||||
|                 } | ||||
|             } | ||||
|         }, | ||||
|         "tags": { | ||||
|             "description": "List of tags", | ||||
|             "type": "array", | ||||
|             "items": { | ||||
|                 "type": "object", | ||||
|                 "properties": { | ||||
|                     "name": { | ||||
|                         "type": "string" | ||||
|                     }, | ||||
|                     "type": { | ||||
|                         "type": "string" | ||||
|                     } | ||||
|                 }, | ||||
|                 "required": [ | ||||
|                     "name", | ||||
|                     "type" | ||||
|               "name": { | ||||
|                 "type": "string" | ||||
|               }, | ||||
|               "type": { | ||||
|                 "type": "string", | ||||
|                 "enum": [ | ||||
|                   "nfs", | ||||
|                   "lustre", | ||||
|                   "gpfs", | ||||
|                   "nvme", | ||||
|                   "ssd", | ||||
|                   "hdd", | ||||
|                   "beegfs" | ||||
|                 ] | ||||
|             }, | ||||
|             "uniqueItems": true | ||||
|         }, | ||||
|         "statistics": { | ||||
|             "description": "Job statistic data", | ||||
|             "type": "object", | ||||
|             "properties": { | ||||
|                 "mem_used": { | ||||
|                     "description": "Memory capacity used (required)", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "cpu_load": { | ||||
|                     "description": "CPU requested core utilization (load 1m) (required)", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "flops_any": { | ||||
|                     "description": "Total flop rate with DP flops scaled up (required)", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "mem_bw": { | ||||
|                     "description": "Main memory bandwidth (required)", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "net_bw": { | ||||
|                     "description": "Total fast interconnect network bandwidth (required)", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "file_bw": { | ||||
|                     "description": "Total file IO bandwidth (required)", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "ipc": { | ||||
|                     "description": "Instructions executed per cycle", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "cpu_user": { | ||||
|                     "description": "CPU user active core utilization", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "flops_dp": { | ||||
|                     "description": "Double precision flop rate", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "flops_sp": { | ||||
|                     "description": "Single precision flops rate", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "rapl_power": { | ||||
|                     "description": "CPU power consumption", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "acc_used": { | ||||
|                     "description": "GPU utilization", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "acc_mem_used": { | ||||
|                     "description": "GPU memory capacity used", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "acc_power": { | ||||
|                     "description": "GPU power consumption", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "clock": { | ||||
|                     "description": "Average core frequency", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "eth_read_bw": { | ||||
|                     "description": "Ethernet read bandwidth", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "eth_write_bw": { | ||||
|                     "description": "Ethernet write bandwidth", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "ic_rcv_packets": { | ||||
|                     "description": "Network interconnect read packets", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "ic_send_packets": { | ||||
|                     "description": "Network interconnect send packet", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "ic_read_bw": { | ||||
|                     "description": "Network interconnect read bandwidth", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "ic_write_bw": { | ||||
|                     "description": "Network interconnect write bandwidth", | ||||
|                     "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                 }, | ||||
|                 "filesystems": { | ||||
|                     "description": "Array of filesystems", | ||||
|                     "type": "array", | ||||
|                     "items": { | ||||
|                         "type": "object", | ||||
|                         "properties": { | ||||
|                             "name": { | ||||
|                                 "type": "string" | ||||
|                             }, | ||||
|                             "type": { | ||||
|                                 "type": "string", | ||||
|                                 "enum": [ | ||||
|                                     "nfs", | ||||
|                                     "lustre", | ||||
|                                     "gpfs", | ||||
|                                     "nvme", | ||||
|                                     "ssd", | ||||
|                                     "hdd", | ||||
|                                     "beegfs" | ||||
|                                 ] | ||||
|                             }, | ||||
|                             "read_bw": { | ||||
|                                 "description": "File system read bandwidth", | ||||
|                                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                             }, | ||||
|                             "write_bw": { | ||||
|                                 "description": "File system write bandwidth", | ||||
|                                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                             }, | ||||
|                             "read_req": { | ||||
|                                 "description": "File system read requests", | ||||
|                                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                             }, | ||||
|                             "write_req": { | ||||
|                                 "description": "File system write requests", | ||||
|                                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                             }, | ||||
|                             "inodes": { | ||||
|                                 "description": "File system write requests", | ||||
|                                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                             }, | ||||
|                             "accesses": { | ||||
|                                 "description": "File system open and close", | ||||
|                                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                             }, | ||||
|                             "fsync": { | ||||
|                                 "description": "File system fsync", | ||||
|                                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                             }, | ||||
|                             "create": { | ||||
|                                 "description": "File system create", | ||||
|                                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                             }, | ||||
|                             "open": { | ||||
|                                 "description": "File system open", | ||||
|                                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                             }, | ||||
|                             "close": { | ||||
|                                 "description": "File system close", | ||||
|                                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                             }, | ||||
|                             "seek": { | ||||
|                                 "description": "File system seek", | ||||
|                                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|                             } | ||||
|                         }, | ||||
|                         "required": [ | ||||
|                             "name", | ||||
|                             "type", | ||||
|                             "read_bw", | ||||
|                             "write_bw" | ||||
|                         ] | ||||
|                     }, | ||||
|                     "minItems": 1 | ||||
|                 } | ||||
|               }, | ||||
|               "read_bw": { | ||||
|                 "description": "File system read bandwidth", | ||||
|                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|               }, | ||||
|               "write_bw": { | ||||
|                 "description": "File system write bandwidth", | ||||
|                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|               }, | ||||
|               "read_req": { | ||||
|                 "description": "File system read requests", | ||||
|                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|               }, | ||||
|               "write_req": { | ||||
|                 "description": "File system write requests", | ||||
|                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|               }, | ||||
|               "inodes": { | ||||
|                 "description": "File system write requests", | ||||
|                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|               }, | ||||
|               "accesses": { | ||||
|                 "description": "File system open and close", | ||||
|                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|               }, | ||||
|               "fsync": { | ||||
|                 "description": "File system fsync", | ||||
|                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|               }, | ||||
|               "create": { | ||||
|                 "description": "File system create", | ||||
|                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|               }, | ||||
|               "open": { | ||||
|                 "description": "File system open", | ||||
|                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|               }, | ||||
|               "close": { | ||||
|                 "description": "File system close", | ||||
|                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|               }, | ||||
|               "seek": { | ||||
|                 "description": "File system seek", | ||||
|                 "$ref": "embedfs://job-metric-statistics.schema.json" | ||||
|               } | ||||
|             }, | ||||
|             "required": [ | ||||
|                 "cpu_user", | ||||
|                 "cpu_load", | ||||
|                 "mem_used", | ||||
|                 "flops_any", | ||||
|                 "mem_bw" | ||||
|               "name", | ||||
|               "type", | ||||
|               "read_bw", | ||||
|               "write_bw" | ||||
|             ] | ||||
|           }, | ||||
|           "minItems": 1 | ||||
|         } | ||||
|     }, | ||||
|     "required": [ | ||||
|         "jobId", | ||||
|         "user", | ||||
|         "project", | ||||
|         "cluster", | ||||
|         "subCluster", | ||||
|         "numNodes", | ||||
|         "exclusive", | ||||
|         "startTime", | ||||
|         "jobState", | ||||
|         "duration", | ||||
|         "resources", | ||||
|         "statistics" | ||||
|     ] | ||||
|       }, | ||||
|       "required": [ | ||||
|         "cpu_user", | ||||
|         "cpu_load", | ||||
|         "mem_used", | ||||
|         "flops_any", | ||||
|         "mem_bw" | ||||
|       ] | ||||
|     } | ||||
|   }, | ||||
|   "required": [ | ||||
|     "jobId", | ||||
|     "user", | ||||
|     "project", | ||||
|     "cluster", | ||||
|     "subCluster", | ||||
|     "numNodes", | ||||
|     "exclusive", | ||||
|     "startTime", | ||||
|     "jobState", | ||||
|     "duration", | ||||
|     "resources", | ||||
|     "statistics" | ||||
|   ] | ||||
| } | ||||
|   | ||||
| @@ -1,216 +1,216 @@ | ||||
| { | ||||
|     "$schema": "http://json-schema.org/draft/2020-12/schema", | ||||
|     "$id": "embedfs://job-metric-data.schema.json", | ||||
|     "title": "Job metric data", | ||||
|     "description": "Metric data of a HPC job", | ||||
|     "type": "object", | ||||
|     "properties": { | ||||
|         "unit": { | ||||
|             "description": "Metric unit", | ||||
|             "$ref": "embedfs://unit.schema.json" | ||||
|         }, | ||||
|         "timestep": { | ||||
|             "description": "Measurement interval in seconds", | ||||
|             "type": "integer" | ||||
|         }, | ||||
|         "thresholds": { | ||||
|             "description": "Metric thresholds for specific system", | ||||
|             "type": "object", | ||||
|             "properties": { | ||||
|                 "peak": { | ||||
|                     "type": "number" | ||||
|                 }, | ||||
|                 "normal": { | ||||
|                     "type": "number" | ||||
|                 }, | ||||
|                 "caution": { | ||||
|                     "type": "number" | ||||
|                 }, | ||||
|                 "alert": { | ||||
|                     "type": "number" | ||||
|                 } | ||||
|             } | ||||
|         }, | ||||
|         "statisticsSeries": { | ||||
|             "type": "object", | ||||
|             "description": "Statistics series across topology", | ||||
|             "properties": { | ||||
|                 "min": { | ||||
|                     "type": "array", | ||||
|                     "items": { | ||||
|                         "type": "number", | ||||
|                         "minimum": 0 | ||||
|                     }, | ||||
|                     "minItems": 3 | ||||
|                 }, | ||||
|                 "max": { | ||||
|                     "type": "array", | ||||
|                     "items": { | ||||
|                         "type": "number", | ||||
|                         "minimum": 0 | ||||
|                     }, | ||||
|                     "minItems": 3 | ||||
|                 }, | ||||
|                 "mean": { | ||||
|                     "type": "array", | ||||
|                     "items": { | ||||
|                         "type": "number", | ||||
|                         "minimum": 0 | ||||
|                     }, | ||||
|                     "minItems": 3 | ||||
|                 }, | ||||
|                 "percentiles": { | ||||
|                     "type": "object", | ||||
|                     "properties": { | ||||
|                         "10": { | ||||
|                             "type": "array", | ||||
|                             "items": { | ||||
|                                 "type": "number", | ||||
|                                 "minimum": 0 | ||||
|                             }, | ||||
|                             "minItems": 3 | ||||
|                         }, | ||||
|                         "20": { | ||||
|                             "type": "array", | ||||
|                             "items": { | ||||
|                                 "type": "number", | ||||
|                                 "minimum": 0 | ||||
|                             }, | ||||
|                             "minItems": 3 | ||||
|                         }, | ||||
|                         "30": { | ||||
|                             "type": "array", | ||||
|                             "items": { | ||||
|                                 "type": "number", | ||||
|                                 "minimum": 0 | ||||
|                             }, | ||||
|                             "minItems": 3 | ||||
|                         }, | ||||
|                         "40": { | ||||
|                             "type": "array", | ||||
|                             "items": { | ||||
|                                 "type": "number", | ||||
|                                 "minimum": 0 | ||||
|                             }, | ||||
|                             "minItems": 3 | ||||
|                         }, | ||||
|                         "50": { | ||||
|                             "type": "array", | ||||
|                             "items": { | ||||
|                                 "type": "number", | ||||
|                                 "minimum": 0 | ||||
|                             }, | ||||
|                             "minItems": 3 | ||||
|                         }, | ||||
|                         "60": { | ||||
|                             "type": "array", | ||||
|                             "items": { | ||||
|                                 "type": "number", | ||||
|                                 "minimum": 0 | ||||
|                             }, | ||||
|                             "minItems": 3 | ||||
|                         }, | ||||
|                         "70": { | ||||
|                             "type": "array", | ||||
|                             "items": { | ||||
|                                 "type": "number", | ||||
|                                 "minimum": 0 | ||||
|                             }, | ||||
|                             "minItems": 3 | ||||
|                         }, | ||||
|                         "80": { | ||||
|                             "type": "array", | ||||
|                             "items": { | ||||
|                                 "type": "number", | ||||
|                                 "minimum": 0 | ||||
|                             }, | ||||
|                             "minItems": 3 | ||||
|                         }, | ||||
|                         "90": { | ||||
|                             "type": "array", | ||||
|                             "items": { | ||||
|                                 "type": "number", | ||||
|                                 "minimum": 0 | ||||
|                             }, | ||||
|                             "minItems": 3 | ||||
|                         }, | ||||
|                         "25": { | ||||
|                             "type": "array", | ||||
|                             "items": { | ||||
|                                 "type": "number", | ||||
|                                 "minimum": 0 | ||||
|                             }, | ||||
|                             "minItems": 3 | ||||
|                         }, | ||||
|                         "75": { | ||||
|                             "type": "array", | ||||
|                             "items": { | ||||
|                                 "type": "number", | ||||
|                                 "minimum": 0 | ||||
|                             }, | ||||
|                             "minItems": 3 | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         }, | ||||
|         "series": { | ||||
|             "type": "array", | ||||
|             "items": { | ||||
|                 "type": "object", | ||||
|                 "properties": { | ||||
|                     "hostname": { | ||||
|                         "type": "string" | ||||
|                     }, | ||||
|                     "id": { | ||||
|                         "type": "string" | ||||
|                     }, | ||||
|                     "statistics": { | ||||
|                         "type": "object", | ||||
|                         "description": "Statistics across time dimension", | ||||
|                         "properties": { | ||||
|                             "avg": { | ||||
|                                 "description": "Series average", | ||||
|                                 "type": "number", | ||||
|                                 "minimum": 0 | ||||
|                             }, | ||||
|                             "min": { | ||||
|                                 "description": "Series minimum", | ||||
|                                 "type": "number", | ||||
|                                 "minimum": 0 | ||||
|                             }, | ||||
|                             "max": { | ||||
|                                 "description": "Series maximum", | ||||
|                                 "type": "number", | ||||
|                                 "minimum": 0 | ||||
|                             } | ||||
|                         }, | ||||
|                         "required": [ | ||||
|                             "avg", | ||||
|                             "min", | ||||
|                             "max" | ||||
|                         ] | ||||
|                     }, | ||||
|                     "data": { | ||||
|                         "type": "array", | ||||
|                         "contains": { | ||||
|                             "type": "number", | ||||
|                             "minimum": 0 | ||||
|                         }, | ||||
|                         "minItems": 1 | ||||
|                     } | ||||
|                 }, | ||||
|                 "required": [ | ||||
|                     "hostname", | ||||
|                     "statistics", | ||||
|                     "data" | ||||
|                 ] | ||||
|             } | ||||
|         } | ||||
|   "$schema": "http://json-schema.org/draft/2020-12/schema", | ||||
|   "$id": "embedfs://job-metric-data.schema.json", | ||||
|   "title": "Job metric data", | ||||
|   "description": "Metric data of a HPC job", | ||||
|   "type": "object", | ||||
|   "properties": { | ||||
|     "unit": { | ||||
|       "description": "Metric unit", | ||||
|       "$ref": "embedfs://unit.schema.json" | ||||
|     }, | ||||
|     "required": [ | ||||
|         "unit", | ||||
|         "timestep", | ||||
|         "series" | ||||
|     ] | ||||
|     "timestep": { | ||||
|       "description": "Measurement interval in seconds", | ||||
|       "type": "integer" | ||||
|     }, | ||||
|     "thresholds": { | ||||
|       "description": "Metric thresholds for specific system", | ||||
|       "type": "object", | ||||
|       "properties": { | ||||
|         "peak": { | ||||
|           "type": "number" | ||||
|         }, | ||||
|         "normal": { | ||||
|           "type": "number" | ||||
|         }, | ||||
|         "caution": { | ||||
|           "type": "number" | ||||
|         }, | ||||
|         "alert": { | ||||
|           "type": "number" | ||||
|         } | ||||
|       } | ||||
|     }, | ||||
|     "statisticsSeries": { | ||||
|       "type": "object", | ||||
|       "description": "Statistics series across topology", | ||||
|       "properties": { | ||||
|         "min": { | ||||
|           "type": "array", | ||||
|           "items": { | ||||
|             "type": "number", | ||||
|             "minimum": 0 | ||||
|           }, | ||||
|           "minItems": 3 | ||||
|         }, | ||||
|         "max": { | ||||
|           "type": "array", | ||||
|           "items": { | ||||
|             "type": "number", | ||||
|             "minimum": 0 | ||||
|           }, | ||||
|           "minItems": 3 | ||||
|         }, | ||||
|         "mean": { | ||||
|           "type": "array", | ||||
|           "items": { | ||||
|             "type": "number", | ||||
|             "minimum": 0 | ||||
|           }, | ||||
|           "minItems": 3 | ||||
|         }, | ||||
|         "percentiles": { | ||||
|           "type": "object", | ||||
|           "properties": { | ||||
|             "10": { | ||||
|               "type": "array", | ||||
|               "items": { | ||||
|                 "type": "number", | ||||
|                 "minimum": 0 | ||||
|               }, | ||||
|               "minItems": 3 | ||||
|             }, | ||||
|             "20": { | ||||
|               "type": "array", | ||||
|               "items": { | ||||
|                 "type": "number", | ||||
|                 "minimum": 0 | ||||
|               }, | ||||
|               "minItems": 3 | ||||
|             }, | ||||
|             "30": { | ||||
|               "type": "array", | ||||
|               "items": { | ||||
|                 "type": "number", | ||||
|                 "minimum": 0 | ||||
|               }, | ||||
|               "minItems": 3 | ||||
|             }, | ||||
|             "40": { | ||||
|               "type": "array", | ||||
|               "items": { | ||||
|                 "type": "number", | ||||
|                 "minimum": 0 | ||||
|               }, | ||||
|               "minItems": 3 | ||||
|             }, | ||||
|             "50": { | ||||
|               "type": "array", | ||||
|               "items": { | ||||
|                 "type": "number", | ||||
|                 "minimum": 0 | ||||
|               }, | ||||
|               "minItems": 3 | ||||
|             }, | ||||
|             "60": { | ||||
|               "type": "array", | ||||
|               "items": { | ||||
|                 "type": "number", | ||||
|                 "minimum": 0 | ||||
|               }, | ||||
|               "minItems": 3 | ||||
|             }, | ||||
|             "70": { | ||||
|               "type": "array", | ||||
|               "items": { | ||||
|                 "type": "number", | ||||
|                 "minimum": 0 | ||||
|               }, | ||||
|               "minItems": 3 | ||||
|             }, | ||||
|             "80": { | ||||
|               "type": "array", | ||||
|               "items": { | ||||
|                 "type": "number", | ||||
|                 "minimum": 0 | ||||
|               }, | ||||
|               "minItems": 3 | ||||
|             }, | ||||
|             "90": { | ||||
|               "type": "array", | ||||
|               "items": { | ||||
|                 "type": "number", | ||||
|                 "minimum": 0 | ||||
|               }, | ||||
|               "minItems": 3 | ||||
|             }, | ||||
|             "25": { | ||||
|               "type": "array", | ||||
|               "items": { | ||||
|                 "type": "number", | ||||
|                 "minimum": 0 | ||||
|               }, | ||||
|               "minItems": 3 | ||||
|             }, | ||||
|             "75": { | ||||
|               "type": "array", | ||||
|               "items": { | ||||
|                 "type": "number", | ||||
|                 "minimum": 0 | ||||
|               }, | ||||
|               "minItems": 3 | ||||
|             } | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|     }, | ||||
|     "series": { | ||||
|       "type": "array", | ||||
|       "items": { | ||||
|         "type": "object", | ||||
|         "properties": { | ||||
|           "hostname": { | ||||
|             "type": "string" | ||||
|           }, | ||||
|           "id": { | ||||
|             "type": "string" | ||||
|           }, | ||||
|           "statistics": { | ||||
|             "type": "object", | ||||
|             "description": "Statistics across time dimension", | ||||
|             "properties": { | ||||
|               "avg": { | ||||
|                 "description": "Series average", | ||||
|                 "type": "number", | ||||
|                 "minimum": 0 | ||||
|               }, | ||||
|               "min": { | ||||
|                 "description": "Series minimum", | ||||
|                 "type": "number", | ||||
|                 "minimum": 0 | ||||
|               }, | ||||
|               "max": { | ||||
|                 "description": "Series maximum", | ||||
|                 "type": "number", | ||||
|                 "minimum": 0 | ||||
|               } | ||||
|             }, | ||||
|             "required": [ | ||||
|               "avg", | ||||
|               "min", | ||||
|               "max" | ||||
|             ] | ||||
|           }, | ||||
|           "data": { | ||||
|             "type": "array", | ||||
|             "contains": { | ||||
|               "type": "number", | ||||
|               "minimum": 0 | ||||
|             }, | ||||
|             "minItems": 1 | ||||
|           } | ||||
|         }, | ||||
|         "required": [ | ||||
|           "hostname", | ||||
|           "statistics", | ||||
|           "data" | ||||
|         ] | ||||
|       } | ||||
|     } | ||||
|   }, | ||||
|   "required": [ | ||||
|     "unit", | ||||
|     "timestep", | ||||
|     "series" | ||||
|   ] | ||||
| } | ||||
|   | ||||
| @@ -1,34 +1,34 @@ | ||||
| { | ||||
|     "$schema": "http://json-schema.org/draft/2020-12/schema", | ||||
|     "$id": "embedfs://job-metric-statistics.schema.json", | ||||
|     "title": "Job statistics", | ||||
|     "description": "Format specification for job metric statistics", | ||||
|     "type": "object", | ||||
|     "properties": { | ||||
|         "unit": { | ||||
|             "description": "Metric unit", | ||||
|             "$ref": "embedfs://unit.schema.json" | ||||
|         }, | ||||
|         "avg": { | ||||
|             "description": "Job metric average", | ||||
|             "type": "number", | ||||
|             "minimum": 0 | ||||
|         }, | ||||
|         "min": { | ||||
|             "description": "Job metric minimum", | ||||
|             "type": "number", | ||||
|             "minimum": 0 | ||||
|         }, | ||||
|         "max": { | ||||
|             "description": "Job metric maximum", | ||||
|             "type": "number", | ||||
|             "minimum": 0 | ||||
|         } | ||||
|   "$schema": "http://json-schema.org/draft/2020-12/schema", | ||||
|   "$id": "embedfs://job-metric-statistics.schema.json", | ||||
|   "title": "Job statistics", | ||||
|   "description": "Format specification for job metric statistics", | ||||
|   "type": "object", | ||||
|   "properties": { | ||||
|     "unit": { | ||||
|       "description": "Metric unit", | ||||
|       "$ref": "embedfs://unit.schema.json" | ||||
|     }, | ||||
|     "required": [ | ||||
|         "unit", | ||||
|         "avg", | ||||
|         "min", | ||||
|         "max" | ||||
|     ] | ||||
|     "avg": { | ||||
|       "description": "Job metric average", | ||||
|       "type": "number", | ||||
|       "minimum": 0 | ||||
|     }, | ||||
|     "min": { | ||||
|       "description": "Job metric minimum", | ||||
|       "type": "number", | ||||
|       "minimum": 0 | ||||
|     }, | ||||
|     "max": { | ||||
|       "description": "Job metric maximum", | ||||
|       "type": "number", | ||||
|       "minimum": 0 | ||||
|     } | ||||
|   }, | ||||
|   "required": [ | ||||
|     "unit", | ||||
|     "avg", | ||||
|     "min", | ||||
|     "max" | ||||
|   ] | ||||
| } | ||||
|   | ||||
| @@ -1,40 +1,41 @@ | ||||
| { | ||||
|     "$schema": "http://json-schema.org/draft/2020-12/schema", | ||||
|     "$id": "embedfs://unit.schema.json", | ||||
|     "title": "Metric unit", | ||||
|     "description": "Format specification for job metric units", | ||||
|     "type": "object", | ||||
|     "properties": { | ||||
|         "base": { | ||||
|             "description": "Metric base unit", | ||||
|             "type": "string", | ||||
|             "enum": [ | ||||
|                 "B", | ||||
|                 "F", | ||||
|                 "B/s", | ||||
|                 "F/s", | ||||
|                 "CPI", | ||||
|                 "IPC", | ||||
|                 "Hz", | ||||
|                 "W", | ||||
|                 "°C", | ||||
|                 "" | ||||
|             ] | ||||
|         }, | ||||
|         "prefix": { | ||||
|             "description": "Unit prefix", | ||||
|             "type": "string", | ||||
|             "enum": [ | ||||
|                 "K", | ||||
|                 "M", | ||||
|                 "G", | ||||
|                 "T", | ||||
|                 "P", | ||||
|                 "E" | ||||
|             ] | ||||
|         } | ||||
|   "$schema": "http://json-schema.org/draft/2020-12/schema", | ||||
|   "$id": "embedfs://unit.schema.json", | ||||
|   "title": "Metric unit", | ||||
|   "description": "Format specification for job metric units", | ||||
|   "type": "object", | ||||
|   "properties": { | ||||
|     "base": { | ||||
|       "description": "Metric base unit", | ||||
|       "type": "string", | ||||
|       "enum": [ | ||||
|         "B", | ||||
|         "F", | ||||
|         "B/s", | ||||
|         "F/s", | ||||
|         "CPI", | ||||
|         "IPC", | ||||
|         "Hz", | ||||
|         "W", | ||||
|         "J", | ||||
|         "°C", | ||||
|         "" | ||||
|       ] | ||||
|     }, | ||||
|     "required": [ | ||||
|         "base" | ||||
|     ] | ||||
|     "prefix": { | ||||
|       "description": "Unit prefix", | ||||
|       "type": "string", | ||||
|       "enum": [ | ||||
|         "K", | ||||
|         "M", | ||||
|         "G", | ||||
|         "T", | ||||
|         "P", | ||||
|         "E" | ||||
|       ] | ||||
|     } | ||||
|   }, | ||||
|   "required": [ | ||||
|     "base" | ||||
|   ] | ||||
| } | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
| @@ -27,6 +27,7 @@ const ( | ||||
| 	AuthViaLocalPassword AuthSource = iota | ||||
| 	AuthViaLDAP | ||||
| 	AuthViaToken | ||||
| 	AuthViaOIDC | ||||
| 	AuthViaAll | ||||
| ) | ||||
|  | ||||
| @@ -41,11 +42,11 @@ type User struct { | ||||
| 	Username   string     `json:"username"` | ||||
| 	Password   string     `json:"-"` | ||||
| 	Name       string     `json:"name"` | ||||
| 	Email      string     `json:"email"` | ||||
| 	Roles      []string   `json:"roles"` | ||||
| 	Projects   []string   `json:"projects"` | ||||
| 	AuthType   AuthType   `json:"authType"` | ||||
| 	AuthSource AuthSource `json:"authSource"` | ||||
| 	Email      string     `json:"email"` | ||||
| 	Projects   []string   `json:"projects"` | ||||
| } | ||||
|  | ||||
| func (u *User) HasProject(project string) bool { | ||||
| @@ -84,6 +85,7 @@ func IsValidRole(role string) bool { | ||||
| 	return getRoleEnum(role) != RoleError | ||||
| } | ||||
|  | ||||
| // Check if User has SPECIFIED role AND role is VALID | ||||
| func (u *User) HasValidRole(role string) (hasRole bool, isValid bool) { | ||||
| 	if IsValidRole(role) { | ||||
| 		for _, r := range u.Roles { | ||||
| @@ -96,6 +98,7 @@ func (u *User) HasValidRole(role string) (hasRole bool, isValid bool) { | ||||
| 	return false, false | ||||
| } | ||||
|  | ||||
| // Check if User has SPECIFIED role | ||||
| func (u *User) HasRole(role Role) bool { | ||||
| 	for _, r := range u.Roles { | ||||
| 		if r == GetRoleString(role) { | ||||
| @@ -105,7 +108,7 @@ func (u *User) HasRole(role Role) bool { | ||||
| 	return false | ||||
| } | ||||
|  | ||||
| // Role-Arrays are short: performance not impacted by nested loop | ||||
| // Check if User has ANY of the listed roles | ||||
| func (u *User) HasAnyRole(queryroles []Role) bool { | ||||
| 	for _, ur := range u.Roles { | ||||
| 		for _, qr := range queryroles { | ||||
| @@ -117,7 +120,7 @@ func (u *User) HasAnyRole(queryroles []Role) bool { | ||||
| 	return false | ||||
| } | ||||
|  | ||||
| // Role-Arrays are short: performance not impacted by nested loop | ||||
| // Check if User has ALL of the listed roles | ||||
| func (u *User) HasAllRoles(queryroles []Role) bool { | ||||
| 	target := len(queryroles) | ||||
| 	matches := 0 | ||||
| @@ -137,7 +140,7 @@ func (u *User) HasAllRoles(queryroles []Role) bool { | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Role-Arrays are short: performance not impacted by nested loop | ||||
| // Check if User has NONE of the listed roles | ||||
| func (u *User) HasNotRoles(queryroles []Role) bool { | ||||
| 	matches := 0 | ||||
| 	for _, ur := range u.Roles { | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| // Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. | ||||
| // Copyright (C) NHR@FAU, University Erlangen-Nuremberg. | ||||
| // All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
| @@ -14,17 +14,20 @@ func TestValidateConfig(t *testing.T) { | ||||
|     "jwts": { | ||||
|         "max-age": "2m" | ||||
|     }, | ||||
| 	"clusters": [ | ||||
| 	{ | ||||
| 	   "name": "testcluster", | ||||
| 	   "metricDataRepository": { | ||||
| 		"kind": "cc-metric-store", | ||||
| 		 "url": "localhost:8082"}, | ||||
| 	   "filterRanges": { | ||||
| 		"numNodes": { "from": 1, "to": 64 }, | ||||
| 		"duration": { "from": 0, "to": 86400 }, | ||||
| 		"startTime": { "from": "2022-01-01T00:00:00Z", "to": null } | ||||
| 	}}] | ||||
|     "apiAllowedIPs": [ | ||||
|       "*" | ||||
|     ], | ||||
|     "clusters": [ | ||||
|     { | ||||
|        "name": "testcluster", | ||||
|        "metricDataRepository": { | ||||
|     	"kind": "cc-metric-store", | ||||
|     	 "url": "localhost:8082"}, | ||||
|        "filterRanges": { | ||||
|     	"numNodes": { "from": 1, "to": 64 }, | ||||
|     	"duration": { "from": 0, "to": 86400 }, | ||||
|     	"startTime": { "from": "2022-01-01T00:00:00Z", "to": null } | ||||
|     }}] | ||||
| }`) | ||||
|  | ||||
| 	if err := Validate(Config, bytes.NewReader(json)); err != nil { | ||||
| @@ -33,7 +36,6 @@ func TestValidateConfig(t *testing.T) { | ||||
| } | ||||
|  | ||||
| func TestValidateJobMeta(t *testing.T) { | ||||
|  | ||||
| } | ||||
|  | ||||
| func TestValidateCluster(t *testing.T) { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user