mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2024-12-25 04:49:05 +01:00
Merge pull request #104 from ClusterCockpit/import-data-sanitation
Import data sanitation Fixes among other things MetricConfig for GPU SubCluster #99 Mismatch of type of "id" in job-metric-data "series" object schema #101
This commit is contained in:
commit
7272db4fb0
4
.gitignore
vendored
4
.gitignore
vendored
@ -9,4 +9,6 @@
|
|||||||
|
|
||||||
/web/frontend/public/build
|
/web/frontend/public/build
|
||||||
/web/frontend/node_modules
|
/web/frontend/node_modules
|
||||||
.vscode/settings.json
|
/.vscode/*
|
||||||
|
/archive-migration
|
||||||
|
/archive-manager
|
||||||
|
@ -47,12 +47,17 @@ type SubCluster {
|
|||||||
socketsPerNode: Int!
|
socketsPerNode: Int!
|
||||||
coresPerSocket: Int!
|
coresPerSocket: Int!
|
||||||
threadsPerCore: Int!
|
threadsPerCore: Int!
|
||||||
flopRateScalar: Int!
|
flopRateScalar: MetricValue!
|
||||||
flopRateSimd: Int!
|
flopRateSimd: MetricValue!
|
||||||
memoryBandwidth: Int!
|
memoryBandwidth: MetricValue!
|
||||||
topology: Topology!
|
topology: Topology!
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type MetricValue {
|
||||||
|
unit: Unit!
|
||||||
|
value: Float!
|
||||||
|
}
|
||||||
|
|
||||||
type Topology {
|
type Topology {
|
||||||
node: [Int!]
|
node: [Int!]
|
||||||
socket: [[Int!]!]
|
socket: [[Int!]!]
|
||||||
@ -70,23 +75,24 @@ type Accelerator {
|
|||||||
|
|
||||||
type SubClusterConfig {
|
type SubClusterConfig {
|
||||||
name: String!
|
name: String!
|
||||||
peak: Float!
|
|
||||||
normal: Float!
|
|
||||||
caution: Float!
|
|
||||||
alert: Float!
|
|
||||||
}
|
|
||||||
|
|
||||||
type MetricConfig {
|
|
||||||
name: String!
|
|
||||||
unit: String!
|
|
||||||
scope: MetricScope!
|
|
||||||
aggregation: String
|
|
||||||
timestep: Int!
|
|
||||||
peak: Float
|
peak: Float
|
||||||
normal: Float
|
normal: Float
|
||||||
caution: Float
|
caution: Float
|
||||||
alert: Float
|
alert: Float
|
||||||
subClusters: [SubClusterConfig]
|
remove: Boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
type MetricConfig {
|
||||||
|
name: String!
|
||||||
|
unit: Unit!
|
||||||
|
scope: MetricScope!
|
||||||
|
aggregation: String!
|
||||||
|
timestep: Int!
|
||||||
|
peak: Float!
|
||||||
|
normal: Float
|
||||||
|
caution: Float!
|
||||||
|
alert: Float!
|
||||||
|
subClusters: [SubClusterConfig!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Tag {
|
type Tag {
|
||||||
@ -104,12 +110,12 @@ type Resource {
|
|||||||
|
|
||||||
type JobMetricWithName {
|
type JobMetricWithName {
|
||||||
name: String!
|
name: String!
|
||||||
|
scope: MetricScope!
|
||||||
metric: JobMetric!
|
metric: JobMetric!
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobMetric {
|
type JobMetric {
|
||||||
unit: String!
|
unit: Unit
|
||||||
scope: MetricScope!
|
|
||||||
timestep: Int!
|
timestep: Int!
|
||||||
series: [Series!]
|
series: [Series!]
|
||||||
statisticsSeries: StatsSeries
|
statisticsSeries: StatsSeries
|
||||||
@ -117,11 +123,16 @@ type JobMetric {
|
|||||||
|
|
||||||
type Series {
|
type Series {
|
||||||
hostname: String!
|
hostname: String!
|
||||||
id: Int
|
id: String
|
||||||
statistics: MetricStatistics
|
statistics: MetricStatistics
|
||||||
data: [NullableFloat!]!
|
data: [NullableFloat!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Unit {
|
||||||
|
base: String!
|
||||||
|
prefix: String
|
||||||
|
}
|
||||||
|
|
||||||
type MetricStatistics {
|
type MetricStatistics {
|
||||||
avg: Float!
|
avg: Float!
|
||||||
min: Float!
|
min: Float!
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
"kind": "file",
|
"kind": "file",
|
||||||
"path": "./var/job-archive"
|
"path": "./var/job-archive"
|
||||||
},
|
},
|
||||||
|
"validate": true,
|
||||||
"clusters": [
|
"clusters": [
|
||||||
{
|
{
|
||||||
"name": "test",
|
"name": "test",
|
||||||
@ -24,9 +25,18 @@
|
|||||||
"token": "eyJhbGciOiJF-E-pQBQ"
|
"token": "eyJhbGciOiJF-E-pQBQ"
|
||||||
},
|
},
|
||||||
"filterRanges": {
|
"filterRanges": {
|
||||||
"numNodes": { "from": 1, "to": 64 },
|
"numNodes": {
|
||||||
"duration": { "from": 0, "to": 86400 },
|
"from": 1,
|
||||||
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
|
"to": 64
|
||||||
|
},
|
||||||
|
"duration": {
|
||||||
|
"from": 0,
|
||||||
|
"to": 86400
|
||||||
|
},
|
||||||
|
"startTime": {
|
||||||
|
"from": "2022-01-01T00:00:00Z",
|
||||||
|
"to": null
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
#!/usr/bin/env perl
|
#!/usr/bin/env perl
|
||||||
|
|
||||||
use strict;
|
use strict;
|
||||||
use warnings;
|
use warnings;
|
||||||
use utf8;
|
use utf8;
|
||||||
|
|
||||||
my %INFO;
|
my %INFO;
|
||||||
my %DOMAINS;
|
my %DOMAINS;
|
||||||
|
|
||||||
my $SMT;
|
my $SMT;
|
||||||
my $numMemoryDomains;
|
my $numMemoryDomains;
|
||||||
$DOMAINS{socket} = [];
|
$DOMAINS{socket} = [];
|
||||||
@ -198,8 +200,11 @@ END
|
|||||||
|
|
||||||
$INFO{gpus} .= join(",\n",@gpuStr);
|
$INFO{gpus} .= join(",\n",@gpuStr);
|
||||||
$INFO{gpus} .= "]\n";
|
$INFO{gpus} .= "]\n";
|
||||||
|
} else {
|
||||||
|
$INFO{gpus} = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
print <<"END";
|
print <<"END";
|
||||||
{
|
{
|
||||||
"name": "<FILL IN>",
|
"name": "<FILL IN>",
|
||||||
@ -219,10 +224,10 @@ print <<"END";
|
|||||||
"memoryDomain": [
|
"memoryDomain": [
|
||||||
$INFO{memoryDomains}
|
$INFO{memoryDomains}
|
||||||
],
|
],
|
||||||
$INFO{gpus}
|
|
||||||
"core": [
|
"core": [
|
||||||
$INFO{cores}
|
$INFO{cores}
|
||||||
]
|
]
|
||||||
|
$INFO{gpus}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
END
|
END
|
||||||
|
37
docs/ConfigurationManagement.md
Normal file
37
docs/ConfigurationManagement.md
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
# Release versioning
|
||||||
|
|
||||||
|
Releases are numbered with a integer id starting with 1.
|
||||||
|
Every release embeds the following assets into the binary:
|
||||||
|
* Web-frontend including javascript files and all static assets
|
||||||
|
* Golang template files for server-side rendering
|
||||||
|
* JSON schema files for validation
|
||||||
|
|
||||||
|
Remaining external assets are:
|
||||||
|
* The SQL database used
|
||||||
|
* The job archive
|
||||||
|
|
||||||
|
Both external assets are also versioned using integer ids.
|
||||||
|
This means every release binary is tied to specific versions for the SQL
|
||||||
|
database and job archive.
|
||||||
|
A command line switch `--migrate-db` is provided to migrate the SQL database
|
||||||
|
from a previous to the most recent version.
|
||||||
|
We provide a separate tool `archive-migration` to migrate an existing job
|
||||||
|
archive from the previous to the most recent version.
|
||||||
|
|
||||||
|
# Versioning of APIs
|
||||||
|
cc-backend provides two API backends:
|
||||||
|
* A REST API for querying jobs
|
||||||
|
* A GraphQL API used for data exchange between web frontend and cc-backend
|
||||||
|
|
||||||
|
Both APIs will also be versioned. We still need to decide if we also support
|
||||||
|
older REST API version using versioning of the endpoint URLs.
|
||||||
|
|
||||||
|
# How to build a specific release
|
||||||
|
|
||||||
|
|
||||||
|
# How to migrate the SQL database
|
||||||
|
|
||||||
|
|
||||||
|
# How to migrate the job archive
|
||||||
|
|
||||||
|
|
@ -13,9 +13,18 @@
|
|||||||
"token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9.eyJ1c2VyIjoiYWRtaW4iLCJyb2xlcyI6WyJST0xFX0FETUlOIiwiUk9MRV9BTkFMWVNUIiwiUk9MRV9VU0VSIl19.d-3_3FZTsadPjDEdsWrrQ7nS0edMAR4zjl-eK7rJU3HziNBfI9PDHDIpJVHTNN5E5SlLGLFXctWyKAkwhXL-Dw"
|
"token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9.eyJ1c2VyIjoiYWRtaW4iLCJyb2xlcyI6WyJST0xFX0FETUlOIiwiUk9MRV9BTkFMWVNUIiwiUk9MRV9VU0VSIl19.d-3_3FZTsadPjDEdsWrrQ7nS0edMAR4zjl-eK7rJU3HziNBfI9PDHDIpJVHTNN5E5SlLGLFXctWyKAkwhXL-Dw"
|
||||||
},
|
},
|
||||||
"filterRanges": {
|
"filterRanges": {
|
||||||
"numNodes": { "from": 1, "to": 32 },
|
"numNodes": {
|
||||||
"duration": { "from": 0, "to": 172800 },
|
"from": 1,
|
||||||
"startTime": { "from": "2010-01-01T00:00:00Z", "to": null }
|
"to": 32
|
||||||
|
},
|
||||||
|
"duration": {
|
||||||
|
"from": 0,
|
||||||
|
"to": 172800
|
||||||
|
},
|
||||||
|
"startTime": {
|
||||||
|
"from": "2010-01-01T00:00:00Z",
|
||||||
|
"to": null
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -26,9 +35,18 @@
|
|||||||
"token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9.eyJ1c2VyIjoiYWRtaW4iLCJyb2xlcyI6WyJST0xFX0FETUlOIiwiUk9MRV9BTkFMWVNUIiwiUk9MRV9VU0VSIl19.d-3_3FZTsadPjDEdsWrrQ7nS0edMAR4zjl-eK7rJU3HziNBfI9PDHDIpJVHTNN5E5SlLGLFXctWyKAkwhXL-Dw"
|
"token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9.eyJ1c2VyIjoiYWRtaW4iLCJyb2xlcyI6WyJST0xFX0FETUlOIiwiUk9MRV9BTkFMWVNUIiwiUk9MRV9VU0VSIl19.d-3_3FZTsadPjDEdsWrrQ7nS0edMAR4zjl-eK7rJU3HziNBfI9PDHDIpJVHTNN5E5SlLGLFXctWyKAkwhXL-Dw"
|
||||||
},
|
},
|
||||||
"filterRanges": {
|
"filterRanges": {
|
||||||
"numNodes": { "from": 1, "to": 1 },
|
"numNodes": {
|
||||||
"duration": { "from": 0, "to": 172800 },
|
"from": 1,
|
||||||
"startTime": { "from": "2015-01-01T00:00:00Z", "to": null }
|
"to": 1
|
||||||
|
},
|
||||||
|
"duration": {
|
||||||
|
"from": 0,
|
||||||
|
"to": 172800
|
||||||
|
},
|
||||||
|
"startTime": {
|
||||||
|
"from": "2015-01-01T00:00:00Z",
|
||||||
|
"to": null
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -63,6 +63,7 @@ models:
|
|||||||
resolver: true
|
resolver: true
|
||||||
NullableFloat: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
|
NullableFloat: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
|
||||||
MetricScope: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" }
|
MetricScope: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" }
|
||||||
|
MetricValue: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricValue" }
|
||||||
JobStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" }
|
JobStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" }
|
||||||
Tag: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Tag" }
|
Tag: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Tag" }
|
||||||
Resource: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
|
Resource: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
|
||||||
@ -79,3 +80,4 @@ models:
|
|||||||
FilterRanges: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" }
|
FilterRanges: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" }
|
||||||
SubCluster: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" }
|
SubCluster: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" }
|
||||||
StatsSeries: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" }
|
StatsSeries: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" }
|
||||||
|
Unit: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Unit" }
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -60,6 +60,7 @@ type JobFilter struct {
|
|||||||
|
|
||||||
type JobMetricWithName struct {
|
type JobMetricWithName struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
|
Scope schema.MetricScope `json:"scope"`
|
||||||
Metric *schema.JobMetric `json:"metric"`
|
Metric *schema.JobMetric `json:"metric"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -194,12 +194,9 @@ func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []str
|
|||||||
res := []*model.JobMetricWithName{}
|
res := []*model.JobMetricWithName{}
|
||||||
for name, md := range data {
|
for name, md := range data {
|
||||||
for scope, metric := range md {
|
for scope, metric := range md {
|
||||||
if metric.Scope != schema.MetricScope(scope) {
|
|
||||||
log.Panic("metric.Scope != schema.MetricScope(scope) : Should not happen!")
|
|
||||||
}
|
|
||||||
|
|
||||||
res = append(res, &model.JobMetricWithName{
|
res = append(res, &model.JobMetricWithName{
|
||||||
Name: name,
|
Name: name,
|
||||||
|
Scope: scope,
|
||||||
Metric: metric,
|
Metric: metric,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -296,6 +293,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
|||||||
for _, scopedMetric := range scopedMetrics {
|
for _, scopedMetric := range scopedMetrics {
|
||||||
host.Metrics = append(host.Metrics, &model.JobMetricWithName{
|
host.Metrics = append(host.Metrics, &model.JobMetricWithName{
|
||||||
Name: metric,
|
Name: metric,
|
||||||
|
Scope: schema.MetricScopeNode,
|
||||||
Metric: scopedMetric,
|
Metric: scopedMetric,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -307,6 +305,15 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
|||||||
return nodeMetrics, nil
|
return nodeMetrics, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NumberOfNodes is the resolver for the numberOfNodes field.
|
||||||
|
func (r *subClusterResolver) NumberOfNodes(ctx context.Context, obj *schema.SubCluster) (int, error) {
|
||||||
|
nodeList, err := archive.ParseNodeList(obj.Nodes)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return nodeList.NodeCount(), nil
|
||||||
|
}
|
||||||
|
|
||||||
// Cluster returns generated.ClusterResolver implementation.
|
// Cluster returns generated.ClusterResolver implementation.
|
||||||
func (r *Resolver) Cluster() generated.ClusterResolver { return &clusterResolver{r} }
|
func (r *Resolver) Cluster() generated.ClusterResolver { return &clusterResolver{r} }
|
||||||
|
|
||||||
@ -319,7 +326,11 @@ func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResol
|
|||||||
// Query returns generated.QueryResolver implementation.
|
// Query returns generated.QueryResolver implementation.
|
||||||
func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }
|
func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }
|
||||||
|
|
||||||
|
// SubCluster returns generated.SubClusterResolver implementation.
|
||||||
|
func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subClusterResolver{r} }
|
||||||
|
|
||||||
type clusterResolver struct{ *Resolver }
|
type clusterResolver struct{ *Resolver }
|
||||||
type jobResolver struct{ *Resolver }
|
type jobResolver struct{ *Resolver }
|
||||||
type mutationResolver struct{ *Resolver }
|
type mutationResolver struct{ *Resolver }
|
||||||
type queryResolver struct{ *Resolver }
|
type queryResolver struct{ *Resolver }
|
||||||
|
type subClusterResolver struct{ *Resolver }
|
||||||
|
@ -164,7 +164,6 @@ func (ccms *CCMetricStore) LoadData(
|
|||||||
scopes []schema.MetricScope,
|
scopes []schema.MetricScope,
|
||||||
ctx context.Context) (schema.JobData, error) {
|
ctx context.Context) (schema.JobData, error) {
|
||||||
|
|
||||||
topology := archive.GetSubCluster(job.Cluster, job.SubCluster).Topology
|
|
||||||
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes)
|
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while building queries")
|
log.Warn("Error while building queries")
|
||||||
@ -201,7 +200,6 @@ func (ccms *CCMetricStore) LoadData(
|
|||||||
if !ok {
|
if !ok {
|
||||||
jobMetric = &schema.JobMetric{
|
jobMetric = &schema.JobMetric{
|
||||||
Unit: mc.Unit,
|
Unit: mc.Unit,
|
||||||
Scope: scope,
|
|
||||||
Timestep: mc.Timestep,
|
Timestep: mc.Timestep,
|
||||||
Series: make([]schema.Series, 0),
|
Series: make([]schema.Series, 0),
|
||||||
}
|
}
|
||||||
@ -215,13 +213,10 @@ func (ccms *CCMetricStore) LoadData(
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
id := (*int)(nil)
|
id := (*string)(nil)
|
||||||
if query.Type != nil {
|
if query.Type != nil {
|
||||||
id = new(int)
|
id = new(string)
|
||||||
*id, err = strconv.Atoi(query.TypeIds[0])
|
*id = query.TypeIds[0]
|
||||||
if err != nil || *query.Type == acceleratorString {
|
|
||||||
*id, _ = topology.GetAcceleratorIndex(query.TypeIds[0])
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
||||||
@ -235,7 +230,7 @@ func (ccms *CCMetricStore) LoadData(
|
|||||||
jobMetric.Series = append(jobMetric.Series, schema.Series{
|
jobMetric.Series = append(jobMetric.Series, schema.Series{
|
||||||
Hostname: query.Hostname,
|
Hostname: query.Hostname,
|
||||||
Id: id,
|
Id: id,
|
||||||
Statistics: &schema.MetricStatistics{
|
Statistics: schema.MetricStatistics{
|
||||||
Avg: float64(res.Avg),
|
Avg: float64(res.Avg),
|
||||||
Min: float64(res.Min),
|
Min: float64(res.Min),
|
||||||
Max: float64(res.Max),
|
Max: float64(res.Max),
|
||||||
@ -275,9 +270,14 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
scopes []schema.MetricScope) ([]ApiQuery, []schema.MetricScope, error) {
|
scopes []schema.MetricScope) ([]ApiQuery, []schema.MetricScope, error) {
|
||||||
|
|
||||||
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources))
|
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources))
|
||||||
topology := archive.GetSubCluster(job.Cluster, job.SubCluster).Topology
|
|
||||||
assignedScope := []schema.MetricScope{}
|
assignedScope := []schema.MetricScope{}
|
||||||
|
|
||||||
|
subcluster, scerr := archive.GetSubCluster(job.Cluster, job.SubCluster)
|
||||||
|
if scerr != nil {
|
||||||
|
return nil, nil, scerr
|
||||||
|
}
|
||||||
|
topology := subcluster.Topology
|
||||||
|
|
||||||
for _, metric := range metrics {
|
for _, metric := range metrics {
|
||||||
remoteName := ccms.toRemoteName(metric)
|
remoteName := ccms.toRemoteName(metric)
|
||||||
mc := archive.GetMetricConfig(job.Cluster, metric)
|
mc := archive.GetMetricConfig(job.Cluster, metric)
|
||||||
@ -293,7 +293,7 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
scopesLoop:
|
scopesLoop:
|
||||||
for _, requestedScope := range scopes {
|
for _, requestedScope := range scopes {
|
||||||
nativeScope := mc.Scope
|
nativeScope := mc.Scope
|
||||||
if nativeScope == schema.MetricScopeAccelerator && job.NumAcc == 0 {
|
if nativeScope == schema.MetricScopeAccelerator && job.NumAcc == nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -624,13 +624,12 @@ func (ccms *CCMetricStore) LoadNodeData(
|
|||||||
mc := archive.GetMetricConfig(cluster, metric)
|
mc := archive.GetMetricConfig(cluster, metric)
|
||||||
hostdata[metric] = append(hostdata[metric], &schema.JobMetric{
|
hostdata[metric] = append(hostdata[metric], &schema.JobMetric{
|
||||||
Unit: mc.Unit,
|
Unit: mc.Unit,
|
||||||
Scope: schema.MetricScopeNode,
|
|
||||||
Timestep: mc.Timestep,
|
Timestep: mc.Timestep,
|
||||||
Series: []schema.Series{
|
Series: []schema.Series{
|
||||||
{
|
{
|
||||||
Hostname: query.Hostname,
|
Hostname: query.Hostname,
|
||||||
Data: qdata.Data,
|
Data: qdata.Data,
|
||||||
Statistics: &schema.MetricStatistics{
|
Statistics: schema.MetricStatistics{
|
||||||
Avg: float64(qdata.Avg),
|
Avg: float64(qdata.Avg),
|
||||||
Min: float64(qdata.Min),
|
Min: float64(qdata.Min),
|
||||||
Max: float64(qdata.Max),
|
Max: float64(qdata.Max),
|
||||||
|
@ -134,7 +134,6 @@ func (idb *InfluxDBv2DataRepository) LoadData(
|
|||||||
jobMetric = map[schema.MetricScope]*schema.JobMetric{
|
jobMetric = map[schema.MetricScope]*schema.JobMetric{
|
||||||
scope: { // uses scope var from above!
|
scope: { // uses scope var from above!
|
||||||
Unit: mc.Unit,
|
Unit: mc.Unit,
|
||||||
Scope: scope,
|
|
||||||
Timestep: mc.Timestep,
|
Timestep: mc.Timestep,
|
||||||
Series: make([]schema.Series, 0, len(job.Resources)),
|
Series: make([]schema.Series, 0, len(job.Resources)),
|
||||||
StatisticsSeries: nil, // Should be: &schema.StatsSeries{},
|
StatisticsSeries: nil, // Should be: &schema.StatsSeries{},
|
||||||
@ -159,7 +158,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(
|
|||||||
field, host = row.Measurement(), row.ValueByKey("hostname").(string)
|
field, host = row.Measurement(), row.ValueByKey("hostname").(string)
|
||||||
hostSeries = schema.Series{
|
hostSeries = schema.Series{
|
||||||
Hostname: host,
|
Hostname: host,
|
||||||
Statistics: nil,
|
Statistics: schema.MetricStatistics{}, //TODO Add Statistics
|
||||||
Data: make([]schema.Float, 0),
|
Data: make([]schema.Float, 0),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -212,15 +211,10 @@ func (idb *InfluxDBv2DataRepository) LoadData(
|
|||||||
for _, scope := range scopes {
|
for _, scope := range scopes {
|
||||||
if scope == "node" { // No 'socket/core' support yet
|
if scope == "node" { // No 'socket/core' support yet
|
||||||
for metric, nodes := range stats {
|
for metric, nodes := range stats {
|
||||||
// log.Debugf("<< Add Stats for : Field %s >>", metric)
|
|
||||||
for node, stats := range nodes {
|
for node, stats := range nodes {
|
||||||
// log.Debugf("<< Add Stats for : Host %s : Min %.2f, Max %.2f, Avg %.2f >>", node, stats.Min, stats.Max, stats.Avg )
|
|
||||||
for index, _ := range jobData[metric][scope].Series {
|
for index, _ := range jobData[metric][scope].Series {
|
||||||
// log.Debugf("<< Try to add Stats to Series in Position %d >>", index)
|
|
||||||
if jobData[metric][scope].Series[index].Hostname == node {
|
if jobData[metric][scope].Series[index].Hostname == node {
|
||||||
// log.Debugf("<< Match for Series in Position %d : Host %s >>", index, jobData[metric][scope].Series[index].Hostname)
|
jobData[metric][scope].Series[index].Statistics = schema.MetricStatistics{Avg: stats.Avg, Min: stats.Min, Max: stats.Max}
|
||||||
jobData[metric][scope].Series[index].Statistics = &schema.MetricStatistics{Avg: stats.Avg, Min: stats.Min, Max: stats.Max}
|
|
||||||
// log.Debugf("<< Result Inner: Min %.2f, Max %.2f, Avg %.2f >>", jobData[metric][scope].Series[index].Statistics.Min, jobData[metric][scope].Series[index].Statistics.Max, jobData[metric][scope].Series[index].Statistics.Avg)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -228,17 +222,6 @@ func (idb *InfluxDBv2DataRepository) LoadData(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// DEBUG:
|
|
||||||
// for _, scope := range scopes {
|
|
||||||
// for _, met := range metrics {
|
|
||||||
// for _, series := range jobData[met][scope].Series {
|
|
||||||
// log.Debugf("<< Result: %d data points for metric %s on %s with scope %s, Stats: Min %.2f, Max %.2f, Avg %.2f >>",
|
|
||||||
// len(series.Data), met, series.Hostname, scope,
|
|
||||||
// series.Statistics.Min, series.Statistics.Max, series.Statistics.Avg)
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
return jobData, nil
|
return jobData, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -335,7 +335,10 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
jobMeta.Statistics[metric] = schema.JobStatistics{
|
||||||
Unit: archive.GetMetricConfig(job.Cluster, metric).Unit,
|
Unit: schema.Unit{
|
||||||
|
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||||
|
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||||
|
},
|
||||||
Avg: avg / float64(job.NumNodes),
|
Avg: avg / float64(job.NumNodes),
|
||||||
Min: min,
|
Min: min,
|
||||||
Max: max,
|
Max: max,
|
||||||
|
@ -251,7 +251,7 @@ func (pdb *PrometheusDataRepository) RowToSeries(
|
|||||||
return schema.Series{
|
return schema.Series{
|
||||||
Hostname: hostname,
|
Hostname: hostname,
|
||||||
Data: values,
|
Data: values,
|
||||||
Statistics: &schema.MetricStatistics{
|
Statistics: schema.MetricStatistics{
|
||||||
Avg: mean,
|
Avg: mean,
|
||||||
Min: min,
|
Min: min,
|
||||||
Max: max,
|
Max: max,
|
||||||
@ -323,7 +323,6 @@ func (pdb *PrometheusDataRepository) LoadData(
|
|||||||
if !ok {
|
if !ok {
|
||||||
jobMetric = &schema.JobMetric{
|
jobMetric = &schema.JobMetric{
|
||||||
Unit: metricConfig.Unit,
|
Unit: metricConfig.Unit,
|
||||||
Scope: scope,
|
|
||||||
Timestep: metricConfig.Timestep,
|
Timestep: metricConfig.Timestep,
|
||||||
Series: make([]schema.Series, 0),
|
Series: make([]schema.Series, 0),
|
||||||
}
|
}
|
||||||
@ -362,7 +361,7 @@ func (pdb *PrometheusDataRepository) LoadStats(
|
|||||||
for metric, metricData := range data {
|
for metric, metricData := range data {
|
||||||
stats[metric] = make(map[string]schema.MetricStatistics)
|
stats[metric] = make(map[string]schema.MetricStatistics)
|
||||||
for _, series := range metricData[schema.MetricScopeNode].Series {
|
for _, series := range metricData[schema.MetricScopeNode].Series {
|
||||||
stats[metric][series.Hostname] = *series.Statistics
|
stats[metric][series.Hostname] = series.Statistics
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -432,7 +431,6 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
|
|||||||
// output per host and metric
|
// output per host and metric
|
||||||
hostdata[metric] = append(hostdata[metric], &schema.JobMetric{
|
hostdata[metric] = append(hostdata[metric], &schema.JobMetric{
|
||||||
Unit: metricConfig.Unit,
|
Unit: metricConfig.Unit,
|
||||||
Scope: scope,
|
|
||||||
Timestep: metricConfig.Timestep,
|
Timestep: metricConfig.Timestep,
|
||||||
Series: []schema.Series{pdb.RowToSeries(from, step, steps, row)},
|
Series: []schema.Series{pdb.RowToSeries(from, step, steps, row)},
|
||||||
},
|
},
|
||||||
|
@ -17,6 +17,7 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/units"
|
||||||
)
|
)
|
||||||
|
|
||||||
const NamedJobInsert string = `INSERT INTO job (
|
const NamedJobInsert string = `INSERT INTO job (
|
||||||
@ -75,6 +76,7 @@ func HandleImportFlag(flag string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
checkJobData(&jobData)
|
||||||
SanityChecks(&jobMeta.BaseJob)
|
SanityChecks(&jobMeta.BaseJob)
|
||||||
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||||
if job, err := GetJobRepository().Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
|
if job, err := GetJobRepository().Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
|
||||||
@ -173,7 +175,9 @@ func InitDB() error {
|
|||||||
i := 0
|
i := 0
|
||||||
errorOccured := 0
|
errorOccured := 0
|
||||||
|
|
||||||
for jobMeta := range ar.Iter() {
|
for jobContainer := range ar.Iter(false) {
|
||||||
|
|
||||||
|
jobMeta := jobContainer.Meta
|
||||||
|
|
||||||
// // Bundle 100 inserts into one transaction for better performance:
|
// // Bundle 100 inserts into one transaction for better performance:
|
||||||
if i%10 == 0 {
|
if i%10 == 0 {
|
||||||
@ -297,7 +301,7 @@ func SanityChecks(job *schema.BaseJob) error {
|
|||||||
if len(job.Resources) == 0 || len(job.User) == 0 {
|
if len(job.Resources) == 0 || len(job.User) == 0 {
|
||||||
return fmt.Errorf("'resources' and 'user' should not be empty")
|
return fmt.Errorf("'resources' and 'user' should not be empty")
|
||||||
}
|
}
|
||||||
if job.NumAcc < 0 || job.NumHWThreads < 0 || job.NumNodes < 1 {
|
if *job.NumAcc < 0 || *job.NumHWThreads < 0 || job.NumNodes < 1 {
|
||||||
return fmt.Errorf("'numNodes', 'numAcc' or 'numHWThreads' invalid")
|
return fmt.Errorf("'numNodes', 'numAcc' or 'numHWThreads' invalid")
|
||||||
}
|
}
|
||||||
if len(job.Resources) != int(job.NumNodes) {
|
if len(job.Resources) != int(job.NumNodes) {
|
||||||
@ -314,3 +318,34 @@ func loadJobStat(job *schema.JobMeta, metric string) float64 {
|
|||||||
|
|
||||||
return 0.0
|
return 0.0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func checkJobData(d *schema.JobData) error {
|
||||||
|
for _, scopes := range *d {
|
||||||
|
var newUnit string
|
||||||
|
// Add node scope if missing
|
||||||
|
for _, metric := range scopes {
|
||||||
|
if strings.Contains(metric.Unit.Base, "B/s") ||
|
||||||
|
strings.Contains(metric.Unit.Base, "F/s") ||
|
||||||
|
strings.Contains(metric.Unit.Base, "B") {
|
||||||
|
|
||||||
|
// First get overall avg
|
||||||
|
sum := 0.0
|
||||||
|
for _, s := range metric.Series {
|
||||||
|
sum += s.Statistics.Avg
|
||||||
|
}
|
||||||
|
|
||||||
|
avg := sum / float64(len(metric.Series))
|
||||||
|
|
||||||
|
for _, s := range metric.Series {
|
||||||
|
fp := schema.ConvertFloatToFloat64(s.Data)
|
||||||
|
// Normalize values with new unit prefix
|
||||||
|
oldUnit := metric.Unit.Base
|
||||||
|
units.NormalizeSeries(fp, avg, oldUnit, &newUnit)
|
||||||
|
s.Data = schema.GetFloat64ToFloat(fp)
|
||||||
|
}
|
||||||
|
metric.Unit.Base = newUnit
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
@ -335,7 +335,13 @@ func (r *JobRepository) DeleteJobById(id int64) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Use node hours instead: SELECT job.user, sum(job.num_nodes * (CASE WHEN job.job_state = "running" THEN CAST(strftime('%s', 'now') AS INTEGER) - job.start_time ELSE job.duration END)) as x FROM job GROUP BY user ORDER BY x DESC;
|
// TODO: Use node hours instead: SELECT job.user, sum(job.num_nodes * (CASE WHEN job.job_state = "running" THEN CAST(strftime('%s', 'now') AS INTEGER) - job.start_time ELSE job.duration END)) as x FROM job GROUP BY user ORDER BY x DESC;
|
||||||
func (r *JobRepository) CountGroupedJobs(ctx context.Context, aggreg model.Aggregate, filters []*model.JobFilter, weight *model.Weights, limit *int) (map[string]int, error) {
|
func (r *JobRepository) CountGroupedJobs(
|
||||||
|
ctx context.Context,
|
||||||
|
aggreg model.Aggregate,
|
||||||
|
filters []*model.JobFilter,
|
||||||
|
weight *model.Weights,
|
||||||
|
limit *int) (map[string]int, error) {
|
||||||
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
if !aggreg.IsValid() {
|
if !aggreg.IsValid() {
|
||||||
return nil, errors.New("invalid aggregate")
|
return nil, errors.New("invalid aggregate")
|
||||||
|
@ -8,13 +8,15 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const Version = 1
|
||||||
|
|
||||||
type ArchiveBackend interface {
|
type ArchiveBackend interface {
|
||||||
Init(rawConfig json.RawMessage) error
|
Init(rawConfig json.RawMessage) (int, error)
|
||||||
|
|
||||||
LoadJobMeta(job *schema.Job) (*schema.JobMeta, error)
|
LoadJobMeta(job *schema.Job) (*schema.JobMeta, error)
|
||||||
|
|
||||||
@ -28,7 +30,12 @@ type ArchiveBackend interface {
|
|||||||
|
|
||||||
GetClusters() []string
|
GetClusters() []string
|
||||||
|
|
||||||
Iter() <-chan *schema.JobMeta
|
Iter(loadMetricData bool) <-chan JobContainer
|
||||||
|
}
|
||||||
|
|
||||||
|
type JobContainer struct {
|
||||||
|
Meta *schema.JobMeta
|
||||||
|
Data *schema.JobData
|
||||||
}
|
}
|
||||||
|
|
||||||
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
|
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
|
||||||
@ -54,10 +61,12 @@ func Init(rawConfig json.RawMessage, disableArchive bool) error {
|
|||||||
return fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", kind.Kind)
|
return fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", kind.Kind)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := ar.Init(rawConfig); err != nil {
|
version, err := ar.Init(rawConfig)
|
||||||
|
if err != nil {
|
||||||
log.Error("Error while initializing archiveBackend")
|
log.Error("Error while initializing archiveBackend")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
log.Infof("Load archive version %d", version)
|
||||||
return initClusterConfig()
|
return initClusterConfig()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -55,7 +55,7 @@ func initClusterConfig() error {
|
|||||||
|
|
||||||
nodeLists[cluster.Name] = make(map[string]NodeList)
|
nodeLists[cluster.Name] = make(map[string]NodeList)
|
||||||
for _, sc := range cluster.SubClusters {
|
for _, sc := range cluster.SubClusters {
|
||||||
if sc.Nodes == "" {
|
if sc.Nodes == "*" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -80,18 +80,17 @@ func GetCluster(cluster string) *schema.Cluster {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetSubCluster(cluster, subcluster string) *schema.SubCluster {
|
func GetSubCluster(cluster, subcluster string) (*schema.SubCluster, error) {
|
||||||
|
|
||||||
for _, c := range Clusters {
|
for _, c := range Clusters {
|
||||||
if c.Name == cluster {
|
if c.Name == cluster {
|
||||||
for _, p := range c.SubClusters {
|
for _, p := range c.SubClusters {
|
||||||
if p.Name == subcluster {
|
if p.Name == subcluster {
|
||||||
return p
|
return p, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil, fmt.Errorf("Subcluster '%v' not found for cluster '%v', or cluster '%v' not configured!", subcluster, cluster, cluster)
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
|
func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
|
||||||
@ -138,7 +137,7 @@ func AssignSubCluster(job *schema.BaseJob) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if cluster.SubClusters[0].Nodes == "" {
|
if cluster.SubClusters[0].Nodes == "*" {
|
||||||
job.SubCluster = cluster.SubClusters[0].Name
|
job.SubCluster = cluster.SubClusters[0].Name
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -7,17 +7,21 @@ package archive
|
|||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"compress/gzip"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
"github.com/santhosh-tekuri/jsonschema/v5"
|
||||||
)
|
)
|
||||||
|
|
||||||
type FsArchiveConfig struct {
|
type FsArchiveConfig struct {
|
||||||
@ -29,6 +33,11 @@ type FsArchive struct {
|
|||||||
clusters []string
|
clusters []string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func checkFileExists(filePath string) bool {
|
||||||
|
_, err := os.Stat(filePath)
|
||||||
|
return !errors.Is(err, os.ErrNotExist)
|
||||||
|
}
|
||||||
|
|
||||||
func getPath(
|
func getPath(
|
||||||
job *schema.Job,
|
job *schema.Job,
|
||||||
rootPath string,
|
rootPath string,
|
||||||
@ -44,54 +53,109 @@ func getPath(
|
|||||||
|
|
||||||
func loadJobMeta(filename string) (*schema.JobMeta, error) {
|
func loadJobMeta(filename string) (*schema.JobMeta, error) {
|
||||||
|
|
||||||
f, err := os.Open(filename)
|
b, err := os.ReadFile(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("loadJobMeta() > open file error: %v", err)
|
log.Errorf("loadJobMeta() > open file error: %v", err)
|
||||||
return &schema.JobMeta{}, err
|
return &schema.JobMeta{}, err
|
||||||
}
|
}
|
||||||
defer f.Close()
|
if config.Keys.Validate {
|
||||||
|
if err := schema.Validate(schema.Meta, bytes.NewReader(b)); err != nil {
|
||||||
return DecodeJobMeta(bufio.NewReader(f))
|
return &schema.JobMeta{}, fmt.Errorf("validate job meta: %v", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) Init(rawConfig json.RawMessage) error {
|
return DecodeJobMeta(bytes.NewReader(b))
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadJobData(filename string, isCompressed bool) (schema.JobData, error) {
|
||||||
|
f, err := os.Open(filename)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("fsBackend LoadJobData()- %v", err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if isCompressed {
|
||||||
|
r, err := gzip.NewReader(f)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf(" %v", err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer r.Close()
|
||||||
|
|
||||||
|
if config.Keys.Validate {
|
||||||
|
if err := schema.Validate(schema.Data, r); err != nil {
|
||||||
|
return schema.JobData{}, fmt.Errorf("validate job data: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return DecodeJobData(r, filename)
|
||||||
|
} else {
|
||||||
|
defer f.Close()
|
||||||
|
if config.Keys.Validate {
|
||||||
|
if err := schema.Validate(schema.Data, bufio.NewReader(f)); err != nil {
|
||||||
|
return schema.JobData{}, fmt.Errorf("validate job data: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return DecodeJobData(bufio.NewReader(f), filename)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fsa *FsArchive) Init(rawConfig json.RawMessage) (int, error) {
|
||||||
|
|
||||||
var config FsArchiveConfig
|
var config FsArchiveConfig
|
||||||
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
||||||
log.Warnf("Init() > Unmarshal error: %#v", err)
|
log.Warnf("Init() > Unmarshal error: %#v", err)
|
||||||
return err
|
return 0, err
|
||||||
}
|
}
|
||||||
if config.Path == "" {
|
if config.Path == "" {
|
||||||
err := fmt.Errorf("Init() : empty config.Path")
|
err := fmt.Errorf("Init() : empty config.Path")
|
||||||
log.Errorf("Init() > config.Path error: %v", err)
|
log.Errorf("Init() > config.Path error: %v", err)
|
||||||
return err
|
return 0, err
|
||||||
}
|
}
|
||||||
fsa.path = config.Path
|
fsa.path = config.Path
|
||||||
|
|
||||||
|
b, err := os.ReadFile(filepath.Join(fsa.path, "version.txt"))
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Err")
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
version, err := strconv.Atoi(strings.TrimSuffix(string(b), "\n"))
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("fsBackend Init()- %v", err)
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if version != Version {
|
||||||
|
return version, fmt.Errorf("unsupported version %d, need %d", version, Version)
|
||||||
|
}
|
||||||
|
|
||||||
entries, err := os.ReadDir(fsa.path)
|
entries, err := os.ReadDir(fsa.path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Init() > ReadDir() error: %v", err)
|
log.Errorf("Init() > ReadDir() error: %v", err)
|
||||||
return err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, de := range entries {
|
for _, de := range entries {
|
||||||
|
if !de.IsDir() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
fsa.clusters = append(fsa.clusters, de.Name())
|
fsa.clusters = append(fsa.clusters, de.Name())
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return version, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) LoadJobData(job *schema.Job) (schema.JobData, error) {
|
func (fsa *FsArchive) LoadJobData(job *schema.Job) (schema.JobData, error) {
|
||||||
|
var isCompressed bool = true
|
||||||
filename := getPath(job, fsa.path, "data.json")
|
filename := getPath(job, fsa.path, "data.json.gz")
|
||||||
f, err := os.Open(filename)
|
if !checkFileExists(filename) {
|
||||||
if err != nil {
|
filename = getPath(job, fsa.path, "data.json")
|
||||||
log.Errorf("LoadJobData() > open file error: %v", err)
|
isCompressed = false
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
return DecodeJobData(bufio.NewReader(f), filename)
|
return loadJobData(filename, isCompressed)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) LoadJobMeta(job *schema.Job) (*schema.JobMeta, error) {
|
func (fsa *FsArchive) LoadJobMeta(job *schema.Job) (*schema.JobMeta, error) {
|
||||||
@ -105,20 +169,19 @@ func (fsa *FsArchive) LoadClusterCfg(name string) (*schema.Cluster, error) {
|
|||||||
b, err := os.ReadFile(filepath.Join(fsa.path, name, "cluster.json"))
|
b, err := os.ReadFile(filepath.Join(fsa.path, name, "cluster.json"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("LoadClusterCfg() > open file error: %v", err)
|
log.Errorf("LoadClusterCfg() > open file error: %v", err)
|
||||||
return &schema.Cluster{}, err
|
// if config.Keys.Validate {
|
||||||
}
|
|
||||||
if config.Keys.Validate {
|
|
||||||
if err := schema.Validate(schema.ClusterCfg, bytes.NewReader(b)); err != nil {
|
if err := schema.Validate(schema.ClusterCfg, bytes.NewReader(b)); err != nil {
|
||||||
log.Warnf("Validate cluster config: %v\n", err)
|
log.Warnf("Validate cluster config: %v\n", err)
|
||||||
return &schema.Cluster{}, fmt.Errorf("Validate cluster config: %v\n", err)
|
return &schema.Cluster{}, fmt.Errorf("validate cluster config: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// }
|
||||||
return DecodeCluster(bytes.NewReader(b))
|
return DecodeCluster(bytes.NewReader(b))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) Iter() <-chan *schema.JobMeta {
|
func (fsa *FsArchive) Iter(loadMetricData bool) <-chan JobContainer {
|
||||||
|
|
||||||
ch := make(chan *schema.JobMeta)
|
ch := make(chan JobContainer)
|
||||||
go func() {
|
go func() {
|
||||||
clustersDir, err := os.ReadDir(fsa.path)
|
clustersDir, err := os.ReadDir(fsa.path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -126,6 +189,9 @@ func (fsa *FsArchive) Iter() <-chan *schema.JobMeta {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, clusterDir := range clustersDir {
|
for _, clusterDir := range clustersDir {
|
||||||
|
if !clusterDir.IsDir() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
lvl1Dirs, err := os.ReadDir(filepath.Join(fsa.path, clusterDir.Name()))
|
lvl1Dirs, err := os.ReadDir(filepath.Join(fsa.path, clusterDir.Name()))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Reading jobs failed @ lvl1 dirs: %s", err.Error())
|
log.Fatalf("Reading jobs failed @ lvl1 dirs: %s", err.Error())
|
||||||
@ -152,10 +218,27 @@ func (fsa *FsArchive) Iter() <-chan *schema.JobMeta {
|
|||||||
for _, startTimeDir := range startTimeDirs {
|
for _, startTimeDir := range startTimeDirs {
|
||||||
if startTimeDir.IsDir() {
|
if startTimeDir.IsDir() {
|
||||||
job, err := loadJobMeta(filepath.Join(dirpath, startTimeDir.Name(), "meta.json"))
|
job, err := loadJobMeta(filepath.Join(dirpath, startTimeDir.Name(), "meta.json"))
|
||||||
if err != nil {
|
if err != nil && !errors.Is(err, &jsonschema.ValidationError{}) {
|
||||||
log.Errorf("error in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
|
log.Errorf("in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
if loadMetricData {
|
||||||
|
var isCompressed bool = true
|
||||||
|
filename := filepath.Join(dirpath, startTimeDir.Name(), "data.json.gz")
|
||||||
|
|
||||||
|
if !checkFileExists(filename) {
|
||||||
|
filename = filepath.Join(dirpath, startTimeDir.Name(), "data.json")
|
||||||
|
isCompressed = false
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := loadJobData(filename, isCompressed)
|
||||||
|
if err != nil && !errors.Is(err, &jsonschema.ValidationError{}) {
|
||||||
|
log.Errorf("in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
|
||||||
|
}
|
||||||
|
ch <- JobContainer{Meta: job, Data: &data}
|
||||||
|
log.Errorf("in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
|
||||||
} else {
|
} else {
|
||||||
ch <- job
|
ch <- JobContainer{Meta: job, Data: nil}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -225,6 +308,28 @@ func (fsa *FsArchive) ImportJob(
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// var isCompressed bool = true
|
||||||
|
// // TODO Use shortJob Config for check
|
||||||
|
// if jobMeta.Duration < 300 {
|
||||||
|
// isCompressed = false
|
||||||
|
// f, err = os.Create(path.Join(dir, "data.json"))
|
||||||
|
// } else {
|
||||||
|
// f, err = os.Create(path.Join(dir, "data.json.gz"))
|
||||||
|
// }
|
||||||
|
// if err != nil {
|
||||||
|
// return err
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if isCompressed {
|
||||||
|
// if err := EncodeJobData(gzip.NewWriter(f), jobData); err != nil {
|
||||||
|
// return err
|
||||||
|
// }
|
||||||
|
// } else {
|
||||||
|
// if err := EncodeJobData(f, jobData); err != nil {
|
||||||
|
// return err
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
f, err = os.Create(path.Join(dir, "data.json"))
|
f, err = os.Create(path.Join(dir, "data.json"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("Error while creating filepath for data.json")
|
log.Error("Error while creating filepath for data.json")
|
||||||
@ -236,9 +341,6 @@ func (fsa *FsArchive) ImportJob(
|
|||||||
}
|
}
|
||||||
if err := f.Close(); err != nil {
|
if err := f.Close(); err != nil {
|
||||||
log.Warn("Error while closing data.json file")
|
log.Warn("Error while closing data.json file")
|
||||||
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// no error: final return is nil
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
@ -20,7 +20,7 @@ func init() {
|
|||||||
|
|
||||||
func TestInitEmptyPath(t *testing.T) {
|
func TestInitEmptyPath(t *testing.T) {
|
||||||
var fsa FsArchive
|
var fsa FsArchive
|
||||||
err := fsa.Init(json.RawMessage("{\"kind\":\"../../test/archive\"}"))
|
_, err := fsa.Init(json.RawMessage("{\"kind\":\"../../test/archive\"}"))
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@ -28,14 +28,14 @@ func TestInitEmptyPath(t *testing.T) {
|
|||||||
|
|
||||||
func TestInitNoJson(t *testing.T) {
|
func TestInitNoJson(t *testing.T) {
|
||||||
var fsa FsArchive
|
var fsa FsArchive
|
||||||
err := fsa.Init(json.RawMessage("\"path\":\"../../test/archive\"}"))
|
_, err := fsa.Init(json.RawMessage("\"path\":\"../../test/archive\"}"))
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
func TestInitNotExists(t *testing.T) {
|
func TestInitNotExists(t *testing.T) {
|
||||||
var fsa FsArchive
|
var fsa FsArchive
|
||||||
err := fsa.Init(json.RawMessage("{\"path\":\"../../test/job-archive\"}"))
|
_, err := fsa.Init(json.RawMessage("{\"path\":\"../../test/job-archive\"}"))
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@ -43,15 +43,16 @@ func TestInitNotExists(t *testing.T) {
|
|||||||
|
|
||||||
func TestInit(t *testing.T) {
|
func TestInit(t *testing.T) {
|
||||||
var fsa FsArchive
|
var fsa FsArchive
|
||||||
err := fsa.Init(json.RawMessage("{\"path\":\"../../test/archive\"}"))
|
version, err := fsa.Init(json.RawMessage("{\"path\":\"../../test/archive\"}"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if fsa.path != "../../test/archive" {
|
if fsa.path != "../../test/archive" {
|
||||||
t.Fail()
|
t.Fail()
|
||||||
}
|
}
|
||||||
|
if version != 1 {
|
||||||
|
t.Fail()
|
||||||
|
}
|
||||||
if len(fsa.clusters) != 1 || fsa.clusters[0] != "emmy" {
|
if len(fsa.clusters) != 1 || fsa.clusters[0] != "emmy" {
|
||||||
t.Fail()
|
t.Fail()
|
||||||
}
|
}
|
||||||
@ -59,7 +60,7 @@ func TestInit(t *testing.T) {
|
|||||||
|
|
||||||
func TestLoadJobMetaInternal(t *testing.T) {
|
func TestLoadJobMetaInternal(t *testing.T) {
|
||||||
var fsa FsArchive
|
var fsa FsArchive
|
||||||
err := fsa.Init(json.RawMessage("{\"path\":\"../../test/archive\"}"))
|
_, err := fsa.Init(json.RawMessage("{\"path\":\"../../test/archive\"}"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@ -82,7 +83,7 @@ func TestLoadJobMetaInternal(t *testing.T) {
|
|||||||
|
|
||||||
func TestLoadJobMeta(t *testing.T) {
|
func TestLoadJobMeta(t *testing.T) {
|
||||||
var fsa FsArchive
|
var fsa FsArchive
|
||||||
err := fsa.Init(json.RawMessage("{\"path\":\"../../test/archive\"}"))
|
_, err := fsa.Init(json.RawMessage("{\"path\":\"../../test/archive\"}"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@ -110,7 +111,7 @@ func TestLoadJobMeta(t *testing.T) {
|
|||||||
|
|
||||||
func TestLoadJobData(t *testing.T) {
|
func TestLoadJobData(t *testing.T) {
|
||||||
var fsa FsArchive
|
var fsa FsArchive
|
||||||
err := fsa.Init(json.RawMessage("{\"path\":\"../../test/archive\"}"))
|
_, err := fsa.Init(json.RawMessage("{\"path\":\"../../test/archive\"}"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@ -136,7 +137,7 @@ func TestLoadJobData(t *testing.T) {
|
|||||||
|
|
||||||
func TestLoadCluster(t *testing.T) {
|
func TestLoadCluster(t *testing.T) {
|
||||||
var fsa FsArchive
|
var fsa FsArchive
|
||||||
err := fsa.Init(json.RawMessage("{\"path\":\"../../test/archive\"}"))
|
_, err := fsa.Init(json.RawMessage("{\"path\":\"../../test/archive\"}"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@ -146,22 +147,22 @@ func TestLoadCluster(t *testing.T) {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if cfg.SubClusters[0].CoresPerSocket != 10 {
|
if cfg.SubClusters[0].CoresPerSocket != 4 {
|
||||||
t.Fail()
|
t.Fail()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestIter(t *testing.T) {
|
func TestIter(t *testing.T) {
|
||||||
var fsa FsArchive
|
var fsa FsArchive
|
||||||
err := fsa.Init(json.RawMessage("{\"path\":\"../../test/archive\"}"))
|
_, err := fsa.Init(json.RawMessage("{\"path\":\"../../test/archive\"}"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
for job := range fsa.Iter() {
|
for job := range fsa.Iter(false) {
|
||||||
fmt.Printf("Job %d\n", job.JobID)
|
fmt.Printf("Job %d\n", job.Meta.JobID)
|
||||||
|
|
||||||
if job.Cluster != "emmy" {
|
if job.Meta.Cluster != "emmy" {
|
||||||
t.Fail()
|
t.Fail()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -14,6 +14,8 @@ import (
|
|||||||
|
|
||||||
type NodeList [][]interface {
|
type NodeList [][]interface {
|
||||||
consume(input string) (next string, ok bool)
|
consume(input string) (next string, ok bool)
|
||||||
|
limits() []map[string]int
|
||||||
|
prefix() string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (nl *NodeList) Contains(name string) bool {
|
func (nl *NodeList) Contains(name string) bool {
|
||||||
@ -35,6 +37,44 @@ func (nl *NodeList) Contains(name string) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (nl *NodeList) PrintList() []string {
|
||||||
|
var out []string
|
||||||
|
for _, term := range *nl {
|
||||||
|
// Get String-Part first
|
||||||
|
prefix := term[0].prefix()
|
||||||
|
if len(term) == 1 { // If only String-Part in Term: Single Node Name -> Use as provided
|
||||||
|
out = append(out, prefix)
|
||||||
|
} else { // Else: Numeric start-end definition with x digits zeroPadded
|
||||||
|
limitArr := term[1].limits()
|
||||||
|
for _, inner := range limitArr {
|
||||||
|
for i := inner["start"]; i < inner["end"]+1; i++ {
|
||||||
|
if inner["zeroPadded"] == 1 {
|
||||||
|
out = append(out, fmt.Sprintf("%s%0*d", prefix, inner["digits"], i))
|
||||||
|
} else {
|
||||||
|
log.Error("node list: only zero-padded ranges are allowed")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func (nl *NodeList) NodeCount() int {
|
||||||
|
var out int = 0
|
||||||
|
for _, term := range *nl {
|
||||||
|
if len(term) == 1 { // If only String-Part in Term: Single Node Name -> add one
|
||||||
|
out += 1
|
||||||
|
} else { // Else: Numeric start-end definition -> add difference + 1
|
||||||
|
limitArr := term[1].limits()
|
||||||
|
for _, inner := range limitArr {
|
||||||
|
out += (inner["end"] - inner["start"]) + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
type NLExprString string
|
type NLExprString string
|
||||||
|
|
||||||
func (nle NLExprString) consume(input string) (next string, ok bool) {
|
func (nle NLExprString) consume(input string) (next string, ok bool) {
|
||||||
@ -45,6 +85,16 @@ func (nle NLExprString) consume(input string) (next string, ok bool) {
|
|||||||
return "", false
|
return "", false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (nle NLExprString) limits() []map[string]int {
|
||||||
|
// Null implementation to fullfill interface requirement
|
||||||
|
l := make([]map[string]int, 0)
|
||||||
|
return l
|
||||||
|
}
|
||||||
|
|
||||||
|
func (nle NLExprString) prefix() string {
|
||||||
|
return string(nle)
|
||||||
|
}
|
||||||
|
|
||||||
type NLExprIntRanges []NLExprIntRange
|
type NLExprIntRanges []NLExprIntRange
|
||||||
|
|
||||||
func (nles NLExprIntRanges) consume(input string) (next string, ok bool) {
|
func (nles NLExprIntRanges) consume(input string) (next string, ok bool) {
|
||||||
@ -56,6 +106,21 @@ func (nles NLExprIntRanges) consume(input string) (next string, ok bool) {
|
|||||||
return "", false
|
return "", false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (nles NLExprIntRanges) limits() []map[string]int {
|
||||||
|
l := make([]map[string]int, 0)
|
||||||
|
for _, nle := range nles {
|
||||||
|
inner := nle.limits()
|
||||||
|
l = append(l, inner[0])
|
||||||
|
}
|
||||||
|
return l
|
||||||
|
}
|
||||||
|
|
||||||
|
func (nles NLExprIntRanges) prefix() string {
|
||||||
|
// Null implementation to fullfill interface requirement
|
||||||
|
var s string
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
type NLExprIntRange struct {
|
type NLExprIntRange struct {
|
||||||
start, end int64
|
start, end int64
|
||||||
zeroPadded bool
|
zeroPadded bool
|
||||||
@ -89,6 +154,27 @@ func (nle NLExprIntRange) consume(input string) (next string, ok bool) {
|
|||||||
return "", false
|
return "", false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (nle NLExprIntRange) limits() []map[string]int {
|
||||||
|
l := make([]map[string]int, 0)
|
||||||
|
m := make(map[string]int)
|
||||||
|
m["start"] = int(nle.start)
|
||||||
|
m["end"] = int(nle.end)
|
||||||
|
m["digits"] = int(nle.digits)
|
||||||
|
if nle.zeroPadded == true {
|
||||||
|
m["zeroPadded"] = 1
|
||||||
|
} else {
|
||||||
|
m["zeroPadded"] = 0
|
||||||
|
}
|
||||||
|
l = append(l, m)
|
||||||
|
return l
|
||||||
|
}
|
||||||
|
|
||||||
|
func (nles NLExprIntRange) prefix() string {
|
||||||
|
// Null implementation to fullfill interface requirement
|
||||||
|
var s string
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
func ParseNodeList(raw string) (NodeList, error) {
|
func ParseNodeList(raw string) (NodeList, error) {
|
||||||
isLetter := func(r byte) bool { return ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') }
|
isLetter := func(r byte) bool { return ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') }
|
||||||
isDigit := func(r byte) bool { return '0' <= r && r <= '9' }
|
isDigit := func(r byte) bool { return '0' <= r && r <= '9' }
|
||||||
@ -117,6 +203,8 @@ func ParseNodeList(raw string) (NodeList, error) {
|
|||||||
for _, rawterm := range rawterms {
|
for _, rawterm := range rawterms {
|
||||||
exprs := []interface {
|
exprs := []interface {
|
||||||
consume(input string) (next string, ok bool)
|
consume(input string) (next string, ok bool)
|
||||||
|
limits() []map[string]int
|
||||||
|
prefix() string
|
||||||
}{}
|
}{}
|
||||||
|
|
||||||
for i := 0; i < len(rawterm); i++ {
|
for i := 0; i < len(rawterm); i++ {
|
||||||
|
@ -4,7 +4,10 @@
|
|||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
package schema
|
package schema
|
||||||
|
|
||||||
import "strconv"
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
)
|
||||||
|
|
||||||
type Accelerator struct {
|
type Accelerator struct {
|
||||||
ID string `json:"id"`
|
ID string `json:"id"`
|
||||||
@ -16,23 +19,27 @@ type Topology struct {
|
|||||||
Node []int `json:"node"`
|
Node []int `json:"node"`
|
||||||
Socket [][]int `json:"socket"`
|
Socket [][]int `json:"socket"`
|
||||||
MemoryDomain [][]int `json:"memoryDomain"`
|
MemoryDomain [][]int `json:"memoryDomain"`
|
||||||
Die [][]int `json:"die"`
|
Die [][]*int `json:"die,omitempty"`
|
||||||
Core [][]int `json:"core"`
|
Core [][]int `json:"core"`
|
||||||
Accelerators []*Accelerator `json:"accelerators"`
|
Accelerators []*Accelerator `json:"accelerators,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type MetricValue struct {
|
||||||
|
Unit Unit `json:"unit"`
|
||||||
|
Value float64 `json:"value"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type SubCluster struct {
|
type SubCluster struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Nodes string `json:"nodes"`
|
Nodes string `json:"nodes"`
|
||||||
NumberOfNodes int `json:"numberOfNodes"`
|
|
||||||
ProcessorType string `json:"processorType"`
|
ProcessorType string `json:"processorType"`
|
||||||
SocketsPerNode int `json:"socketsPerNode"`
|
SocketsPerNode int `json:"socketsPerNode"`
|
||||||
CoresPerSocket int `json:"coresPerSocket"`
|
CoresPerSocket int `json:"coresPerSocket"`
|
||||||
ThreadsPerCore int `json:"threadsPerCore"`
|
ThreadsPerCore int `json:"threadsPerCore"`
|
||||||
FlopRateScalar int `json:"flopRateScalar"`
|
FlopRateScalar MetricValue `json:"flopRateScalar"`
|
||||||
FlopRateSimd int `json:"flopRateSimd"`
|
FlopRateSimd MetricValue `json:"flopRateSimd"`
|
||||||
MemoryBandwidth int `json:"memoryBandwidth"`
|
MemoryBandwidth MetricValue `json:"memoryBandwidth"`
|
||||||
Topology *Topology `json:"topology"`
|
Topology Topology `json:"topology"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type SubClusterConfig struct {
|
type SubClusterConfig struct {
|
||||||
@ -41,19 +48,20 @@ type SubClusterConfig struct {
|
|||||||
Normal float64 `json:"normal"`
|
Normal float64 `json:"normal"`
|
||||||
Caution float64 `json:"caution"`
|
Caution float64 `json:"caution"`
|
||||||
Alert float64 `json:"alert"`
|
Alert float64 `json:"alert"`
|
||||||
|
Remove bool `json:"remove"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricConfig struct {
|
type MetricConfig struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Unit string `json:"unit"`
|
Unit Unit `json:"unit"`
|
||||||
Scope MetricScope `json:"scope"`
|
Scope MetricScope `json:"scope"`
|
||||||
Aggregation *string `json:"aggregation"`
|
Aggregation string `json:"aggregation"`
|
||||||
Timestep int `json:"timestep"`
|
Timestep int `json:"timestep"`
|
||||||
Peak *float64 `json:"peak"`
|
Peak float64 `json:"peak"`
|
||||||
Normal *float64 `json:"normal"`
|
Normal float64 `json:"normal"`
|
||||||
Caution *float64 `json:"caution"`
|
Caution float64 `json:"caution"`
|
||||||
Alert *float64 `json:"alert"`
|
Alert float64 `json:"alert"`
|
||||||
SubClusters []*SubClusterConfig `json:"subClusters"`
|
SubClusters []*SubClusterConfig `json:"subClusters,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Cluster struct {
|
type Cluster struct {
|
||||||
@ -152,6 +160,15 @@ func (topo *Topology) GetMemoryDomainsFromHWThreads(
|
|||||||
return memDoms, exclusive
|
return memDoms, exclusive
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Temporary fix to convert back from int id to string id for accelerators
|
||||||
|
func (topo *Topology) GetAcceleratorID(id int) (string, error) {
|
||||||
|
if id < len(topo.Accelerators) {
|
||||||
|
return topo.Accelerators[id].ID, nil
|
||||||
|
} else {
|
||||||
|
return "", fmt.Errorf("Index %d out of range", id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (topo *Topology) GetAcceleratorIDs() ([]int, error) {
|
func (topo *Topology) GetAcceleratorIDs() ([]int, error) {
|
||||||
accels := make([]int, 0)
|
accels := make([]int, 0)
|
||||||
for _, accel := range topo.Accelerators {
|
for _, accel := range topo.Accelerators {
|
||||||
@ -163,12 +180,3 @@ func (topo *Topology) GetAcceleratorIDs() ([]int, error) {
|
|||||||
}
|
}
|
||||||
return accels, nil
|
return accels, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (topo *Topology) GetAcceleratorIndex(id string) (int, bool) {
|
|
||||||
for idx, accel := range topo.Accelerators {
|
|
||||||
if accel.ID == id {
|
|
||||||
return idx, true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1, false
|
|
||||||
}
|
|
||||||
|
@ -83,10 +83,10 @@ func (s *Series) MarshalJSON() ([]byte, error) {
|
|||||||
buf = append(buf, s.Hostname...)
|
buf = append(buf, s.Hostname...)
|
||||||
buf = append(buf, '"')
|
buf = append(buf, '"')
|
||||||
if s.Id != nil {
|
if s.Id != nil {
|
||||||
buf = append(buf, `,"id":`...)
|
buf = append(buf, `,"id":"`...)
|
||||||
buf = strconv.AppendInt(buf, int64(*s.Id), 10)
|
buf = append(buf, *s.Id...)
|
||||||
|
buf = append(buf, '"')
|
||||||
}
|
}
|
||||||
if s.Statistics != nil {
|
|
||||||
buf = append(buf, `,"statistics":{"min":`...)
|
buf = append(buf, `,"statistics":{"min":`...)
|
||||||
buf = strconv.AppendFloat(buf, s.Statistics.Min, 'f', 2, 64)
|
buf = strconv.AppendFloat(buf, s.Statistics.Min, 'f', 2, 64)
|
||||||
buf = append(buf, `,"avg":`...)
|
buf = append(buf, `,"avg":`...)
|
||||||
@ -94,7 +94,6 @@ func (s *Series) MarshalJSON() ([]byte, error) {
|
|||||||
buf = append(buf, `,"max":`...)
|
buf = append(buf, `,"max":`...)
|
||||||
buf = strconv.AppendFloat(buf, s.Statistics.Max, 'f', 2, 64)
|
buf = strconv.AppendFloat(buf, s.Statistics.Max, 'f', 2, 64)
|
||||||
buf = append(buf, '}')
|
buf = append(buf, '}')
|
||||||
}
|
|
||||||
buf = append(buf, `,"data":[`...)
|
buf = append(buf, `,"data":[`...)
|
||||||
for i := 0; i < len(s.Data); i++ {
|
for i := 0; i < len(s.Data); i++ {
|
||||||
if i != 0 {
|
if i != 0 {
|
||||||
@ -110,3 +109,23 @@ func (s *Series) MarshalJSON() ([]byte, error) {
|
|||||||
buf = append(buf, ']', '}')
|
buf = append(buf, ']', '}')
|
||||||
return buf, nil
|
return buf, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ConvertFloatToFloat64(s []Float) []float64 {
|
||||||
|
fp := make([]float64, len(s))
|
||||||
|
|
||||||
|
for i, val := range s {
|
||||||
|
fp[i] = float64(val)
|
||||||
|
}
|
||||||
|
|
||||||
|
return fp
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetFloat64ToFloat(s []float64) []Float {
|
||||||
|
fp := make([]Float, len(s))
|
||||||
|
|
||||||
|
for i, val := range s {
|
||||||
|
fp[i] = Float(val)
|
||||||
|
}
|
||||||
|
|
||||||
|
return fp
|
||||||
|
}
|
||||||
|
@ -21,18 +21,18 @@ type BaseJob struct {
|
|||||||
Project string `json:"project" db:"project" example:"abcd200"` // The unique identifier of a project
|
Project string `json:"project" db:"project" example:"abcd200"` // The unique identifier of a project
|
||||||
Cluster string `json:"cluster" db:"cluster" example:"fritz"` // The unique identifier of a cluster
|
Cluster string `json:"cluster" db:"cluster" example:"fritz"` // The unique identifier of a cluster
|
||||||
SubCluster string `json:"subCluster" db:"subcluster" example:"main"` // The unique identifier of a sub cluster
|
SubCluster string `json:"subCluster" db:"subcluster" example:"main"` // The unique identifier of a sub cluster
|
||||||
Partition string `json:"partition" db:"partition" example:"main"` // The Slurm partition to which the job was submitted
|
Partition *string `json:"partition,omitempty" db:"partition" example:"main"` // The Slurm partition to which the job was submitted
|
||||||
ArrayJobId int64 `json:"arrayJobId" db:"array_job_id" example:"123000"` // The unique identifier of an array job
|
ArrayJobId *int64 `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"` // The unique identifier of an array job
|
||||||
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"` // Number of nodes used (Min > 0)
|
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"` // Number of nodes used (Min > 0)
|
||||||
NumHWThreads int32 `json:"numHwthreads" db:"num_hwthreads" example:"20" minimum:"1"` // Number of HWThreads used (Min > 0)
|
NumHWThreads *int32 `json:"numHwthreads,omitempty" db:"num_hwthreads" example:"20" minimum:"1"` // Number of HWThreads used (Min > 0)
|
||||||
NumAcc int32 `json:"numAcc" db:"num_acc" example:"2" minimum:"1"` // Number of accelerators used (Min > 0)
|
NumAcc *int32 `json:"numAcc,omitempty" db:"num_acc" example:"2" minimum:"1"` // Number of accelerators used (Min > 0)
|
||||||
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"` // Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user
|
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"` // Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user
|
||||||
MonitoringStatus int32 `json:"monitoringStatus" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"` // State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull
|
MonitoringStatus int32 `json:"monitoringStatus,omitempty" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"` // State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull
|
||||||
SMT int32 `json:"smt" db:"smt" example:"4"` // SMT threads used by job
|
SMT *int32 `json:"smt,omitempty" db:"smt" example:"4"` // SMT threads used by job
|
||||||
State JobState `json:"jobState" db:"job_state" example:"completed"` // Final state of job
|
State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"` // Final state of job
|
||||||
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"` // Duration of job in seconds (Min > 0)
|
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"` // Duration of job in seconds (Min > 0)
|
||||||
Walltime int64 `json:"walltime" db:"walltime" example:"86400" minimum:"1"` // Requested walltime of job in seconds (Min > 0)
|
Walltime *int64 `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"` // Requested walltime of job in seconds (Min > 0)
|
||||||
Tags []*Tag `json:"tags"` // List of tags
|
Tags []*Tag `json:"tags,omitempty"` // List of tags
|
||||||
RawResources []byte `json:"-" db:"resources"` // Resources used by job [As Bytes]
|
RawResources []byte `json:"-" db:"resources"` // Resources used by job [As Bytes]
|
||||||
Resources []*Resource `json:"resources"` // Resources used by job
|
Resources []*Resource `json:"resources"` // Resources used by job
|
||||||
RawMetaData []byte `json:"-" db:"meta_data"` // Additional information about the job [As Bytes]
|
RawMetaData []byte `json:"-" db:"meta_data"` // Additional information about the job [As Bytes]
|
||||||
@ -89,11 +89,15 @@ var JobDefaults BaseJob = BaseJob{
|
|||||||
MonitoringStatus: MonitoringStatusRunningOrArchiving,
|
MonitoringStatus: MonitoringStatusRunningOrArchiving,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Unit struct {
|
||||||
|
Base string `json:"base"`
|
||||||
|
Prefix *string `json:"prefix,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
// JobStatistics model
|
// JobStatistics model
|
||||||
// @Description Specification for job metric statistics.
|
// @Description Specification for job metric statistics.
|
||||||
type JobStatistics struct {
|
type JobStatistics struct {
|
||||||
// Metric unit (see schema/unit.schema.json)
|
Unit Unit `json:"unit" example:"GHz"`
|
||||||
Unit string `json:"unit" example:"GHz"`
|
|
||||||
Avg float64 `json:"avg" example:"2500" minimum:"0"` // Job metric average
|
Avg float64 `json:"avg" example:"2500" minimum:"0"` // Job metric average
|
||||||
Min float64 `json:"min" example:"2000" minimum:"0"` // Job metric minimum
|
Min float64 `json:"min" example:"2000" minimum:"0"` // Job metric minimum
|
||||||
Max float64 `json:"max" example:"3000" minimum:"0"` // Job metric maximum
|
Max float64 `json:"max" example:"3000" minimum:"0"` // Job metric maximum
|
||||||
@ -102,6 +106,7 @@ type JobStatistics struct {
|
|||||||
// Tag model
|
// Tag model
|
||||||
// @Description Defines a tag using name and type.
|
// @Description Defines a tag using name and type.
|
||||||
type Tag struct {
|
type Tag struct {
|
||||||
|
// The unique DB identifier of a tag
|
||||||
// The unique DB identifier of a tag
|
// The unique DB identifier of a tag
|
||||||
ID int64 `json:"id" db:"id"`
|
ID int64 `json:"id" db:"id"`
|
||||||
Type string `json:"type" db:"tag_type" example:"Debug"` // Tag Type
|
Type string `json:"type" db:"tag_type" example:"Debug"` // Tag Type
|
||||||
|
@ -15,17 +15,16 @@ import (
|
|||||||
type JobData map[string]map[MetricScope]*JobMetric
|
type JobData map[string]map[MetricScope]*JobMetric
|
||||||
|
|
||||||
type JobMetric struct {
|
type JobMetric struct {
|
||||||
Unit string `json:"unit"`
|
Unit Unit `json:"unit"`
|
||||||
Scope MetricScope `json:"scope"`
|
|
||||||
Timestep int `json:"timestep"`
|
Timestep int `json:"timestep"`
|
||||||
Series []Series `json:"series"`
|
Series []Series `json:"series"`
|
||||||
StatisticsSeries *StatsSeries `json:"statisticsSeries"`
|
StatisticsSeries *StatsSeries `json:"statisticsSeries,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Series struct {
|
type Series struct {
|
||||||
Hostname string `json:"hostname"`
|
Hostname string `json:"hostname"`
|
||||||
Id *int `json:"id,omitempty"`
|
Id *string `json:"id,omitempty"`
|
||||||
Statistics *MetricStatistics `json:"statistics"`
|
Statistics MetricStatistics `json:"statistics"`
|
||||||
Data []Float `json:"data"`
|
Data []Float `json:"data"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -218,17 +217,12 @@ func (jd *JobData) AddNodeScope(metric string) bool {
|
|||||||
|
|
||||||
nodeJm := &JobMetric{
|
nodeJm := &JobMetric{
|
||||||
Unit: jm.Unit,
|
Unit: jm.Unit,
|
||||||
Scope: MetricScopeNode,
|
|
||||||
Timestep: jm.Timestep,
|
Timestep: jm.Timestep,
|
||||||
Series: make([]Series, 0, len(hosts)),
|
Series: make([]Series, 0, len(hosts)),
|
||||||
}
|
}
|
||||||
for hostname, series := range hosts {
|
for hostname, series := range hosts {
|
||||||
min, sum, max := math.MaxFloat32, 0.0, -math.MaxFloat32
|
min, sum, max := math.MaxFloat32, 0.0, -math.MaxFloat32
|
||||||
for _, series := range series {
|
for _, series := range series {
|
||||||
if series.Statistics == nil {
|
|
||||||
min, sum, max = math.NaN(), math.NaN(), math.NaN()
|
|
||||||
break
|
|
||||||
}
|
|
||||||
sum += series.Statistics.Avg
|
sum += series.Statistics.Avg
|
||||||
min = math.Min(min, series.Statistics.Min)
|
min = math.Min(min, series.Statistics.Min)
|
||||||
max = math.Max(max, series.Statistics.Max)
|
max = math.Max(max, series.Statistics.Max)
|
||||||
@ -259,7 +253,7 @@ func (jd *JobData) AddNodeScope(metric string) bool {
|
|||||||
|
|
||||||
nodeJm.Series = append(nodeJm.Series, Series{
|
nodeJm.Series = append(nodeJm.Series, Series{
|
||||||
Hostname: hostname,
|
Hostname: hostname,
|
||||||
Statistics: &MetricStatistics{Min: min, Avg: sum / float64(len(series)), Max: max},
|
Statistics: MetricStatistics{Min: min, Avg: sum / float64(len(series)), Max: max},
|
||||||
Data: data,
|
Data: data,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
},
|
},
|
||||||
"unit": {
|
"unit": {
|
||||||
"description": "Metric unit",
|
"description": "Metric unit",
|
||||||
"type": "string"
|
"$ref": "embedfs://unit.schema.json"
|
||||||
},
|
},
|
||||||
"scope": {
|
"scope": {
|
||||||
"description": "Native measurement resolution",
|
"description": "Native measurement resolution",
|
||||||
@ -38,7 +38,22 @@
|
|||||||
"sum",
|
"sum",
|
||||||
"avg"
|
"avg"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
"peak": {
|
||||||
|
"description": "Metric peak threshold (Upper metric limit)",
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"normal": {
|
||||||
|
"description": "Metric normal threshold",
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"caution": {
|
||||||
|
"description": "Metric caution threshold (Suspicious but does not require immediate action)",
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"alert": {
|
||||||
|
"description": "Metric alert threshold (Requires immediate action)",
|
||||||
|
"type": "number"
|
||||||
},
|
},
|
||||||
"subClusters": {
|
"subClusters": {
|
||||||
"description": "Array of cluster hardware partition metric thresholds",
|
"description": "Array of cluster hardware partition metric thresholds",
|
||||||
@ -61,13 +76,13 @@
|
|||||||
},
|
},
|
||||||
"alert": {
|
"alert": {
|
||||||
"type": "number"
|
"type": "number"
|
||||||
|
},
|
||||||
|
"remove": {
|
||||||
|
"type": "boolean"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": [
|
||||||
"name",
|
"name"
|
||||||
"peak",
|
|
||||||
"caution",
|
|
||||||
"alert"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -76,7 +91,12 @@
|
|||||||
"name",
|
"name",
|
||||||
"unit",
|
"unit",
|
||||||
"scope",
|
"scope",
|
||||||
"timestep"
|
"timestep",
|
||||||
|
"aggregation",
|
||||||
|
"peak",
|
||||||
|
"normal",
|
||||||
|
"caution",
|
||||||
|
"alert"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"minItems": 1
|
"minItems": 1
|
||||||
@ -109,15 +129,42 @@
|
|||||||
},
|
},
|
||||||
"flopRateScalar": {
|
"flopRateScalar": {
|
||||||
"description": "Theoretical node peak flop rate for scalar code in GFlops/s",
|
"description": "Theoretical node peak flop rate for scalar code in GFlops/s",
|
||||||
"type": "integer"
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"unit": {
|
||||||
|
"description": "Metric unit",
|
||||||
|
"$ref": "embedfs://unit.schema.json"
|
||||||
|
},
|
||||||
|
"value": {
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"flopRateSimd": {
|
"flopRateSimd": {
|
||||||
"description": "Theoretical node peak flop rate for SIMD code in GFlops/s",
|
"description": "Theoretical node peak flop rate for SIMD code in GFlops/s",
|
||||||
"type": "integer"
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"unit": {
|
||||||
|
"description": "Metric unit",
|
||||||
|
"$ref": "embedfs://unit.schema.json"
|
||||||
|
},
|
||||||
|
"value": {
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"memoryBandwidth": {
|
"memoryBandwidth": {
|
||||||
"description": "Theoretical node peak memory bandwidth in GB/s",
|
"description": "Theoretical node peak memory bandwidth in GB/s",
|
||||||
"type": "integer"
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"unit": {
|
||||||
|
"description": "Metric unit",
|
||||||
|
"$ref": "embedfs://unit.schema.json"
|
||||||
|
},
|
||||||
|
"value": {
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"nodes": {
|
"nodes": {
|
||||||
"description": "Node list expression",
|
"description": "Node list expression",
|
||||||
@ -215,6 +262,7 @@
|
|||||||
},
|
},
|
||||||
"required": [
|
"required": [
|
||||||
"name",
|
"name",
|
||||||
|
"nodes",
|
||||||
"topology",
|
"topology",
|
||||||
"processorType",
|
"processorType",
|
||||||
"socketsPerNode",
|
"socketsPerNode",
|
||||||
|
@ -86,8 +86,8 @@
|
|||||||
},
|
},
|
||||||
"minProperties": 1
|
"minProperties": 1
|
||||||
},
|
},
|
||||||
"cpu_used": {
|
"cpu_user": {
|
||||||
"description": "CPU active core utilization",
|
"description": "CPU user active core utilization",
|
||||||
"properties": {
|
"properties": {
|
||||||
"node": {
|
"node": {
|
||||||
"$ref": "embedfs://job-metric-data.schema.json"
|
"$ref": "embedfs://job-metric-data.schema.json"
|
||||||
@ -479,7 +479,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"required": [
|
"required": [
|
||||||
"cpu_used",
|
"cpu_user",
|
||||||
"mem_used",
|
"mem_used",
|
||||||
"flops_any",
|
"flops_any",
|
||||||
"mem_bw",
|
"mem_bw",
|
||||||
|
@ -84,11 +84,6 @@
|
|||||||
"type": "integer",
|
"type": "integer",
|
||||||
"exclusiveMinimum": 0
|
"exclusiveMinimum": 0
|
||||||
},
|
},
|
||||||
"stopTime": {
|
|
||||||
"description": "Stop epoch time stamp in seconds",
|
|
||||||
"type": "integer",
|
|
||||||
"exclusiveMinimum": 0
|
|
||||||
},
|
|
||||||
"duration": {
|
"duration": {
|
||||||
"description": "Duration of job in seconds",
|
"description": "Duration of job in seconds",
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
@ -198,8 +193,8 @@
|
|||||||
"description": "Instructions executed per cycle",
|
"description": "Instructions executed per cycle",
|
||||||
"$ref": "embedfs://job-metric-statistics.schema.json"
|
"$ref": "embedfs://job-metric-statistics.schema.json"
|
||||||
},
|
},
|
||||||
"cpu_used": {
|
"cpu_user": {
|
||||||
"description": "CPU active core utilization",
|
"description": "CPU user active core utilization",
|
||||||
"$ref": "embedfs://job-metric-statistics.schema.json"
|
"$ref": "embedfs://job-metric-statistics.schema.json"
|
||||||
},
|
},
|
||||||
"flops_dp": {
|
"flops_dp": {
|
||||||
@ -331,7 +326,7 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": [
|
||||||
"cpu_used",
|
"cpu_user",
|
||||||
"mem_used",
|
"mem_used",
|
||||||
"flops_any",
|
"flops_any",
|
||||||
"mem_bw"
|
"mem_bw"
|
||||||
@ -343,13 +338,13 @@
|
|||||||
"user",
|
"user",
|
||||||
"project",
|
"project",
|
||||||
"cluster",
|
"cluster",
|
||||||
|
"subCluster",
|
||||||
"numNodes",
|
"numNodes",
|
||||||
"exclusive",
|
"exclusive",
|
||||||
"startTime",
|
"startTime",
|
||||||
"jobState",
|
"jobState",
|
||||||
"duration",
|
"duration",
|
||||||
"resources",
|
"resources",
|
||||||
"tags",
|
|
||||||
"statistics"
|
"statistics"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -193,7 +193,7 @@
|
|||||||
},
|
},
|
||||||
"data": {
|
"data": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"contains": {
|
||||||
"type": "number",
|
"type": "number",
|
||||||
"minimum": 0
|
"minimum": 0
|
||||||
},
|
},
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
"description": "Format specification for job metric units",
|
"description": "Format specification for job metric units",
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"base_unit": {
|
"base": {
|
||||||
"description": "Metric base unit",
|
"description": "Metric base unit",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": [
|
"enum": [
|
||||||
@ -15,7 +15,6 @@
|
|||||||
"F/s",
|
"F/s",
|
||||||
"CPI",
|
"CPI",
|
||||||
"IPC",
|
"IPC",
|
||||||
"load",
|
|
||||||
"Hz",
|
"Hz",
|
||||||
"W",
|
"W",
|
||||||
"°C",
|
"°C",
|
||||||
@ -36,6 +35,6 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": [
|
||||||
"base_unit"
|
"base"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -45,9 +45,29 @@ func TestValidateCluster(t *testing.T) {
|
|||||||
"socketsPerNode": 2,
|
"socketsPerNode": 2,
|
||||||
"coresPerSocket": 10,
|
"coresPerSocket": 10,
|
||||||
"threadsPerCore": 2,
|
"threadsPerCore": 2,
|
||||||
"flopRateScalar": 44,
|
"flopRateScalar": {
|
||||||
"flopRateSimd": 704,
|
"unit": {
|
||||||
"memoryBandwidth": 80,
|
"prefix": "G",
|
||||||
|
"base": "F/s"
|
||||||
|
},
|
||||||
|
"value": 14
|
||||||
|
},
|
||||||
|
"flopRateSimd": {
|
||||||
|
"unit": {
|
||||||
|
"prefix": "G",
|
||||||
|
"base": "F/s"
|
||||||
|
},
|
||||||
|
"value": 112
|
||||||
|
},
|
||||||
|
"memoryBandwidth": {
|
||||||
|
"unit": {
|
||||||
|
"prefix": "G",
|
||||||
|
"base": "B/s"
|
||||||
|
},
|
||||||
|
"value": 24
|
||||||
|
},
|
||||||
|
"numberOfNodes": 70,
|
||||||
|
"nodes": "w11[27-45,49-63,69-72]",
|
||||||
"topology": {
|
"topology": {
|
||||||
"node": [0,20,1,21,2,22,3,23,4,24,5,25,6,26,7,27,8,28,9,29,10,30,11,31,12,32,13,33,14,34,15,35,16,36,17,37,18,38,19,39],
|
"node": [0,20,1,21,2,22,3,23,4,24,5,25,6,26,7,27,8,28,9,29,10,30,11,31,12,32,13,33,14,34,15,35,16,36,17,37,18,38,19,39],
|
||||||
"socket": [
|
"socket": [
|
||||||
@ -68,8 +88,13 @@ func TestValidateCluster(t *testing.T) {
|
|||||||
{
|
{
|
||||||
"name": "cpu_load",
|
"name": "cpu_load",
|
||||||
"scope": "hwthread",
|
"scope": "hwthread",
|
||||||
"unit": "load",
|
"unit": {"base": ""},
|
||||||
"timestep": 60
|
"aggregation": "avg",
|
||||||
|
"timestep": 60,
|
||||||
|
"peak": 4,
|
||||||
|
"normal": 2,
|
||||||
|
"caution": 1,
|
||||||
|
"alert": 0.25
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}`)
|
}`)
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
# cc-units - A unit system for ClusterCockpit
|
# cc-units - A unit system for ClusterCockpit
|
||||||
|
|
||||||
When working with metrics, the problem comes up that they may use different unit name but have the same unit in fact. There are a lot of real world examples like 'kB' and 'Kbyte'. In [cc-metric-collector](https://github.com/ClusterCockpit/cc-metric-collector), the collectors read data from different sources which may use different units or the programmer specifies a unit for a metric by hand. The cc-units system is not comparable with the SI unit system. If you are looking for a package for the SI units, see [here](https://pkg.go.dev/github.com/gurre/si).
|
When working with metrics, the problem comes up that they may use different unit name but have the same unit in fact.
|
||||||
|
There are a lot of real world examples like 'kB' and 'Kbyte'. In [cc-metric-collector](https://github.com/ClusterCockpit/cc-metric-collector), the collectors read data from different sources which may use different units or the programmer specifies a unit for a metric by hand. The cc-units system is not comparable with the SI unit system. If you are looking for a package for the SI units, see [here](https://pkg.go.dev/github.com/gurre/si).
|
||||||
|
|
||||||
In order to enable unit comparison and conversion, the ccUnits package provides some helpers:
|
In order to enable unit comparison and conversion, the ccUnits package provides some helpers:
|
||||||
```go
|
```go
|
||||||
|
@ -39,7 +39,7 @@ var MeasuresMap map[Measure]MeasureData = map[Measure]MeasureData{
|
|||||||
},
|
},
|
||||||
Flops: {
|
Flops: {
|
||||||
Long: "Flops",
|
Long: "Flops",
|
||||||
Short: "Flops",
|
Short: "F",
|
||||||
Regex: "^([fF][lL]?[oO]?[pP]?[sS]?)",
|
Regex: "^([fF][lL]?[oO]?[pP]?[sS]?)",
|
||||||
},
|
},
|
||||||
Percentage: {
|
Percentage: {
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package units
|
package units
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"math"
|
||||||
"regexp"
|
"regexp"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -172,3 +173,20 @@ func NewPrefix(prefix string) Prefix {
|
|||||||
}
|
}
|
||||||
return InvalidPrefix
|
return InvalidPrefix
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getExponent(p float64) int {
|
||||||
|
count := 0
|
||||||
|
|
||||||
|
for p > 1.0 {
|
||||||
|
p = p / 1000.0
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
|
||||||
|
return count * 3
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewPrefixFromFactor(op Prefix, e int) Prefix {
|
||||||
|
f := float64(op)
|
||||||
|
exp := math.Pow10(getExponent(f) - e)
|
||||||
|
return Prefix(exp)
|
||||||
|
}
|
||||||
|
@ -3,7 +3,10 @@ package units
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
type unit struct {
|
type unit struct {
|
||||||
@ -25,7 +28,9 @@ type Unit interface {
|
|||||||
|
|
||||||
var INVALID_UNIT = NewUnit("foobar")
|
var INVALID_UNIT = NewUnit("foobar")
|
||||||
|
|
||||||
// Valid checks whether a unit is a valid unit. A unit is valid if it has at least a prefix and a measure. The unit denominator is optional.
|
// Valid checks whether a unit is a valid unit.
|
||||||
|
// A unit is valid if it has at least a prefix and a measure.
|
||||||
|
// The unit denominator is optional.
|
||||||
func (u *unit) Valid() bool {
|
func (u *unit) Valid() bool {
|
||||||
return u.prefix != InvalidPrefix && u.measure != InvalidMeasure
|
return u.prefix != InvalidPrefix && u.measure != InvalidMeasure
|
||||||
}
|
}
|
||||||
@ -71,6 +76,90 @@ func (u *unit) getUnitDenominator() Measure {
|
|||||||
return u.divMeasure
|
return u.divMeasure
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ConvertValue(v *float64, from string, to string) {
|
||||||
|
uf := NewUnit(from)
|
||||||
|
ut := NewUnit(to)
|
||||||
|
factor := float64(uf.getPrefix()) / float64(ut.getPrefix())
|
||||||
|
*v = math.Ceil(*v * factor)
|
||||||
|
}
|
||||||
|
|
||||||
|
func ConvertSeries(s []float64, from string, to string) {
|
||||||
|
uf := NewUnit(from)
|
||||||
|
ut := NewUnit(to)
|
||||||
|
factor := float64(uf.getPrefix()) / float64(ut.getPrefix())
|
||||||
|
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
s[i] = math.Ceil(s[i] * factor)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getNormalizationFactor(v float64) (float64, int) {
|
||||||
|
count := 0
|
||||||
|
scale := -3
|
||||||
|
|
||||||
|
if v > 1000.0 {
|
||||||
|
for v > 1000.0 {
|
||||||
|
v *= 1e-3
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for v < 1.0 {
|
||||||
|
v *= 1e3
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
scale = 3
|
||||||
|
}
|
||||||
|
return math.Pow10(count * scale), count * scale
|
||||||
|
}
|
||||||
|
|
||||||
|
func NormalizeValue(v *float64, us string, nu *string) {
|
||||||
|
u := NewUnit(us)
|
||||||
|
f, e := getNormalizationFactor((*v))
|
||||||
|
*v = math.Ceil(*v * f)
|
||||||
|
u.setPrefix(NewPrefixFromFactor(u.getPrefix(), e))
|
||||||
|
*nu = u.Short()
|
||||||
|
}
|
||||||
|
|
||||||
|
func NormalizeSeries(s []float64, avg float64, us string, nu *string) {
|
||||||
|
u := NewUnit(us)
|
||||||
|
f, e := getNormalizationFactor(avg)
|
||||||
|
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
s[i] *= f
|
||||||
|
s[i] = math.Ceil(s[i])
|
||||||
|
}
|
||||||
|
u.setPrefix(NewPrefixFromFactor(u.getPrefix(), e))
|
||||||
|
fmt.Printf("Prefix: %e \n", u.getPrefix())
|
||||||
|
*nu = u.Short()
|
||||||
|
}
|
||||||
|
|
||||||
|
func ConvertUnitString(us string) schema.Unit {
|
||||||
|
var nu schema.Unit
|
||||||
|
|
||||||
|
if us == "CPI" ||
|
||||||
|
us == "IPC" ||
|
||||||
|
us == "load" ||
|
||||||
|
us == "" {
|
||||||
|
nu.Base = us
|
||||||
|
return nu
|
||||||
|
}
|
||||||
|
u := NewUnit(us)
|
||||||
|
p := u.getPrefix()
|
||||||
|
if p.Prefix() != "" {
|
||||||
|
prefix := p.Prefix()
|
||||||
|
nu.Prefix = &prefix
|
||||||
|
}
|
||||||
|
m := u.getMeasure()
|
||||||
|
d := u.getUnitDenominator()
|
||||||
|
if d.Short() != "inval" {
|
||||||
|
nu.Base = fmt.Sprintf("%s/%s", m.Short(), d.Short())
|
||||||
|
} else {
|
||||||
|
nu.Base = m.Short()
|
||||||
|
}
|
||||||
|
|
||||||
|
return nu
|
||||||
|
}
|
||||||
|
|
||||||
// GetPrefixPrefixFactor creates the default conversion function between two prefixes.
|
// GetPrefixPrefixFactor creates the default conversion function between two prefixes.
|
||||||
// It returns a conversation function for the value.
|
// It returns a conversation function for the value.
|
||||||
func GetPrefixPrefixFactor(in Prefix, out Prefix) func(value interface{}) interface{} {
|
func GetPrefixPrefixFactor(in Prefix, out Prefix) func(value interface{}) interface{} {
|
||||||
|
@ -2,6 +2,7 @@ package units
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"reflect"
|
||||||
"regexp"
|
"regexp"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
@ -199,3 +200,108 @@ func TestPrefixRegex(t *testing.T) {
|
|||||||
t.Logf("succussfully compiled regex '%s' for prefix %s", data.Regex, data.Long)
|
t.Logf("succussfully compiled regex '%s' for prefix %s", data.Regex, data.Long)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConvertValue(t *testing.T) {
|
||||||
|
v := float64(103456)
|
||||||
|
ConvertValue(&v, "MB/s", "GB/s")
|
||||||
|
|
||||||
|
if v != 104.00 {
|
||||||
|
t.Errorf("Failed ConvertValue: Want 103.456, Got %f", v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConvertValueUp(t *testing.T) {
|
||||||
|
v := float64(10.3456)
|
||||||
|
ConvertValue(&v, "GB/s", "MB/s")
|
||||||
|
|
||||||
|
if v != 10346.00 {
|
||||||
|
t.Errorf("Failed ConvertValue: Want 10346.00, Got %f", v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func TestConvertSeries(t *testing.T) {
|
||||||
|
s := []float64{2890031237, 23998994567, 389734042344, 390349424345}
|
||||||
|
r := []float64{3, 24, 390, 391}
|
||||||
|
ConvertSeries(s, "F/s", "GF/s")
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(s, r) {
|
||||||
|
t.Errorf("Failed ConvertValue: Want 3, 24, 390, 391, Got %v", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeValue(t *testing.T) {
|
||||||
|
var s string
|
||||||
|
v := float64(103456)
|
||||||
|
|
||||||
|
NormalizeValue(&v, "MB/s", &s)
|
||||||
|
|
||||||
|
if v != 104.00 {
|
||||||
|
t.Errorf("Failed ConvertValue: Want 104.00, Got %f", v)
|
||||||
|
}
|
||||||
|
if s != "GB/s" {
|
||||||
|
t.Errorf("Failed Prefix or unit: Want GB/s, Got %s", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeValueNoPrefix(t *testing.T) {
|
||||||
|
var s string
|
||||||
|
v := float64(103458596)
|
||||||
|
|
||||||
|
NormalizeValue(&v, "F/s", &s)
|
||||||
|
|
||||||
|
if v != 104.00 {
|
||||||
|
t.Errorf("Failed ConvertValue: Want 104.00, Got %f", v)
|
||||||
|
}
|
||||||
|
if s != "MF/s" {
|
||||||
|
t.Errorf("Failed Prefix or unit: Want MF/s, Got %s", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeValueKeep(t *testing.T) {
|
||||||
|
var s string
|
||||||
|
v := float64(345)
|
||||||
|
|
||||||
|
NormalizeValue(&v, "MB/s", &s)
|
||||||
|
|
||||||
|
if v != 345.00 {
|
||||||
|
t.Errorf("Failed ConvertValue: Want 104.00, Got %f", v)
|
||||||
|
}
|
||||||
|
if s != "MB/s" {
|
||||||
|
t.Errorf("Failed Prefix or unit: Want GB/s, Got %s", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeValueDown(t *testing.T) {
|
||||||
|
var s string
|
||||||
|
v := float64(0.0004578)
|
||||||
|
|
||||||
|
NormalizeValue(&v, "GB/s", &s)
|
||||||
|
|
||||||
|
if v != 458.00 {
|
||||||
|
t.Errorf("Failed ConvertValue: Want 458.00, Got %f", v)
|
||||||
|
}
|
||||||
|
if s != "KB/s" {
|
||||||
|
t.Errorf("Failed Prefix or unit: Want KB/s, Got %s", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeSeries(t *testing.T) {
|
||||||
|
var us string
|
||||||
|
s := []float64{2890031237, 23998994567, 389734042344, 390349424345}
|
||||||
|
r := []float64{3, 24, 390, 391}
|
||||||
|
|
||||||
|
total := 0.0
|
||||||
|
for _, number := range s {
|
||||||
|
total += number
|
||||||
|
}
|
||||||
|
avg := total / float64(len(s))
|
||||||
|
|
||||||
|
fmt.Printf("AVG: %e\n", avg)
|
||||||
|
NormalizeSeries(s, avg, "KB/s", &us)
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(s, r) {
|
||||||
|
t.Errorf("Failed ConvertValue: Want 3, 24, 390, 391, Got %v", s)
|
||||||
|
}
|
||||||
|
if us != "TB/s" {
|
||||||
|
t.Errorf("Failed Prefix or unit: Want TB/s, Got %s", us)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
File diff suppressed because one or more lines are too long
BIN
test/archive/emmy/1403/244/1608923076/data.json.gz
Normal file
BIN
test/archive/emmy/1403/244/1608923076/data.json.gz
Normal file
Binary file not shown.
@ -1 +1,194 @@
|
|||||||
{"exclusive":1,"jobId":1403244,"statistics":{"mem_bw":{"avg":63.57,"min":0,"unit":"GB/s","max":74.5},"rapl_power":{"avg":228.07,"min":0,"unit":"W","max":258.56},"ipc":{"unit":"IPC","max":0.510204081632653,"avg":1.53846153846154,"min":0.0},"clock":{"min":1380.32,"avg":2599.39,"unit":"MHz","max":2634.46},"cpu_load":{"avg":18.4,"min":0,"max":23.58,"unit":"load"},"flops_any":{"max":404.62,"unit":"GF/s","avg":225.59,"min":0},"flops_dp":{"max":0.24,"unit":"GF/s","min":0,"avg":0},"mem_used":{"min":1.55,"avg":27.84,"unit":"GB","max":37.5},"flops_sp":{"min":0,"avg":225.59,"max":404.62,"unit":"GF/s"}},"resources":[{"hostname":"e0102"},{"hostname":"e0103"},{"hostname":"e0105"},{"hostname":"e0106"},{"hostname":"e0107"},{"hostname":"e0108"},{"hostname":"e0114"},{"hostname":"e0320"},{"hostname":"e0321"},{"hostname":"e0325"},{"hostname":"e0404"},{"hostname":"e0415"},{"hostname":"e0433"},{"hostname":"e0437"},{"hostname":"e0439"},{"hostname":"e0501"},{"hostname":"e0503"},{"hostname":"e0505"},{"hostname":"e0506"},{"hostname":"e0512"},{"hostname":"e0513"},{"hostname":"e0514"},{"hostname":"e0653"},{"hostname":"e0701"},{"hostname":"e0716"},{"hostname":"e0727"},{"hostname":"e0728"},{"hostname":"e0925"},{"hostname":"e0926"},{"hostname":"e0929"},{"hostname":"e0934"},{"hostname":"e0951"}],"walltime":10,"jobState":"completed","cluster":"emmy","stopTime":1609009562,"user":"emmyUser6","startTime":1608923076,"partition":"work","tags":[],"project":"no project","numNodes":32,"duration":86486}
|
{
|
||||||
|
"exclusive": 1,
|
||||||
|
"jobId": 1403244,
|
||||||
|
"statistics": {
|
||||||
|
"mem_bw": {
|
||||||
|
"avg": 63.57,
|
||||||
|
"min": 0,
|
||||||
|
"unit": {
|
||||||
|
"base": "B/s",
|
||||||
|
"prefix": "G"
|
||||||
|
},
|
||||||
|
"max": 74.5
|
||||||
|
},
|
||||||
|
"rapl_power": {
|
||||||
|
"avg": 228.07,
|
||||||
|
"min": 0,
|
||||||
|
"unit": {
|
||||||
|
"base": "W"
|
||||||
|
},
|
||||||
|
"max": 258.56
|
||||||
|
},
|
||||||
|
"ipc": {
|
||||||
|
"unit": {
|
||||||
|
"base": "IPC"
|
||||||
|
},
|
||||||
|
"max": 0.510204081632653,
|
||||||
|
"avg": 1.53846153846154,
|
||||||
|
"min": 0.0
|
||||||
|
},
|
||||||
|
"clock": {
|
||||||
|
"min": 1380.32,
|
||||||
|
"avg": 2599.39,
|
||||||
|
"unit": {
|
||||||
|
"base": "Hz",
|
||||||
|
"prefix": "M"
|
||||||
|
},
|
||||||
|
"max": 2634.46
|
||||||
|
},
|
||||||
|
"cpu_load": {
|
||||||
|
"avg": 18.4,
|
||||||
|
"min": 0,
|
||||||
|
"max": 23.58,
|
||||||
|
"unit": {
|
||||||
|
"base": "load"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"flops_any": {
|
||||||
|
"max": 404.62,
|
||||||
|
"unit": {
|
||||||
|
"base": "F/s",
|
||||||
|
"prefix": "G"
|
||||||
|
},
|
||||||
|
"avg": 225.59,
|
||||||
|
"min": 0
|
||||||
|
},
|
||||||
|
"flops_dp": {
|
||||||
|
"max": 0.24,
|
||||||
|
"unit": {
|
||||||
|
"base": "F/s",
|
||||||
|
"prefix": "G"
|
||||||
|
},
|
||||||
|
"min": 0,
|
||||||
|
"avg": 0
|
||||||
|
},
|
||||||
|
"mem_used": {
|
||||||
|
"min": 1.55,
|
||||||
|
"avg": 27.84,
|
||||||
|
"unit": {
|
||||||
|
"base": "B",
|
||||||
|
"prefix": "G"
|
||||||
|
},
|
||||||
|
"max": 37.5
|
||||||
|
},
|
||||||
|
"flops_sp": {
|
||||||
|
"min": 0,
|
||||||
|
"avg": 225.59,
|
||||||
|
"max": 404.62,
|
||||||
|
"unit": {
|
||||||
|
"base": "F/s",
|
||||||
|
"prefix": "G"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"resources": [
|
||||||
|
{
|
||||||
|
"hostname": "e0102"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0103"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0105"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0106"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0107"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0108"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0114"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0320"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0321"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0325"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0404"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0415"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0433"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0437"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0439"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0501"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0503"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0505"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0506"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0512"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0513"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0514"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0653"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0701"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0716"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0727"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0728"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0925"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0926"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0929"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0934"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0951"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"walltime": 10,
|
||||||
|
"jobState": "completed",
|
||||||
|
"cluster": "emmy",
|
||||||
|
"subCluster": "haswell",
|
||||||
|
"stopTime": 1609009562,
|
||||||
|
"user": "emmyUser6",
|
||||||
|
"startTime": 1608923076,
|
||||||
|
"partition": "work",
|
||||||
|
"tags": [],
|
||||||
|
"project": "no project",
|
||||||
|
"numNodes": 32,
|
||||||
|
"duration": 86486
|
||||||
|
}
|
||||||
|
File diff suppressed because one or more lines are too long
BIN
test/archive/emmy/1404/397/1609300556/data.json.gz
Normal file
BIN
test/archive/emmy/1404/397/1609300556/data.json.gz
Normal file
Binary file not shown.
@ -1 +1,194 @@
|
|||||||
{"stopTime":1609387081,"resources":[{"hostname":"e0151"},{"hostname":"e0152"},{"hostname":"e0153"},{"hostname":"e0232"},{"hostname":"e0303"},{"hostname":"e0314"},{"hostname":"e0344"},{"hostname":"e0345"},{"hostname":"e0348"},{"hostname":"e0507"},{"hostname":"e0518"},{"hostname":"e0520"},{"hostname":"e0522"},{"hostname":"e0526"},{"hostname":"e0527"},{"hostname":"e0528"},{"hostname":"e0530"},{"hostname":"e0551"},{"hostname":"e0604"},{"hostname":"e0613"},{"hostname":"e0634"},{"hostname":"e0639"},{"hostname":"e0640"},{"hostname":"e0651"},{"hostname":"e0653"},{"hostname":"e0701"},{"hostname":"e0704"},{"hostname":"e0751"},{"hostname":"e0809"},{"hostname":"e0814"},{"hostname":"e0819"},{"hostname":"e0908"}],"walltime":10,"cluster":"emmy","jobState":"completed","statistics":{"clock":{"max":2634.9,"unit":"MHz","min":0,"avg":2597.8},"cpu_load":{"max":27.41,"unit":"load","min":0,"avg":18.39},"mem_bw":{"min":0,"avg":63.23,"unit":"GB/s","max":75.06},"ipc":{"min":0.0,"avg":1.53846153846154,"unit":"IPC","max":0.490196078431373},"rapl_power":{"min":0,"avg":227.32,"unit":"W","max":256.22},"mem_used":{"min":1.5,"avg":27.77,"unit":"GB","max":37.43},"flops_sp":{"unit":"GF/s","max":413.21,"min":0,"avg":224.41},"flops_dp":{"max":5.72,"unit":"GF/s","min":0,"avg":0},"flops_any":{"min":0,"avg":224.42,"max":413.21,"unit":"GF/s"}},"exclusive":1,"jobId":1404397,"tags":[],"partition":"work","project":"no project","user":"emmyUser6","startTime":1609300556,"duration":86525,"numNodes":32}
|
{
|
||||||
|
"stopTime": 1609387081,
|
||||||
|
"resources": [
|
||||||
|
{
|
||||||
|
"hostname": "e0151"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0152"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0153"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0232"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0303"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0314"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0344"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0345"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0348"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0507"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0518"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0520"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0522"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0526"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0527"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0528"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0530"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0551"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0604"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0613"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0634"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0639"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0640"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0651"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0653"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0701"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0704"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0751"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0809"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0814"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0819"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"hostname": "e0908"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"walltime": 10,
|
||||||
|
"cluster": "emmy",
|
||||||
|
"subCluster": "haswell",
|
||||||
|
"jobState": "completed",
|
||||||
|
"statistics": {
|
||||||
|
"clock": {
|
||||||
|
"max": 2634.9,
|
||||||
|
"unit": {
|
||||||
|
"base": "Hz",
|
||||||
|
"prefix": "M"
|
||||||
|
},
|
||||||
|
"min": 0,
|
||||||
|
"avg": 2597.8
|
||||||
|
},
|
||||||
|
"cpu_load": {
|
||||||
|
"max": 27.41,
|
||||||
|
"min": 0,
|
||||||
|
"avg": 18.39,
|
||||||
|
"unit": {
|
||||||
|
"base": "load"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"mem_bw": {
|
||||||
|
"min": 0,
|
||||||
|
"avg": 63.23,
|
||||||
|
"unit": {
|
||||||
|
"base": "B/s",
|
||||||
|
"prefix": "G"
|
||||||
|
},
|
||||||
|
"max": 75.06
|
||||||
|
},
|
||||||
|
"ipc": {
|
||||||
|
"min": 0.0,
|
||||||
|
"avg": 1.53846153846154,
|
||||||
|
"unit": {
|
||||||
|
"base": "IPC"
|
||||||
|
},
|
||||||
|
"max": 0.490196078431373
|
||||||
|
},
|
||||||
|
"rapl_power": {
|
||||||
|
"min": 0,
|
||||||
|
"avg": 227.32,
|
||||||
|
"unit": {
|
||||||
|
"base": "W"
|
||||||
|
},
|
||||||
|
"max": 256.22
|
||||||
|
},
|
||||||
|
"mem_used": {
|
||||||
|
"min": 1.5,
|
||||||
|
"avg": 27.77,
|
||||||
|
"unit": {
|
||||||
|
"base": "B",
|
||||||
|
"prefix": "G"
|
||||||
|
},
|
||||||
|
"max": 37.43
|
||||||
|
},
|
||||||
|
"flops_sp": {
|
||||||
|
"unit": {
|
||||||
|
"base": "F/s",
|
||||||
|
"prefix": "G"
|
||||||
|
},
|
||||||
|
"max": 413.21,
|
||||||
|
"min": 0,
|
||||||
|
"avg": 224.41
|
||||||
|
},
|
||||||
|
"flops_dp": {
|
||||||
|
"max": 5.72,
|
||||||
|
"unit": {
|
||||||
|
"base": "F/s",
|
||||||
|
"prefix": "G"
|
||||||
|
},
|
||||||
|
"min": 0,
|
||||||
|
"avg": 0
|
||||||
|
},
|
||||||
|
"flops_any": {
|
||||||
|
"min": 0,
|
||||||
|
"avg": 224.42,
|
||||||
|
"max": 413.21,
|
||||||
|
"unit": {
|
||||||
|
"base": "F/s",
|
||||||
|
"prefix": "G"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"exclusive": 1,
|
||||||
|
"jobId": 1404397,
|
||||||
|
"tags": [],
|
||||||
|
"partition": "work",
|
||||||
|
"project": "no project",
|
||||||
|
"user": "emmyUser6",
|
||||||
|
"startTime": 1609300556,
|
||||||
|
"duration": 86525,
|
||||||
|
"numNodes": 32
|
||||||
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
1
test/archive/version.txt
Normal file
1
test/archive/version.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
1
|
@ -1,13 +1,14 @@
|
|||||||
{
|
{
|
||||||
"cpu_used": {
|
"cpu_used": {
|
||||||
"core": {
|
"core": {
|
||||||
"unit": "cpu used",
|
"unit": {
|
||||||
"scope": "core",
|
"base": ""
|
||||||
|
},
|
||||||
"timestep": 30,
|
"timestep": 30,
|
||||||
"series": [
|
"series": [
|
||||||
{
|
{
|
||||||
"hostname": "taurusi6489",
|
"hostname": "taurusi6489",
|
||||||
"id": 0,
|
"id": "0",
|
||||||
"statistics": {
|
"statistics": {
|
||||||
"min": 0.09090909090909093,
|
"min": 0.09090909090909093,
|
||||||
"avg": 0.9173553719008265,
|
"avg": 0.9173553719008265,
|
||||||
@ -29,7 +30,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"hostname": "taurusi6489",
|
"hostname": "taurusi6489",
|
||||||
"id": 1,
|
"id": "1",
|
||||||
"statistics": {
|
"statistics": {
|
||||||
"min": 0.03694102397926118,
|
"min": 0.03694102397926118,
|
||||||
"avg": 0.045968409230268584,
|
"avg": 0.045968409230268584,
|
||||||
@ -51,7 +52,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"hostname": "taurusi6490",
|
"hostname": "taurusi6490",
|
||||||
"id": 10,
|
"id": "10",
|
||||||
"statistics": {
|
"statistics": {
|
||||||
"min": 0.10505319148936171,
|
"min": 0.10505319148936171,
|
||||||
"avg": 0.9186411992263056,
|
"avg": 0.9186411992263056,
|
||||||
@ -73,7 +74,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"hostname": "taurusi6490",
|
"hostname": "taurusi6490",
|
||||||
"id": 11,
|
"id": "11",
|
||||||
"statistics": {
|
"statistics": {
|
||||||
"min": 0.05286048845767815,
|
"min": 0.05286048845767815,
|
||||||
"avg": 0.07053823838706144,
|
"avg": 0.07053823838706144,
|
||||||
@ -99,13 +100,14 @@
|
|||||||
},
|
},
|
||||||
"ipc": {
|
"ipc": {
|
||||||
"core": {
|
"core": {
|
||||||
"unit": "IPC",
|
"unit": {
|
||||||
"scope": "core",
|
"base": "IPC"
|
||||||
|
},
|
||||||
"timestep": 60,
|
"timestep": 60,
|
||||||
"series": [
|
"series": [
|
||||||
{
|
{
|
||||||
"hostname": "taurusi6489",
|
"hostname": "taurusi6489",
|
||||||
"id": 0,
|
"id": "0",
|
||||||
"statistics": {
|
"statistics": {
|
||||||
"min": 1.3808406263195592,
|
"min": 1.3808406263195592,
|
||||||
"avg": 1.3960848578375105,
|
"avg": 1.3960848578375105,
|
||||||
@ -121,7 +123,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"hostname": "taurusi6489",
|
"hostname": "taurusi6489",
|
||||||
"id": 1,
|
"id": "1",
|
||||||
"statistics": {
|
"statistics": {
|
||||||
"min": 0.30469640475234366,
|
"min": 0.30469640475234366,
|
||||||
"avg": 0.8816944294664065,
|
"avg": 0.8816944294664065,
|
||||||
@ -137,7 +139,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"hostname": "taurusi6490",
|
"hostname": "taurusi6490",
|
||||||
"id": 10,
|
"id": "10",
|
||||||
"statistics": {
|
"statistics": {
|
||||||
"min": 1.3791232173760588,
|
"min": 1.3791232173760588,
|
||||||
"avg": 1.3850247295506815,
|
"avg": 1.3850247295506815,
|
||||||
@ -153,7 +155,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"hostname": "taurusi6490",
|
"hostname": "taurusi6490",
|
||||||
"id": 11,
|
"id": "11",
|
||||||
"statistics": {
|
"statistics": {
|
||||||
"min": 0.6424094604392216,
|
"min": 0.6424094604392216,
|
||||||
"avg": 0.9544442638400293,
|
"avg": 0.9544442638400293,
|
||||||
@ -173,13 +175,14 @@
|
|||||||
},
|
},
|
||||||
"flops_any": {
|
"flops_any": {
|
||||||
"core": {
|
"core": {
|
||||||
"unit": "F/s",
|
"unit": {
|
||||||
"scope": "core",
|
"base": "F/s"
|
||||||
|
},
|
||||||
"timestep": 60,
|
"timestep": 60,
|
||||||
"series": [
|
"series": [
|
||||||
{
|
{
|
||||||
"hostname": "taurusi6489",
|
"hostname": "taurusi6489",
|
||||||
"id": 0,
|
"id": "0",
|
||||||
"statistics": {
|
"statistics": {
|
||||||
"min": 0.0,
|
"min": 0.0,
|
||||||
"avg": 184.2699002412084,
|
"avg": 184.2699002412084,
|
||||||
@ -195,7 +198,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"hostname": "taurusi6489",
|
"hostname": "taurusi6489",
|
||||||
"id": 1,
|
"id": "1",
|
||||||
"statistics": {
|
"statistics": {
|
||||||
"min": 0.13559227208748068,
|
"min": 0.13559227208748068,
|
||||||
"avg": 273.2997868356056,
|
"avg": 273.2997868356056,
|
||||||
@ -211,7 +214,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"hostname": "taurusi6490",
|
"hostname": "taurusi6490",
|
||||||
"id": 10,
|
"id": "10",
|
||||||
"statistics": {
|
"statistics": {
|
||||||
"min": 0.0,
|
"min": 0.0,
|
||||||
"avg": 1678.8419461262179,
|
"avg": 1678.8419461262179,
|
||||||
@ -227,7 +230,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"hostname": "taurusi6490",
|
"hostname": "taurusi6490",
|
||||||
"id": 11,
|
"id": "11",
|
||||||
"statistics": {
|
"statistics": {
|
||||||
"min": 45.28689133054866,
|
"min": 45.28689133054866,
|
||||||
"avg": 609.6644949204072,
|
"avg": 609.6644949204072,
|
||||||
@ -247,13 +250,14 @@
|
|||||||
},
|
},
|
||||||
"mem_bw": {
|
"mem_bw": {
|
||||||
"socket": {
|
"socket": {
|
||||||
"unit": "B/s",
|
"unit": {
|
||||||
"scope": "socket",
|
"base": "B/s"
|
||||||
|
},
|
||||||
"timestep": 60,
|
"timestep": 60,
|
||||||
"series": [
|
"series": [
|
||||||
{
|
{
|
||||||
"hostname": "taurusi6489",
|
"hostname": "taurusi6489",
|
||||||
"id": 0,
|
"id": "0",
|
||||||
"statistics": {
|
"statistics": {
|
||||||
"min": 653671812.1661415,
|
"min": 653671812.1661415,
|
||||||
"avg": 1637585527.5854635,
|
"avg": 1637585527.5854635,
|
||||||
@ -269,7 +273,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"hostname": "taurusi6490",
|
"hostname": "taurusi6490",
|
||||||
"id": 0,
|
"id": "0",
|
||||||
"statistics": {
|
"statistics": {
|
||||||
"min": 1520190251.61048,
|
"min": 1520190251.61048,
|
||||||
"avg": 1572477682.3850098,
|
"avg": 1572477682.3850098,
|
||||||
@ -289,8 +293,9 @@
|
|||||||
},
|
},
|
||||||
"file_bw": {
|
"file_bw": {
|
||||||
"node": {
|
"node": {
|
||||||
"unit": "B/s",
|
"unit": {
|
||||||
"scope": "node",
|
"base": "B/s"
|
||||||
|
},
|
||||||
"timestep": 30,
|
"timestep": 30,
|
||||||
"series": [
|
"series": [
|
||||||
{
|
{
|
||||||
@ -341,8 +346,9 @@
|
|||||||
},
|
},
|
||||||
"net_bw": {
|
"net_bw": {
|
||||||
"node": {
|
"node": {
|
||||||
"unit": "B/s",
|
"unit": {
|
||||||
"scope": "node",
|
"base": "B/s"
|
||||||
|
},
|
||||||
"timestep": 30,
|
"timestep": 30,
|
||||||
"series": [
|
"series": [
|
||||||
{
|
{
|
||||||
@ -393,8 +399,9 @@
|
|||||||
},
|
},
|
||||||
"mem_used": {
|
"mem_used": {
|
||||||
"node": {
|
"node": {
|
||||||
"unit": "B",
|
"unit": {
|
||||||
"scope": "node",
|
"base": "B"
|
||||||
|
},
|
||||||
"timestep": 30,
|
"timestep": 30,
|
||||||
"series": [
|
"series": [
|
||||||
{
|
{
|
||||||
@ -445,13 +452,14 @@
|
|||||||
},
|
},
|
||||||
"cpu_power": {
|
"cpu_power": {
|
||||||
"socket": {
|
"socket": {
|
||||||
"unit": "W",
|
"unit": {
|
||||||
"scope": "socket",
|
"base": "W"
|
||||||
|
},
|
||||||
"timestep": 60,
|
"timestep": 60,
|
||||||
"series": [
|
"series": [
|
||||||
{
|
{
|
||||||
"hostname": "taurusi6489",
|
"hostname": "taurusi6489",
|
||||||
"id": 0,
|
"id": "0",
|
||||||
"statistics": {
|
"statistics": {
|
||||||
"min": 35.50647456742635,
|
"min": 35.50647456742635,
|
||||||
"avg": 72.08313211552377,
|
"avg": 72.08313211552377,
|
||||||
@ -467,7 +475,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"hostname": "taurusi6490",
|
"hostname": "taurusi6490",
|
||||||
"id": 0,
|
"id": "0",
|
||||||
"statistics": {
|
"statistics": {
|
||||||
"min": 83.8466923147859,
|
"min": 83.8466923147859,
|
||||||
"avg": 85.18572681122097,
|
"avg": 85.18572681122097,
|
||||||
|
@ -59,10 +59,6 @@ func setup(t *testing.T) *api.RestApi {
|
|||||||
const testclusterJson = `{
|
const testclusterJson = `{
|
||||||
"name": "testcluster",
|
"name": "testcluster",
|
||||||
"subClusters": [
|
"subClusters": [
|
||||||
{
|
|
||||||
"name": "sc0",
|
|
||||||
"nodes": "host120,host121,host122"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "sc1",
|
"name": "sc1",
|
||||||
"nodes": "host123,host124,host125",
|
"nodes": "host123,host124,host125",
|
||||||
@ -70,9 +66,28 @@ func setup(t *testing.T) *api.RestApi {
|
|||||||
"socketsPerNode": 1,
|
"socketsPerNode": 1,
|
||||||
"coresPerSocket": 4,
|
"coresPerSocket": 4,
|
||||||
"threadsPerCore": 2,
|
"threadsPerCore": 2,
|
||||||
"flopRateScalar": 44,
|
"flopRateScalar": {
|
||||||
"flopRateSimd": 704,
|
"unit": {
|
||||||
"memoryBandwidth": 80,
|
"prefix": "G",
|
||||||
|
"base": "F/s"
|
||||||
|
},
|
||||||
|
"value": 14
|
||||||
|
},
|
||||||
|
"flopRateSimd": {
|
||||||
|
"unit": {
|
||||||
|
"prefix": "G",
|
||||||
|
"base": "F/s"
|
||||||
|
},
|
||||||
|
"value": 112
|
||||||
|
},
|
||||||
|
"memoryBandwidth": {
|
||||||
|
"unit": {
|
||||||
|
"prefix": "G",
|
||||||
|
"base": "B/s"
|
||||||
|
},
|
||||||
|
"value": 24
|
||||||
|
},
|
||||||
|
"numberOfNodes": 70,
|
||||||
"topology": {
|
"topology": {
|
||||||
"node": [0, 1, 2, 3, 4, 5, 6, 7],
|
"node": [0, 1, 2, 3, 4, 5, 6, 7],
|
||||||
"socket": [[0, 1, 2, 3, 4, 5, 6, 7]],
|
"socket": [[0, 1, 2, 3, 4, 5, 6, 7]],
|
||||||
@ -85,9 +100,10 @@ func setup(t *testing.T) *api.RestApi {
|
|||||||
"metricConfig": [
|
"metricConfig": [
|
||||||
{
|
{
|
||||||
"name": "load_one",
|
"name": "load_one",
|
||||||
"unit": "load",
|
"unit": { "base": ""},
|
||||||
"scope": "node",
|
"scope": "node",
|
||||||
"timestep": 60,
|
"timestep": 60,
|
||||||
|
"aggregation": "avg",
|
||||||
"peak": 8,
|
"peak": 8,
|
||||||
"normal": 0,
|
"normal": 0,
|
||||||
"caution": 0,
|
"caution": 0,
|
||||||
@ -95,19 +111,38 @@ func setup(t *testing.T) *api.RestApi {
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
}`
|
}`
|
||||||
|
|
||||||
const taurusclusterJson = `{
|
const taurusclusterJson = `{
|
||||||
"name": "taurus",
|
"name": "taurus",
|
||||||
"SubClusters": [
|
"subClusters": [
|
||||||
{
|
{
|
||||||
"name": "haswell",
|
"name": "haswell",
|
||||||
"processorType": "Intel Haswell",
|
"processorType": "Intel Haswell",
|
||||||
"socketsPerNode": 2,
|
"socketsPerNode": 2,
|
||||||
"coresPerSocket": 12,
|
"coresPerSocket": 12,
|
||||||
"threadsPerCore": 1,
|
"threadsPerCore": 1,
|
||||||
"flopRateScalar": 32,
|
"flopRateScalar": {
|
||||||
"flopRateSimd": 512,
|
"unit": {
|
||||||
"memoryBandwidth": 60,
|
"prefix": "G",
|
||||||
|
"base": "F/s"
|
||||||
|
},
|
||||||
|
"value": 14
|
||||||
|
},
|
||||||
|
"flopRateSimd": {
|
||||||
|
"unit": {
|
||||||
|
"prefix": "G",
|
||||||
|
"base": "F/s"
|
||||||
|
},
|
||||||
|
"value": 112
|
||||||
|
},
|
||||||
|
"memoryBandwidth": {
|
||||||
|
"unit": {
|
||||||
|
"prefix": "G",
|
||||||
|
"base": "B/s"
|
||||||
|
},
|
||||||
|
"value": 24
|
||||||
|
},
|
||||||
|
"numberOfNodes": 70,
|
||||||
|
"nodes": "w11[27-45,49-63,69-72]",
|
||||||
"topology": {
|
"topology": {
|
||||||
"node": [ 0, 1 ],
|
"node": [ 0, 1 ],
|
||||||
"socket": [
|
"socket": [
|
||||||
@ -126,8 +161,13 @@ func setup(t *testing.T) *api.RestApi {
|
|||||||
{
|
{
|
||||||
"name": "cpu_used",
|
"name": "cpu_used",
|
||||||
"scope": "core",
|
"scope": "core",
|
||||||
"unit": "",
|
"unit": {"base": ""},
|
||||||
|
"aggregation": "avg",
|
||||||
"timestep": 30,
|
"timestep": 30,
|
||||||
|
"peak": 1,
|
||||||
|
"normal": 0.5,
|
||||||
|
"caution": 2e-07,
|
||||||
|
"alert": 1e-07,
|
||||||
"subClusters": [
|
"subClusters": [
|
||||||
{
|
{
|
||||||
"name": "haswell",
|
"name": "haswell",
|
||||||
@ -141,8 +181,13 @@ func setup(t *testing.T) *api.RestApi {
|
|||||||
{
|
{
|
||||||
"name": "ipc",
|
"name": "ipc",
|
||||||
"scope": "core",
|
"scope": "core",
|
||||||
"unit": "IPC",
|
"unit": { "base": "IPC"},
|
||||||
|
"aggregation": "avg",
|
||||||
"timestep": 60,
|
"timestep": 60,
|
||||||
|
"peak": 2,
|
||||||
|
"normal": 1,
|
||||||
|
"caution": 0.1,
|
||||||
|
"alert": 0.5,
|
||||||
"subClusters": [
|
"subClusters": [
|
||||||
{
|
{
|
||||||
"name": "haswell",
|
"name": "haswell",
|
||||||
@ -156,8 +201,13 @@ func setup(t *testing.T) *api.RestApi {
|
|||||||
{
|
{
|
||||||
"name": "flops_any",
|
"name": "flops_any",
|
||||||
"scope": "core",
|
"scope": "core",
|
||||||
"unit": "F/s",
|
"unit": { "base": "F/s"},
|
||||||
|
"aggregation": "sum",
|
||||||
"timestep": 60,
|
"timestep": 60,
|
||||||
|
"peak": 40000000000,
|
||||||
|
"normal": 20000000000,
|
||||||
|
"caution": 30000000000,
|
||||||
|
"alert": 35000000000,
|
||||||
"subClusters": [
|
"subClusters": [
|
||||||
{
|
{
|
||||||
"name": "haswell",
|
"name": "haswell",
|
||||||
@ -171,8 +221,13 @@ func setup(t *testing.T) *api.RestApi {
|
|||||||
{
|
{
|
||||||
"name": "mem_bw",
|
"name": "mem_bw",
|
||||||
"scope": "socket",
|
"scope": "socket",
|
||||||
"unit": "B/s",
|
"unit": { "base": "B/s"},
|
||||||
|
"aggregation": "sum",
|
||||||
"timestep": 60,
|
"timestep": 60,
|
||||||
|
"peak": 58800000000,
|
||||||
|
"normal": 28800000000,
|
||||||
|
"caution": 38800000000,
|
||||||
|
"alert": 48800000000,
|
||||||
"subClusters": [
|
"subClusters": [
|
||||||
{
|
{
|
||||||
"name": "haswell",
|
"name": "haswell",
|
||||||
@ -186,8 +241,13 @@ func setup(t *testing.T) *api.RestApi {
|
|||||||
{
|
{
|
||||||
"name": "file_bw",
|
"name": "file_bw",
|
||||||
"scope": "node",
|
"scope": "node",
|
||||||
"unit": "B/s",
|
"unit": { "base": "B/s"},
|
||||||
|
"aggregation": "sum",
|
||||||
"timestep": 30,
|
"timestep": 30,
|
||||||
|
"peak": 20000000000,
|
||||||
|
"normal": 5000000000,
|
||||||
|
"caution": 9000000000,
|
||||||
|
"alert": 19000000000,
|
||||||
"subClusters": [
|
"subClusters": [
|
||||||
{
|
{
|
||||||
"name": "haswell",
|
"name": "haswell",
|
||||||
@ -201,8 +261,13 @@ func setup(t *testing.T) *api.RestApi {
|
|||||||
{
|
{
|
||||||
"name": "net_bw",
|
"name": "net_bw",
|
||||||
"scope": "node",
|
"scope": "node",
|
||||||
"unit": "B/s",
|
"unit": { "base": "B/s"},
|
||||||
"timestep": 30,
|
"timestep": 30,
|
||||||
|
"aggregation": "sum",
|
||||||
|
"peak": 7000000000,
|
||||||
|
"normal": 5000000000,
|
||||||
|
"caution": 6000000000,
|
||||||
|
"alert": 6500000000,
|
||||||
"subClusters": [
|
"subClusters": [
|
||||||
{
|
{
|
||||||
"name": "haswell",
|
"name": "haswell",
|
||||||
@ -216,8 +281,13 @@ func setup(t *testing.T) *api.RestApi {
|
|||||||
{
|
{
|
||||||
"name": "mem_used",
|
"name": "mem_used",
|
||||||
"scope": "node",
|
"scope": "node",
|
||||||
"unit": "B",
|
"unit": {"base": "B"},
|
||||||
|
"aggregation": "sum",
|
||||||
"timestep": 30,
|
"timestep": 30,
|
||||||
|
"peak": 32000000000,
|
||||||
|
"normal": 2000000000,
|
||||||
|
"caution": 31000000000,
|
||||||
|
"alert": 30000000000,
|
||||||
"subClusters": [
|
"subClusters": [
|
||||||
{
|
{
|
||||||
"name": "haswell",
|
"name": "haswell",
|
||||||
@ -231,8 +301,13 @@ func setup(t *testing.T) *api.RestApi {
|
|||||||
{
|
{
|
||||||
"name": "cpu_power",
|
"name": "cpu_power",
|
||||||
"scope": "socket",
|
"scope": "socket",
|
||||||
"unit": "W",
|
"unit": {"base": "W"},
|
||||||
|
"aggregation": "sum",
|
||||||
"timestep": 60,
|
"timestep": 60,
|
||||||
|
"peak": 100,
|
||||||
|
"normal": 80,
|
||||||
|
"caution": 90,
|
||||||
|
"alert": 90,
|
||||||
"subClusters": [
|
"subClusters": [
|
||||||
{
|
{
|
||||||
"name": "haswell",
|
"name": "haswell",
|
||||||
@ -253,6 +328,10 @@ func setup(t *testing.T) *api.RestApi {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 1)), 0666); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
if err := os.Mkdir(filepath.Join(jobarchive, "testcluster"), 0777); err != nil {
|
if err := os.Mkdir(filepath.Join(jobarchive, "testcluster"), 0777); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@ -315,13 +394,12 @@ func TestRestApi(t *testing.T) {
|
|||||||
testData := schema.JobData{
|
testData := schema.JobData{
|
||||||
"load_one": map[schema.MetricScope]*schema.JobMetric{
|
"load_one": map[schema.MetricScope]*schema.JobMetric{
|
||||||
schema.MetricScopeNode: {
|
schema.MetricScopeNode: {
|
||||||
Unit: "load",
|
Unit: schema.Unit{Base: "load"},
|
||||||
Scope: schema.MetricScopeNode,
|
|
||||||
Timestep: 60,
|
Timestep: 60,
|
||||||
Series: []schema.Series{
|
Series: []schema.Series{
|
||||||
{
|
{
|
||||||
Hostname: "host123",
|
Hostname: "host123",
|
||||||
Statistics: &schema.MetricStatistics{Min: 0.1, Avg: 0.2, Max: 0.3},
|
Statistics: schema.MetricStatistics{Min: 0.1, Avg: 0.2, Max: 0.3},
|
||||||
Data: []schema.Float{0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.3, 0.3, 0.3},
|
Data: []schema.Float{0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.3, 0.3, 0.3},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -392,15 +470,15 @@ func TestRestApi(t *testing.T) {
|
|||||||
job.Project != "testproj" ||
|
job.Project != "testproj" ||
|
||||||
job.Cluster != "testcluster" ||
|
job.Cluster != "testcluster" ||
|
||||||
job.SubCluster != "sc1" ||
|
job.SubCluster != "sc1" ||
|
||||||
job.Partition != "default" ||
|
*job.Partition != "default" ||
|
||||||
job.Walltime != 3600 ||
|
*job.Walltime != 3600 ||
|
||||||
job.ArrayJobId != 0 ||
|
*job.ArrayJobId != 0 ||
|
||||||
job.NumNodes != 1 ||
|
job.NumNodes != 1 ||
|
||||||
job.NumHWThreads != 8 ||
|
*job.NumHWThreads != 8 ||
|
||||||
job.NumAcc != 0 ||
|
*job.NumAcc != 0 ||
|
||||||
job.Exclusive != 1 ||
|
job.Exclusive != 1 ||
|
||||||
job.MonitoringStatus != 1 ||
|
job.MonitoringStatus != 1 ||
|
||||||
job.SMT != 1 ||
|
*job.SMT != 1 ||
|
||||||
!reflect.DeepEqual(job.Resources, []*schema.Resource{{Hostname: "host123", HWThreads: []int{0, 1, 2, 3, 4, 5, 6, 7}}}) ||
|
!reflect.DeepEqual(job.Resources, []*schema.Resource{{Hostname: "host123", HWThreads: []int{0, 1, 2, 3, 4, 5, 6, 7}}}) ||
|
||||||
job.StartTime.Unix() != 123456789 {
|
job.StartTime.Unix() != 123456789 {
|
||||||
t.Fatalf("unexpected job properties: %#v", job)
|
t.Fatalf("unexpected job properties: %#v", job)
|
||||||
@ -488,13 +566,13 @@ func TestRestApi(t *testing.T) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("FailedJob", func(t *testing.T) {
|
// t.Run("FailedJob", func(t *testing.T) {
|
||||||
subtestLetJobFail(t, restapi, r)
|
// subtestLetJobFail(t, restapi, r)
|
||||||
})
|
// })
|
||||||
|
|
||||||
t.Run("ImportJob", func(t *testing.T) {
|
// t.Run("ImportJob", func(t *testing.T) {
|
||||||
testImportFlag(t)
|
// testImportFlag(t)
|
||||||
})
|
// })
|
||||||
}
|
}
|
||||||
|
|
||||||
func subtestLetJobFail(t *testing.T, restapi *api.RestApi, r *mux.Router) {
|
func subtestLetJobFail(t *testing.T, restapi *api.RestApi, r *mux.Router) {
|
||||||
@ -505,19 +583,15 @@ func subtestLetJobFail(t *testing.T, restapi *api.RestApi, r *mux.Router) {
|
|||||||
"cluster": "testcluster",
|
"cluster": "testcluster",
|
||||||
"partition": "default",
|
"partition": "default",
|
||||||
"walltime": 3600,
|
"walltime": 3600,
|
||||||
"arrayJobId": 0,
|
|
||||||
"numNodes": 1,
|
"numNodes": 1,
|
||||||
"numAcc": 0,
|
|
||||||
"exclusive": 1,
|
"exclusive": 1,
|
||||||
"monitoringStatus": 1,
|
"monitoringStatus": 1,
|
||||||
"smt": 1,
|
"smt": 1,
|
||||||
"tags": [],
|
|
||||||
"resources": [
|
"resources": [
|
||||||
{
|
{
|
||||||
"hostname": "host123"
|
"hostname": "host123"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metaData": {},
|
|
||||||
"startTime": 12345678
|
"startTime": 12345678
|
||||||
}`
|
}`
|
||||||
|
|
||||||
@ -596,4 +670,17 @@ func testImportFlag(t *testing.T) {
|
|||||||
if len(data) != 8 {
|
if len(data) != 8 {
|
||||||
t.Errorf("Job data length: Got %d, want 8", len(data))
|
t.Errorf("Job data length: Got %d, want 8", len(data))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
r := map[string]string{"mem_used": "GB", "net_bw": "KB/s",
|
||||||
|
"cpu_power": "W", "cpu_used": "",
|
||||||
|
"file_bw": "KB/s", "flops_any": "F/s",
|
||||||
|
"mem_bw": "GB/s", "ipc": "IPC"}
|
||||||
|
|
||||||
|
for name, scopes := range data {
|
||||||
|
for _, metric := range scopes {
|
||||||
|
if metric.Unit.Base != r[name] {
|
||||||
|
t.Errorf("Metric %s unit: Got %s, want %s", name, metric.Unit.Base, r[name])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,10 +5,8 @@
|
|||||||
"cluster": "taurus",
|
"cluster": "taurus",
|
||||||
"subCluster": "haswell",
|
"subCluster": "haswell",
|
||||||
"partition": "haswell64",
|
"partition": "haswell64",
|
||||||
"arrayJobId": 0,
|
|
||||||
"numNodes": 2,
|
"numNodes": 2,
|
||||||
"numHwthreads": 4,
|
"numHwthreads": 4,
|
||||||
"numAcc": 0,
|
|
||||||
"exclusive": 0,
|
"exclusive": 0,
|
||||||
"startTime": 1635856524,
|
"startTime": 1635856524,
|
||||||
"jobState": "completed",
|
"jobState": "completed",
|
||||||
@ -18,11 +16,17 @@
|
|||||||
"resources": [
|
"resources": [
|
||||||
{
|
{
|
||||||
"hostname": "taurusi6489",
|
"hostname": "taurusi6489",
|
||||||
"hwthreads": [ 0, 1 ]
|
"hwthreads": [
|
||||||
|
0,
|
||||||
|
1
|
||||||
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"hostname": "taurusi6490",
|
"hostname": "taurusi6490",
|
||||||
"hwthreads": [ 10, 11 ]
|
"hwthreads": [
|
||||||
|
10,
|
||||||
|
11
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"statistics": {
|
"statistics": {
|
||||||
@ -30,49 +34,65 @@
|
|||||||
"min": 0.03694102397926118,
|
"min": 0.03694102397926118,
|
||||||
"avg": 0.48812580468611544,
|
"avg": 0.48812580468611544,
|
||||||
"max": 1.0000000000000002,
|
"max": 1.0000000000000002,
|
||||||
"unit": "cpu used"
|
"unit": {
|
||||||
|
"base": ""
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"ipc": {
|
"ipc": {
|
||||||
"min": 0.30469640475234366,
|
"min": 0.30469640475234366,
|
||||||
"avg": 1.154312070173657,
|
"avg": 1.154312070173657,
|
||||||
"max": 1.797623522191001,
|
"max": 1.797623522191001,
|
||||||
"unit": "IPC"
|
"unit": {
|
||||||
|
"base": "IPC"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"flops_any": {
|
"flops_any": {
|
||||||
"min": 0.0,
|
"min": 0.0,
|
||||||
"avg": 686.5190320308598,
|
"avg": 686.5190320308598,
|
||||||
"max": 4346.591400350933,
|
"max": 4346.591400350933,
|
||||||
"unit": "F/s"
|
"unit": {
|
||||||
|
"base": "F/s"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"mem_bw": {
|
"mem_bw": {
|
||||||
"min": 653671812.1661415,
|
"min": 653671812.1661415,
|
||||||
"avg": 1605031604.9852366,
|
"avg": 1605031604.9852366,
|
||||||
"max": 2614718291.9554267,
|
"max": 2614718291.9554267,
|
||||||
"unit": "B/s"
|
"unit": {
|
||||||
|
"base": "B/s"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"file_bw": {
|
"file_bw": {
|
||||||
"min": 0.0,
|
"min": 0.0,
|
||||||
"avg": 620592.5419124186,
|
"avg": 620592.5419124186,
|
||||||
"max": 11559156.360352296,
|
"max": 11559156.360352296,
|
||||||
"unit": "B/s"
|
"unit": {
|
||||||
|
"base": "B/s"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"net_bw": {
|
"net_bw": {
|
||||||
"min": 126779.89655880642,
|
"min": 126779.89655880642,
|
||||||
"avg": 763101.082138246,
|
"avg": 763101.082138246,
|
||||||
"max": 1916309.7075416835,
|
"max": 1916309.7075416835,
|
||||||
"unit": "B/s"
|
"unit": {
|
||||||
|
"base": "B/s"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"mem_used": {
|
"mem_used": {
|
||||||
"min": 2779066368.0,
|
"min": 2779066368.0,
|
||||||
"avg": 9647598685.09091,
|
"avg": 9647598685.09091,
|
||||||
"max": 10202595328.0,
|
"max": 10202595328.0,
|
||||||
"unit": "B"
|
"unit": {
|
||||||
|
"base": "B"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"cpu_power": {
|
"cpu_power": {
|
||||||
"min": 35.50647456742635,
|
"min": 35.50647456742635,
|
||||||
"avg": 78.63442946337237,
|
"avg": 78.63442946337237,
|
||||||
"max": 85.83909286117324,
|
"max": 85.83909286117324,
|
||||||
"unit": "W"
|
"unit": {
|
||||||
|
"base": "W"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
36
tools/archive-manager/main.go
Normal file
36
tools/archive-manager/main.go
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
var srcPath, flagConfigFile string
|
||||||
|
|
||||||
|
flag.StringVar(&srcPath, "s", "./var/job-archive", "Specify the source job archive path. Default is ./var/job-archive")
|
||||||
|
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
||||||
|
flag.Parse()
|
||||||
|
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", srcPath)
|
||||||
|
|
||||||
|
config.Init(flagConfigFile)
|
||||||
|
config.Keys.Validate = true
|
||||||
|
|
||||||
|
if err := archive.Init(json.RawMessage(archiveCfg), false); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
ar := archive.GetHandle()
|
||||||
|
|
||||||
|
for job := range ar.Iter(true) {
|
||||||
|
log.Printf("Validate %s - %d\n", job.Meta.Cluster, job.Meta.JobID)
|
||||||
|
}
|
||||||
|
}
|
65
tools/archive-migration/cluster.go
Normal file
65
tools/archive-migration/cluster.go
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
// type Accelerator struct {
|
||||||
|
// ID string `json:"id"`
|
||||||
|
// Type string `json:"type"`
|
||||||
|
// Model string `json:"model"`
|
||||||
|
// }
|
||||||
|
|
||||||
|
// type Topology struct {
|
||||||
|
// Node []int `json:"node"`
|
||||||
|
// Socket [][]int `json:"socket"`
|
||||||
|
// MemoryDomain [][]int `json:"memoryDomain"`
|
||||||
|
// Die [][]int `json:"die"`
|
||||||
|
// Core [][]int `json:"core"`
|
||||||
|
// Accelerators []*Accelerator `json:"accelerators"`
|
||||||
|
// }
|
||||||
|
|
||||||
|
type SubCluster struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Nodes string `json:"nodes"`
|
||||||
|
NumberOfNodes int `json:"numberOfNodes"`
|
||||||
|
ProcessorType string `json:"processorType"`
|
||||||
|
SocketsPerNode int `json:"socketsPerNode"`
|
||||||
|
CoresPerSocket int `json:"coresPerSocket"`
|
||||||
|
ThreadsPerCore int `json:"threadsPerCore"`
|
||||||
|
FlopRateScalar int `json:"flopRateScalar"`
|
||||||
|
FlopRateSimd int `json:"flopRateSimd"`
|
||||||
|
MemoryBandwidth int `json:"memoryBandwidth"`
|
||||||
|
Topology *schema.Topology `json:"topology"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// type SubClusterConfig struct {
|
||||||
|
// Name string `json:"name"`
|
||||||
|
// Peak float64 `json:"peak"`
|
||||||
|
// Normal float64 `json:"normal"`
|
||||||
|
// Caution float64 `json:"caution"`
|
||||||
|
// Alert float64 `json:"alert"`
|
||||||
|
// }
|
||||||
|
|
||||||
|
type MetricConfig struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Unit string `json:"unit"`
|
||||||
|
Scope schema.MetricScope `json:"scope"`
|
||||||
|
Aggregation string `json:"aggregation"`
|
||||||
|
Timestep int `json:"timestep"`
|
||||||
|
Peak float64 `json:"peak"`
|
||||||
|
Normal float64 `json:"normal"`
|
||||||
|
Caution float64 `json:"caution"`
|
||||||
|
Alert float64 `json:"alert"`
|
||||||
|
SubClusters []*schema.SubClusterConfig `json:"subClusters"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Cluster struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
MetricConfig []*MetricConfig `json:"metricConfig"`
|
||||||
|
SubClusters []*SubCluster `json:"subClusters"`
|
||||||
|
}
|
166
tools/archive-migration/clusterConfig.go
Normal file
166
tools/archive-migration/clusterConfig.go
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
var Clusters []*Cluster
|
||||||
|
var nodeLists map[string]map[string]archive.NodeList
|
||||||
|
|
||||||
|
func initClusterConfig() error {
|
||||||
|
|
||||||
|
Clusters = []*Cluster{}
|
||||||
|
nodeLists = map[string]map[string]archive.NodeList{}
|
||||||
|
|
||||||
|
for _, c := range ar.GetClusters() {
|
||||||
|
|
||||||
|
cluster, err := ar.LoadClusterCfg(c)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(cluster.Name) == 0 ||
|
||||||
|
len(cluster.MetricConfig) == 0 ||
|
||||||
|
len(cluster.SubClusters) == 0 {
|
||||||
|
return errors.New("cluster.name, cluster.metricConfig and cluster.SubClusters should not be empty")
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, mc := range cluster.MetricConfig {
|
||||||
|
if len(mc.Name) == 0 {
|
||||||
|
return errors.New("cluster.metricConfig.name should not be empty")
|
||||||
|
}
|
||||||
|
if mc.Timestep < 1 {
|
||||||
|
return errors.New("cluster.metricConfig.timestep should not be smaller than one")
|
||||||
|
}
|
||||||
|
|
||||||
|
// For backwards compability...
|
||||||
|
if mc.Scope == "" {
|
||||||
|
mc.Scope = schema.MetricScopeNode
|
||||||
|
}
|
||||||
|
if !mc.Scope.Valid() {
|
||||||
|
return errors.New("cluster.metricConfig.scope must be a valid scope ('node', 'scocket', ...)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Clusters = append(Clusters, cluster)
|
||||||
|
|
||||||
|
nodeLists[cluster.Name] = make(map[string]archive.NodeList)
|
||||||
|
for _, sc := range cluster.SubClusters {
|
||||||
|
if sc.Nodes == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
nl, err := archive.ParseNodeList(sc.Nodes)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("in %s/cluster.json: %w", cluster.Name, err)
|
||||||
|
}
|
||||||
|
nodeLists[cluster.Name][sc.Name] = nl
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetCluster(cluster string) *Cluster {
|
||||||
|
|
||||||
|
for _, c := range Clusters {
|
||||||
|
if c.Name == cluster {
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetSubCluster(cluster, subcluster string) *SubCluster {
|
||||||
|
|
||||||
|
for _, c := range Clusters {
|
||||||
|
if c.Name == cluster {
|
||||||
|
for _, p := range c.SubClusters {
|
||||||
|
if p.Name == subcluster {
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetMetricConfig(cluster, metric string) *MetricConfig {
|
||||||
|
|
||||||
|
for _, c := range Clusters {
|
||||||
|
if c.Name == cluster {
|
||||||
|
for _, m := range c.MetricConfig {
|
||||||
|
if m.Name == metric {
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// AssignSubCluster sets the `job.subcluster` property of the job based
|
||||||
|
// on its cluster and resources.
|
||||||
|
func AssignSubCluster(job *BaseJob) error {
|
||||||
|
|
||||||
|
cluster := GetCluster(job.Cluster)
|
||||||
|
if cluster == nil {
|
||||||
|
return fmt.Errorf("unkown cluster: %#v", job.Cluster)
|
||||||
|
}
|
||||||
|
|
||||||
|
if job.SubCluster != "" {
|
||||||
|
for _, sc := range cluster.SubClusters {
|
||||||
|
if sc.Name == job.SubCluster {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fmt.Errorf("already assigned subcluster %#v unkown (cluster: %#v)", job.SubCluster, job.Cluster)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(job.Resources) == 0 {
|
||||||
|
return fmt.Errorf("job without any resources/hosts")
|
||||||
|
}
|
||||||
|
|
||||||
|
host0 := job.Resources[0].Hostname
|
||||||
|
for sc, nl := range nodeLists[job.Cluster] {
|
||||||
|
if nl != nil && nl.Contains(host0) {
|
||||||
|
job.SubCluster = sc
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if cluster.SubClusters[0].Nodes == "" {
|
||||||
|
job.SubCluster = cluster.SubClusters[0].Name
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Errorf("no subcluster found for cluster %#v and host %#v", job.Cluster, host0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetSubClusterByNode(cluster, hostname string) (string, error) {
|
||||||
|
|
||||||
|
for sc, nl := range nodeLists[cluster] {
|
||||||
|
if nl != nil && nl.Contains(hostname) {
|
||||||
|
return sc, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
c := GetCluster(cluster)
|
||||||
|
if c == nil {
|
||||||
|
return "", fmt.Errorf("unkown cluster: %#v", cluster)
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.SubClusters[0].Nodes == "" {
|
||||||
|
return c.SubClusters[0].Name, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", fmt.Errorf("no subcluster found for cluster %#v and host %#v", cluster, hostname)
|
||||||
|
}
|
109
tools/archive-migration/float.go
Normal file
109
tools/archive-migration/float.go
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"math"
|
||||||
|
"strconv"
|
||||||
|
)
|
||||||
|
|
||||||
|
// A custom float type is used so that (Un)MarshalJSON and
|
||||||
|
// (Un)MarshalGQL can be overloaded and NaN/null can be used.
|
||||||
|
// The default behaviour of putting every nullable value behind
|
||||||
|
// a pointer has a bigger overhead.
|
||||||
|
type Float float64
|
||||||
|
|
||||||
|
var NaN Float = Float(math.NaN())
|
||||||
|
var nullAsBytes []byte = []byte("null")
|
||||||
|
|
||||||
|
func (f Float) IsNaN() bool {
|
||||||
|
return math.IsNaN(float64(f))
|
||||||
|
}
|
||||||
|
|
||||||
|
// NaN will be serialized to `null`.
|
||||||
|
func (f Float) MarshalJSON() ([]byte, error) {
|
||||||
|
if f.IsNaN() {
|
||||||
|
return nullAsBytes, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return strconv.AppendFloat(make([]byte, 0, 10), float64(f), 'f', 2, 64), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// `null` will be unserialized to NaN.
|
||||||
|
func (f *Float) UnmarshalJSON(input []byte) error {
|
||||||
|
s := string(input)
|
||||||
|
if s == "null" {
|
||||||
|
*f = NaN
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
val, err := strconv.ParseFloat(s, 64)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
*f = Float(val)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnmarshalGQL implements the graphql.Unmarshaler interface.
|
||||||
|
func (f *Float) UnmarshalGQL(v interface{}) error {
|
||||||
|
f64, ok := v.(float64)
|
||||||
|
if !ok {
|
||||||
|
return errors.New("invalid Float scalar")
|
||||||
|
}
|
||||||
|
|
||||||
|
*f = Float(f64)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// MarshalGQL implements the graphql.Marshaler interface.
|
||||||
|
// NaN will be serialized to `null`.
|
||||||
|
func (f Float) MarshalGQL(w io.Writer) {
|
||||||
|
if f.IsNaN() {
|
||||||
|
w.Write(nullAsBytes)
|
||||||
|
} else {
|
||||||
|
w.Write(strconv.AppendFloat(make([]byte, 0, 10), float64(f), 'f', 2, 64))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only used via REST-API, not via GraphQL.
|
||||||
|
// This uses a lot less allocations per series,
|
||||||
|
// but it turns out that the performance increase
|
||||||
|
// from using this is not that big.
|
||||||
|
func (s *Series) MarshalJSON() ([]byte, error) {
|
||||||
|
buf := make([]byte, 0, 512+len(s.Data)*8)
|
||||||
|
buf = append(buf, `{"hostname":"`...)
|
||||||
|
buf = append(buf, s.Hostname...)
|
||||||
|
buf = append(buf, '"')
|
||||||
|
if s.Id != nil {
|
||||||
|
buf = append(buf, `,"id":`...)
|
||||||
|
buf = strconv.AppendInt(buf, int64(*s.Id), 10)
|
||||||
|
}
|
||||||
|
if s.Statistics != nil {
|
||||||
|
buf = append(buf, `,"statistics":{"min":`...)
|
||||||
|
buf = strconv.AppendFloat(buf, s.Statistics.Min, 'f', 2, 64)
|
||||||
|
buf = append(buf, `,"avg":`...)
|
||||||
|
buf = strconv.AppendFloat(buf, s.Statistics.Avg, 'f', 2, 64)
|
||||||
|
buf = append(buf, `,"max":`...)
|
||||||
|
buf = strconv.AppendFloat(buf, s.Statistics.Max, 'f', 2, 64)
|
||||||
|
buf = append(buf, '}')
|
||||||
|
}
|
||||||
|
buf = append(buf, `,"data":[`...)
|
||||||
|
for i := 0; i < len(s.Data); i++ {
|
||||||
|
if i != 0 {
|
||||||
|
buf = append(buf, ',')
|
||||||
|
}
|
||||||
|
|
||||||
|
if s.Data[i].IsNaN() {
|
||||||
|
buf = append(buf, `null`...)
|
||||||
|
} else {
|
||||||
|
buf = strconv.AppendFloat(buf, float64(s.Data[i]), 'f', 2, 32)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buf = append(buf, ']', '}')
|
||||||
|
return buf, nil
|
||||||
|
}
|
142
tools/archive-migration/fsBackend.go
Normal file
142
tools/archive-migration/fsBackend.go
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FsArchiveConfig struct {
|
||||||
|
Path string `json:"path"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type FsArchive struct {
|
||||||
|
path string
|
||||||
|
clusters []string
|
||||||
|
}
|
||||||
|
|
||||||
|
func getPath(
|
||||||
|
job *JobMeta,
|
||||||
|
rootPath string,
|
||||||
|
file string) string {
|
||||||
|
|
||||||
|
lvl1, lvl2 := fmt.Sprintf("%d", job.JobID/1000), fmt.Sprintf("%03d", job.JobID%1000)
|
||||||
|
return filepath.Join(
|
||||||
|
rootPath,
|
||||||
|
job.Cluster,
|
||||||
|
lvl1, lvl2,
|
||||||
|
strconv.FormatInt(job.StartTime, 10), file)
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadJobMeta(filename string) (*JobMeta, error) {
|
||||||
|
|
||||||
|
f, err := os.Open(filename)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("fsBackend loadJobMeta()- %v", err)
|
||||||
|
return &JobMeta{}, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
return DecodeJobMeta(bufio.NewReader(f))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fsa *FsArchive) Init(rawConfig json.RawMessage) error {
|
||||||
|
|
||||||
|
var config FsArchiveConfig
|
||||||
|
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
||||||
|
log.Errorf("fsBackend Init()- %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if config.Path == "" {
|
||||||
|
err := fmt.Errorf("fsBackend Init()- empty path")
|
||||||
|
log.Errorf("fsBackend Init()- %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
fsa.path = config.Path
|
||||||
|
|
||||||
|
entries, err := os.ReadDir(fsa.path)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("fsBackend Init()- %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, de := range entries {
|
||||||
|
fsa.clusters = append(fsa.clusters, de.Name())
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fsa *FsArchive) Iter() <-chan *JobMeta {
|
||||||
|
|
||||||
|
ch := make(chan *JobMeta)
|
||||||
|
go func() {
|
||||||
|
clustersDir, err := os.ReadDir(fsa.path)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Reading clusters failed: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, clusterDir := range clustersDir {
|
||||||
|
lvl1Dirs, err := os.ReadDir(filepath.Join(fsa.path, clusterDir.Name()))
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Reading jobs failed: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, lvl1Dir := range lvl1Dirs {
|
||||||
|
if !lvl1Dir.IsDir() {
|
||||||
|
// Could be the cluster.json file
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
lvl2Dirs, err := os.ReadDir(filepath.Join(fsa.path, clusterDir.Name(), lvl1Dir.Name()))
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Reading jobs failed: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, lvl2Dir := range lvl2Dirs {
|
||||||
|
dirpath := filepath.Join(fsa.path, clusterDir.Name(), lvl1Dir.Name(), lvl2Dir.Name())
|
||||||
|
startTimeDirs, err := os.ReadDir(dirpath)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Reading jobs failed: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, startTimeDir := range startTimeDirs {
|
||||||
|
if startTimeDir.IsDir() {
|
||||||
|
job, err := loadJobMeta(filepath.Join(dirpath, startTimeDir.Name(), "meta.json"))
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
|
||||||
|
} else {
|
||||||
|
ch <- job
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close(ch)
|
||||||
|
}()
|
||||||
|
return ch
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fsa *FsArchive) LoadClusterCfg(name string) (*Cluster, error) {
|
||||||
|
b, err := os.ReadFile(filepath.Join(fsa.path, name, "cluster.json"))
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("fsBackend LoadClusterCfg()- %v", err)
|
||||||
|
return &Cluster{}, err
|
||||||
|
}
|
||||||
|
return DecodeCluster(bytes.NewReader(b))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fsa *FsArchive) GetClusters() []string {
|
||||||
|
return fsa.clusters
|
||||||
|
}
|
162
tools/archive-migration/job.go
Normal file
162
tools/archive-migration/job.go
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Non-Swaggered Comment: BaseJob
|
||||||
|
// Non-Swaggered Comment: Common subset of Job and JobMeta. Use one of those, not this type directly.
|
||||||
|
|
||||||
|
type BaseJob struct {
|
||||||
|
// The unique identifier of a job
|
||||||
|
JobID int64 `json:"jobId" db:"job_id" example:"123000"`
|
||||||
|
User string `json:"user" db:"user" example:"abcd100h"` // The unique identifier of a user
|
||||||
|
Project string `json:"project" db:"project" example:"abcd200"` // The unique identifier of a project
|
||||||
|
Cluster string `json:"cluster" db:"cluster" example:"fritz"` // The unique identifier of a cluster
|
||||||
|
SubCluster string `json:"subCluster" db:"subcluster" example:"main"` // The unique identifier of a sub cluster
|
||||||
|
Partition *string `json:"partition" db:"partition" example:"main"` // The Slurm partition to which the job was submitted
|
||||||
|
ArrayJobId *int64 `json:"arrayJobId" db:"array_job_id" example:"123000"` // The unique identifier of an array job
|
||||||
|
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"` // Number of nodes used (Min > 0)
|
||||||
|
NumHWThreads *int32 `json:"numHwthreads" db:"num_hwthreads" example:"20" minimum:"1"` // Number of HWThreads used (Min > 0)
|
||||||
|
NumAcc *int32 `json:"numAcc" db:"num_acc" example:"2" minimum:"1"` // Number of accelerators used (Min > 0)
|
||||||
|
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"` // Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user
|
||||||
|
MonitoringStatus int32 `json:"monitoringStatus" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"` // State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull
|
||||||
|
SMT *int32 `json:"smt" db:"smt" example:"4"` // SMT threads used by job
|
||||||
|
State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"` // Final state of job
|
||||||
|
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"` // Duration of job in seconds (Min > 0)
|
||||||
|
Walltime *int64 `json:"walltime" db:"walltime" example:"86400" minimum:"1"` // Requested walltime of job in seconds (Min > 0)
|
||||||
|
Tags []*schema.Tag `json:"tags"` // List of tags
|
||||||
|
RawResources []byte `json:"-" db:"resources"` // Resources used by job [As Bytes]
|
||||||
|
Resources []*Resource `json:"resources"` // Resources used by job
|
||||||
|
RawMetaData []byte `json:"-" db:"meta_data"` // Additional information about the job [As Bytes]
|
||||||
|
MetaData map[string]string `json:"metaData"` // Additional information about the job
|
||||||
|
}
|
||||||
|
|
||||||
|
// Non-Swaggered Comment: Job
|
||||||
|
// Non-Swaggered Comment: This type is used as the GraphQL interface and using sqlx as a table row.
|
||||||
|
|
||||||
|
// Job model
|
||||||
|
// @Description Information of a HPC job.
|
||||||
|
type Job struct {
|
||||||
|
// The unique identifier of a job in the database
|
||||||
|
ID int64 `json:"id" db:"id"`
|
||||||
|
BaseJob
|
||||||
|
StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"` // Start epoch time stamp in seconds
|
||||||
|
StartTime time.Time `json:"startTime"` // Start time as 'time.Time' data type
|
||||||
|
MemUsedMax float64 `json:"-" db:"mem_used_max"` // MemUsedMax as Float64
|
||||||
|
FlopsAnyAvg float64 `json:"-" db:"flops_any_avg"` // FlopsAnyAvg as Float64
|
||||||
|
MemBwAvg float64 `json:"-" db:"mem_bw_avg"` // MemBwAvg as Float64
|
||||||
|
LoadAvg float64 `json:"-" db:"load_avg"` // LoadAvg as Float64
|
||||||
|
NetBwAvg float64 `json:"-" db:"net_bw_avg"` // NetBwAvg as Float64
|
||||||
|
NetDataVolTotal float64 `json:"-" db:"net_data_vol_total"` // NetDataVolTotal as Float64
|
||||||
|
FileBwAvg float64 `json:"-" db:"file_bw_avg"` // FileBwAvg as Float64
|
||||||
|
FileDataVolTotal float64 `json:"-" db:"file_data_vol_total"` // FileDataVolTotal as Float64
|
||||||
|
}
|
||||||
|
|
||||||
|
// Non-Swaggered Comment: JobMeta
|
||||||
|
// Non-Swaggered Comment: When reading from the database or sending data via GraphQL, the start time can be in the much more
|
||||||
|
// Non-Swaggered Comment: convenient time.Time type. In the `meta.json` files, the start time is encoded as a unix epoch timestamp.
|
||||||
|
// Non-Swaggered Comment: This is why there is this struct, which contains all fields from the regular job struct, but "overwrites"
|
||||||
|
// Non-Swaggered Comment: the StartTime field with one of type int64.
|
||||||
|
// Non-Swaggered Comment: ID *int64 `json:"id,omitempty"` >> never used in the job-archive, only available via REST-API
|
||||||
|
|
||||||
|
// JobMeta model
|
||||||
|
// @Description Meta data information of a HPC job.
|
||||||
|
type JobMeta struct {
|
||||||
|
// The unique identifier of a job in the database
|
||||||
|
ID *int64 `json:"id,omitempty"`
|
||||||
|
BaseJob
|
||||||
|
StartTime int64 `json:"startTime" db:"start_time" example:"1649723812" minimum:"1"` // Start epoch time stamp in seconds (Min > 0)
|
||||||
|
Statistics map[string]JobStatistics `json:"statistics,omitempty"` // Metric statistics of job
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
MonitoringStatusDisabled int32 = 0
|
||||||
|
MonitoringStatusRunningOrArchiving int32 = 1
|
||||||
|
MonitoringStatusArchivingFailed int32 = 2
|
||||||
|
MonitoringStatusArchivingSuccessful int32 = 3
|
||||||
|
)
|
||||||
|
|
||||||
|
var JobDefaults BaseJob = BaseJob{
|
||||||
|
Exclusive: 1,
|
||||||
|
MonitoringStatus: MonitoringStatusRunningOrArchiving,
|
||||||
|
}
|
||||||
|
|
||||||
|
// JobStatistics model
|
||||||
|
// @Description Specification for job metric statistics.
|
||||||
|
type JobStatistics struct {
|
||||||
|
// Metric unit (see schema/unit.schema.json)
|
||||||
|
Unit string `json:"unit" example:"GHz"`
|
||||||
|
Avg float64 `json:"avg" example:"2500" minimum:"0"` // Job metric average
|
||||||
|
Min float64 `json:"min" example:"2000" minimum:"0"` // Job metric minimum
|
||||||
|
Max float64 `json:"max" example:"3000" minimum:"0"` // Job metric maximum
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tag model
|
||||||
|
// @Description Defines a tag using name and type.
|
||||||
|
type Tag struct {
|
||||||
|
// The unique DB identifier of a tag
|
||||||
|
ID int64 `json:"id" db:"id"`
|
||||||
|
Type string `json:"type" db:"tag_type" example:"Debug"` // Tag Type
|
||||||
|
Name string `json:"name" db:"tag_name" example:"Testjob"` // Tag Name
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resource model
|
||||||
|
// @Description A resource used by a job
|
||||||
|
type Resource struct {
|
||||||
|
Hostname string `json:"hostname"` // Name of the host (= node)
|
||||||
|
HWThreads []int `json:"hwthreads,omitempty"` // List of OS processor ids
|
||||||
|
Accelerators []string `json:"accelerators,omitempty"` // List of of accelerator device ids
|
||||||
|
Configuration string `json:"configuration,omitempty"` // The configuration options of the node
|
||||||
|
}
|
||||||
|
|
||||||
|
type JobState string
|
||||||
|
|
||||||
|
const (
|
||||||
|
JobStateRunning JobState = "running"
|
||||||
|
JobStateCompleted JobState = "completed"
|
||||||
|
JobStateFailed JobState = "failed"
|
||||||
|
JobStateCancelled JobState = "cancelled"
|
||||||
|
JobStateStopped JobState = "stopped"
|
||||||
|
JobStateTimeout JobState = "timeout"
|
||||||
|
JobStatePreempted JobState = "preempted"
|
||||||
|
JobStateOutOfMemory JobState = "out_of_memory"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (e *JobState) UnmarshalGQL(v interface{}) error {
|
||||||
|
str, ok := v.(string)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("enums must be strings")
|
||||||
|
}
|
||||||
|
|
||||||
|
*e = JobState(str)
|
||||||
|
if !e.Valid() {
|
||||||
|
return errors.New("invalid job state")
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e JobState) MarshalGQL(w io.Writer) {
|
||||||
|
fmt.Fprintf(w, "\"%s\"", e)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e JobState) Valid() bool {
|
||||||
|
return e == JobStateRunning ||
|
||||||
|
e == JobStateCompleted ||
|
||||||
|
e == JobStateFailed ||
|
||||||
|
e == JobStateCancelled ||
|
||||||
|
e == JobStateStopped ||
|
||||||
|
e == JobStateTimeout ||
|
||||||
|
e == JobStatePreempted ||
|
||||||
|
e == JobStateOutOfMemory
|
||||||
|
}
|
66
tools/archive-migration/json.go
Normal file
66
tools/archive-migration/json.go
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
func DecodeJobData(r io.Reader) (*JobData, error) {
|
||||||
|
var d JobData
|
||||||
|
if err := json.NewDecoder(r).Decode(&d); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &d, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func DecodeJobMeta(r io.Reader) (*JobMeta, error) {
|
||||||
|
var d JobMeta
|
||||||
|
if err := json.NewDecoder(r).Decode(&d); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &d, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func DecodeCluster(r io.Reader) (*Cluster, error) {
|
||||||
|
var c Cluster
|
||||||
|
if err := json.NewDecoder(r).Decode(&c); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &c, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func EncodeJobData(w io.Writer, d *schema.JobData) error {
|
||||||
|
// Sanitize parameters
|
||||||
|
if err := json.NewEncoder(w).Encode(d); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func EncodeJobMeta(w io.Writer, d *schema.JobMeta) error {
|
||||||
|
// Sanitize parameters
|
||||||
|
if err := json.NewEncoder(w).Encode(d); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func EncodeCluster(w io.Writer, c *schema.Cluster) error {
|
||||||
|
// Sanitize parameters
|
||||||
|
if err := json.NewEncoder(w).Encode(c); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
318
tools/archive-migration/main.go
Normal file
318
tools/archive-migration/main.go
Normal file
@ -0,0 +1,318 @@
|
|||||||
|
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/units"
|
||||||
|
)
|
||||||
|
|
||||||
|
const Version = 1
|
||||||
|
|
||||||
|
var ar FsArchive
|
||||||
|
|
||||||
|
func loadJobData(filename string) (*JobData, error) {
|
||||||
|
|
||||||
|
f, err := os.Open(filename)
|
||||||
|
if err != nil {
|
||||||
|
return &JobData{}, fmt.Errorf("fsBackend loadJobData()- %v", err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
return DecodeJobData(bufio.NewReader(f))
|
||||||
|
}
|
||||||
|
|
||||||
|
func deepCopyJobMeta(j *JobMeta) schema.JobMeta {
|
||||||
|
var jn schema.JobMeta
|
||||||
|
|
||||||
|
//required properties
|
||||||
|
jn.JobID = j.JobID
|
||||||
|
jn.User = j.User
|
||||||
|
jn.Project = j.Project
|
||||||
|
jn.Cluster = j.Cluster
|
||||||
|
jn.SubCluster = j.SubCluster
|
||||||
|
jn.NumNodes = j.NumNodes
|
||||||
|
jn.Exclusive = j.Exclusive
|
||||||
|
jn.StartTime = j.StartTime
|
||||||
|
jn.State = schema.JobState(j.State)
|
||||||
|
jn.Duration = j.Duration
|
||||||
|
|
||||||
|
for _, ro := range j.Resources {
|
||||||
|
var rn schema.Resource
|
||||||
|
rn.Hostname = ro.Hostname
|
||||||
|
rn.Configuration = ro.Configuration
|
||||||
|
hwt := make([]int, len(ro.HWThreads))
|
||||||
|
if ro.HWThreads != nil {
|
||||||
|
copy(hwt, ro.HWThreads)
|
||||||
|
}
|
||||||
|
rn.HWThreads = hwt
|
||||||
|
acc := make([]string, len(ro.Accelerators))
|
||||||
|
if ro.Accelerators != nil {
|
||||||
|
copy(acc, ro.Accelerators)
|
||||||
|
}
|
||||||
|
rn.Accelerators = acc
|
||||||
|
jn.Resources = append(jn.Resources, &rn)
|
||||||
|
}
|
||||||
|
jn.MetaData = make(map[string]string)
|
||||||
|
|
||||||
|
for k, v := range j.MetaData {
|
||||||
|
jn.MetaData[k] = v
|
||||||
|
}
|
||||||
|
|
||||||
|
jn.Statistics = make(map[string]schema.JobStatistics)
|
||||||
|
for k, v := range j.Statistics {
|
||||||
|
var sn schema.JobStatistics
|
||||||
|
sn.Avg = v.Avg
|
||||||
|
sn.Max = v.Max
|
||||||
|
sn.Min = v.Min
|
||||||
|
tmpUnit := units.ConvertUnitString(v.Unit)
|
||||||
|
if tmpUnit.Base == "inval" {
|
||||||
|
sn.Unit = schema.Unit{Base: ""}
|
||||||
|
} else {
|
||||||
|
sn.Unit = tmpUnit
|
||||||
|
}
|
||||||
|
jn.Statistics[k] = sn
|
||||||
|
}
|
||||||
|
|
||||||
|
//optional properties
|
||||||
|
jn.Partition = j.Partition
|
||||||
|
jn.ArrayJobId = j.ArrayJobId
|
||||||
|
jn.NumHWThreads = j.NumHWThreads
|
||||||
|
jn.NumAcc = j.NumAcc
|
||||||
|
jn.MonitoringStatus = j.MonitoringStatus
|
||||||
|
jn.SMT = j.SMT
|
||||||
|
jn.Walltime = j.Walltime
|
||||||
|
|
||||||
|
for _, t := range j.Tags {
|
||||||
|
jn.Tags = append(jn.Tags, t)
|
||||||
|
}
|
||||||
|
|
||||||
|
return jn
|
||||||
|
}
|
||||||
|
|
||||||
|
func deepCopyJobData(d *JobData, cluster string, subCluster string) *schema.JobData {
|
||||||
|
var dn = make(schema.JobData)
|
||||||
|
|
||||||
|
for k, v := range *d {
|
||||||
|
// fmt.Printf("Metric %s\n", k)
|
||||||
|
dn[k] = make(map[schema.MetricScope]*schema.JobMetric)
|
||||||
|
|
||||||
|
for mk, mv := range v {
|
||||||
|
// fmt.Printf("Scope %s\n", mk)
|
||||||
|
var mn schema.JobMetric
|
||||||
|
tmpUnit := units.ConvertUnitString(mv.Unit)
|
||||||
|
if tmpUnit.Base == "inval" {
|
||||||
|
mn.Unit = schema.Unit{Base: ""}
|
||||||
|
} else {
|
||||||
|
mn.Unit = tmpUnit
|
||||||
|
}
|
||||||
|
|
||||||
|
mn.Timestep = mv.Timestep
|
||||||
|
|
||||||
|
for _, v := range mv.Series {
|
||||||
|
var sn schema.Series
|
||||||
|
sn.Hostname = v.Hostname
|
||||||
|
if v.Id != nil {
|
||||||
|
var id = new(string)
|
||||||
|
|
||||||
|
if mk == schema.MetricScopeAccelerator {
|
||||||
|
s := GetSubCluster(cluster, subCluster)
|
||||||
|
var err error
|
||||||
|
|
||||||
|
*id, err = s.Topology.GetAcceleratorID(*v.Id)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
*id = fmt.Sprint(*v.Id)
|
||||||
|
}
|
||||||
|
sn.Id = id
|
||||||
|
}
|
||||||
|
if v.Statistics != nil {
|
||||||
|
sn.Statistics = schema.MetricStatistics{
|
||||||
|
Avg: v.Statistics.Avg,
|
||||||
|
Min: v.Statistics.Min,
|
||||||
|
Max: v.Statistics.Max}
|
||||||
|
}
|
||||||
|
|
||||||
|
sn.Data = make([]schema.Float, len(v.Data))
|
||||||
|
copy(sn.Data, v.Data)
|
||||||
|
mn.Series = append(mn.Series, sn)
|
||||||
|
}
|
||||||
|
|
||||||
|
dn[k][mk] = &mn
|
||||||
|
}
|
||||||
|
// fmt.Printf("FINISH %s\n", k)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &dn
|
||||||
|
}
|
||||||
|
|
||||||
|
func deepCopyClusterConfig(co *Cluster) schema.Cluster {
|
||||||
|
var cn schema.Cluster
|
||||||
|
|
||||||
|
cn.Name = co.Name
|
||||||
|
for _, sco := range co.SubClusters {
|
||||||
|
var scn schema.SubCluster
|
||||||
|
scn.Name = sco.Name
|
||||||
|
scn.Nodes = sco.Nodes
|
||||||
|
scn.ProcessorType = sco.ProcessorType
|
||||||
|
scn.SocketsPerNode = sco.SocketsPerNode
|
||||||
|
scn.CoresPerSocket = sco.CoresPerSocket
|
||||||
|
scn.ThreadsPerCore = sco.ThreadsPerCore
|
||||||
|
var prefix = new(string)
|
||||||
|
*prefix = "G"
|
||||||
|
scn.FlopRateScalar = schema.MetricValue{
|
||||||
|
Unit: schema.Unit{Base: "F/s", Prefix: prefix},
|
||||||
|
Value: float64(sco.FlopRateScalar)}
|
||||||
|
scn.FlopRateSimd = schema.MetricValue{
|
||||||
|
Unit: schema.Unit{Base: "F/s", Prefix: prefix},
|
||||||
|
Value: float64(sco.FlopRateSimd)}
|
||||||
|
scn.MemoryBandwidth = schema.MetricValue{
|
||||||
|
Unit: schema.Unit{Base: "B/s", Prefix: prefix},
|
||||||
|
Value: float64(sco.MemoryBandwidth)}
|
||||||
|
scn.Topology = *sco.Topology
|
||||||
|
cn.SubClusters = append(cn.SubClusters, &scn)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, mco := range co.MetricConfig {
|
||||||
|
var mcn schema.MetricConfig
|
||||||
|
mcn.Name = mco.Name
|
||||||
|
mcn.Scope = mco.Scope
|
||||||
|
if mco.Aggregation == "" {
|
||||||
|
fmt.Println("Property aggregation missing! Please review file!")
|
||||||
|
mcn.Aggregation = "sum"
|
||||||
|
} else {
|
||||||
|
mcn.Aggregation = mco.Aggregation
|
||||||
|
}
|
||||||
|
mcn.Timestep = mco.Timestep
|
||||||
|
tmpUnit := units.ConvertUnitString(mco.Unit)
|
||||||
|
if tmpUnit.Base == "inval" {
|
||||||
|
mcn.Unit = schema.Unit{Base: ""}
|
||||||
|
} else {
|
||||||
|
mcn.Unit = tmpUnit
|
||||||
|
}
|
||||||
|
mcn.Peak = mco.Peak
|
||||||
|
mcn.Normal = mco.Normal
|
||||||
|
mcn.Caution = mco.Caution
|
||||||
|
mcn.Alert = mco.Alert
|
||||||
|
mcn.SubClusters = mco.SubClusters
|
||||||
|
|
||||||
|
cn.MetricConfig = append(cn.MetricConfig, &mcn)
|
||||||
|
}
|
||||||
|
|
||||||
|
return cn
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
var srcPath string
|
||||||
|
var dstPath string
|
||||||
|
|
||||||
|
flag.StringVar(&srcPath, "s", "./var/job-archive", "Specify the source job archive path. Default is ./var/job-archive")
|
||||||
|
flag.StringVar(&dstPath, "d", "./var/job-archive-new", "Specify the destination job archive path. Default is ./var/job-archive-new")
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if _, err := os.Stat(filepath.Join(srcPath, "version.txt")); !errors.Is(err, os.ErrNotExist) {
|
||||||
|
log.Fatal("Archive version exists!")
|
||||||
|
}
|
||||||
|
|
||||||
|
srcConfig := fmt.Sprintf("{\"path\": \"%s\"}", srcPath)
|
||||||
|
err := ar.Init(json.RawMessage(srcConfig))
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = initClusterConfig()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
// setup new job archive
|
||||||
|
err = os.Mkdir(dstPath, 0750)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range Clusters {
|
||||||
|
path := fmt.Sprintf("%s/%s", dstPath, c.Name)
|
||||||
|
fmt.Println(path)
|
||||||
|
err = os.Mkdir(path, 0750)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
cn := deepCopyClusterConfig(c)
|
||||||
|
|
||||||
|
f, err := os.Create(fmt.Sprintf("%s/%s/cluster.json", dstPath, c.Name))
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
if err := EncodeCluster(f, &cn); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
if err := f.Close(); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
|
for job := range ar.Iter() {
|
||||||
|
// fmt.Printf("Job %d\n", job.JobID)
|
||||||
|
job := job
|
||||||
|
wg.Add(1)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
path := getPath(job, dstPath, "meta.json")
|
||||||
|
err = os.MkdirAll(filepath.Dir(path), 0750)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
f, err := os.Create(path)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
jmn := deepCopyJobMeta(job)
|
||||||
|
if err = EncodeJobMeta(f, &jmn); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
if err = f.Close(); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err = os.Create(getPath(job, dstPath, "data.json"))
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var jd *JobData
|
||||||
|
jd, err = loadJobData(getPath(job, srcPath, "data.json"))
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
jdn := deepCopyJobData(jd, job.Cluster, job.SubCluster)
|
||||||
|
if err := EncodeJobData(f, jdn); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
if err := f.Close(); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
os.WriteFile(filepath.Join(dstPath, "version.txt"), []byte(fmt.Sprintf("%d", Version)), 0644)
|
||||||
|
}
|
65
tools/archive-migration/metrics.go
Normal file
65
tools/archive-migration/metrics.go
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
type JobData map[string]map[schema.MetricScope]*JobMetric
|
||||||
|
|
||||||
|
type JobMetric struct {
|
||||||
|
Unit string `json:"unit"`
|
||||||
|
Scope schema.MetricScope `json:"scope"`
|
||||||
|
Timestep int `json:"timestep"`
|
||||||
|
Series []Series `json:"series"`
|
||||||
|
StatisticsSeries *StatsSeries `json:"statisticsSeries"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Series struct {
|
||||||
|
Hostname string `json:"hostname"`
|
||||||
|
Id *int `json:"id,omitempty"`
|
||||||
|
Statistics *MetricStatistics `json:"statistics"`
|
||||||
|
Data []schema.Float `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type MetricStatistics struct {
|
||||||
|
Avg float64 `json:"avg"`
|
||||||
|
Min float64 `json:"min"`
|
||||||
|
Max float64 `json:"max"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type StatsSeries struct {
|
||||||
|
Mean []Float `json:"mean"`
|
||||||
|
Min []Float `json:"min"`
|
||||||
|
Max []Float `json:"max"`
|
||||||
|
Percentiles map[int][]Float `json:"percentiles,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// type MetricScope string
|
||||||
|
|
||||||
|
// const (
|
||||||
|
// MetricScopeInvalid MetricScope = "invalid_scope"
|
||||||
|
|
||||||
|
// MetricScopeNode MetricScope = "node"
|
||||||
|
// MetricScopeSocket MetricScope = "socket"
|
||||||
|
// MetricScopeMemoryDomain MetricScope = "memoryDomain"
|
||||||
|
// MetricScopeCore MetricScope = "core"
|
||||||
|
// MetricScopeHWThread MetricScope = "hwthread"
|
||||||
|
|
||||||
|
// MetricScopeAccelerator MetricScope = "accelerator"
|
||||||
|
// )
|
||||||
|
|
||||||
|
// var metricScopeGranularity map[MetricScope]int = map[MetricScope]int{
|
||||||
|
// MetricScopeNode: 10,
|
||||||
|
// MetricScopeSocket: 5,
|
||||||
|
// MetricScopeMemoryDomain: 3,
|
||||||
|
// MetricScopeCore: 2,
|
||||||
|
// MetricScopeHWThread: 1,
|
||||||
|
|
||||||
|
// MetricScopeAccelerator: 5, // Special/Randomly choosen
|
||||||
|
|
||||||
|
// MetricScopeInvalid: -1,
|
||||||
|
// }
|
@ -1,9 +0,0 @@
|
|||||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
// All rights reserved.
|
|
||||||
// Use of this source code is governed by a MIT-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
package main
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
|
|
||||||
}
|
|
@ -30,8 +30,8 @@
|
|||||||
let rooflineMaxY
|
let rooflineMaxY
|
||||||
let colWidth
|
let colWidth
|
||||||
let numBins = 50
|
let numBins = 50
|
||||||
const ccconfig = getContext('cc-config'),
|
const ccconfig = getContext('cc-config')
|
||||||
metricConfig = getContext('metrics')
|
const metricConfig = getContext('metrics')
|
||||||
|
|
||||||
let metricsInHistograms = ccconfig.analysis_view_histogramMetrics,
|
let metricsInHistograms = ccconfig.analysis_view_histogramMetrics,
|
||||||
metricsInScatterplots = ccconfig.analysis_view_scatterPlotMetrics
|
metricsInScatterplots = ccconfig.analysis_view_scatterPlotMetrics
|
||||||
@ -161,24 +161,29 @@
|
|||||||
<Histogram
|
<Histogram
|
||||||
width={colWidth - 25} height={300 * 0.5}
|
width={colWidth - 25} height={300 * 0.5}
|
||||||
data={$statsQuery.data.topUsers.sort((a, b) => b.count - a.count).map(({ count }, idx) => ({ count, value: idx }))}
|
data={$statsQuery.data.topUsers.sort((a, b) => b.count - a.count).map(({ count }, idx) => ({ count, value: idx }))}
|
||||||
label={(x) => x < $statsQuery.data.topUsers.length ? $statsQuery.data.topUsers[Math.floor(x)].name : '0'} />
|
label={(x) => x < $statsQuery.data.topUsers.length ? $statsQuery.data.topUsers[Math.floor(x)].name : 'No Users'}
|
||||||
|
ylabel="Node Hours [h]"/>
|
||||||
{/key}
|
{/key}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="col-3">
|
<div class="col-3">
|
||||||
{#key $statsQuery.data.stats[0].histDuration}
|
{#key $statsQuery.data.stats[0].histDuration}
|
||||||
<h4>Walltime Distribution</h4>
|
<h4>Duration Distribution</h4>
|
||||||
<Histogram
|
<Histogram
|
||||||
width={colWidth - 25} height={300}
|
width={colWidth - 25}
|
||||||
data={$statsQuery.data.stats[0].histDuration} />
|
data={$statsQuery.data.stats[0].histDuration}
|
||||||
|
xlabel="Current Runtimes [h]"
|
||||||
|
ylabel="Number of Jobs"/>
|
||||||
{/key}
|
{/key}
|
||||||
</div>
|
</div>
|
||||||
<div class="col-3">
|
<div class="col-3">
|
||||||
{#key $statsQuery.data.stats[0].histNumNodes}
|
{#key $statsQuery.data.stats[0].histNumNodes}
|
||||||
<h4>Number of Nodes Distribution</h4>
|
<h4>Number of Nodes Distribution</h4>
|
||||||
<Histogram
|
<Histogram
|
||||||
width={colWidth - 25} height={300}
|
width={colWidth - 25}
|
||||||
data={$statsQuery.data.stats[0].histNumNodes} />
|
data={$statsQuery.data.stats[0].histNumNodes}
|
||||||
|
xlabel="Allocated Nodes [#]"
|
||||||
|
ylabel="Number of Jobs" />
|
||||||
{/key}
|
{/key}
|
||||||
</div>
|
</div>
|
||||||
<div class="col-3">
|
<div class="col-3">
|
||||||
@ -189,7 +194,7 @@
|
|||||||
{:else if $rooflineQuery.data && cluster}
|
{:else if $rooflineQuery.data && cluster}
|
||||||
{#key $rooflineQuery.data}
|
{#key $rooflineQuery.data}
|
||||||
<Roofline
|
<Roofline
|
||||||
width={colWidth - 25} height={300}
|
width={colWidth - 25}
|
||||||
tiles={$rooflineQuery.data.rooflineHeatmap}
|
tiles={$rooflineQuery.data.rooflineHeatmap}
|
||||||
cluster={cluster.subClusters.length == 1 ? cluster.subClusters[0] : null}
|
cluster={cluster.subClusters.length == 1 ? cluster.subClusters[0] : null}
|
||||||
maxY={rooflineMaxY} />
|
maxY={rooflineMaxY} />
|
||||||
@ -211,6 +216,7 @@
|
|||||||
<Col>
|
<Col>
|
||||||
<Card body>
|
<Card body>
|
||||||
These histograms show the distribution of the averages of all jobs matching the filters. Each job/average is weighted by its node hours.
|
These histograms show the distribution of the averages of all jobs matching the filters. Each job/average is weighted by its node hours.
|
||||||
|
Note that some metrics could be disabled for specific subclusters as per metriConfig and thus could affect shown average values.
|
||||||
</Card>
|
</Card>
|
||||||
<br/>
|
<br/>
|
||||||
</Col>
|
</Col>
|
||||||
@ -224,12 +230,16 @@
|
|||||||
$footprintsQuery.data.footprints.nodehours,
|
$footprintsQuery.data.footprints.nodehours,
|
||||||
$footprintsQuery.data.footprints.metrics.find(f => f.metric == metric).data, numBins) }))}
|
$footprintsQuery.data.footprints.metrics.find(f => f.metric == metric).data, numBins) }))}
|
||||||
itemsPerRow={ccconfig.plot_view_plotsPerRow}>
|
itemsPerRow={ccconfig.plot_view_plotsPerRow}>
|
||||||
<h4>{item.metric} [{metricConfig(cluster.name, item.metric)?.unit}]</h4>
|
<h4>Average Distribution of '{item.metric}'</h4>
|
||||||
|
|
||||||
<Histogram
|
<Histogram
|
||||||
width={width} height={250}
|
width={width} height={250}
|
||||||
min={item.min} max={item.max}
|
min={item.min} max={item.max}
|
||||||
data={item.bins} label={item.label} />
|
data={item.bins}
|
||||||
|
label={item.label}
|
||||||
|
xlabel={`${item.metric} Average [${(metricConfig(cluster.name, item.metric)?.unit?.prefix ? metricConfig(cluster.name, item.metric)?.unit?.prefix : '') +
|
||||||
|
(metricConfig(cluster.name, item.metric)?.unit?.base ? metricConfig(cluster.name, item.metric)?.unit?.base : '')}]`}
|
||||||
|
ylabel="Node Hours [h]" />
|
||||||
</PlotTable>
|
</PlotTable>
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
@ -238,6 +248,7 @@
|
|||||||
<Col>
|
<Col>
|
||||||
<Card body>
|
<Card body>
|
||||||
Each circle represents one job. The size of a circle is proportional to its node hours. Darker circles mean multiple jobs have the same averages for the respective metrics.
|
Each circle represents one job. The size of a circle is proportional to its node hours. Darker circles mean multiple jobs have the same averages for the respective metrics.
|
||||||
|
Note that some metrics could be disabled for specific subclusters as per metriConfig and thus could affect shown average values.
|
||||||
</Card>
|
</Card>
|
||||||
<br/>
|
<br/>
|
||||||
</Col>
|
</Col>
|
||||||
@ -254,12 +265,18 @@
|
|||||||
|
|
||||||
<ScatterPlot
|
<ScatterPlot
|
||||||
width={width} height={250} color={"rgba(0, 102, 204, 0.33)"}
|
width={width} height={250} color={"rgba(0, 102, 204, 0.33)"}
|
||||||
xLabel={`${item.m1} [${metricConfig(cluster.name, item.m1)?.unit}]`}
|
xLabel={`${item.m1} [${(metricConfig(cluster.name, item.m1)?.unit?.prefix ? metricConfig(cluster.name, item.m1)?.unit?.prefix : '') +
|
||||||
yLabel={`${item.m2} [${metricConfig(cluster.name, item.m2)?.unit}]`}
|
(metricConfig(cluster.name, item.m1)?.unit?.base ? metricConfig(cluster.name, item.m1)?.unit?.base : '')}]`}
|
||||||
|
yLabel={`${item.m2} [${(metricConfig(cluster.name, item.m2)?.unit?.prefix ? metricConfig(cluster.name, item.m2)?.unit?.prefix : '') +
|
||||||
|
(metricConfig(cluster.name, item.m2)?.unit?.base ? metricConfig(cluster.name, item.m2)?.unit?.base : '')}]`}
|
||||||
X={item.f1} Y={item.f2} S={$footprintsQuery.data.footprints.nodehours} />
|
X={item.f1} Y={item.f2} S={$footprintsQuery.data.footprints.nodehours} />
|
||||||
</PlotTable>
|
</PlotTable>
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
|
<style>
|
||||||
|
h4 {
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
@ -81,7 +81,7 @@
|
|||||||
missingMetrics = metricNames.filter(metric => !metrics.some(jm => jm.name == metric))
|
missingMetrics = metricNames.filter(metric => !metrics.some(jm => jm.name == metric))
|
||||||
missingHosts = job.resources.map(({ hostname }) => ({
|
missingHosts = job.resources.map(({ hostname }) => ({
|
||||||
hostname: hostname,
|
hostname: hostname,
|
||||||
metrics: metricNames.filter(metric => !metrics.some(jm => jm.metric.scope == 'node' && jm.metric.series.some(series => series.hostname == hostname)))
|
metrics: metricNames.filter(metric => !metrics.some(jm => jm.scope == 'node' && jm.metric.series.some(series => series.hostname == hostname)))
|
||||||
})).filter(({ metrics }) => metrics.length > 0)
|
})).filter(({ metrics }) => metrics.length > 0)
|
||||||
somethingMissing = missingMetrics.length > 0 || missingHosts.length > 0
|
somethingMissing = missingMetrics.length > 0 || missingHosts.length > 0
|
||||||
}
|
}
|
||||||
@ -114,8 +114,8 @@
|
|||||||
cluster={clusters
|
cluster={clusters
|
||||||
.find(c => c.name == $initq.data.job.cluster).subClusters
|
.find(c => c.name == $initq.data.job.cluster).subClusters
|
||||||
.find(sc => sc.name == $initq.data.job.subCluster)}
|
.find(sc => sc.name == $initq.data.job.subCluster)}
|
||||||
flopsAny={$jobMetrics.data.jobMetrics.find(m => m.name == 'flops_any' && m.metric.scope == 'node')}
|
flopsAny={$jobMetrics.data.jobMetrics.find(m => m.name == 'flops_any' && m.scope == 'node').metric}
|
||||||
memBw={$jobMetrics.data.jobMetrics.find(m => m.name == 'mem_bw' && m.metric.scope == 'node')} />
|
memBw={$jobMetrics.data.jobMetrics.find(m => m.name == 'mem_bw' && m.scope == 'node').metric} />
|
||||||
</Col>
|
</Col>
|
||||||
{:else}
|
{:else}
|
||||||
<Col></Col>
|
<Col></Col>
|
||||||
@ -163,8 +163,9 @@
|
|||||||
bind:this={plots[item.metric]}
|
bind:this={plots[item.metric]}
|
||||||
on:more-loaded={({ detail }) => statsTable.moreLoaded(detail)}
|
on:more-loaded={({ detail }) => statsTable.moreLoaded(detail)}
|
||||||
job={$initq.data.job}
|
job={$initq.data.job}
|
||||||
metric={item.metric}
|
metricName={item.metric}
|
||||||
scopes={item.data.map(x => x.metric)}
|
rawData={item.data.map(x => x.metric)}
|
||||||
|
scopes={item.data.map(x => x.scope)}
|
||||||
width={width}/>
|
width={width}/>
|
||||||
{:else}
|
{:else}
|
||||||
<Card body color="warning">No data for <code>{item.metric}</code></Card>
|
<Card body color="warning">No data for <code>{item.metric}</code></Card>
|
||||||
|
@ -17,11 +17,15 @@
|
|||||||
export let authlevel
|
export let authlevel
|
||||||
export let roles
|
export let roles
|
||||||
|
|
||||||
let filters, jobList, matchedJobs = null
|
let filters = []
|
||||||
|
let jobList, matchedJobs = null
|
||||||
let sorting = { field: 'startTime', order: 'DESC' }, isSortingOpen = false, isMetricsSelectionOpen = false
|
let sorting = { field: 'startTime', order: 'DESC' }, isSortingOpen = false, isMetricsSelectionOpen = false
|
||||||
let metrics = filterPresets.cluster
|
let metrics = filterPresets.cluster
|
||||||
? ccconfig[`plot_list_selectedMetrics:${filterPresets.cluster}`] || ccconfig.plot_list_selectedMetrics
|
? ccconfig[`plot_list_selectedMetrics:${filterPresets.cluster}`] || ccconfig.plot_list_selectedMetrics
|
||||||
: ccconfig.plot_list_selectedMetrics
|
: ccconfig.plot_list_selectedMetrics
|
||||||
|
let selectedCluster = filterPresets?.cluster ? filterPresets.cluster : null
|
||||||
|
|
||||||
|
$: selectedCluster = filters[0]?.cluster ? filters[0].cluster.eq : null
|
||||||
|
|
||||||
// The filterPresets are handled by the Filters component,
|
// The filterPresets are handled by the Filters component,
|
||||||
// so we need to wait for it to be ready before we can start a query.
|
// so we need to wait for it to be ready before we can start a query.
|
||||||
@ -58,7 +62,10 @@
|
|||||||
<Filters
|
<Filters
|
||||||
filterPresets={filterPresets}
|
filterPresets={filterPresets}
|
||||||
bind:this={filters}
|
bind:this={filters}
|
||||||
on:update={({ detail }) => jobList.update(detail.filters)} />
|
on:update={({ detail }) => {
|
||||||
|
filters = detail.filters
|
||||||
|
jobList.update(detail.filters)}
|
||||||
|
} />
|
||||||
</Col>
|
</Col>
|
||||||
|
|
||||||
<Col xs="3" style="margin-left: auto;">
|
<Col xs="3" style="margin-left: auto;">
|
||||||
@ -84,7 +91,7 @@
|
|||||||
bind:isOpen={isSortingOpen} />
|
bind:isOpen={isSortingOpen} />
|
||||||
|
|
||||||
<MetricSelection
|
<MetricSelection
|
||||||
cluster={filterPresets.cluster}
|
bind:cluster={selectedCluster}
|
||||||
configName="plot_list_selectedMetrics"
|
configName="plot_list_selectedMetrics"
|
||||||
bind:metrics={metrics}
|
bind:metrics={metrics}
|
||||||
bind:isOpen={isMetricsSelectionOpen} />
|
bind:isOpen={isMetricsSelectionOpen} />
|
||||||
|
@ -5,19 +5,22 @@
|
|||||||
import { fetchMetrics, minScope } from './utils'
|
import { fetchMetrics, minScope } from './utils'
|
||||||
|
|
||||||
export let job
|
export let job
|
||||||
export let metric
|
export let metricName
|
||||||
export let scopes
|
export let scopes
|
||||||
export let width
|
export let width
|
||||||
|
export let rawData
|
||||||
|
|
||||||
const dispatch = createEventDispatcher()
|
const dispatch = createEventDispatcher()
|
||||||
const cluster = getContext('clusters').find(cluster => cluster.name == job.cluster)
|
const cluster = getContext('clusters').find(cluster => cluster.name == job.cluster)
|
||||||
const subCluster = cluster.subClusters.find(subCluster => subCluster.name == job.subCluster)
|
const subCluster = cluster.subClusters.find(subCluster => subCluster.name == job.subCluster)
|
||||||
const metricConfig = cluster.metricConfig.find(metricConfig => metricConfig.name == metric)
|
const metricConfig = cluster.metricConfig.find(metricConfig => metricConfig.name == metricName)
|
||||||
|
|
||||||
let selectedScope = minScope(scopes.map(s => s.scope)), selectedHost = null, plot, fetching = false, error = null
|
let selectedHost = null, plot, fetching = false, error = null
|
||||||
|
let selectedScope = minScope(scopes)
|
||||||
|
let selectedScopeIndex = scopes.findIndex(s => s == selectedScope)
|
||||||
|
|
||||||
$: avaliableScopes = scopes.map(metric => metric.scope)
|
$: avaliableScopes = scopes
|
||||||
$: data = scopes.find(metric => metric.scope == selectedScope)
|
$: data = rawData[selectedScopeIndex]
|
||||||
$: series = data?.series.filter(series => selectedHost == null || series.hostname == selectedHost)
|
$: series = data?.series.filter(series => selectedHost == null || series.hostname == selectedHost)
|
||||||
|
|
||||||
let from = null, to = null
|
let from = null, to = null
|
||||||
@ -29,7 +32,7 @@
|
|||||||
|
|
||||||
export async function loadMore() {
|
export async function loadMore() {
|
||||||
fetching = true
|
fetching = true
|
||||||
let response = await fetchMetrics(job, [metric], ["core"])
|
let response = await fetchMetrics(job, [metricName], ["core"])
|
||||||
fetching = false
|
fetching = false
|
||||||
|
|
||||||
if (response.error) {
|
if (response.error) {
|
||||||
@ -38,9 +41,9 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (let jm of response.data.jobMetrics) {
|
for (let jm of response.data.jobMetrics) {
|
||||||
if (jm.metric.scope != "node") {
|
if (jm.scope != "node") {
|
||||||
scopes.push(jm.metric)
|
scopes.push(jm.metric)
|
||||||
selectedScope = jm.metric.scope
|
selectedScope = jm.scope
|
||||||
dispatch('more-loaded', jm)
|
dispatch('more-loaded', jm)
|
||||||
if (!avaliableScopes.includes(selectedScope))
|
if (!avaliableScopes.includes(selectedScope))
|
||||||
avaliableScopes = [...avaliableScopes, selectedScope]
|
avaliableScopes = [...avaliableScopes, selectedScope]
|
||||||
@ -52,7 +55,8 @@
|
|||||||
</script>
|
</script>
|
||||||
<InputGroup>
|
<InputGroup>
|
||||||
<InputGroupText style="min-width: 150px;">
|
<InputGroupText style="min-width: 150px;">
|
||||||
{metric} ({metricConfig?.unit})
|
{metricName} ({(metricConfig?.unit?.prefix ? metricConfig.unit.prefix : '') +
|
||||||
|
(metricConfig?.unit?.base ? metricConfig.unit.base : '')})
|
||||||
</InputGroupText>
|
</InputGroupText>
|
||||||
<select class="form-select" bind:value={selectedScope}>
|
<select class="form-select" bind:value={selectedScope}>
|
||||||
{#each avaliableScopes as scope}
|
{#each avaliableScopes as scope}
|
||||||
@ -82,7 +86,7 @@
|
|||||||
width={width} height={300}
|
width={width} height={300}
|
||||||
cluster={cluster} subCluster={subCluster}
|
cluster={cluster} subCluster={subCluster}
|
||||||
timestep={data.timestep}
|
timestep={data.timestep}
|
||||||
scope={selectedScope} metric={metric}
|
scope={selectedScope} metric={metricName}
|
||||||
series={series} />
|
series={series} />
|
||||||
{/if}
|
{/if}
|
||||||
{/key}
|
{/key}
|
||||||
|
@ -95,7 +95,7 @@
|
|||||||
|
|
||||||
<Modal isOpen={isOpen} toggle={() => (isOpen = !isOpen)}>
|
<Modal isOpen={isOpen} toggle={() => (isOpen = !isOpen)}>
|
||||||
<ModalHeader>
|
<ModalHeader>
|
||||||
Configure columns
|
Configure columns (Metric availability shown)
|
||||||
</ModalHeader>
|
</ModalHeader>
|
||||||
<ModalBody>
|
<ModalBody>
|
||||||
<ListGroup>
|
<ListGroup>
|
||||||
@ -113,9 +113,26 @@
|
|||||||
{/if}
|
{/if}
|
||||||
{metric}
|
{metric}
|
||||||
<span style="float: right;">
|
<span style="float: right;">
|
||||||
{cluster == null ? clusters
|
{cluster == null ?
|
||||||
|
clusters // No single cluster specified: List Clusters with Metric
|
||||||
.filter(cluster => cluster.metricConfig.find(m => m.name == metric) != null)
|
.filter(cluster => cluster.metricConfig.find(m => m.name == metric) != null)
|
||||||
.map(cluster => cluster.name).join(', ') : ''}
|
.map(cluster => cluster.name).join(', ') :
|
||||||
|
clusters // Single cluster requested: List Subclusters with do not have metric remove flag
|
||||||
|
.filter(cluster => cluster.metricConfig.find(m => m.name == metric) != null)
|
||||||
|
.map(function(cluster) {
|
||||||
|
let scNames = cluster.subClusters.map(sc => sc.name)
|
||||||
|
scNames.forEach(function(scName){
|
||||||
|
let met = cluster.metricConfig.find(m => m.name == metric)
|
||||||
|
let msc = met.subClusters.find(msc => msc.name == scName)
|
||||||
|
if (msc != null) {
|
||||||
|
if (msc.remove == true) {
|
||||||
|
scNames = scNames.filter(scn => scn != msc.name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return scNames
|
||||||
|
})
|
||||||
|
.join(', ')}
|
||||||
</span>
|
</span>
|
||||||
</li>
|
</li>
|
||||||
{/each}
|
{/each}
|
||||||
|
@ -20,16 +20,19 @@
|
|||||||
from.setMinutes(from.getMinutes() - 30)
|
from.setMinutes(from.getMinutes() - 30)
|
||||||
}
|
}
|
||||||
|
|
||||||
const ccconfig = getContext('cc-config'), clusters = getContext('clusters')
|
const ccconfig = getContext('cc-config')
|
||||||
|
const clusters = getContext('clusters')
|
||||||
|
|
||||||
const nodesQuery = operationStore(`query($cluster: String!, $nodes: [String!], $from: Time!, $to: Time!) {
|
const nodesQuery = operationStore(`query($cluster: String!, $nodes: [String!], $from: Time!, $to: Time!) {
|
||||||
nodeMetrics(cluster: $cluster, nodes: $nodes, from: $from, to: $to) {
|
nodeMetrics(cluster: $cluster, nodes: $nodes, from: $from, to: $to) {
|
||||||
host, subCluster
|
host
|
||||||
|
subCluster
|
||||||
metrics {
|
metrics {
|
||||||
name,
|
name
|
||||||
|
scope
|
||||||
metric {
|
metric {
|
||||||
timestep
|
timestep
|
||||||
scope
|
unit { base, prefix }
|
||||||
series {
|
series {
|
||||||
statistics { min, avg, max }
|
statistics { min, avg, max }
|
||||||
data
|
data
|
||||||
@ -46,6 +49,17 @@
|
|||||||
|
|
||||||
$: $nodesQuery.variables = { cluster, nodes: [hostname], from: from.toISOString(), to: to.toISOString() }
|
$: $nodesQuery.variables = { cluster, nodes: [hostname], from: from.toISOString(), to: to.toISOString() }
|
||||||
|
|
||||||
|
let metricUnits = {}
|
||||||
|
$: if ($nodesQuery.data) {
|
||||||
|
for (let metric of clusters.find(c => c.name == cluster).metricConfig) {
|
||||||
|
if (metric.unit.prefix || metric.unit.base) {
|
||||||
|
metricUnits[metric.name] = '(' + (metric.unit.prefix ? metric.unit.prefix : '') + (metric.unit.base ? metric.unit.base : '') + ')'
|
||||||
|
} else { // If no unit defined: Omit Unit Display
|
||||||
|
metricUnits[metric.name] = ''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
query(nodesQuery)
|
query(nodesQuery)
|
||||||
|
|
||||||
// $: console.log($nodesQuery?.data?.nodeMetrics[0].metrics)
|
// $: console.log($nodesQuery?.data?.nodeMetrics[0].metrics)
|
||||||
@ -83,7 +97,7 @@
|
|||||||
let:width
|
let:width
|
||||||
itemsPerRow={ccconfig.plot_view_plotsPerRow}
|
itemsPerRow={ccconfig.plot_view_plotsPerRow}
|
||||||
items={$nodesQuery.data.nodeMetrics[0].metrics.sort((a, b) => a.name.localeCompare(b.name))}>
|
items={$nodesQuery.data.nodeMetrics[0].metrics.sort((a, b) => a.name.localeCompare(b.name))}>
|
||||||
<h4 style="text-align: center;">{item.name}</h4>
|
<h4 style="text-align: center;">{item.name} {metricUnits[item.name]}</h4>
|
||||||
<MetricPlot
|
<MetricPlot
|
||||||
width={width} height={300} metric={item.name} timestep={item.metric.timestep}
|
width={width} height={300} metric={item.name} timestep={item.metric.timestep}
|
||||||
cluster={clusters.find(c => c.name == cluster)} subCluster={$nodesQuery.data.nodeMetrics[0].subCluster}
|
cluster={clusters.find(c => c.name == cluster)} subCluster={$nodesQuery.data.nodeMetrics[0].subCluster}
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
const allMetrics = [...new Set(jobMetrics.map(m => m.name))].sort(),
|
const allMetrics = [...new Set(jobMetrics.map(m => m.name))].sort(),
|
||||||
scopesForMetric = (metric) => jobMetrics
|
scopesForMetric = (metric) => jobMetrics
|
||||||
.filter(jm => jm.name == metric)
|
.filter(jm => jm.name == metric)
|
||||||
.map(jm => jm.metric.scope)
|
.map(jm => jm.scope)
|
||||||
|
|
||||||
let hosts = job.resources.map(r => r.hostname).sort(),
|
let hosts = job.resources.map(r => r.hostname).sort(),
|
||||||
selectedScopes = {},
|
selectedScopes = {},
|
||||||
@ -40,7 +40,7 @@
|
|||||||
s.active = true
|
s.active = true
|
||||||
}
|
}
|
||||||
|
|
||||||
let series = jobMetrics.find(jm => jm.name == metric && jm.metric.scope == 'node')?.metric.series
|
let series = jobMetrics.find(jm => jm.name == metric && jm.scope == 'node')?.metric.series
|
||||||
sorting = {...sorting}
|
sorting = {...sorting}
|
||||||
hosts = hosts.sort((h1, h2) => {
|
hosts = hosts.sort((h1, h2) => {
|
||||||
let s1 = series.find(s => s.hostname == h1)?.statistics
|
let s1 = series.find(s => s.hostname == h1)?.statistics
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
export let jobMetrics
|
export let jobMetrics
|
||||||
|
|
||||||
$: series = jobMetrics
|
$: series = jobMetrics
|
||||||
.find(jm => jm.name == metric && jm.metric.scope == scope)
|
.find(jm => jm.name == metric && jm.scope == scope)
|
||||||
?.metric.series.filter(s => s.hostname == host && s.statistics != null)
|
?.metric.series.filter(s => s.hostname == host && s.statistics != null)
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
|
@ -2,8 +2,8 @@
|
|||||||
import Refresher from './joblist/Refresher.svelte'
|
import Refresher from './joblist/Refresher.svelte'
|
||||||
import Roofline, { transformPerNodeData } from './plots/Roofline.svelte'
|
import Roofline, { transformPerNodeData } from './plots/Roofline.svelte'
|
||||||
import Histogram from './plots/Histogram.svelte'
|
import Histogram from './plots/Histogram.svelte'
|
||||||
import { Row, Col, Spinner, Card, Table, Progress } from 'sveltestrap'
|
import { Row, Col, Spinner, Card, CardHeader, CardTitle, CardBody, Table, Progress, Icon } from 'sveltestrap'
|
||||||
import { init } from './utils.js'
|
import { init, formatNumber } from './utils.js'
|
||||||
import { operationStore, query } from '@urql/svelte'
|
import { operationStore, query } from '@urql/svelte'
|
||||||
|
|
||||||
const { query: initq } = init()
|
const { query: initq } = init()
|
||||||
@ -15,13 +15,14 @@
|
|||||||
let from = new Date(Date.now() - 5 * 60 * 1000), to = new Date(Date.now())
|
let from = new Date(Date.now() - 5 * 60 * 1000), to = new Date(Date.now())
|
||||||
const mainQuery = operationStore(`query($cluster: String!, $filter: [JobFilter!]!, $metrics: [String!], $from: Time!, $to: Time!) {
|
const mainQuery = operationStore(`query($cluster: String!, $filter: [JobFilter!]!, $metrics: [String!], $from: Time!, $to: Time!) {
|
||||||
nodeMetrics(cluster: $cluster, metrics: $metrics, from: $from, to: $to) {
|
nodeMetrics(cluster: $cluster, metrics: $metrics, from: $from, to: $to) {
|
||||||
host,
|
host
|
||||||
subCluster,
|
subCluster
|
||||||
metrics {
|
metrics {
|
||||||
name,
|
name
|
||||||
metric {
|
|
||||||
scope
|
scope
|
||||||
timestep,
|
metric {
|
||||||
|
timestep
|
||||||
|
unit { base, prefix }
|
||||||
series { data }
|
series { data }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -47,20 +48,27 @@
|
|||||||
? sum + (node.metrics.find(m => m.name == metric)?.metric.series.reduce((sum, series) => sum + series.data[series.data.length - 1], 0) || 0)
|
? sum + (node.metrics.find(m => m.name == metric)?.metric.series.reduce((sum, series) => sum + series.data[series.data.length - 1], 0) || 0)
|
||||||
: sum, 0)
|
: sum, 0)
|
||||||
|
|
||||||
let allocatedNodes = {}, flopRate = {}, memBwRate = {}
|
let allocatedNodes = {}, flopRate = {}, flopRateUnit = {}, memBwRate = {}, memBwRateUnit = {}
|
||||||
$: if ($initq.data && $mainQuery.data) {
|
$: if ($initq.data && $mainQuery.data) {
|
||||||
let subClusters = $initq.data.clusters.find(c => c.name == cluster).subClusters
|
let subClusters = $initq.data.clusters.find(c => c.name == cluster).subClusters
|
||||||
for (let subCluster of subClusters) {
|
for (let subCluster of subClusters) {
|
||||||
allocatedNodes[subCluster.name] = $mainQuery.data.allocatedNodes.find(({ name }) => name == subCluster.name)?.count || 0
|
allocatedNodes[subCluster.name] = $mainQuery.data.allocatedNodes.find(({ name }) => name == subCluster.name)?.count || 0
|
||||||
flopRate[subCluster.name] = Math.floor(sumUp($mainQuery.data.nodeMetrics, subCluster.name, 'flops_any') * 100) / 100
|
flopRate[subCluster.name] = Math.floor(sumUp($mainQuery.data.nodeMetrics, subCluster.name, 'flops_any') * 100) / 100
|
||||||
|
flopRateUnit[subCluster.name] = subCluster.flopRateSimd.unit.prefix + subCluster.flopRateSimd.unit.base
|
||||||
memBwRate[subCluster.name] = Math.floor(sumUp($mainQuery.data.nodeMetrics, subCluster.name, 'mem_bw') * 100) / 100
|
memBwRate[subCluster.name] = Math.floor(sumUp($mainQuery.data.nodeMetrics, subCluster.name, 'mem_bw') * 100) / 100
|
||||||
|
memBwRateUnit[subCluster.name] = subCluster.memoryBandwidth.unit.prefix + subCluster.memoryBandwidth.unit.base
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
query(mainQuery)
|
query(mainQuery)
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
|
<!-- Loading indicator & Refresh -->
|
||||||
|
|
||||||
<Row>
|
<Row>
|
||||||
|
<Col xs="auto" style="align-self: flex-end;">
|
||||||
|
<h4 class="mb-0" >Current usage of cluster "{cluster}"</h4>
|
||||||
|
</Col>
|
||||||
<Col xs="auto">
|
<Col xs="auto">
|
||||||
{#if $initq.fetching || $mainQuery.fetching}
|
{#if $initq.fetching || $mainQuery.fetching}
|
||||||
<Spinner/>
|
<Spinner/>
|
||||||
@ -89,54 +97,72 @@
|
|||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
|
<hr>
|
||||||
|
|
||||||
|
<!-- Gauges & Roofline per Subcluster-->
|
||||||
|
|
||||||
{#if $initq.data && $mainQuery.data}
|
{#if $initq.data && $mainQuery.data}
|
||||||
{#each $initq.data.clusters.find(c => c.name == cluster).subClusters as subCluster, i}
|
{#each $initq.data.clusters.find(c => c.name == cluster).subClusters as subCluster, i}
|
||||||
<Row>
|
<Row cols={2} class="mb-3 justify-content-center">
|
||||||
<Col xs="3">
|
<Col xs="4" class="px-3">
|
||||||
|
<Card class="h-auto mt-1">
|
||||||
|
<CardHeader>
|
||||||
|
<CardTitle class="mb-0">SubCluster "{subCluster.name}"</CardTitle>
|
||||||
|
</CardHeader>
|
||||||
|
<CardBody>
|
||||||
<Table>
|
<Table>
|
||||||
<tr>
|
|
||||||
<th scope="col">SubCluster</th>
|
|
||||||
<td colspan="2">{subCluster.name}</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
<tr>
|
||||||
<th scope="col">Allocated Nodes</th>
|
<th scope="col">Allocated Nodes</th>
|
||||||
<td style="min-width: 75px;"><div class="col"><Progress value={allocatedNodes[subCluster.name]} max={subCluster.numberOfNodes}/></div></td>
|
<td style="min-width: 100px;"><div class="col"><Progress value={allocatedNodes[subCluster.name]} max={subCluster.numberOfNodes}/></div></td>
|
||||||
<td>({allocatedNodes[subCluster.name]} / {subCluster.numberOfNodes})</td>
|
<td>({allocatedNodes[subCluster.name]} Nodes / {subCluster.numberOfNodes} Total Nodes)</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th scope="col">Flop Rate</th>
|
<th scope="col">Flop Rate (Any) <Icon name="info-circle" class="p-1" style="cursor: help;" title="Flops[Any] = (Flops[Double] x 2) + Flops[Single]"/></th>
|
||||||
<td style="min-width: 75px;"><div class="col"><Progress value={flopRate[subCluster.name]} max={subCluster.flopRateSimd * subCluster.numberOfNodes}/></div></td>
|
<td style="min-width: 100px;"><div class="col"><Progress value={flopRate[subCluster.name]} max={subCluster.flopRateSimd.value * subCluster.numberOfNodes}/></div></td>
|
||||||
<td>({flopRate[subCluster.name]} / {subCluster.flopRateSimd * subCluster.numberOfNodes})</td>
|
<td>({flopRate[subCluster.name]} {flopRateUnit[subCluster.name]} / {(subCluster.flopRateSimd.value * subCluster.numberOfNodes)} {flopRateUnit[subCluster.name]} [Max])</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<th scope="col">MemBw Rate</th>
|
<th scope="col">MemBw Rate</th>
|
||||||
<td style="min-width: 75px;"><div class="col"><Progress value={memBwRate[subCluster.name]} max={subCluster.memoryBandwidth * subCluster.numberOfNodes}/></div></td>
|
<td style="min-width: 100px;"><div class="col"><Progress value={memBwRate[subCluster.name]} max={subCluster.memoryBandwidth.value * subCluster.numberOfNodes}/></div></td>
|
||||||
<td>({memBwRate[subCluster.name]} / {subCluster.memoryBandwidth * subCluster.numberOfNodes})</td>
|
<td>({memBwRate[subCluster.name]} {memBwRateUnit[subCluster.name]} / {(subCluster.memoryBandwidth.value * subCluster.numberOfNodes)} {memBwRateUnit[subCluster.name]} [Max])</td>
|
||||||
</tr>
|
</tr>
|
||||||
</Table>
|
</Table>
|
||||||
|
</CardBody>
|
||||||
|
</Card>
|
||||||
</Col>
|
</Col>
|
||||||
<div class="col-9" bind:clientWidth={plotWidths[i]}>
|
<Col class="px-3">
|
||||||
|
<div bind:clientWidth={plotWidths[i]}>
|
||||||
{#key $mainQuery.data.nodeMetrics}
|
{#key $mainQuery.data.nodeMetrics}
|
||||||
<Roofline
|
<Roofline
|
||||||
width={plotWidths[i] - 10} height={300} colorDots={false} cluster={subCluster}
|
width={plotWidths[i] - 10} height={300} colorDots={true} showTime={false} cluster={subCluster}
|
||||||
data={transformPerNodeData($mainQuery.data.nodeMetrics.filter(data => data.subCluster == subCluster.name))} />
|
data={transformPerNodeData($mainQuery.data.nodeMetrics.filter(data => data.subCluster == subCluster.name))} />
|
||||||
{/key}
|
{/key}
|
||||||
</div>
|
</div>
|
||||||
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
{/each}
|
{/each}
|
||||||
<Row>
|
|
||||||
<div class="col-4" bind:clientWidth={colWidth1}>
|
<hr style="margin-top: -1em;">
|
||||||
<h4>Top Users</h4>
|
|
||||||
|
<!-- Usage Stats as Histograms -->
|
||||||
|
|
||||||
|
<Row cols={4}>
|
||||||
|
<Col class="p-2">
|
||||||
|
<div bind:clientWidth={colWidth1}>
|
||||||
|
<h4 class="mb-3 text-center">Top Users</h4>
|
||||||
{#key $mainQuery.data}
|
{#key $mainQuery.data}
|
||||||
<Histogram
|
<Histogram
|
||||||
width={colWidth1 - 25} height={300}
|
width={colWidth1 - 25}
|
||||||
data={$mainQuery.data.topUsers.sort((a, b) => b.count - a.count).map(({ count }, idx) => ({ count, value: idx }))}
|
data={$mainQuery.data.topUsers.sort((a, b) => b.count - a.count).map(({ count }, idx) => ({ count, value: idx }))}
|
||||||
label={(x) => x < $mainQuery.data.topUsers.length ? $mainQuery.data.topUsers[Math.floor(x)].name : '0'} />
|
label={(x) => x < $mainQuery.data.topUsers.length ? $mainQuery.data.topUsers[Math.floor(x)].name : '0'}
|
||||||
|
xlabel="User Name" ylabel="Number of Jobs" />
|
||||||
{/key}
|
{/key}
|
||||||
</div>
|
</div>
|
||||||
<div class="col-2">
|
</Col>
|
||||||
|
<Col class="px-4 py-2">
|
||||||
<Table>
|
<Table>
|
||||||
<tr><th>Name</th><th>Number of Nodes</th></tr>
|
<tr class="mb-2"><th>User Name</th><th>Number of Nodes</th></tr>
|
||||||
{#each $mainQuery.data.topUsers.sort((a, b) => b.count - a.count) as { name, count }}
|
{#each $mainQuery.data.topUsers.sort((a, b) => b.count - a.count) as { name, count }}
|
||||||
<tr>
|
<tr>
|
||||||
<th scope="col"><a href="/monitoring/user/{name}">{name}</a></th>
|
<th scope="col"><a href="/monitoring/user/{name}">{name}</a></th>
|
||||||
@ -144,41 +170,48 @@
|
|||||||
</tr>
|
</tr>
|
||||||
{/each}
|
{/each}
|
||||||
</Table>
|
</Table>
|
||||||
</div>
|
</Col>
|
||||||
<div class="col-4">
|
<Col class="p-2">
|
||||||
<h4>Top Projects</h4>
|
<h4 class="mb-3 text-center">Top Projects</h4>
|
||||||
{#key $mainQuery.data}
|
{#key $mainQuery.data}
|
||||||
<Histogram
|
<Histogram
|
||||||
width={colWidth1 - 25} height={300}
|
width={colWidth1 - 25}
|
||||||
data={$mainQuery.data.topProjects.sort((a, b) => b.count - a.count).map(({ count }, idx) => ({ count, value: idx }))}
|
data={$mainQuery.data.topProjects.sort((a, b) => b.count - a.count).map(({ count }, idx) => ({ count, value: idx }))}
|
||||||
label={(x) => x < $mainQuery.data.topProjects.length ? $mainQuery.data.topProjects[Math.floor(x)].name : '0'} />
|
label={(x) => x < $mainQuery.data.topProjects.length ? $mainQuery.data.topProjects[Math.floor(x)].name : '0'}
|
||||||
|
xlabel="Project Code" ylabel="Number of Jobs" />
|
||||||
{/key}
|
{/key}
|
||||||
</div>
|
</Col>
|
||||||
<div class="col-2">
|
<Col class="px-4 py-2">
|
||||||
<Table>
|
<Table>
|
||||||
<tr><th>Name</th><th>Number of Nodes</th></tr>
|
<tr class="mb-2"><th>Project Code</th><th>Number of Nodes</th></tr>
|
||||||
{#each $mainQuery.data.topProjects.sort((a, b) => b.count - a.count) as { name, count }}
|
{#each $mainQuery.data.topProjects.sort((a, b) => b.count - a.count) as { name, count }}
|
||||||
<tr><th scope="col">{name}</th><td>{count}</td></tr>
|
<tr><th scope="col">{name}</th><td>{count}</td></tr>
|
||||||
{/each}
|
{/each}
|
||||||
</Table>
|
</Table>
|
||||||
</div>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
<Row>
|
<Row cols={2} class="mt-3">
|
||||||
<div class="col" bind:clientWidth={colWidth2}>
|
<Col class="p-2">
|
||||||
<h4>Duration Distribution</h4>
|
<div bind:clientWidth={colWidth2}>
|
||||||
|
<h4 class="mb-3 text-center">Duration Distribution</h4>
|
||||||
{#key $mainQuery.data.stats}
|
{#key $mainQuery.data.stats}
|
||||||
<Histogram
|
<Histogram
|
||||||
width={colWidth2 - 25} height={300}
|
width={colWidth2 - 25}
|
||||||
data={$mainQuery.data.stats[0].histDuration} />
|
data={$mainQuery.data.stats[0].histDuration}
|
||||||
|
xlabel="Current Runtimes [h]"
|
||||||
|
ylabel="Number of Jobs" />
|
||||||
{/key}
|
{/key}
|
||||||
</div>
|
</div>
|
||||||
<div class="col">
|
</Col>
|
||||||
<h4>Number of Nodes Distribution</h4>
|
<Col class="p-2">
|
||||||
|
<h4 class="mb-3 text-center">Number of Nodes Distribution</h4>
|
||||||
{#key $mainQuery.data.stats}
|
{#key $mainQuery.data.stats}
|
||||||
<Histogram
|
<Histogram
|
||||||
width={colWidth2 - 25} height={300}
|
width={colWidth2 - 25}
|
||||||
data={$mainQuery.data.stats[0].histNumNodes} />
|
data={$mainQuery.data.stats[0].histNumNodes}
|
||||||
|
xlabel="Allocated Nodes [#]"
|
||||||
|
ylabel="Number of Jobs" />
|
||||||
{/key}
|
{/key}
|
||||||
</div>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
{/if}
|
{/if}
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
|
|
||||||
const clusters = getContext('clusters')
|
const clusters = getContext('clusters')
|
||||||
const ccconfig = getContext('cc-config')
|
const ccconfig = getContext('cc-config')
|
||||||
|
const metricConfig = getContext('metrics')
|
||||||
|
|
||||||
let plotHeight = 300
|
let plotHeight = 300
|
||||||
let hostnameFilter = ''
|
let hostnameFilter = ''
|
||||||
@ -28,13 +29,14 @@
|
|||||||
|
|
||||||
const nodesQuery = operationStore(`query($cluster: String!, $metrics: [String!], $from: Time!, $to: Time!) {
|
const nodesQuery = operationStore(`query($cluster: String!, $metrics: [String!], $from: Time!, $to: Time!) {
|
||||||
nodeMetrics(cluster: $cluster, metrics: $metrics, from: $from, to: $to) {
|
nodeMetrics(cluster: $cluster, metrics: $metrics, from: $from, to: $to) {
|
||||||
host,
|
host
|
||||||
subCluster
|
subCluster
|
||||||
metrics {
|
metrics {
|
||||||
name,
|
name
|
||||||
metric {
|
|
||||||
scope
|
scope
|
||||||
timestep,
|
metric {
|
||||||
|
timestep
|
||||||
|
unit { base, prefix }
|
||||||
series {
|
series {
|
||||||
statistics { min, avg, max }
|
statistics { min, avg, max }
|
||||||
data
|
data
|
||||||
@ -49,6 +51,18 @@
|
|||||||
to: to.toISOString()
|
to: to.toISOString()
|
||||||
})
|
})
|
||||||
|
|
||||||
|
let metricUnits = {}
|
||||||
|
$: if ($nodesQuery.data) {
|
||||||
|
let thisCluster = clusters.find(c => c.name == cluster)
|
||||||
|
for (let metric of thisCluster.metricConfig) {
|
||||||
|
if (metric.unit.prefix || metric.unit.base) {
|
||||||
|
metricUnits[metric.name] = '(' + (metric.unit.prefix ? metric.unit.prefix : '') + (metric.unit.base ? metric.unit.base : '') + ')'
|
||||||
|
} else { // If no unit defined: Omit Unit Display
|
||||||
|
metricUnits[metric.name] = ''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$: $nodesQuery.variables = { cluster, metrics: [selectedMetric], from: from.toISOString(), to: to.toISOString() }
|
$: $nodesQuery.variables = { cluster, metrics: [selectedMetric], from: from.toISOString(), to: to.toISOString() }
|
||||||
|
|
||||||
query(nodesQuery)
|
query(nodesQuery)
|
||||||
@ -71,7 +85,7 @@
|
|||||||
<InputGroupText>Metric</InputGroupText>
|
<InputGroupText>Metric</InputGroupText>
|
||||||
<select class="form-select" bind:value={selectedMetric}>
|
<select class="form-select" bind:value={selectedMetric}>
|
||||||
{#each clusters.find(c => c.name == cluster).metricConfig as metric}
|
{#each clusters.find(c => c.name == cluster).metricConfig as metric}
|
||||||
<option value={metric.name}>{metric.name} ({metric.unit})</option>
|
<option value={metric.name}>{metric.name} {metricUnits[metric.name]}</option>
|
||||||
{/each}
|
{/each}
|
||||||
</select>
|
</select>
|
||||||
</InputGroup>
|
</InputGroup>
|
||||||
@ -98,11 +112,23 @@
|
|||||||
let:width
|
let:width
|
||||||
itemsPerRow={ccconfig.plot_view_plotsPerRow}
|
itemsPerRow={ccconfig.plot_view_plotsPerRow}
|
||||||
items={$nodesQuery.data.nodeMetrics
|
items={$nodesQuery.data.nodeMetrics
|
||||||
.filter(h => h.host.includes(hostnameFilter) && h.metrics.some(m => m.name == selectedMetric && m.metric.scope == 'node'))
|
.filter(h => h.host.includes(hostnameFilter) && h.metrics.some(m => m.name == selectedMetric && m.scope == 'node'))
|
||||||
.map(h => ({ host: h.host, subCluster: h.subCluster, data: h.metrics.find(m => m.name == selectedMetric && m.metric.scope == 'node') }))
|
.map(function (h) {
|
||||||
|
let thisConfig = metricConfig(cluster, selectedMetric)
|
||||||
|
let thisSCIndex = thisConfig.subClusters.findIndex(sc => sc.name == h.subCluster)
|
||||||
|
// Metric remove == true
|
||||||
|
if (thisSCIndex >= 0) {
|
||||||
|
if (thisConfig.subClusters[thisSCIndex].remove == true) {
|
||||||
|
return { host: h.host, subCluster: h.subCluster, data: null, removed: true }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Else
|
||||||
|
return { host: h.host, subCluster: h.subCluster, data: h.metrics.find(m => m.name == selectedMetric && m.scope == 'node'), removed: false }
|
||||||
|
})
|
||||||
.sort((a, b) => a.host.localeCompare(b.host))}>
|
.sort((a, b) => a.host.localeCompare(b.host))}>
|
||||||
|
|
||||||
<h4 style="width: 100%; text-align: center;"><a href="/monitoring/node/{cluster}/{item.host}">{item.host} ({item.subCluster})</a></h4>
|
<h4 style="width: 100%; text-align: center;"><a href="/monitoring/node/{cluster}/{item.host}">{item.host} ({item.subCluster})</a></h4>
|
||||||
|
{#if item.removed == false && item.data != null}
|
||||||
<MetricPlot
|
<MetricPlot
|
||||||
width={width}
|
width={width}
|
||||||
height={plotHeight}
|
height={plotHeight}
|
||||||
@ -111,6 +137,11 @@
|
|||||||
metric={item.data.name}
|
metric={item.data.name}
|
||||||
cluster={clusters.find(c => c.name == cluster)}
|
cluster={clusters.find(c => c.name == cluster)}
|
||||||
subCluster={item.subCluster} />
|
subCluster={item.subCluster} />
|
||||||
|
{:else if item.removed == true && item.data == null}
|
||||||
|
<Card body color="info">Metric '{ selectedMetric }' disabled for subcluster '{ item.subCluster }'</Card>
|
||||||
|
{:else}
|
||||||
|
<Card body color="warning">Missing Data</Card>
|
||||||
|
{/if}
|
||||||
</PlotTable>
|
</PlotTable>
|
||||||
{/if}
|
{/if}
|
||||||
</Col>
|
</Col>
|
||||||
|
@ -18,10 +18,12 @@
|
|||||||
export let user
|
export let user
|
||||||
export let filterPresets
|
export let filterPresets
|
||||||
|
|
||||||
let filters, jobList
|
let filters = []
|
||||||
|
let jobList
|
||||||
let sorting = { field: 'startTime', order: 'DESC' }, isSortingOpen = false
|
let sorting = { field: 'startTime', order: 'DESC' }, isSortingOpen = false
|
||||||
let metrics = ccconfig.plot_list_selectedMetrics, isMetricsSelectionOpen = false
|
let metrics = ccconfig.plot_list_selectedMetrics, isMetricsSelectionOpen = false
|
||||||
let w1, w2, histogramHeight = 250
|
let w1, w2, histogramHeight = 250
|
||||||
|
let selectedCluster = filterPresets?.cluster ? filterPresets.cluster : null
|
||||||
|
|
||||||
const stats = operationStore(`
|
const stats = operationStore(`
|
||||||
query($filter: [JobFilter!]!) {
|
query($filter: [JobFilter!]!) {
|
||||||
@ -40,6 +42,12 @@
|
|||||||
pause: true
|
pause: true
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// filters[filters.findIndex(filter => filter.cluster != null)] ?
|
||||||
|
// filters[filters.findIndex(filter => filter.cluster != null)].cluster.eq :
|
||||||
|
// null
|
||||||
|
// Cluster filter has to be alwas @ first index, above will throw error
|
||||||
|
$: selectedCluster = filters[0]?.cluster ? filters[0].cluster.eq : null
|
||||||
|
|
||||||
query(stats)
|
query(stats)
|
||||||
|
|
||||||
onMount(() => filters.update())
|
onMount(() => filters.update())
|
||||||
@ -75,11 +83,12 @@
|
|||||||
startTimeQuickSelect={true}
|
startTimeQuickSelect={true}
|
||||||
bind:this={filters}
|
bind:this={filters}
|
||||||
on:update={({ detail }) => {
|
on:update={({ detail }) => {
|
||||||
let filters = [...detail.filters, { user: { eq: user.username } }]
|
let jobFilters = [...detail.filters, { user: { eq: user.username } }]
|
||||||
$stats.variables = { filter: filters }
|
$stats.variables = { filter: jobFilters }
|
||||||
$stats.context.pause = false
|
$stats.context.pause = false
|
||||||
$stats.reexecute()
|
$stats.reexecute()
|
||||||
jobList.update(filters)
|
filters = jobFilters
|
||||||
|
jobList.update(jobFilters)
|
||||||
}} />
|
}} />
|
||||||
</Col>
|
</Col>
|
||||||
<Col xs="auto" style="margin-left: auto;">
|
<Col xs="auto" style="margin-left: auto;">
|
||||||
@ -136,19 +145,23 @@
|
|||||||
</Table>
|
</Table>
|
||||||
</Col>
|
</Col>
|
||||||
<div class="col-4" style="text-align: center;" bind:clientWidth={w1}>
|
<div class="col-4" style="text-align: center;" bind:clientWidth={w1}>
|
||||||
<b>Walltime</b>
|
<b>Duration Distribution</b>
|
||||||
{#key $stats.data.jobsStatistics[0].histDuration}
|
{#key $stats.data.jobsStatistics[0].histDuration}
|
||||||
<Histogram
|
<Histogram
|
||||||
data={$stats.data.jobsStatistics[0].histDuration}
|
data={$stats.data.jobsStatistics[0].histDuration}
|
||||||
width={w1 - 25} height={histogramHeight} />
|
width={w1 - 25} height={histogramHeight}
|
||||||
|
xlabel="Current Runtimes [h]"
|
||||||
|
ylabel="Number of Jobs"/>
|
||||||
{/key}
|
{/key}
|
||||||
</div>
|
</div>
|
||||||
<div class="col-4" style="text-align: center;" bind:clientWidth={w2}>
|
<div class="col-4" style="text-align: center;" bind:clientWidth={w2}>
|
||||||
<b>Number of Nodes</b>
|
<b>Number of Nodes Distribution</b>
|
||||||
{#key $stats.data.jobsStatistics[0].histNumNodes}
|
{#key $stats.data.jobsStatistics[0].histNumNodes}
|
||||||
<Histogram
|
<Histogram
|
||||||
data={$stats.data.jobsStatistics[0].histNumNodes}
|
data={$stats.data.jobsStatistics[0].histNumNodes}
|
||||||
width={w2 - 25} height={histogramHeight} />
|
width={w2 - 25} height={histogramHeight}
|
||||||
|
xlabel="Allocated Nodes [#]"
|
||||||
|
ylabel="Number of Jobs" />
|
||||||
{/key}
|
{/key}
|
||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
@ -167,6 +180,8 @@
|
|||||||
bind:sorting={sorting}
|
bind:sorting={sorting}
|
||||||
bind:isOpen={isSortingOpen} />
|
bind:isOpen={isSortingOpen} />
|
||||||
|
|
||||||
<MetricSelection configName="plot_list_selectedMetrics"
|
<MetricSelection
|
||||||
|
bind:cluster={selectedCluster}
|
||||||
|
configName="plot_list_selectedMetrics"
|
||||||
bind:metrics={metrics}
|
bind:metrics={metrics}
|
||||||
bind:isOpen={isMetricsSelectionOpen} />
|
bind:isOpen={isMetricsSelectionOpen} />
|
@ -20,6 +20,7 @@
|
|||||||
let text = await res.text()
|
let text = await res.text()
|
||||||
popMessage(text, '#048109')
|
popMessage(text, '#048109')
|
||||||
reloadUserList()
|
reloadUserList()
|
||||||
|
form.reset()
|
||||||
} else {
|
} else {
|
||||||
let text = await res.text()
|
let text = await res.text()
|
||||||
// console.log(res.statusText)
|
// console.log(res.statusText)
|
||||||
@ -79,7 +80,12 @@
|
|||||||
{#if i == 0}
|
{#if i == 0}
|
||||||
<div>
|
<div>
|
||||||
<input type="radio" id={role} name="role" value={role} checked/>
|
<input type="radio" id={role} name="role" value={role} checked/>
|
||||||
<label for={role}>{role.charAt(0).toUpperCase() + role.slice(1)} (regular user, same as if created via LDAP sync.)</label>
|
<label for={role}>{role.toUpperCase()} (Allowed to interact with REST API.)</label>
|
||||||
|
</div>
|
||||||
|
{:else if i == 1}
|
||||||
|
<div>
|
||||||
|
<input type="radio" id={role} name="role" value={role} checked/>
|
||||||
|
<label for={role}>{role.charAt(0).toUpperCase() + role.slice(1)} (Same as if created via LDAP sync.)</label>
|
||||||
</div>
|
</div>
|
||||||
{:else}
|
{:else}
|
||||||
<div>
|
<div>
|
||||||
|
@ -102,9 +102,11 @@
|
|||||||
{#if $initialized}
|
{#if $initialized}
|
||||||
({clusters
|
({clusters
|
||||||
.map(cluster => cluster.metricConfig.find(m => m.name == metric))
|
.map(cluster => cluster.metricConfig.find(m => m.name == metric))
|
||||||
.filter(m => m != null).map(m => m.unit)
|
.filter(m => m != null)
|
||||||
.reduce((arr, unit) => arr.includes(unit) ? arr : [...arr, unit], [])
|
.map(m => (m.unit?.prefix?m.unit?.prefix:'') + (m.unit?.base?m.unit?.base:'')) // Build unitStr
|
||||||
.join(', ')})
|
.reduce((arr, unitStr) => arr.includes(unitStr) ? arr : [...arr, unitStr], []) // w/o this, output would be [unitStr, unitStr]
|
||||||
|
.join(', ')
|
||||||
|
})
|
||||||
{/if}
|
{/if}
|
||||||
</th>
|
</th>
|
||||||
{/each}
|
{/each}
|
||||||
|
@ -24,12 +24,14 @@
|
|||||||
let scopes = [job.numNodes == 1 ? 'core' : 'node']
|
let scopes = [job.numNodes == 1 ? 'core' : 'node']
|
||||||
|
|
||||||
const cluster = getContext('clusters').find(c => c.name == job.cluster)
|
const cluster = getContext('clusters').find(c => c.name == job.cluster)
|
||||||
|
// Get all MetricConfs which include subCluster-specific settings for this job
|
||||||
|
const metricConfig = getContext('metrics')
|
||||||
const metricsQuery = operationStore(`query($id: ID!, $metrics: [String!]!, $scopes: [MetricScope!]!) {
|
const metricsQuery = operationStore(`query($id: ID!, $metrics: [String!]!, $scopes: [MetricScope!]!) {
|
||||||
jobMetrics(id: $id, metrics: $metrics, scopes: $scopes) {
|
jobMetrics(id: $id, metrics: $metrics, scopes: $scopes) {
|
||||||
name
|
name
|
||||||
|
scope
|
||||||
metric {
|
metric {
|
||||||
unit, scope, timestep
|
unit { prefix, base }, timestep
|
||||||
statisticsSeries { min, mean, max }
|
statisticsSeries { min, mean, max }
|
||||||
series {
|
series {
|
||||||
hostname, id, data
|
hostname, id, data
|
||||||
@ -44,13 +46,47 @@
|
|||||||
})
|
})
|
||||||
|
|
||||||
const selectScope = (jobMetrics) => jobMetrics.reduce(
|
const selectScope = (jobMetrics) => jobMetrics.reduce(
|
||||||
(a, b) => maxScope([a.metric.scope, b.metric.scope]) == a.metric.scope
|
(a, b) => maxScope([a.scope, b.scope]) == a.scope
|
||||||
? (job.numNodes > 1 ? a : b)
|
? (job.numNodes > 1 ? a : b)
|
||||||
: (job.numNodes > 1 ? b : a), jobMetrics[0])
|
: (job.numNodes > 1 ? b : a), jobMetrics[0])
|
||||||
|
|
||||||
const sortAndSelectScope = (jobMetrics) => metrics
|
const sortAndSelectScope = (jobMetrics) => metrics
|
||||||
.map(name => jobMetrics.filter(jobMetric => jobMetric.name == name))
|
.map(function(name) {
|
||||||
.map(jobMetrics => jobMetrics.length > 0 ? selectScope(jobMetrics) : null)
|
// Get MetricConf for this selected/requested metric
|
||||||
|
let thisConfig = metricConfig(cluster, name)
|
||||||
|
let thisSCIndex = thisConfig.subClusters.findIndex(sc => sc.name == job.subCluster)
|
||||||
|
// Check if Subcluster has MetricConf: If not found (index == -1), no further remove flag check required
|
||||||
|
if (thisSCIndex >= 0) {
|
||||||
|
// SubCluster Config present: Check if remove flag is set
|
||||||
|
if (thisConfig.subClusters[thisSCIndex].remove == true) {
|
||||||
|
// Return null data and informational flag
|
||||||
|
return {removed: true, data: null}
|
||||||
|
} else {
|
||||||
|
// load and return metric, if data available
|
||||||
|
let thisMetric = jobMetrics.filter(jobMetric => jobMetric.name == name) // Returns Array
|
||||||
|
if (thisMetric.length > 0) {
|
||||||
|
return {removed: false, data: thisMetric}
|
||||||
|
} else {
|
||||||
|
return {removed: false, data: null}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No specific subCluster config: 'remove' flag not set, deemed false -> load and return metric, if data available
|
||||||
|
let thisMetric = jobMetrics.filter(jobMetric => jobMetric.name == name) // Returns Array
|
||||||
|
if (thisMetric.length > 0) {
|
||||||
|
return {removed: false, data: thisMetric}
|
||||||
|
} else {
|
||||||
|
return {removed: false, data: null}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.map(function(jobMetrics) {
|
||||||
|
if (jobMetrics.data != null && jobMetrics.data.length > 0) {
|
||||||
|
return {removed: jobMetrics.removed, data: selectScope(jobMetrics.data)}
|
||||||
|
} else {
|
||||||
|
return jobMetrics
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
$: metricsQuery.variables = { id: job.id, metrics, scopes }
|
$: metricsQuery.variables = { id: job.id, metrics, scopes }
|
||||||
|
|
||||||
@ -81,17 +117,20 @@
|
|||||||
{:else}
|
{:else}
|
||||||
{#each sortAndSelectScope($metricsQuery.data.jobMetrics) as metric, i (metric || i)}
|
{#each sortAndSelectScope($metricsQuery.data.jobMetrics) as metric, i (metric || i)}
|
||||||
<td>
|
<td>
|
||||||
{#if metric != null}
|
<!-- Subluster Metricconfig remove keyword for jobtables (joblist main, user joblist, project joblist) to be used here as toplevel case-->
|
||||||
|
{#if metric.removed == false && metric.data != null}
|
||||||
<MetricPlot
|
<MetricPlot
|
||||||
width={plotWidth}
|
width={plotWidth}
|
||||||
height={plotHeight}
|
height={plotHeight}
|
||||||
timestep={metric.metric.timestep}
|
timestep={metric.data.metric.timestep}
|
||||||
scope={metric.metric.scope}
|
scope={metric.data.scope}
|
||||||
series={metric.metric.series}
|
series={metric.data.metric.series}
|
||||||
statisticsSeries={metric.metric.statisticsSeries}
|
statisticsSeries={metric.data.metric.statisticsSeries}
|
||||||
metric={metric.name}
|
metric={metric.data.name}
|
||||||
cluster={cluster}
|
cluster={cluster}
|
||||||
subCluster={job.subCluster} />
|
subCluster={job.subCluster} />
|
||||||
|
{:else if metric.removed == true && metric.data == null}
|
||||||
|
<Card body color="info">Metric disabled for subcluster '{ job.subCluster }'</Card>
|
||||||
{:else}
|
{:else}
|
||||||
<Card body color="warning">Missing Data</Card>
|
<Card body color="warning">Missing Data</Card>
|
||||||
{/if}
|
{/if}
|
||||||
|
@ -18,15 +18,17 @@
|
|||||||
import { onMount } from 'svelte'
|
import { onMount } from 'svelte'
|
||||||
|
|
||||||
export let data
|
export let data
|
||||||
export let width
|
export let width = 500
|
||||||
export let height
|
export let height = 300
|
||||||
|
export let xlabel = ''
|
||||||
|
export let ylabel = ''
|
||||||
export let min = null
|
export let min = null
|
||||||
export let max = null
|
export let max = null
|
||||||
export let label = formatNumber
|
export let label = formatNumber
|
||||||
|
|
||||||
const fontSize = 12
|
const fontSize = 12
|
||||||
const fontFamily = 'system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji"'
|
const fontFamily = 'system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji"'
|
||||||
const paddingLeft = 35, paddingRight = 20, paddingTop = 20, paddingBottom = 20
|
const paddingLeft = 50, paddingRight = 20, paddingTop = 20, paddingBottom = 20
|
||||||
|
|
||||||
let ctx, canvasElement
|
let ctx, canvasElement
|
||||||
|
|
||||||
@ -72,9 +74,11 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
function render() {
|
function render() {
|
||||||
const h = height - paddingTop - paddingBottom
|
const labelOffset = Math.floor(height * 0.1)
|
||||||
|
const h = height - paddingTop - paddingBottom - labelOffset
|
||||||
const w = width - paddingLeft - paddingRight
|
const w = width - paddingLeft - paddingRight
|
||||||
const barWidth = Math.ceil(w / (maxValue + 1))
|
const barGap = 5
|
||||||
|
const barWidth = Math.ceil(w / (maxValue + 1)) - barGap
|
||||||
|
|
||||||
if (Number.isNaN(barWidth))
|
if (Number.isNaN(barWidth))
|
||||||
return
|
return
|
||||||
@ -83,9 +87,14 @@
|
|||||||
const getCanvasY = (count) => (h - (count / maxCount) * h) + paddingTop
|
const getCanvasY = (count) => (h - (count / maxCount) * h) + paddingTop
|
||||||
|
|
||||||
// X Axis
|
// X Axis
|
||||||
ctx.font = `${fontSize}px ${fontFamily}`
|
ctx.font = `bold ${fontSize}px ${fontFamily}`
|
||||||
ctx.fillStyle = 'black'
|
ctx.fillStyle = 'black'
|
||||||
|
if (xlabel != '') {
|
||||||
|
let textWidth = ctx.measureText(xlabel).width
|
||||||
|
ctx.fillText(xlabel, Math.floor((width / 2) - (textWidth / 2) + barGap), height - Math.floor(labelOffset / 2))
|
||||||
|
}
|
||||||
ctx.textAlign = 'center'
|
ctx.textAlign = 'center'
|
||||||
|
ctx.font = `${fontSize}px ${fontFamily}`
|
||||||
if (min != null && max != null) {
|
if (min != null && max != null) {
|
||||||
const stepsizeX = getStepSize(max - min, w, 75)
|
const stepsizeX = getStepSize(max - min, w, 75)
|
||||||
let startX = 0
|
let startX = 0
|
||||||
@ -94,19 +103,28 @@
|
|||||||
|
|
||||||
for (let x = startX; x < max; x += stepsizeX) {
|
for (let x = startX; x < max; x += stepsizeX) {
|
||||||
let px = ((x - min) / (max - min)) * (w - barWidth) + paddingLeft + (barWidth / 2.)
|
let px = ((x - min) / (max - min)) * (w - barWidth) + paddingLeft + (barWidth / 2.)
|
||||||
ctx.fillText(`${formatNumber(x)}`, px, height - paddingBottom + 15)
|
ctx.fillText(`${formatNumber(x)}`, px, height - paddingBottom - Math.floor(labelOffset / 2))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const stepsizeX = getStepSize(maxValue, w, 120)
|
const stepsizeX = getStepSize(maxValue, w, 120)
|
||||||
for (let x = 0; x <= maxValue; x += stepsizeX) {
|
for (let x = 0; x <= maxValue; x += stepsizeX) {
|
||||||
ctx.fillText(label(x), getCanvasX(x), height - paddingBottom + 15)
|
ctx.fillText(label(x), getCanvasX(x), height - paddingBottom - Math.floor(labelOffset / 2))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Y Axis
|
// Y Axis
|
||||||
ctx.fillStyle = 'black'
|
ctx.fillStyle = 'black'
|
||||||
ctx.strokeStyle = '#bbbbbb'
|
ctx.strokeStyle = '#bbbbbb'
|
||||||
|
ctx.font = `bold ${fontSize}px ${fontFamily}`
|
||||||
|
if (ylabel != '') {
|
||||||
|
ctx.save()
|
||||||
|
ctx.translate(15, Math.floor(h / 2))
|
||||||
|
ctx.rotate(-Math.PI / 2)
|
||||||
|
ctx.fillText(ylabel, 0, 0)
|
||||||
|
ctx.restore()
|
||||||
|
}
|
||||||
ctx.textAlign = 'right'
|
ctx.textAlign = 'right'
|
||||||
|
ctx.font = `${fontSize}px ${fontFamily}`
|
||||||
ctx.beginPath()
|
ctx.beginPath()
|
||||||
const stepsizeY = getStepSize(maxCount, h, 50)
|
const stepsizeY = getStepSize(maxCount, h, 50)
|
||||||
for (let y = stepsizeY; y <= maxCount; y += stepsizeY) {
|
for (let y = stepsizeY; y <= maxCount; y += stepsizeY) {
|
||||||
@ -118,7 +136,7 @@
|
|||||||
ctx.stroke()
|
ctx.stroke()
|
||||||
|
|
||||||
// Draw bars
|
// Draw bars
|
||||||
ctx.fillStyle = '#0066cc'
|
ctx.fillStyle = '#85abce'
|
||||||
for (let p of data) {
|
for (let p of data) {
|
||||||
ctx.fillRect(
|
ctx.fillRect(
|
||||||
getCanvasX(p.value) - (barWidth / 2.),
|
getCanvasX(p.value) - (barWidth / 2.),
|
||||||
@ -130,10 +148,10 @@
|
|||||||
// Fat lines left and below plotting area
|
// Fat lines left and below plotting area
|
||||||
ctx.strokeStyle = 'black'
|
ctx.strokeStyle = 'black'
|
||||||
ctx.beginPath()
|
ctx.beginPath()
|
||||||
ctx.moveTo(0, height - paddingBottom)
|
ctx.moveTo(0, height - paddingBottom - labelOffset)
|
||||||
ctx.lineTo(width, height - paddingBottom)
|
ctx.lineTo(width, height - paddingBottom - labelOffset)
|
||||||
ctx.moveTo(paddingLeft, 0)
|
ctx.moveTo(paddingLeft, 0)
|
||||||
ctx.lineTo(paddingLeft, height- paddingBottom)
|
ctx.lineTo(paddingLeft, height - Math.floor(labelOffset / 2))
|
||||||
ctx.stroke()
|
ctx.stroke()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@
|
|||||||
let ctx, canvasElement
|
let ctx, canvasElement
|
||||||
|
|
||||||
const labels = metrics.filter(name => {
|
const labels = metrics.filter(name => {
|
||||||
if (!jobMetrics.find(m => m.name == name && m.metric.scope == "node")) {
|
if (!jobMetrics.find(m => m.name == name && m.scope == "node")) {
|
||||||
console.warn(`PolarPlot: No metric data for '${name}'`)
|
console.warn(`PolarPlot: No metric data for '${name}'`)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@ -27,7 +27,7 @@
|
|||||||
|
|
||||||
const getValuesForStat = (getStat) => labels.map(name => {
|
const getValuesForStat = (getStat) => labels.map(name => {
|
||||||
const peak = metricConfig(cluster, name).peak
|
const peak = metricConfig(cluster, name).peak
|
||||||
const metric = jobMetrics.find(m => m.name == name && m.metric.scope == "node")
|
const metric = jobMetrics.find(m => m.name == name && m.scope == "node")
|
||||||
const value = getStat(metric.metric) / peak
|
const value = getStat(metric.metric) / peak
|
||||||
return value <= 1. ? value : 1.
|
return value <= 1. ? value : 1.
|
||||||
})
|
})
|
||||||
|
@ -4,7 +4,8 @@
|
|||||||
|
|
||||||
<script context="module">
|
<script context="module">
|
||||||
const axesColor = '#aaaaaa'
|
const axesColor = '#aaaaaa'
|
||||||
const fontSize = 12
|
const tickFontSize = 10
|
||||||
|
const labelFontSize = 12
|
||||||
const fontFamily = 'system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji"'
|
const fontFamily = 'system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji"'
|
||||||
const paddingLeft = 40,
|
const paddingLeft = 40,
|
||||||
paddingRight = 10,
|
paddingRight = 10,
|
||||||
@ -67,11 +68,11 @@
|
|||||||
return 2
|
return 2
|
||||||
}
|
}
|
||||||
|
|
||||||
function render(ctx, data, cluster, width, height, colorDots, defaultMaxY) {
|
function render(ctx, data, cluster, width, height, colorDots, showTime, defaultMaxY) {
|
||||||
if (width <= 0)
|
if (width <= 0)
|
||||||
return
|
return
|
||||||
|
|
||||||
const [minX, maxX, minY, maxY] = [0.01, 1000, 1., cluster?.flopRateSimd || defaultMaxY]
|
const [minX, maxX, minY, maxY] = [0.01, 1000, 1., cluster?.flopRateSimd?.value || defaultMaxY]
|
||||||
const w = width - paddingLeft - paddingRight
|
const w = width - paddingLeft - paddingRight
|
||||||
const h = height - paddingTop - paddingBottom
|
const h = height - paddingTop - paddingBottom
|
||||||
|
|
||||||
@ -95,7 +96,7 @@
|
|||||||
// Axes
|
// Axes
|
||||||
ctx.fillStyle = 'black'
|
ctx.fillStyle = 'black'
|
||||||
ctx.strokeStyle = axesColor
|
ctx.strokeStyle = axesColor
|
||||||
ctx.font = `${fontSize}px ${fontFamily}`
|
ctx.font = `${tickFontSize}px ${fontFamily}`
|
||||||
ctx.beginPath()
|
ctx.beginPath()
|
||||||
for (let x = minX, i = 0; x <= maxX; i++) {
|
for (let x = minX, i = 0; x <= maxX; i++) {
|
||||||
let px = getCanvasX(x)
|
let px = getCanvasX(x)
|
||||||
@ -103,18 +104,20 @@
|
|||||||
let textWidth = ctx.measureText(text).width
|
let textWidth = ctx.measureText(text).width
|
||||||
ctx.fillText(text,
|
ctx.fillText(text,
|
||||||
Math.floor(px - (textWidth / 2)),
|
Math.floor(px - (textWidth / 2)),
|
||||||
height - paddingBottom + fontSize + 5)
|
height - paddingBottom + tickFontSize + 5)
|
||||||
ctx.moveTo(px, paddingTop - 5)
|
ctx.moveTo(px, paddingTop - 5)
|
||||||
ctx.lineTo(px, height - paddingBottom + 5)
|
ctx.lineTo(px, height - paddingBottom + 5)
|
||||||
|
|
||||||
x *= axisStepFactor(i, w)
|
x *= axisStepFactor(i, w)
|
||||||
}
|
}
|
||||||
if (data.xLabel) {
|
if (data.xLabel) {
|
||||||
|
ctx.font = `${labelFontSize}px ${fontFamily}`
|
||||||
let textWidth = ctx.measureText(data.xLabel).width
|
let textWidth = ctx.measureText(data.xLabel).width
|
||||||
ctx.fillText(data.xLabel, Math.floor((width / 2) - (textWidth / 2)), height - 20)
|
ctx.fillText(data.xLabel, Math.floor((width / 2) - (textWidth / 2)), height - 20)
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.textAlign = 'center'
|
ctx.textAlign = 'center'
|
||||||
|
ctx.font = `${tickFontSize}px ${fontFamily}`
|
||||||
for (let y = minY, i = 0; y <= maxY; i++) {
|
for (let y = minY, i = 0; y <= maxY; i++) {
|
||||||
let py = getCanvasY(y)
|
let py = getCanvasY(y)
|
||||||
ctx.moveTo(paddingLeft - 5, py)
|
ctx.moveTo(paddingLeft - 5, py)
|
||||||
@ -129,6 +132,7 @@
|
|||||||
y *= axisStepFactor(i)
|
y *= axisStepFactor(i)
|
||||||
}
|
}
|
||||||
if (data.yLabel) {
|
if (data.yLabel) {
|
||||||
|
ctx.font = `${labelFontSize}px ${fontFamily}`
|
||||||
ctx.save()
|
ctx.save()
|
||||||
ctx.translate(15, Math.floor(height / 2))
|
ctx.translate(15, Math.floor(height / 2))
|
||||||
ctx.rotate(-Math.PI / 2)
|
ctx.rotate(-Math.PI / 2)
|
||||||
@ -185,13 +189,13 @@
|
|||||||
ctx.lineWidth = 2
|
ctx.lineWidth = 2
|
||||||
ctx.beginPath()
|
ctx.beginPath()
|
||||||
if (cluster != null) {
|
if (cluster != null) {
|
||||||
const ycut = 0.01 * cluster.memoryBandwidth
|
const ycut = 0.01 * cluster.memoryBandwidth.value
|
||||||
const scalarKnee = (cluster.flopRateScalar - ycut) / cluster.memoryBandwidth
|
const scalarKnee = (cluster.flopRateScalar.value - ycut) / cluster.memoryBandwidth.value
|
||||||
const simdKnee = (cluster.flopRateSimd - ycut) / cluster.memoryBandwidth
|
const simdKnee = (cluster.flopRateSimd.value - ycut) / cluster.memoryBandwidth.value
|
||||||
const scalarKneeX = getCanvasX(scalarKnee),
|
const scalarKneeX = getCanvasX(scalarKnee),
|
||||||
simdKneeX = getCanvasX(simdKnee),
|
simdKneeX = getCanvasX(simdKnee),
|
||||||
flopRateScalarY = getCanvasY(cluster.flopRateScalar),
|
flopRateScalarY = getCanvasY(cluster.flopRateScalar.value),
|
||||||
flopRateSimdY = getCanvasY(cluster.flopRateSimd)
|
flopRateSimdY = getCanvasY(cluster.flopRateSimd.value)
|
||||||
|
|
||||||
if (scalarKneeX < width - paddingRight) {
|
if (scalarKneeX < width - paddingRight) {
|
||||||
ctx.moveTo(scalarKneeX, flopRateScalarY)
|
ctx.moveTo(scalarKneeX, flopRateScalarY)
|
||||||
@ -222,8 +226,8 @@
|
|||||||
}
|
}
|
||||||
ctx.stroke()
|
ctx.stroke()
|
||||||
|
|
||||||
if (colorDots && data.x && data.y) {
|
if (colorDots && showTime && data.x && data.y) {
|
||||||
// The Color Scale
|
// The Color Scale For Time Information
|
||||||
ctx.fillStyle = 'black'
|
ctx.fillStyle = 'black'
|
||||||
ctx.fillText('Time:', 17, height - 5)
|
ctx.fillText('Time:', 17, height - 5)
|
||||||
const start = paddingLeft + 5
|
const start = paddingLeft + 5
|
||||||
@ -237,7 +241,7 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function transformData(flopsAny, memBw, colorDots) {
|
function transformData(flopsAny, memBw, colorDots) { // Uses Metric Object
|
||||||
const nodes = flopsAny.series.length
|
const nodes = flopsAny.series.length
|
||||||
const timesteps = flopsAny.series[0].data.length
|
const timesteps = flopsAny.series[0].data.length
|
||||||
|
|
||||||
@ -308,17 +312,18 @@
|
|||||||
export let memBw = null
|
export let memBw = null
|
||||||
export let cluster = null
|
export let cluster = null
|
||||||
export let maxY = null
|
export let maxY = null
|
||||||
export let width
|
export let width = 500
|
||||||
export let height
|
export let height = 300
|
||||||
export let tiles = null
|
export let tiles = null
|
||||||
export let colorDots = true
|
export let colorDots = true
|
||||||
|
export let showTime = true
|
||||||
export let data = null
|
export let data = null
|
||||||
|
|
||||||
console.assert(data || tiles || (flopsAny && memBw), "you must provide flopsAny and memBw or tiles!")
|
console.assert(data || tiles || (flopsAny && memBw), "you must provide flopsAny and memBw or tiles!")
|
||||||
|
|
||||||
let ctx, canvasElement, prevWidth = width, prevHeight = height
|
let ctx, canvasElement, prevWidth = width, prevHeight = height
|
||||||
data = data != null ? data : (flopsAny && memBw
|
data = data != null ? data : (flopsAny && memBw
|
||||||
? transformData(flopsAny.metric, memBw.metric, colorDots)
|
? transformData(flopsAny, memBw, colorDots) // Use Metric Object from Parent
|
||||||
: {
|
: {
|
||||||
tiles: tiles,
|
tiles: tiles,
|
||||||
xLabel: 'Intensity [FLOPS/byte]',
|
xLabel: 'Intensity [FLOPS/byte]',
|
||||||
@ -334,7 +339,7 @@
|
|||||||
|
|
||||||
canvasElement.width = width
|
canvasElement.width = width
|
||||||
canvasElement.height = height
|
canvasElement.height = height
|
||||||
render(ctx, data, cluster, width, height, colorDots, maxY)
|
render(ctx, data, cluster, width, height, colorDots, showTime, maxY)
|
||||||
})
|
})
|
||||||
|
|
||||||
let timeoutId = null
|
let timeoutId = null
|
||||||
@ -354,7 +359,7 @@
|
|||||||
timeoutId = null
|
timeoutId = null
|
||||||
canvasElement.width = width
|
canvasElement.width = width
|
||||||
canvasElement.height = height
|
canvasElement.height = height
|
||||||
render(ctx, data, cluster, width, height, colorDots, maxY)
|
render(ctx, data, cluster, width, height, colorDots, showTime, maxY)
|
||||||
}, 250)
|
}, 250)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,11 +37,11 @@ export function init(extraInitQuery = '') {
|
|||||||
clusters {
|
clusters {
|
||||||
name,
|
name,
|
||||||
metricConfig {
|
metricConfig {
|
||||||
name, unit, peak,
|
name, unit { base, prefix }, peak,
|
||||||
normal, caution, alert,
|
normal, caution, alert,
|
||||||
timestep, scope,
|
timestep, scope,
|
||||||
aggregation,
|
aggregation,
|
||||||
subClusters { name, peak, normal, caution, alert }
|
subClusters { name, peak, normal, caution, alert, remove }
|
||||||
}
|
}
|
||||||
partitions
|
partitions
|
||||||
subClusters {
|
subClusters {
|
||||||
@ -49,9 +49,9 @@ export function init(extraInitQuery = '') {
|
|||||||
socketsPerNode
|
socketsPerNode
|
||||||
coresPerSocket
|
coresPerSocket
|
||||||
threadsPerCore
|
threadsPerCore
|
||||||
flopRateScalar
|
flopRateScalar { unit { base, prefix }, value }
|
||||||
flopRateSimd
|
flopRateSimd { unit { base, prefix }, value }
|
||||||
memoryBandwidth
|
memoryBandwidth { unit { base, prefix }, value }
|
||||||
numberOfNodes
|
numberOfNodes
|
||||||
topology {
|
topology {
|
||||||
node, socket, core
|
node, socket, core
|
||||||
|
Loading…
Reference in New Issue
Block a user