Extract importer package and restructure tests

This commit is contained in:
2023-04-28 08:49:58 +02:00
parent c090b18628
commit ebba4371eb
19 changed files with 605 additions and 554 deletions

View File

@@ -0,0 +1,131 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package importer
import (
"bytes"
"database/sql"
"encoding/json"
"fmt"
"os"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
// Import all jobs specified as `<path-to-meta.json>:<path-to-data.json>,...`
func HandleImportFlag(flag string) error {
r := repository.GetJobRepository()
for _, pair := range strings.Split(flag, ",") {
files := strings.Split(pair, ":")
if len(files) != 2 {
return fmt.Errorf("REPOSITORY/INIT > invalid import flag format")
}
raw, err := os.ReadFile(files[0])
if err != nil {
log.Warn("Error while reading metadata file for import")
return err
}
if config.Keys.Validate {
if err := schema.Validate(schema.Meta, bytes.NewReader(raw)); err != nil {
return fmt.Errorf("REPOSITORY/INIT > validate job meta: %v", err)
}
}
dec := json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
if err := dec.Decode(&jobMeta); err != nil {
log.Warn("Error while decoding raw json metadata for import")
return err
}
raw, err = os.ReadFile(files[1])
if err != nil {
log.Warn("Error while reading jobdata file for import")
return err
}
if config.Keys.Validate {
if err := schema.Validate(schema.Data, bytes.NewReader(raw)); err != nil {
return fmt.Errorf("REPOSITORY/INIT > validate job data: %v", err)
}
}
dec = json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
jobData := schema.JobData{}
if err := dec.Decode(&jobData); err != nil {
log.Warn("Error while decoding raw json jobdata for import")
return err
}
//checkJobData(&jobData)
// SanityChecks(&jobMeta.BaseJob)
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
if job, err := r.Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
if err != nil {
log.Warn("Error while finding job in jobRepository")
return err
}
return fmt.Errorf("REPOSITORY/INIT > a job with that jobId, cluster and startTime does already exist (dbid: %d)", job.ID)
}
job := schema.Job{
BaseJob: jobMeta.BaseJob,
StartTime: time.Unix(jobMeta.StartTime, 0),
StartTimeUnix: jobMeta.StartTime,
}
// TODO: Other metrics...
job.FlopsAnyAvg = loadJobStat(&jobMeta, "flops_any")
job.MemBwAvg = loadJobStat(&jobMeta, "mem_bw")
job.NetBwAvg = loadJobStat(&jobMeta, "net_bw")
job.FileBwAvg = loadJobStat(&jobMeta, "file_bw")
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
log.Warn("Error while marshaling job resources")
return err
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
log.Warn("Error while marshaling job metadata")
return err
}
if err := SanityChecks(&job.BaseJob); err != nil {
log.Warn("BaseJob SanityChecks failed")
return err
}
if err := archive.GetHandle().ImportJob(&jobMeta, &jobData); err != nil {
log.Error("Error while importing job")
return err
}
id, err := r.InsertJob(&job)
if err != nil {
log.Warn("Error while job db insert")
return err
}
for _, tag := range job.Tags {
if _, err := r.AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
log.Error("Error while adding or creating tag")
return err
}
}
log.Infof("successfully imported a new job (jobId: %d, cluster: %s, dbid: %d)", job.JobID, job.Cluster, id)
}
return nil
}

View File

@@ -0,0 +1,173 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package importer_test
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"testing"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/importer"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
)
func copyFile(s string, d string) error {
r, err := os.Open(s)
if err != nil {
return err
}
defer r.Close()
w, err := os.Create(d)
if err != nil {
return err
}
defer w.Close()
w.ReadFrom(r)
return nil
}
func setupRepo(t *testing.T) *repository.JobRepository {
const testconfig = `{
"addr": "0.0.0.0:8080",
"validate": false,
"archive": {
"kind": "file",
"path": "./var/job-archive"
},
"clusters": [
{
"name": "testcluster",
"metricDataRepository": {"kind": "test", "url": "bla:8081"},
"filterRanges": {
"numNodes": { "from": 1, "to": 64 },
"duration": { "from": 0, "to": 86400 },
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
}
},
{
"name": "fritz",
"metricDataRepository": {"kind": "test", "url": "bla:8081"},
"filterRanges": {
"numNodes": { "from": 1, "to": 944 },
"duration": { "from": 0, "to": 86400 },
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
}
},
{
"name": "taurus",
"metricDataRepository": {"kind": "test", "url": "bla:8081"},
"filterRanges": {
"numNodes": { "from": 1, "to": 4000 },
"duration": { "from": 0, "to": 604800 },
"startTime": { "from": "2010-01-01T00:00:00Z", "to": null }
}
}
]}`
log.Init("info", true)
tmpdir := t.TempDir()
jobarchive := filepath.Join(tmpdir, "job-archive")
if err := os.Mkdir(jobarchive, 0777); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 1)), 0666); err != nil {
t.Fatal(err)
}
fritzArchive := filepath.Join(tmpdir, "job-archive", "fritz")
if err := os.Mkdir(fritzArchive, 0777); err != nil {
t.Fatal(err)
}
if err := copyFile(filepath.Join("testdata", "cluster-fritz.json"),
filepath.Join(fritzArchive, "cluster.json")); err != nil {
t.Fatal(err)
}
dbfilepath := filepath.Join(tmpdir, "test.db")
err := repository.MigrateDB("sqlite3", dbfilepath)
if err != nil {
t.Fatal(err)
}
cfgFilePath := filepath.Join(tmpdir, "config.json")
if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0666); err != nil {
t.Fatal(err)
}
config.Init(cfgFilePath)
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
t.Fatal(err)
}
repository.Connect("sqlite3", dbfilepath)
return repository.GetJobRepository()
}
type Result struct {
JobId int64
Cluster string
StartTime int64
Duration int32
}
func readResult(t *testing.T, testname string) Result {
var r Result
content, err := os.ReadFile(filepath.Join("testdata",
fmt.Sprintf("%s-golden.json", testname)))
if err != nil {
t.Fatal("Error when opening file: ", err)
}
err = json.Unmarshal(content, &r)
if err != nil {
t.Fatal("Error during Unmarshal(): ", err)
}
return r
}
func TestHandleImportFlag(t *testing.T) {
r := setupRepo(t)
tests, err := filepath.Glob(filepath.Join("testdata", "*.input"))
if err != nil {
t.Fatal(err)
}
for _, path := range tests {
_, filename := filepath.Split(path)
str := strings.Split(strings.TrimSuffix(filename, ".input"), "-")
testname := str[1]
t.Run(testname, func(t *testing.T) {
s := fmt.Sprintf("%s:%s", filepath.Join("testdata",
fmt.Sprintf("meta-%s.input", testname)),
filepath.Join("testdata", fmt.Sprintf("data-%s.json", testname)))
err := importer.HandleImportFlag(s)
if err != nil {
t.Fatal(err)
}
result := readResult(t, testname)
job, err := r.Find(&result.JobId, &result.Cluster, &result.StartTime)
if err != nil {
t.Fatal(err)
}
if job.Duration != result.Duration {
t.Errorf("wrong duration for job\ngot: %d \nwant: %d", job.Duration, result.Duration)
}
})
}
}

225
internal/importer/initDB.go Normal file
View File

@@ -0,0 +1,225 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package importer
import (
"encoding/json"
"fmt"
"math"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/ClusterCockpit/cc-backend/pkg/units"
)
// Delete the tables "job", "tag" and "jobtag" from the database and
// repopulate them using the jobs found in `archive`.
func InitDB() error {
r := repository.GetJobRepository()
starttime := time.Now()
log.Print("Building job table...")
t, err := r.TransactionInit()
if err != nil {
log.Warn("Error while initializing SQL transactions")
return err
}
tags := make(map[string]int64)
// Not using log.Print because we want the line to end with `\r` and
// this function is only ever called when a special command line flag
// is passed anyways.
fmt.Printf("%d jobs inserted...\r", 0)
ar := archive.GetHandle()
i := 0
errorOccured := 0
for jobContainer := range ar.Iter(false) {
jobMeta := jobContainer.Meta
// Bundle 100 inserts into one transaction for better performance
if i%100 == 0 {
r.TransactionCommit(t)
fmt.Printf("%d jobs inserted...\r", i)
}
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
job := schema.Job{
BaseJob: jobMeta.BaseJob,
StartTime: time.Unix(jobMeta.StartTime, 0),
StartTimeUnix: jobMeta.StartTime,
}
// TODO: Other metrics...
job.FlopsAnyAvg = loadJobStat(jobMeta, "flops_any")
job.MemBwAvg = loadJobStat(jobMeta, "mem_bw")
job.NetBwAvg = loadJobStat(jobMeta, "net_bw")
job.FileBwAvg = loadJobStat(jobMeta, "file_bw")
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
log.Errorf("repository initDB(): %v", err)
errorOccured++
continue
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
log.Errorf("repository initDB(): %v", err)
errorOccured++
continue
}
if err := SanityChecks(&job.BaseJob); err != nil {
log.Errorf("repository initDB(): %v", err)
errorOccured++
continue
}
id, err := r.TransactionAdd(t, job)
if err != nil {
log.Errorf("repository initDB(): %v", err)
errorOccured++
continue
}
for _, tag := range job.Tags {
tagstr := tag.Name + ":" + tag.Type
tagId, ok := tags[tagstr]
if !ok {
tagId, err = r.TransactionAddTag(t, tag)
if err != nil {
log.Errorf("Error adding tag: %v", err)
errorOccured++
continue
}
tags[tagstr] = tagId
}
r.TransactionSetTag(t, id, tagId)
}
if err == nil {
i += 1
}
}
if errorOccured > 0 {
log.Warnf("Error in import of %d jobs!", errorOccured)
}
r.TransactionEnd(t)
log.Printf("A total of %d jobs have been registered in %.3f seconds.\n", i, time.Since(starttime).Seconds())
return nil
}
// This function also sets the subcluster if necessary!
func SanityChecks(job *schema.BaseJob) error {
if c := archive.GetCluster(job.Cluster); c == nil {
return fmt.Errorf("no such cluster: %v", job.Cluster)
}
if err := archive.AssignSubCluster(job); err != nil {
log.Warn("Error while assigning subcluster to job")
return err
}
if !job.State.Valid() {
return fmt.Errorf("not a valid job state: %v", job.State)
}
if len(job.Resources) == 0 || len(job.User) == 0 {
return fmt.Errorf("'resources' and 'user' should not be empty")
}
if *job.NumAcc < 0 || *job.NumHWThreads < 0 || job.NumNodes < 1 {
return fmt.Errorf("'numNodes', 'numAcc' or 'numHWThreads' invalid")
}
if len(job.Resources) != int(job.NumNodes) {
return fmt.Errorf("len(resources) does not equal numNodes (%d vs %d)", len(job.Resources), job.NumNodes)
}
return nil
}
func loadJobStat(job *schema.JobMeta, metric string) float64 {
if stats, ok := job.Statistics[metric]; ok {
return stats.Avg
}
return 0.0
}
func getNormalizationFactor(v float64) (float64, int) {
count := 0
scale := -3
if v > 1000.0 {
for v > 1000.0 {
v *= 1e-3
count++
}
} else {
for v < 1.0 {
v *= 1e3
count++
}
scale = 3
}
return math.Pow10(count * scale), count * scale
}
func normalize(avg float64, p string) (float64, string) {
f, e := getNormalizationFactor(avg)
if e != 0 {
np := units.NewPrefixFromFactor(units.NewPrefix(p), e)
return f, np.Prefix()
}
return f, p
}
func checkJobData(d *schema.JobData) error {
for _, scopes := range *d {
// var newUnit schema.Unit
// TODO Add node scope if missing
for _, metric := range scopes {
if strings.Contains(metric.Unit.Base, "B/s") ||
strings.Contains(metric.Unit.Base, "F/s") ||
strings.Contains(metric.Unit.Base, "B") {
// get overall avg
sum := 0.0
for _, s := range metric.Series {
sum += s.Statistics.Avg
}
avg := sum / float64(len(metric.Series))
f, p := normalize(avg, metric.Unit.Prefix)
if p != metric.Unit.Prefix {
fmt.Printf("Convert %e", f)
// for _, s := range metric.Series {
// fp := schema.ConvertFloatToFloat64(s.Data)
//
// for i := 0; i < len(fp); i++ {
// fp[i] *= f
// fp[i] = math.Ceil(fp[i])
// }
//
// s.Data = schema.GetFloat64ToFloat(fp)
// }
metric.Unit.Prefix = p
}
}
}
}
return nil
}

View File

@@ -0,0 +1,64 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package importer
import (
"fmt"
"testing"
"github.com/ClusterCockpit/cc-backend/pkg/units"
)
func TestNormalizeFactor(t *testing.T) {
// var us string
s := []float64{2890031237, 23998994567, 389734042344, 390349424345}
// r := []float64{3, 24, 390, 391}
total := 0.0
for _, number := range s {
total += number
}
avg := total / float64(len(s))
fmt.Printf("AVG: %e\n", avg)
f, e := getNormalizationFactor(avg)
fmt.Printf("Factor %e Count %d\n", f, e)
np := units.NewPrefix("")
fmt.Printf("Prefix %e Short %s\n", float64(np), np.Prefix())
p := units.NewPrefixFromFactor(np, e)
if p.Prefix() != "G" {
t.Errorf("Failed Prefix or unit: Want G, Got %s", p.Prefix())
}
}
func TestNormalizeKeep(t *testing.T) {
s := []float64{3.0, 24.0, 390.0, 391.0}
total := 0.0
for _, number := range s {
total += number
}
avg := total / float64(len(s))
fmt.Printf("AVG: %e\n", avg)
f, e := getNormalizationFactor(avg)
fmt.Printf("Factor %e Count %d\n", f, e)
np := units.NewPrefix("G")
fmt.Printf("Prefix %e Short %s\n", float64(np), np.Prefix())
p := units.NewPrefixFromFactor(np, e)
if p.Prefix() != "G" {
t.Errorf("Failed Prefix or unit: Want G, Got %s", p.Prefix())
}
}

View File

@@ -0,0 +1,746 @@
{
"name": "fritz",
"metricConfig": [
{
"name": "cpu_load",
"unit": {
"base": ""
},
"scope": "node",
"aggregation": "avg",
"timestep": 60,
"peak": 72,
"normal": 72,
"caution": 36,
"alert": 20
},
{
"name": "cpu_user",
"unit": {
"base": ""
},
"scope": "hwthread",
"aggregation": "avg",
"timestep": 60,
"peak": 100,
"normal": 50,
"caution": 20,
"alert": 10
},
{
"name": "mem_used",
"unit": {
"base": "B",
"prefix": "G"
},
"scope": "node",
"aggregation": "sum",
"timestep": 60,
"peak": 256,
"normal": 128,
"caution": 200,
"alert": 240
},
{
"name": "flops_any",
"unit": {
"base": "F/s",
"prefix": "G"
},
"scope": "hwthread",
"aggregation": "sum",
"timestep": 60,
"peak": 5600,
"normal": 1000,
"caution": 200,
"alert": 50
},
{
"name": "flops_sp",
"unit": {
"base": "F/s",
"prefix": "G"
},
"scope": "hwthread",
"aggregation": "sum",
"timestep": 60,
"peak": 5600,
"normal": 1000,
"caution": 200,
"alert": 50
},
{
"name": "flops_dp",
"unit": {
"base": "F/s",
"prefix": "G"
},
"scope": "hwthread",
"aggregation": "sum",
"timestep": 60,
"peak": 2300,
"normal": 500,
"caution": 100,
"alert": 50
},
{
"name": "mem_bw",
"unit": {
"base": "B/s",
"prefix": "G"
},
"scope": "socket",
"aggregation": "sum",
"timestep": 60,
"peak": 350,
"normal": 100,
"caution": 50,
"alert": 10
},
{
"name": "clock",
"unit": {
"base": "Hz",
"prefix": "M"
},
"scope": "hwthread",
"aggregation": "avg",
"timestep": 60,
"peak": 3000,
"normal": 2400,
"caution": 1800,
"alert": 1200
},
{
"name": "cpu_power",
"unit": {
"base": "W"
},
"scope": "socket",
"aggregation": "sum",
"timestep": 60,
"peak": 500,
"normal": 250,
"caution": 100,
"alert": 50
},
{
"name": "mem_power",
"unit": {
"base": "W"
},
"scope": "socket",
"aggregation": "sum",
"timestep": 60,
"peak": 100,
"normal": 50,
"caution": 20,
"alert": 10
},
{
"name": "ipc",
"unit": {
"base": "IPC"
},
"scope": "hwthread",
"aggregation": "avg",
"timestep": 60,
"peak": 4,
"normal": 2,
"caution": 1,
"alert": 0.5
},
{
"name": "vectorization_ratio",
"unit": {
"base": ""
},
"scope": "hwthread",
"aggregation": "avg",
"timestep": 60,
"peak": 100,
"normal": 60,
"caution": 40,
"alert": 10
},
{
"name": "ib_recv",
"unit": {
"base": "B/s"
},
"scope": "node",
"aggregation": "sum",
"timestep": 60,
"peak": 1250000,
"normal": 6000000,
"caution": 200,
"alert": 1
},
{
"name": "ib_xmit",
"unit": {
"base": "B/s"
},
"scope": "node",
"aggregation": "sum",
"timestep": 60,
"peak": 1250000,
"normal": 6000000,
"caution": 200,
"alert": 1
},
{
"name": "ib_recv_pkts",
"unit": {
"base": ""
},
"scope": "node",
"aggregation": "sum",
"timestep": 60,
"peak": 6,
"normal": 4,
"caution": 2,
"alert": 1
},
{
"name": "ib_xmit_pkts",
"unit": {
"base": ""
},
"scope": "node",
"aggregation": "sum",
"timestep": 60,
"peak": 6,
"normal": 4,
"caution": 2,
"alert": 1
},
{
"name": "nfs4_read",
"unit": {
"base": "B/s",
"prefix": "M"
},
"scope": "node",
"aggregation": "sum",
"timestep": 60,
"peak": 6,
"normal": 4,
"caution": 2,
"alert": 1
},
{
"name": "nfs4_write",
"unit": {
"base": "B/s",
"prefix": "M"
},
"scope": "node",
"aggregation": "sum",
"timestep": 60,
"peak": 6,
"normal": 4,
"caution": 2,
"alert": 1
},
{
"name": "nfs4_total",
"unit": {
"base": "B/s",
"prefix": "M"
},
"scope": "node",
"aggregation": "sum",
"timestep": 60,
"peak": 6,
"normal": 4,
"caution": 2,
"alert": 1
}
],
"subClusters": [
{
"name": "main",
"nodes": "f01[01-88],f02[01-88],f03[01-88],f03[01-88],f04[01-88],f05[01-88],f06[01-88],f07[01-88],f08[01-88],f09[01-88],f10[01-88],f11[01-56],f12[01-56]",
"processorType": "Intel Icelake",
"socketsPerNode": 2,
"coresPerSocket": 36,
"threadsPerCore": 1,
"flopRateScalar": {
"unit": {
"base": "F/s",
"prefix": "G"
},
"value": 432
},
"flopRateSimd": {
"unit": {
"base": "F/s",
"prefix": "G"
},
"value": 9216
},
"memoryBandwidth": {
"unit": {
"base": "B/s",
"prefix": "G"
},
"value": 350
},
"topology": {
"node": [
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
62,
63,
64,
65,
66,
67,
68,
69,
70,
71
],
"socket": [
[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35
],
[
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
62,
63,
64,
65,
66,
67,
68,
69,
70,
71
]
],
"memoryDomain": [
[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17
],
[
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35
],
[
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53
],
[
54,
55,
56,
57,
58,
59,
60,
61,
62,
63,
64,
65,
66,
67,
68,
69,
70,
71
]
],
"core": [
[
0
],
[
1
],
[
2
],
[
3
],
[
4
],
[
5
],
[
6
],
[
7
],
[
8
],
[
9
],
[
10
],
[
11
],
[
12
],
[
13
],
[
14
],
[
15
],
[
16
],
[
17
],
[
18
],
[
19
],
[
20
],
[
21
],
[
22
],
[
23
],
[
24
],
[
25
],
[
26
],
[
27
],
[
28
],
[
29
],
[
30
],
[
31
],
[
32
],
[
33
],
[
34
],
[
35
],
[
36
],
[
37
],
[
38
],
[
39
],
[
40
],
[
41
],
[
42
],
[
43
],
[
44
],
[
45
],
[
46
],
[
47
],
[
48
],
[
49
],
[
50
],
[
51
],
[
52
],
[
53
],
[
54
],
[
55
],
[
56
],
[
57
],
[
58
],
[
59
],
[
60
],
[
61
],
[
62
],
[
63
],
[
64
],
[
65
],
[
66
],
[
67
],
[
68
],
[
69
],
[
70
],
[
71
]
]
}
}
]
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,496 @@
{
"cpu_used": {
"core": {
"unit": {
"base": ""
},
"timestep": 30,
"series": [
{
"hostname": "taurusi6489",
"id": "0",
"statistics": {
"min": 0.09090909090909093,
"avg": 0.9173553719008265,
"max": 1.0000000000000002
},
"data": [
0.09090909090909093,
0.9999999999999999,
1.0,
1.0000000000000002,
1.0,
1.0000000000000002,
0.9999999999999999,
1.0,
1.0,
1.0,
1.0
]
},
{
"hostname": "taurusi6489",
"id": "1",
"statistics": {
"min": 0.03694102397926118,
"avg": 0.045968409230268584,
"max": 0.08809840425531917
},
"data": [
0.08809840425531917,
0.05710659898477157,
0.04034861200774694,
0.037962362102530824,
0.03976721629485936,
0.04163976759199483,
0.03694102397926118,
0.03821243523316062,
0.03851132686084142,
0.044752092723760455,
0.04231266149870802
]
},
{
"hostname": "taurusi6490",
"id": "10",
"statistics": {
"min": 0.10505319148936171,
"avg": 0.9186411992263056,
"max": 1.0000000000000002
},
"data": [
0.10505319148936171,
1.0000000000000002,
1.0,
1.0,
1.0,
0.9999999999999999,
1.0,
0.9999999999999999,
1.0,
1.0,
1.0
]
},
{
"hostname": "taurusi6490",
"id": "11",
"statistics": {
"min": 0.05286048845767815,
"avg": 0.07053823838706144,
"max": 0.075148113501715
},
"data": [
0.05286048845767815,
0.06936597614563718,
0.07254534083802376,
0.075148113501715,
0.06909547738693468,
0.07372696032489846,
0.07077983088005012,
0.07082419304293325,
0.07424812030075188,
0.07285803627267043,
0.07446808510638298
]
}
],
"statisticsSeries": null
}
},
"ipc": {
"core": {
"unit": {
"base": "IPC"
},
"timestep": 60,
"series": [
{
"hostname": "taurusi6489",
"id": "0",
"statistics": {
"min": 1.3808406263195592,
"avg": 1.3960848578375105,
"max": 1.4485575599350569
},
"data": [
1.4485575599350569,
1.3808406263195592,
1.3830284413690626,
1.3836692663348698,
1.3843283952290035
]
},
{
"hostname": "taurusi6489",
"id": "1",
"statistics": {
"min": 0.30469640475234366,
"avg": 0.8816944294664065,
"max": 1.797623522191001
},
"data": [
1.797623522191001,
0.954395633726228,
1.0019972349956185,
0.30469640475234366,
0.3497593516668412
]
},
{
"hostname": "taurusi6490",
"id": "10",
"statistics": {
"min": 1.3791232173760588,
"avg": 1.3850247295506815,
"max": 1.386710405495511
},
"data": [
1.3791232173760588,
1.38619977419787,
1.386397917938246,
1.3866923327457215,
1.386710405495511
]
},
{
"hostname": "taurusi6490",
"id": "11",
"statistics": {
"min": 0.6424094604392216,
"avg": 0.9544442638400293,
"max": 1.2706704244636826
},
"data": [
1.2706704244636826,
0.6424094604392216,
0.9249973908234796,
0.6940110823242276,
1.2401329611495353
]
}
],
"statisticsSeries": null
}
},
"flops_any": {
"core": {
"unit": {
"base": "F/s"
},
"timestep": 60,
"series": [
{
"hostname": "taurusi6489",
"id": "0",
"statistics": {
"min": 0.0,
"avg": 184.2699002412084,
"max": 921.3495012060421
},
"data": [
921.3495012060421,
0.0,
0.0,
0.0,
0.0
]
},
{
"hostname": "taurusi6489",
"id": "1",
"statistics": {
"min": 0.13559227208748068,
"avg": 273.2997868356056,
"max": 1355.9227390817396
},
"data": [
1355.9227390817396,
8.94908797747172,
0.6779613312519499,
0.13559227208748068,
0.8135535154771758
]
},
{
"hostname": "taurusi6490",
"id": "10",
"statistics": {
"min": 0.0,
"avg": 1678.8419461262179,
"max": 4346.591400350933
},
"data": [
4346.591400350933,
0.0,
578.4248288199713,
0.0,
3469.193501460185
]
},
{
"hostname": "taurusi6490",
"id": "11",
"statistics": {
"min": 45.28689133054866,
"avg": 609.6644949204072,
"max": 2582.7080822873186
},
"data": [
2582.7080822873186,
45.28689133054866,
48.67663233623293,
47.591911855555026,
324.0589567923803
]
}
],
"statisticsSeries": null
}
},
"mem_bw": {
"socket": {
"unit": {
"base": "B/s"
},
"timestep": 60,
"series": [
{
"hostname": "taurusi6489",
"id": "0",
"statistics": {
"min": 653671812.1661415,
"avg": 1637585527.5854635,
"max": 2614718291.9554267
},
"data": [
653671812.1661415,
2614718291.9554267,
1732453371.7073724,
1612865229.8704093,
1574218932.2279677
]
},
{
"hostname": "taurusi6490",
"id": "0",
"statistics": {
"min": 1520190251.61048,
"avg": 1572477682.3850098,
"max": 1688960732.2760606
},
"data": [
1688960732.2760606,
1580140679.8216474,
1520190251.61048,
1541841829.6250021,
1531254918.591859
]
}
],
"statisticsSeries": null
}
},
"file_bw": {
"node": {
"unit": {
"base": "B/s"
},
"timestep": 30,
"series": [
{
"hostname": "taurusi6489",
"statistics": {
"min": 0.0,
"avg": 190352.6328851857,
"max": 2093878.361723524
},
"data": [
0.0,
0.0,
0.0,
0.6000135186380174,
0.0,
0.0,
2093878.361723524,
0.0,
0.0,
0.0,
0.0
]
},
{
"hostname": "taurusi6490",
"statistics": {
"min": 0.0,
"avg": 1050832.4509396513,
"max": 11559156.360352296
},
"data": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
11559156.360352296,
0.0,
0.5999838690326298,
0.0,
0.0
]
}
],
"statisticsSeries": null
}
},
"net_bw": {
"node": {
"unit": {
"base": "B/s"
},
"timestep": 30,
"series": [
{
"hostname": "taurusi6489",
"statistics": {
"min": 126779.89655880642,
"avg": 653834.5091507058,
"max": 1285639.5107541133
},
"data": [
1158202.7403032137,
126779.89655880642,
419017.91939583793,
345766.3974972795,
645419.3296982117,
644667.7333333333,
1285639.5107541133,
643481.2108874657,
640025.3562553325,
643241.4875354709,
639938.0184386979
]
},
{
"hostname": "taurusi6490",
"statistics": {
"min": 640156.9862985397,
"avg": 872367.6551257868,
"max": 1916309.7075416835
},
"data": [
1774843.146788355,
643218.3646426039,
641681.1031071587,
644690.1512268113,
647183.5650609672,
644439.3303402043,
1916309.7075416835,
643748.3241006166,
757189.8273227927,
642583.6999539217,
640156.9862985397
]
}
],
"statisticsSeries": null
}
},
"mem_used": {
"node": {
"unit": {
"base": "B"
},
"timestep": 30,
"series": [
{
"hostname": "taurusi6489",
"statistics": {
"min": 2779066368.0,
"avg": 9282117259.636364,
"max": 10202595328.0
},
"data": [
2779066368.0,
8518217728.0,
9852760064.0,
9979805696.0,
10039619584.0,
10087104512.0,
10136084480.0,
10202595328.0,
10154196992.0,
10177409024.0,
10176430080.0
]
},
{
"hostname": "taurusi6490",
"statistics": {
"min": 9993277440.0,
"avg": 10013080110.545454,
"max": 10039676928.0
},
"data": [
10001317888.0,
10013028352.0,
10006728704.0,
10039676928.0,
10035838976.0,
10033356800.0,
10006577152.0,
10005659648.0,
9993277440.0,
9993564160.0,
10014855168.0
]
}
],
"statisticsSeries": null
}
},
"cpu_power": {
"socket": {
"unit": {
"base": "W"
},
"timestep": 60,
"series": [
{
"hostname": "taurusi6489",
"id": "0",
"statistics": {
"min": 35.50647456742635,
"avg": 72.08313211552377,
"max": 83.33799371150049
},
"data": [
35.50647456742635,
75.65022009482759,
83.33799371150049,
83.00405043233219,
82.9169217715322
]
},
{
"hostname": "taurusi6490",
"id": "0",
"statistics": {
"min": 83.8466923147859,
"avg": 85.18572681122097,
"max": 85.83909286117324
},
"data": [
83.8466923147859,
85.58816979864088,
85.31266819129794,
85.83909286117324,
85.34201089020692
]
}
],
"statisticsSeries": null
}
}
}

View File

@@ -0,0 +1 @@
{"jobId":398955,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","partition":"singlenode","arrayJobId":0,"numNodes":1,"numHwthreads":72,"numAcc":0,"exclusive":1,"monitoringStatus":1,"smt":0,"jobState":"completed","duration":260,"walltime":86340,"resources":[{"hostname":"f0720"}],"metaData":{"jobName":"ams_pipeline","jobScript":"#!/bin/bash -l\n#SBATCH --job-name=ams_pipeline\n#SBATCH --time=23:59:00\n#SBATCH --partition=singlenode\n#SBATCH --ntasks=72\n#SBATCH --hint=multithread\n#SBATCH --chdir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n#SBATCH --export=NONE\nunset SLURM_EXPORT_ENV\nuss=$(whoami)\nfind /dev/shm/ -user $uss -type f -mmin +30 -delete\ncd \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\"\nams_pipeline pipeline.json \u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.out\" 2\u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.err\"\n","slurmInfo":"\nJobId=398955 JobName=ams_pipeline\n UserId=k106eb10(210387) GroupId=80111\n Account=k106eb QOS=normal \n Requeue=False Restarts=0 BatchFlag=True \n TimeLimit=1439\n SubmitTime=2023-02-09T14:11:22\n Partition=singlenode \n NodeList=f0720\n NumNodes=1 NumCPUs=72 NumTasks=72 CPUs/Task=1\n NTasksPerNode:Socket:Core=0:None:None\n TRES_req=cpu=72,mem=250000M,node=1,billing=72\n TRES_alloc=cpu=72,node=1,billing=72\n Command=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh\n WorkDir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n StdErr=\n StdOut=ams_pipeline.o%j\n"},"startTime":1675956725,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":2335.254,"min":800.418,"max":2734.922},"cpu_load":{"unit":{"base":""},"avg":52.72,"min":34.46,"max":71.91},"cpu_power":{"unit":{"base":"W"},"avg":407.767,"min":93.932,"max":497.636},"cpu_user":{"unit":{"base":""},"avg":63.678,"min":19.872,"max":96.633},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":635.672,"min":0,"max":1332.874},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":261.006,"min":0,"max":382.294},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":113.659,"min":0,"max":568.286},"ib_recv":{"unit":{"base":"B/s"},"avg":27981.111,"min":69.4,"max":48084.589},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":398.939,"min":0.5,"max":693.817},"ib_xmit":{"unit":{"base":"B/s"},"avg":188.513,"min":39.597,"max":724.568},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":0.867,"min":0.2,"max":2.933},"ipc":{"unit":{"base":"IPC"},"avg":0.944,"min":0.564,"max":1.291},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":79.565,"min":0.021,"max":116.02},"mem_power":{"unit":{"base":"W"},"avg":24.692,"min":7.883,"max":31.318},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":22.566,"min":8.225,"max":27.613},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":647,"min":0,"max":1946},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6181.6,"min":1270,"max":11411},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":22.4,"min":11,"max":29},"vectorization_ratio":{"unit":{"base":"%"},"avg":77.351,"min":0,"max":98.837}}}

View File

@@ -0,0 +1 @@
{"jobId":398764,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","partition":"singlenode","arrayJobId":0,"numNodes":1,"numHwthreads":72,"numAcc":0,"exclusive":1,"monitoringStatus":1,"smt":0,"jobState":"completed","duration":177,"walltime":86340,"resources":[{"hostname":"f0649"}],"metaData":{"jobName":"ams_pipeline","jobScript":"#!/bin/bash -l\n#SBATCH --job-name=ams_pipeline\n#SBATCH --time=23:59:00\n#SBATCH --partition=singlenode\n#SBATCH --ntasks=72\n#SBATCH --hint=multithread\n#SBATCH --chdir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/moreprototypesYury/gen_131_CuTe/cfg/Ni2Al2\n#SBATCH --export=NONE\nunset SLURM_EXPORT_ENV\nuss=$(whoami)\nfind /dev/shm/ -user $uss -type f -mmin +30 -delete\ncd \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/moreprototypesYury/gen_131_CuTe/cfg/Ni2Al2\"\nams_pipeline pipeline.json \u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/moreprototypesYury/gen_131_CuTe/cfg/Ni2Al2/ams_pipeline_job.sh.out\" 2\u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/moreprototypesYury/gen_131_CuTe/cfg/Ni2Al2/ams_pipeline_job.sh.err\"\n","slurmInfo":"\nJobId=398764 JobName=ams_pipeline\n UserId=k106eb10(210387) GroupId=80111\n Account=k106eb QOS=normal \n Requeue=False Restarts=0 BatchFlag=True \n TimeLimit=1439\n SubmitTime=2023-02-09T14:10:20\n Partition=singlenode \n NodeList=f0649\n NumNodes=1 NumCPUs=72 NumTasks=72 CPUs/Task=1\n NTasksPerNode:Socket:Core=0:None:None\n TRES_req=cpu=72,mem=250000M,node=1,billing=72\n TRES_alloc=cpu=72,node=1,billing=72\n Command=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/moreprototypesYury/gen_131_CuTe/cfg/Ni2Al2/ams_pipeline_job.sh\n WorkDir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/moreprototypesYury/gen_131_CuTe/cfg/Ni2Al2\n StdErr=\n StdOut=ams_pipeline.o%j\n"},"startTime":1675954353,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":1336.519,"min":801.564,"max":2348.215},"cpu_load":{"unit":{"base":""},"avg":31.64,"min":17.36,"max":45.54},"cpu_power":{"unit":{"base":"W"},"avg":150.018,"min":93.672,"max":261.592},"cpu_user":{"unit":{"base":""},"avg":28.518,"min":0.09,"max":57.343},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":45.012,"min":0,"max":135.037},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":22.496,"min":0,"max":67.488},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":0.02,"min":0,"max":0.061},"ib_recv":{"unit":{"base":"B/s"},"avg":14442.82,"min":219.998,"max":42581.368},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":201.532,"min":1.25,"max":601.345},"ib_xmit":{"unit":{"base":"B/s"},"avg":282.098,"min":56.2,"max":569.363},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":1.228,"min":0.433,"max":2},"ipc":{"unit":{"base":"IPC"},"avg":0.77,"min":0.564,"max":0.906},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":4.872,"min":0.025,"max":14.552},"mem_power":{"unit":{"base":"W"},"avg":7.725,"min":6.286,"max":10.556},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":6.162,"min":6.103,"max":6.226},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":1045.333,"min":311,"max":1525},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6430,"min":2796,"max":11518},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":24.333,"min":0,"max":38},"vectorization_ratio":{"unit":{"base":"%"},"avg":25.528,"min":0,"max":76.585}}}

View File

@@ -0,0 +1,98 @@
{
"jobId": 20639587,
"user": "s3804552",
"project": "p_speichersysteme",
"cluster": "taurus",
"subCluster": "haswell",
"partition": "haswell64",
"numNodes": 2,
"numHwthreads": 4,
"exclusive": 0,
"startTime": 1635856524,
"jobState": "completed",
"duration": 310,
"walltime": 3600,
"smt": 0,
"resources": [
{
"hostname": "taurusi6489",
"hwthreads": [
0,
1
]
},
{
"hostname": "taurusi6490",
"hwthreads": [
10,
11
]
}
],
"statistics": {
"cpu_used": {
"min": 0.03694102397926118,
"avg": 0.48812580468611544,
"max": 1.0000000000000002,
"unit": {
"base": ""
}
},
"ipc": {
"min": 0.30469640475234366,
"avg": 1.154312070173657,
"max": 1.797623522191001,
"unit": {
"base": "IPC"
}
},
"flops_any": {
"min": 0.0,
"avg": 686.5190320308598,
"max": 4346.591400350933,
"unit": {
"base": "F/s"
}
},
"mem_bw": {
"min": 653671812.1661415,
"avg": 1605031604.9852366,
"max": 2614718291.9554267,
"unit": {
"base": "B/s"
}
},
"file_bw": {
"min": 0.0,
"avg": 620592.5419124186,
"max": 11559156.360352296,
"unit": {
"base": "B/s"
}
},
"net_bw": {
"min": 126779.89655880642,
"avg": 763101.082138246,
"max": 1916309.7075416835,
"unit": {
"base": "B/s"
}
},
"mem_used": {
"min": 2779066368.0,
"avg": 9647598685.09091,
"max": 10202595328.0,
"unit": {
"base": "B"
}
},
"cpu_power": {
"min": 35.50647456742635,
"avg": 78.63442946337237,
"max": 85.83909286117324,
"unit": {
"base": "W"
}
}
}
}

View File

@@ -0,0 +1,6 @@
{
"jobId": 398764,
"cluster": "fritz",
"startTime": 1675954353
"duration": 3400
}