move archive to internal

This commit is contained in:
Pay Giesselmann
2024-07-16 08:56:32 +02:00
parent e348ec74fd
commit 93c515098c
35 changed files with 19 additions and 19 deletions

View File

@@ -1,180 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archive
import (
"encoding/json"
"fmt"
"sync"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
const Version uint64 = 1
type ArchiveBackend interface {
Init(rawConfig json.RawMessage) (uint64, error)
Info()
Exists(job *schema.Job) bool
LoadJobMeta(job *schema.Job) (*schema.JobMeta, error)
LoadJobData(job *schema.Job) (schema.JobData, error)
LoadClusterCfg(name string) (*schema.Cluster, error)
StoreJobMeta(jobMeta *schema.JobMeta) error
ImportJob(jobMeta *schema.JobMeta, jobData *schema.JobData) error
GetClusters() []string
CleanUp(jobs []*schema.Job)
Move(jobs []*schema.Job, path string)
Clean(before int64, after int64)
Compress(jobs []*schema.Job)
CompressLast(starttime int64) int64
Iter(loadMetricData bool) <-chan JobContainer
}
type JobContainer struct {
Meta *schema.JobMeta
Data *schema.JobData
}
var (
initOnce sync.Once
cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
ar ArchiveBackend
useArchive bool
)
func Init(rawConfig json.RawMessage, disableArchive bool) error {
var err error
initOnce.Do(func() {
useArchive = !disableArchive
var cfg struct {
Kind string `json:"kind"`
}
if err = json.Unmarshal(rawConfig, &cfg); err != nil {
log.Warn("Error while unmarshaling raw config json")
return
}
switch cfg.Kind {
case "file":
ar = &FsArchive{}
// case "s3":
// ar = &S3Archive{}
default:
err = fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind)
}
var version uint64
version, err = ar.Init(rawConfig)
if err != nil {
log.Error("Error while initializing archiveBackend")
return
}
log.Infof("Load archive version %d", version)
err = initClusterConfig()
})
return err
}
func GetHandle() ArchiveBackend {
return ar
}
// Helper to metricdata.LoadAverages().
func LoadAveragesFromArchive(
job *schema.Job,
metrics []string,
data [][]schema.Float,
) error {
metaFile, err := ar.LoadJobMeta(job)
if err != nil {
log.Warn("Error while loading job metadata from archiveBackend")
return err
}
for i, m := range metrics {
if stat, ok := metaFile.Statistics[m]; ok {
data[i] = append(data[i], schema.Float(stat.Avg))
} else {
data[i] = append(data[i], schema.NaN)
}
}
return nil
}
func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
metaFile, err := ar.LoadJobMeta(job)
if err != nil {
log.Warn("Error while loading job metadata from archiveBackend")
return nil, err
}
return metaFile.Statistics, nil
}
// If the job is archived, find its `meta.json` file and override the Metadata
// in that JSON file. If the job is not archived, nothing is done.
func UpdateMetadata(job *schema.Job, metadata map[string]string) error {
if job.State == schema.JobStateRunning || !useArchive {
return nil
}
jobMeta, err := ar.LoadJobMeta(job)
if err != nil {
log.Warn("Error while loading job metadata from archiveBackend")
return err
}
for k, v := range metadata {
jobMeta.MetaData[k] = v
}
return ar.StoreJobMeta(jobMeta)
}
// If the job is archived, find its `meta.json` file and override the tags list
// in that JSON file. If the job is not archived, nothing is done.
func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
if job.State == schema.JobStateRunning || !useArchive {
return nil
}
jobMeta, err := ar.LoadJobMeta(job)
if err != nil {
log.Warn("Error while loading job metadata from archiveBackend")
return err
}
jobMeta.Tags = make([]*schema.Tag, 0)
for _, tag := range tags {
jobMeta.Tags = append(jobMeta.Tags, &schema.Tag{
Name: tag.Name,
Type: tag.Type,
})
}
return ar.StoreJobMeta(jobMeta)
}

View File

@@ -1,69 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archive_test
import (
"encoding/json"
"fmt"
"path/filepath"
"testing"
"time"
"github.com/ClusterCockpit/cc-backend/internal/util"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
var jobs []*schema.Job
func setup(t *testing.T) archive.ArchiveBackend {
tmpdir := t.TempDir()
jobarchive := filepath.Join(tmpdir, "job-archive")
util.CopyDir("./testdata/archive/", jobarchive)
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
if err := archive.Init(json.RawMessage(archiveCfg), false); err != nil {
t.Fatal(err)
}
jobs = make([]*schema.Job, 2)
jobs[0] = &schema.Job{}
jobs[0].JobID = 1403244
jobs[0].Cluster = "emmy"
jobs[0].StartTime = time.Unix(1608923076, 0)
jobs[1] = &schema.Job{}
jobs[0].JobID = 1404397
jobs[0].Cluster = "emmy"
jobs[0].StartTime = time.Unix(1609300556, 0)
return archive.GetHandle()
}
func TestCleanUp(t *testing.T) {
a := setup(t)
if !a.Exists(jobs[0]) {
t.Error("Job does not exist")
}
a.CleanUp(jobs)
if a.Exists(jobs[0]) || a.Exists(jobs[1]) {
t.Error("Jobs still exist")
}
}
// func TestCompress(t *testing.T) {
// a := setup(t)
// if !a.Exists(jobs[0]) {
// t.Error("Job does not exist")
// }
//
// a.Compress(jobs)
//
// if a.Exists(jobs[0]) || a.Exists(jobs[1]) {
// t.Error("Jobs still exist")
// }
// }

View File

@@ -1,223 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archive
import (
"errors"
"fmt"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
var (
Clusters []*schema.Cluster
GlobalMetricList []*schema.GlobalMetricListItem
nodeLists map[string]map[string]NodeList
)
func initClusterConfig() error {
Clusters = []*schema.Cluster{}
nodeLists = map[string]map[string]NodeList{}
metricLookup := make(map[string]schema.GlobalMetricListItem)
for _, c := range ar.GetClusters() {
cluster, err := ar.LoadClusterCfg(c)
if err != nil {
log.Warnf("Error while loading cluster config for cluster '%v'", c)
return err
}
if len(cluster.Name) == 0 ||
len(cluster.MetricConfig) == 0 ||
len(cluster.SubClusters) == 0 {
return errors.New("cluster.name, cluster.metricConfig and cluster.SubClusters should not be empty")
}
for _, mc := range cluster.MetricConfig {
if len(mc.Name) == 0 {
return errors.New("cluster.metricConfig.name should not be empty")
}
if mc.Timestep < 1 {
return errors.New("cluster.metricConfig.timestep should not be smaller than one")
}
// For backwards compability...
if mc.Scope == "" {
mc.Scope = schema.MetricScopeNode
}
if !mc.Scope.Valid() {
return errors.New("cluster.metricConfig.scope must be a valid scope ('node', 'scocket', ...)")
}
ml, ok := metricLookup[mc.Name]
if !ok {
metricLookup[mc.Name] = schema.GlobalMetricListItem{
Name: mc.Name, Scope: mc.Scope, Unit: mc.Unit, Footprint: mc.Footprint,
}
ml = metricLookup[mc.Name]
}
availability := schema.ClusterSupport{Cluster: cluster.Name}
scLookup := make(map[string]*schema.SubClusterConfig)
for _, scc := range mc.SubClusters {
scLookup[scc.Name] = scc
}
for _, sc := range cluster.SubClusters {
newMetric := mc
newMetric.SubClusters = nil
if cfg, ok := scLookup[sc.Name]; ok {
if !cfg.Remove {
availability.SubClusters = append(availability.SubClusters, sc.Name)
newMetric.Peak = cfg.Peak
newMetric.Peak = cfg.Peak
newMetric.Normal = cfg.Normal
newMetric.Caution = cfg.Caution
newMetric.Alert = cfg.Alert
newMetric.Footprint = cfg.Footprint
newMetric.Energy = cfg.Energy
newMetric.LowerIsBetter = cfg.LowerIsBetter
sc.MetricConfig = append(sc.MetricConfig, *newMetric)
if newMetric.Footprint {
sc.Footprint = append(sc.Footprint, newMetric.Name)
ml.Footprint = true
}
if newMetric.Energy {
sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name)
}
}
} else {
availability.SubClusters = append(availability.SubClusters, sc.Name)
sc.MetricConfig = append(sc.MetricConfig, *newMetric)
if newMetric.Footprint {
sc.Footprint = append(sc.Footprint, newMetric.Name)
}
if newMetric.Energy {
sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name)
}
}
}
ml.Availability = append(metricLookup[mc.Name].Availability, availability)
metricLookup[mc.Name] = ml
}
Clusters = append(Clusters, cluster)
nodeLists[cluster.Name] = make(map[string]NodeList)
for _, sc := range cluster.SubClusters {
if sc.Nodes == "*" {
continue
}
nl, err := ParseNodeList(sc.Nodes)
if err != nil {
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > in %s/cluster.json: %w", cluster.Name, err)
}
nodeLists[cluster.Name][sc.Name] = nl
}
}
for _, ml := range metricLookup {
GlobalMetricList = append(GlobalMetricList, &ml)
}
return nil
}
func GetCluster(cluster string) *schema.Cluster {
for _, c := range Clusters {
if c.Name == cluster {
return c
}
}
return nil
}
func GetSubCluster(cluster, subcluster string) (*schema.SubCluster, error) {
for _, c := range Clusters {
if c.Name == cluster {
for _, p := range c.SubClusters {
if p.Name == subcluster {
return p, nil
}
}
}
}
return nil, fmt.Errorf("subcluster '%v' not found for cluster '%v', or cluster '%v' not configured", subcluster, cluster, cluster)
}
func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
for _, c := range Clusters {
if c.Name == cluster {
for _, m := range c.MetricConfig {
if m.Name == metric {
return m
}
}
}
}
return nil
}
// AssignSubCluster sets the `job.subcluster` property of the job based
// on its cluster and resources.
func AssignSubCluster(job *schema.BaseJob) error {
cluster := GetCluster(job.Cluster)
if cluster == nil {
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > unkown cluster: %v", job.Cluster)
}
if job.SubCluster != "" {
for _, sc := range cluster.SubClusters {
if sc.Name == job.SubCluster {
return nil
}
}
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > already assigned subcluster %v unkown (cluster: %v)", job.SubCluster, job.Cluster)
}
if len(job.Resources) == 0 {
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > job without any resources/hosts")
}
host0 := job.Resources[0].Hostname
for sc, nl := range nodeLists[job.Cluster] {
if nl != nil && nl.Contains(host0) {
job.SubCluster = sc
return nil
}
}
if cluster.SubClusters[0].Nodes == "*" {
job.SubCluster = cluster.SubClusters[0].Name
return nil
}
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > no subcluster found for cluster %v and host %v", job.Cluster, host0)
}
func GetSubClusterByNode(cluster, hostname string) (string, error) {
for sc, nl := range nodeLists[cluster] {
if nl != nil && nl.Contains(hostname) {
return sc, nil
}
}
c := GetCluster(cluster)
if c == nil {
return "", fmt.Errorf("ARCHIVE/CLUSTERCONFIG > unkown cluster: %v", cluster)
}
if c.SubClusters[0].Nodes == "" {
return c.SubClusters[0].Name, nil
}
return "", fmt.Errorf("ARCHIVE/CLUSTERCONFIG > no subcluster found for cluster %v and host %v", cluster, hostname)
}

View File

@@ -1,39 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archive_test
import (
"encoding/json"
"testing"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
)
func TestClusterConfig(t *testing.T) {
if err := archive.Init(json.RawMessage("{\"kind\": \"file\",\"path\": \"testdata/archive\"}"), false); err != nil {
t.Fatal(err)
}
sc, err := archive.GetSubCluster("fritz", "spr1tb")
if err != nil {
t.Fatal(err)
}
// spew.Dump(sc.MetricConfig)
if len(sc.Footprint) != 3 {
t.Fail()
}
if len(sc.MetricConfig) != 15 {
t.Fail()
}
for _, metric := range sc.MetricConfig {
if metric.LowerIsBetter && metric.Name != "mem_used" {
t.Fail()
}
}
// spew.Dump(archive.GlobalMetricList)
// t.Fail()
}

View File

@@ -1,575 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archive
import (
"bufio"
"bytes"
"compress/gzip"
"encoding/json"
"errors"
"fmt"
"math"
"os"
"path"
"path/filepath"
"strconv"
"strings"
"text/tabwriter"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/util"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/santhosh-tekuri/jsonschema/v5"
)
type FsArchiveConfig struct {
Path string `json:"path"`
}
type FsArchive struct {
path string
clusters []string
}
type clusterInfo struct {
numJobs int
dateFirst int64
dateLast int64
diskSize float64
}
func getDirectory(
job *schema.Job,
rootPath string,
) string {
lvl1, lvl2 := fmt.Sprintf("%d", job.JobID/1000), fmt.Sprintf("%03d", job.JobID%1000)
return filepath.Join(
rootPath,
job.Cluster,
lvl1, lvl2,
strconv.FormatInt(job.StartTime.Unix(), 10))
}
func getPath(
job *schema.Job,
rootPath string,
file string) string {
return filepath.Join(
getDirectory(job, rootPath), file)
}
func loadJobMeta(filename string) (*schema.JobMeta, error) {
b, err := os.ReadFile(filename)
if err != nil {
log.Errorf("loadJobMeta() > open file error: %v", err)
return &schema.JobMeta{}, err
}
if config.Keys.Validate {
if err := schema.Validate(schema.Meta, bytes.NewReader(b)); err != nil {
return &schema.JobMeta{}, fmt.Errorf("validate job meta: %v", err)
}
}
return DecodeJobMeta(bytes.NewReader(b))
}
func loadJobData(filename string, isCompressed bool) (schema.JobData, error) {
f, err := os.Open(filename)
if err != nil {
log.Errorf("fsBackend LoadJobData()- %v", err)
return nil, err
}
defer f.Close()
if isCompressed {
r, err := gzip.NewReader(f)
if err != nil {
log.Errorf(" %v", err)
return nil, err
}
defer r.Close()
if config.Keys.Validate {
if err := schema.Validate(schema.Data, r); err != nil {
return schema.JobData{}, fmt.Errorf("validate job data: %v", err)
}
}
return DecodeJobData(r, filename)
} else {
if config.Keys.Validate {
if err := schema.Validate(schema.Data, bufio.NewReader(f)); err != nil {
return schema.JobData{}, fmt.Errorf("validate job data: %v", err)
}
}
return DecodeJobData(bufio.NewReader(f), filename)
}
}
func (fsa *FsArchive) Init(rawConfig json.RawMessage) (uint64, error) {
var config FsArchiveConfig
if err := json.Unmarshal(rawConfig, &config); err != nil {
log.Warnf("Init() > Unmarshal error: %#v", err)
return 0, err
}
if config.Path == "" {
err := fmt.Errorf("Init() : empty config.Path")
log.Errorf("Init() > config.Path error: %v", err)
return 0, err
}
fsa.path = config.Path
b, err := os.ReadFile(filepath.Join(fsa.path, "version.txt"))
if err != nil {
log.Warnf("fsBackend Init() - %v", err)
return 0, err
}
version, err := strconv.ParseUint(strings.TrimSuffix(string(b), "\n"), 10, 64)
if err != nil {
log.Errorf("fsBackend Init()- %v", err)
return 0, err
}
if version != Version {
return version, fmt.Errorf("unsupported version %d, need %d", version, Version)
}
entries, err := os.ReadDir(fsa.path)
if err != nil {
log.Errorf("Init() > ReadDir() error: %v", err)
return 0, err
}
for _, de := range entries {
if !de.IsDir() {
continue
}
fsa.clusters = append(fsa.clusters, de.Name())
}
return version, nil
}
func (fsa *FsArchive) Info() {
fmt.Printf("Job archive %s\n", fsa.path)
clusters, err := os.ReadDir(fsa.path)
if err != nil {
log.Fatalf("Reading clusters failed: %s", err.Error())
}
ci := make(map[string]*clusterInfo)
for _, cluster := range clusters {
if !cluster.IsDir() {
continue
}
cc := cluster.Name()
ci[cc] = &clusterInfo{dateFirst: time.Now().Unix()}
lvl1Dirs, err := os.ReadDir(filepath.Join(fsa.path, cluster.Name()))
if err != nil {
log.Fatalf("Reading jobs failed @ lvl1 dirs: %s", err.Error())
}
for _, lvl1Dir := range lvl1Dirs {
if !lvl1Dir.IsDir() {
continue
}
lvl2Dirs, err := os.ReadDir(filepath.Join(fsa.path, cluster.Name(), lvl1Dir.Name()))
if err != nil {
log.Fatalf("Reading jobs failed @ lvl2 dirs: %s", err.Error())
}
for _, lvl2Dir := range lvl2Dirs {
dirpath := filepath.Join(fsa.path, cluster.Name(), lvl1Dir.Name(), lvl2Dir.Name())
startTimeDirs, err := os.ReadDir(dirpath)
if err != nil {
log.Fatalf("Reading jobs failed @ starttime dirs: %s", err.Error())
}
for _, startTimeDir := range startTimeDirs {
if startTimeDir.IsDir() {
ci[cc].numJobs++
startTime, err := strconv.ParseInt(startTimeDir.Name(), 10, 64)
if err != nil {
log.Fatalf("Cannot parse starttime: %s", err.Error())
}
ci[cc].dateFirst = util.Min(ci[cc].dateFirst, startTime)
ci[cc].dateLast = util.Max(ci[cc].dateLast, startTime)
ci[cc].diskSize += util.DiskUsage(filepath.Join(dirpath, startTimeDir.Name()))
}
}
}
}
}
cit := clusterInfo{dateFirst: time.Now().Unix()}
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', tabwriter.Debug)
fmt.Fprintln(w, "cluster\t#jobs\tfrom\tto\tdu (MB)")
for cluster, clusterInfo := range ci {
fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%.2f\n", cluster,
clusterInfo.numJobs,
time.Unix(clusterInfo.dateFirst, 0),
time.Unix(clusterInfo.dateLast, 0),
clusterInfo.diskSize)
cit.numJobs += clusterInfo.numJobs
cit.dateFirst = util.Min(cit.dateFirst, clusterInfo.dateFirst)
cit.dateLast = util.Max(cit.dateLast, clusterInfo.dateLast)
cit.diskSize += clusterInfo.diskSize
}
fmt.Fprintf(w, "TOTAL\t%d\t%s\t%s\t%.2f\n",
cit.numJobs, time.Unix(cit.dateFirst, 0), time.Unix(cit.dateLast, 0), cit.diskSize)
w.Flush()
}
func (fsa *FsArchive) Exists(job *schema.Job) bool {
dir := getDirectory(job, fsa.path)
_, err := os.Stat(dir)
return !errors.Is(err, os.ErrNotExist)
}
func (fsa *FsArchive) Clean(before int64, after int64) {
if after == 0 {
after = math.MaxInt64
}
clusters, err := os.ReadDir(fsa.path)
if err != nil {
log.Fatalf("Reading clusters failed: %s", err.Error())
}
for _, cluster := range clusters {
if !cluster.IsDir() {
continue
}
lvl1Dirs, err := os.ReadDir(filepath.Join(fsa.path, cluster.Name()))
if err != nil {
log.Fatalf("Reading jobs failed @ lvl1 dirs: %s", err.Error())
}
for _, lvl1Dir := range lvl1Dirs {
if !lvl1Dir.IsDir() {
continue
}
lvl2Dirs, err := os.ReadDir(filepath.Join(fsa.path, cluster.Name(), lvl1Dir.Name()))
if err != nil {
log.Fatalf("Reading jobs failed @ lvl2 dirs: %s", err.Error())
}
for _, lvl2Dir := range lvl2Dirs {
dirpath := filepath.Join(fsa.path, cluster.Name(), lvl1Dir.Name(), lvl2Dir.Name())
startTimeDirs, err := os.ReadDir(dirpath)
if err != nil {
log.Fatalf("Reading jobs failed @ starttime dirs: %s", err.Error())
}
for _, startTimeDir := range startTimeDirs {
if startTimeDir.IsDir() {
startTime, err := strconv.ParseInt(startTimeDir.Name(), 10, 64)
if err != nil {
log.Fatalf("Cannot parse starttime: %s", err.Error())
}
if startTime < before || startTime > after {
if err := os.RemoveAll(filepath.Join(dirpath, startTimeDir.Name())); err != nil {
log.Errorf("JobArchive Cleanup() error: %v", err)
}
}
}
}
if util.GetFilecount(dirpath) == 0 {
if err := os.Remove(dirpath); err != nil {
log.Errorf("JobArchive Clean() error: %v", err)
}
}
}
}
}
}
func (fsa *FsArchive) Move(jobs []*schema.Job, path string) {
for _, job := range jobs {
source := getDirectory(job, fsa.path)
target := getDirectory(job, path)
if err := os.MkdirAll(filepath.Clean(filepath.Join(target, "..")), 0777); err != nil {
log.Errorf("JobArchive Move MkDir error: %v", err)
}
if err := os.Rename(source, target); err != nil {
log.Errorf("JobArchive Move() error: %v", err)
}
parent := filepath.Clean(filepath.Join(source, ".."))
if util.GetFilecount(parent) == 0 {
if err := os.Remove(parent); err != nil {
log.Errorf("JobArchive Move() error: %v", err)
}
}
}
}
func (fsa *FsArchive) CleanUp(jobs []*schema.Job) {
start := time.Now()
for _, job := range jobs {
dir := getDirectory(job, fsa.path)
if err := os.RemoveAll(dir); err != nil {
log.Errorf("JobArchive Cleanup() error: %v", err)
}
parent := filepath.Clean(filepath.Join(dir, ".."))
if util.GetFilecount(parent) == 0 {
if err := os.Remove(parent); err != nil {
log.Errorf("JobArchive Cleanup() error: %v", err)
}
}
}
log.Infof("Retention Service - Remove %d files in %s", len(jobs), time.Since(start))
}
func (fsa *FsArchive) Compress(jobs []*schema.Job) {
var cnt int
start := time.Now()
for _, job := range jobs {
fileIn := getPath(job, fsa.path, "data.json")
if util.CheckFileExists(fileIn) && util.GetFilesize(fileIn) > 2000 {
util.CompressFile(fileIn, getPath(job, fsa.path, "data.json.gz"))
cnt++
}
}
log.Infof("Compression Service - %d files took %s", cnt, time.Since(start))
}
func (fsa *FsArchive) CompressLast(starttime int64) int64 {
filename := filepath.Join(fsa.path, "compress.txt")
b, err := os.ReadFile(filename)
if err != nil {
log.Errorf("fsBackend Compress - %v", err)
os.WriteFile(filename, []byte(fmt.Sprintf("%d", starttime)), 0644)
return starttime
}
last, err := strconv.ParseInt(strings.TrimSuffix(string(b), "\n"), 10, 64)
if err != nil {
log.Errorf("fsBackend Compress - %v", err)
return starttime
}
log.Infof("fsBackend Compress - start %d last %d", starttime, last)
os.WriteFile(filename, []byte(fmt.Sprintf("%d", starttime)), 0644)
return last
}
func (fsa *FsArchive) LoadJobData(job *schema.Job) (schema.JobData, error) {
var isCompressed bool = true
filename := getPath(job, fsa.path, "data.json.gz")
if !util.CheckFileExists(filename) {
filename = getPath(job, fsa.path, "data.json")
isCompressed = false
}
return loadJobData(filename, isCompressed)
}
func (fsa *FsArchive) LoadJobMeta(job *schema.Job) (*schema.JobMeta, error) {
filename := getPath(job, fsa.path, "meta.json")
return loadJobMeta(filename)
}
func (fsa *FsArchive) LoadClusterCfg(name string) (*schema.Cluster, error) {
b, err := os.ReadFile(filepath.Join(fsa.path, name, "cluster.json"))
if err != nil {
log.Errorf("LoadClusterCfg() > open file error: %v", err)
// if config.Keys.Validate {
if err := schema.Validate(schema.ClusterCfg, bytes.NewReader(b)); err != nil {
log.Warnf("Validate cluster config: %v\n", err)
return &schema.Cluster{}, fmt.Errorf("validate cluster config: %v", err)
}
}
// }
return DecodeCluster(bytes.NewReader(b))
}
func (fsa *FsArchive) Iter(loadMetricData bool) <-chan JobContainer {
ch := make(chan JobContainer)
go func() {
clustersDir, err := os.ReadDir(fsa.path)
if err != nil {
log.Fatalf("Reading clusters failed @ cluster dirs: %s", err.Error())
}
for _, clusterDir := range clustersDir {
if !clusterDir.IsDir() {
continue
}
lvl1Dirs, err := os.ReadDir(filepath.Join(fsa.path, clusterDir.Name()))
if err != nil {
log.Fatalf("Reading jobs failed @ lvl1 dirs: %s", err.Error())
}
for _, lvl1Dir := range lvl1Dirs {
if !lvl1Dir.IsDir() {
// Could be the cluster.json file
continue
}
lvl2Dirs, err := os.ReadDir(filepath.Join(fsa.path, clusterDir.Name(), lvl1Dir.Name()))
if err != nil {
log.Fatalf("Reading jobs failed @ lvl2 dirs: %s", err.Error())
}
for _, lvl2Dir := range lvl2Dirs {
dirpath := filepath.Join(fsa.path, clusterDir.Name(), lvl1Dir.Name(), lvl2Dir.Name())
startTimeDirs, err := os.ReadDir(dirpath)
if err != nil {
log.Fatalf("Reading jobs failed @ starttime dirs: %s", err.Error())
}
for _, startTimeDir := range startTimeDirs {
if startTimeDir.IsDir() {
job, err := loadJobMeta(filepath.Join(dirpath, startTimeDir.Name(), "meta.json"))
if err != nil && !errors.Is(err, &jsonschema.ValidationError{}) {
log.Errorf("in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
}
if loadMetricData {
var isCompressed bool = true
filename := filepath.Join(dirpath, startTimeDir.Name(), "data.json.gz")
if !util.CheckFileExists(filename) {
filename = filepath.Join(dirpath, startTimeDir.Name(), "data.json")
isCompressed = false
}
data, err := loadJobData(filename, isCompressed)
if err != nil && !errors.Is(err, &jsonschema.ValidationError{}) {
log.Errorf("in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
}
ch <- JobContainer{Meta: job, Data: &data}
log.Errorf("in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
} else {
ch <- JobContainer{Meta: job, Data: nil}
}
}
}
}
}
}
close(ch)
}()
return ch
}
func (fsa *FsArchive) StoreJobMeta(jobMeta *schema.JobMeta) error {
job := schema.Job{
BaseJob: jobMeta.BaseJob,
StartTime: time.Unix(jobMeta.StartTime, 0),
StartTimeUnix: jobMeta.StartTime,
}
f, err := os.Create(getPath(&job, fsa.path, "meta.json"))
if err != nil {
log.Error("Error while creating filepath for meta.json")
return err
}
if err := EncodeJobMeta(f, jobMeta); err != nil {
log.Error("Error while encoding job metadata to meta.json file")
return err
}
if err := f.Close(); err != nil {
log.Warn("Error while closing meta.json file")
return err
}
return nil
}
func (fsa *FsArchive) GetClusters() []string {
return fsa.clusters
}
func (fsa *FsArchive) ImportJob(
jobMeta *schema.JobMeta,
jobData *schema.JobData) error {
job := schema.Job{
BaseJob: jobMeta.BaseJob,
StartTime: time.Unix(jobMeta.StartTime, 0),
StartTimeUnix: jobMeta.StartTime,
}
dir := getPath(&job, fsa.path, "")
if err := os.MkdirAll(dir, 0777); err != nil {
log.Error("Error while creating job archive path")
return err
}
f, err := os.Create(path.Join(dir, "meta.json"))
if err != nil {
log.Error("Error while creating filepath for meta.json")
return err
}
if err := EncodeJobMeta(f, jobMeta); err != nil {
log.Error("Error while encoding job metadata to meta.json file")
return err
}
if err := f.Close(); err != nil {
log.Warn("Error while closing meta.json file")
return err
}
// var isCompressed bool = true
// // TODO Use shortJob Config for check
// if jobMeta.Duration < 300 {
// isCompressed = false
// f, err = os.Create(path.Join(dir, "data.json"))
// } else {
// f, err = os.Create(path.Join(dir, "data.json.gz"))
// }
// if err != nil {
// return err
// }
//
// if isCompressed {
// if err := EncodeJobData(gzip.NewWriter(f), jobData); err != nil {
// return err
// }
// } else {
// if err := EncodeJobData(f, jobData); err != nil {
// return err
// }
// }
f, err = os.Create(path.Join(dir, "data.json"))
if err != nil {
log.Error("Error while creating filepath for data.json")
return err
}
if err := EncodeJobData(f, jobData); err != nil {
log.Error("Error while encoding job metricdata to data.json file")
return err
}
if err := f.Close(); err != nil {
log.Warn("Error while closing data.json file")
}
return err
}

View File

@@ -1,210 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archive
import (
"encoding/json"
"fmt"
"path/filepath"
"testing"
"time"
"github.com/ClusterCockpit/cc-backend/internal/util"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
func TestInitEmptyPath(t *testing.T) {
var fsa FsArchive
_, err := fsa.Init(json.RawMessage("{\"kind\":\"testdata/archive\"}"))
if err == nil {
t.Fatal(err)
}
}
func TestInitNoJson(t *testing.T) {
var fsa FsArchive
_, err := fsa.Init(json.RawMessage("\"path\":\"testdata/archive\"}"))
if err == nil {
t.Fatal(err)
}
}
func TestInitNotExists(t *testing.T) {
var fsa FsArchive
_, err := fsa.Init(json.RawMessage("{\"path\":\"testdata/job-archive\"}"))
if err == nil {
t.Fatal(err)
}
}
func TestInit(t *testing.T) {
var fsa FsArchive
version, err := fsa.Init(json.RawMessage("{\"path\":\"testdata/archive\"}"))
if err != nil {
t.Fatal(err)
}
if fsa.path != "testdata/archive" {
t.Fail()
}
if version != 1 {
t.Fail()
}
if len(fsa.clusters) != 3 || fsa.clusters[1] != "emmy" {
t.Fail()
}
}
func TestLoadJobMetaInternal(t *testing.T) {
var fsa FsArchive
_, err := fsa.Init(json.RawMessage("{\"path\":\"testdata/archive\"}"))
if err != nil {
t.Fatal(err)
}
job, err := loadJobMeta("testdata/archive/emmy/1404/397/1609300556/meta.json")
if err != nil {
t.Fatal(err)
}
if job.JobID != 1404397 {
t.Fail()
}
if int(job.NumNodes) != len(job.Resources) {
t.Fail()
}
if job.StartTime != 1609300556 {
t.Fail()
}
}
func TestLoadJobMeta(t *testing.T) {
var fsa FsArchive
_, err := fsa.Init(json.RawMessage("{\"path\":\"testdata/archive\"}"))
if err != nil {
t.Fatal(err)
}
jobIn := schema.Job{BaseJob: schema.JobDefaults}
jobIn.StartTime = time.Unix(1608923076, 0)
jobIn.JobID = 1403244
jobIn.Cluster = "emmy"
job, err := fsa.LoadJobMeta(&jobIn)
if err != nil {
t.Fatal(err)
}
if job.JobID != 1403244 {
t.Fail()
}
if int(job.NumNodes) != len(job.Resources) {
t.Fail()
}
if job.StartTime != 1608923076 {
t.Fail()
}
}
func TestLoadJobData(t *testing.T) {
var fsa FsArchive
_, err := fsa.Init(json.RawMessage("{\"path\": \"testdata/archive\"}"))
if err != nil {
t.Fatal(err)
}
jobIn := schema.Job{BaseJob: schema.JobDefaults}
jobIn.StartTime = time.Unix(1608923076, 0)
jobIn.JobID = 1403244
jobIn.Cluster = "emmy"
data, err := fsa.LoadJobData(&jobIn)
if err != nil {
t.Fatal(err)
}
for _, scopes := range data {
// fmt.Printf("Metric name: %s\n", name)
if _, exists := scopes[schema.MetricScopeNode]; !exists {
t.Fail()
}
}
}
func BenchmarkLoadJobData(b *testing.B) {
tmpdir := b.TempDir()
jobarchive := filepath.Join(tmpdir, "job-archive")
util.CopyDir("./testdata/archive/", jobarchive)
archiveCfg := fmt.Sprintf("{\"path\": \"%s\"}", jobarchive)
var fsa FsArchive
fsa.Init(json.RawMessage(archiveCfg))
jobIn := schema.Job{BaseJob: schema.JobDefaults}
jobIn.StartTime = time.Unix(1608923076, 0)
jobIn.JobID = 1403244
jobIn.Cluster = "emmy"
util.UncompressFile(filepath.Join(jobarchive, "emmy/1403/244/1608923076/data.json.gz"),
filepath.Join(jobarchive, "emmy/1403/244/1608923076/data.json"))
b.ResetTimer()
for i := 0; i < b.N; i++ {
fsa.LoadJobData(&jobIn)
}
}
func BenchmarkLoadJobDataCompressed(b *testing.B) {
tmpdir := b.TempDir()
jobarchive := filepath.Join(tmpdir, "job-archive")
util.CopyDir("./testdata/archive/", jobarchive)
archiveCfg := fmt.Sprintf("{\"path\": \"%s\"}", jobarchive)
var fsa FsArchive
fsa.Init(json.RawMessage(archiveCfg))
jobIn := schema.Job{BaseJob: schema.JobDefaults}
jobIn.StartTime = time.Unix(1608923076, 0)
jobIn.JobID = 1403244
jobIn.Cluster = "emmy"
b.ResetTimer()
for i := 0; i < b.N; i++ {
fsa.LoadJobData(&jobIn)
}
}
func TestLoadCluster(t *testing.T) {
var fsa FsArchive
_, err := fsa.Init(json.RawMessage("{\"path\":\"testdata/archive\"}"))
if err != nil {
t.Fatal(err)
}
cfg, err := fsa.LoadClusterCfg("emmy")
if err != nil {
t.Fatal(err)
}
if cfg.SubClusters[0].CoresPerSocket != 4 {
t.Fail()
}
}
func TestIter(t *testing.T) {
var fsa FsArchive
_, err := fsa.Init(json.RawMessage("{\"path\":\"testdata/archive\"}"))
if err != nil {
t.Fatal(err)
}
for job := range fsa.Iter(false) {
fmt.Printf("Job %d\n", job.Meta.JobID)
if job.Meta.Cluster != "emmy" {
t.Fail()
}
}
}

View File

@@ -1,77 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archive
import (
"encoding/json"
"io"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/ClusterCockpit/cc-backend/pkg/log"
)
func DecodeJobData(r io.Reader, k string) (schema.JobData, error) {
data := cache.Get(k, func() (value interface{}, ttl time.Duration, size int) {
var d schema.JobData
if err := json.NewDecoder(r).Decode(&d); err != nil {
log.Warn("Error while decoding raw job data json")
return err, 0, 1000
}
return d, 1 * time.Hour, d.Size()
})
if err, ok := data.(error); ok {
log.Warn("Error in decoded job data set")
return nil, err
}
return data.(schema.JobData), nil
}
func DecodeJobMeta(r io.Reader) (*schema.JobMeta, error) {
var d schema.JobMeta
if err := json.NewDecoder(r).Decode(&d); err != nil {
log.Warn("Error while decoding raw job meta json")
return &d, err
}
// Sanitize parameters
return &d, nil
}
func DecodeCluster(r io.Reader) (*schema.Cluster, error) {
var c schema.Cluster
if err := json.NewDecoder(r).Decode(&c); err != nil {
log.Warn("Error while decoding raw cluster json")
return &c, err
}
// Sanitize parameters
return &c, nil
}
func EncodeJobData(w io.Writer, d *schema.JobData) error {
// Sanitize parameters
if err := json.NewEncoder(w).Encode(d); err != nil {
log.Warn("Error while encoding new job data json")
return err
}
return nil
}
func EncodeJobMeta(w io.Writer, d *schema.JobMeta) error {
// Sanitize parameters
if err := json.NewEncoder(w).Encode(d); err != nil {
log.Warn("Error while encoding new job meta json")
return err
}
return nil
}

View File

@@ -1,270 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archive
import (
"fmt"
"strconv"
"strings"
"github.com/ClusterCockpit/cc-backend/pkg/log"
)
type NodeList [][]interface {
consume(input string) (next string, ok bool)
limits() []map[string]int
prefix() string
}
func (nl *NodeList) Contains(name string) bool {
var ok bool
for _, term := range *nl {
str := name
for _, expr := range term {
str, ok = expr.consume(str)
if !ok {
break
}
}
if ok && str == "" {
return true
}
}
return false
}
func (nl *NodeList) PrintList() []string {
var out []string
for _, term := range *nl {
// Get String-Part first
prefix := term[0].prefix()
if len(term) == 1 { // If only String-Part in Term: Single Node Name -> Use as provided
out = append(out, prefix)
} else { // Else: Numeric start-end definition with x digits zeroPadded
limitArr := term[1].limits()
for _, inner := range limitArr {
for i := inner["start"]; i < inner["end"]+1; i++ {
if inner["zeroPadded"] == 1 {
out = append(out, fmt.Sprintf("%s%0*d", prefix, inner["digits"], i))
} else {
log.Error("node list: only zero-padded ranges are allowed")
}
}
}
}
}
return out
}
func (nl *NodeList) NodeCount() int {
var out int = 0
for _, term := range *nl {
if len(term) == 1 { // If only String-Part in Term: Single Node Name -> add one
out += 1
} else { // Else: Numeric start-end definition -> add difference + 1
limitArr := term[1].limits()
for _, inner := range limitArr {
out += (inner["end"] - inner["start"]) + 1
}
}
}
return out
}
type NLExprString string
func (nle NLExprString) consume(input string) (next string, ok bool) {
str := string(nle)
if strings.HasPrefix(input, str) {
return strings.TrimPrefix(input, str), true
}
return "", false
}
func (nle NLExprString) limits() []map[string]int {
// Null implementation to fullfill interface requirement
l := make([]map[string]int, 0)
return l
}
func (nle NLExprString) prefix() string {
return string(nle)
}
type NLExprIntRanges []NLExprIntRange
func (nles NLExprIntRanges) consume(input string) (next string, ok bool) {
for _, nle := range nles {
if next, ok := nle.consume(input); ok {
return next, ok
}
}
return "", false
}
func (nles NLExprIntRanges) limits() []map[string]int {
l := make([]map[string]int, 0)
for _, nle := range nles {
inner := nle.limits()
l = append(l, inner[0])
}
return l
}
func (nles NLExprIntRanges) prefix() string {
// Null implementation to fullfill interface requirement
var s string
return s
}
type NLExprIntRange struct {
start, end int64
zeroPadded bool
digits int
}
func (nle NLExprIntRange) consume(input string) (next string, ok bool) {
if !nle.zeroPadded || nle.digits < 1 {
log.Error("only zero-padded ranges are allowed")
return "", false
}
if len(input) < nle.digits {
return "", false
}
numerals, rest := input[:nle.digits], input[nle.digits:]
for len(numerals) > 1 && numerals[0] == '0' {
numerals = numerals[1:]
}
x, err := strconv.ParseInt(numerals, 10, 32)
if err != nil {
return "", false
}
if nle.start <= x && x <= nle.end {
return rest, true
}
return "", false
}
func (nle NLExprIntRange) limits() []map[string]int {
l := make([]map[string]int, 0)
m := make(map[string]int)
m["start"] = int(nle.start)
m["end"] = int(nle.end)
m["digits"] = int(nle.digits)
if nle.zeroPadded == true {
m["zeroPadded"] = 1
} else {
m["zeroPadded"] = 0
}
l = append(l, m)
return l
}
func (nles NLExprIntRange) prefix() string {
// Null implementation to fullfill interface requirement
var s string
return s
}
func ParseNodeList(raw string) (NodeList, error) {
isLetter := func(r byte) bool { return ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') }
isDigit := func(r byte) bool { return '0' <= r && r <= '9' }
isDash := func(r byte) bool { return r == '-' }
rawterms := []string{}
prevterm := 0
for i := 0; i < len(raw); i++ {
if raw[i] == '[' {
for i < len(raw) && raw[i] != ']' {
i++
}
if i == len(raw) {
return nil, fmt.Errorf("ARCHIVE/NODELIST > unclosed '['")
}
} else if raw[i] == ',' {
rawterms = append(rawterms, raw[prevterm:i])
prevterm = i + 1
}
}
if prevterm != len(raw) {
rawterms = append(rawterms, raw[prevterm:])
}
nl := NodeList{}
for _, rawterm := range rawterms {
exprs := []interface {
consume(input string) (next string, ok bool)
limits() []map[string]int
prefix() string
}{}
for i := 0; i < len(rawterm); i++ {
c := rawterm[i]
if isLetter(c) || isDigit(c) {
j := i
for j < len(rawterm) &&
(isLetter(rawterm[j]) ||
isDigit(rawterm[j]) ||
isDash(rawterm[j])) {
j++
}
exprs = append(exprs, NLExprString(rawterm[i:j]))
i = j - 1
} else if c == '[' {
end := strings.Index(rawterm[i:], "]")
if end == -1 {
return nil, fmt.Errorf("ARCHIVE/NODELIST > unclosed '['")
}
parts := strings.Split(rawterm[i+1:i+end], ",")
nles := NLExprIntRanges{}
for _, part := range parts {
minus := strings.Index(part, "-")
if minus == -1 {
return nil, fmt.Errorf("ARCHIVE/NODELIST > no '-' found inside '[...]'")
}
s1, s2 := part[0:minus], part[minus+1:]
if len(s1) != len(s2) || len(s1) == 0 {
return nil, fmt.Errorf("ARCHIVE/NODELIST > %v and %v are not of equal length or of length zero", s1, s2)
}
x1, err := strconv.ParseInt(s1, 10, 32)
if err != nil {
return nil, fmt.Errorf("ARCHIVE/NODELIST > could not parse int: %w", err)
}
x2, err := strconv.ParseInt(s2, 10, 32)
if err != nil {
return nil, fmt.Errorf("ARCHIVE/NODELIST > could not parse int: %w", err)
}
nles = append(nles, NLExprIntRange{
start: x1,
end: x2,
digits: len(s1),
zeroPadded: true,
})
}
exprs = append(exprs, nles)
i += end
} else {
return nil, fmt.Errorf("ARCHIVE/NODELIST > invalid character: %#v", rune(c))
}
}
nl = append(nl, exprs)
}
return nl, nil
}

View File

@@ -1,75 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archive
import (
"testing"
)
func TestNodeList(t *testing.T) {
nl, err := ParseNodeList("hallo,wel123t,emmy[01-99],fritz[005-500],woody[100-200]")
if err != nil {
t.Fatal(err)
}
if nl.Contains("hello") || nl.Contains("woody") {
t.Fail()
}
if nl.Contains("fritz1") || nl.Contains("fritz9") || nl.Contains("fritz004") || nl.Contains("woody201") {
t.Fail()
}
if !nl.Contains("hallo") || !nl.Contains("wel123t") {
t.Fail()
}
if !nl.Contains("emmy01") || !nl.Contains("emmy42") || !nl.Contains("emmy99") {
t.Fail()
}
if !nl.Contains("woody100") || !nl.Contains("woody199") {
t.Fail()
}
}
func TestNodeListCommasInBrackets(t *testing.T) {
nl, err := ParseNodeList("a[1000-2000,2010-2090,3000-5000]")
if err != nil {
t.Fatal(err)
}
if nl.Contains("hello") || nl.Contains("woody") {
t.Fatal("1")
}
if nl.Contains("a0") || nl.Contains("a0000") || nl.Contains("a5001") || nl.Contains("a2005") {
t.Fatal("2")
}
if !nl.Contains("a1001") || !nl.Contains("a2000") {
t.Fatal("3")
}
if !nl.Contains("a2042") || !nl.Contains("a4321") || !nl.Contains("a3000") {
t.Fatal("4")
}
}
func TestNodeListCommasOutsideBrackets(t *testing.T) {
nl, err := ParseNodeList("cn-0010,cn0011,cn-00[13-18,22-24]")
if err != nil {
t.Fatal(err)
}
if !nl.Contains("cn-0010") || !nl.Contains("cn0011") {
t.Fatal("1")
}
if !nl.Contains("cn-0013") ||
!nl.Contains("cn-0015") ||
!nl.Contains("cn-0022") ||
!nl.Contains("cn-0018") {
t.Fatal("2")
}
}

View File

@@ -1,13 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archive
type S3ArchiveConfig struct {
Path string `json:"filePath"`
}
type S3Archive struct {
path string
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,194 +0,0 @@
{
"exclusive": 1,
"jobId": 1403244,
"statistics": {
"mem_bw": {
"avg": 63.57,
"min": 0,
"unit": {
"base": "B/s",
"prefix": "G"
},
"max": 74.5
},
"rapl_power": {
"avg": 228.07,
"min": 0,
"unit": {
"base": "W"
},
"max": 258.56
},
"ipc": {
"unit": {
"base": "IPC"
},
"max": 0.510204081632653,
"avg": 1.53846153846154,
"min": 0.0
},
"clock": {
"min": 1380.32,
"avg": 2599.39,
"unit": {
"base": "Hz",
"prefix": "M"
},
"max": 2634.46
},
"cpu_load": {
"avg": 18.4,
"min": 0,
"max": 23.58,
"unit": {
"base": "load"
}
},
"flops_any": {
"max": 404.62,
"unit": {
"base": "F/s",
"prefix": "G"
},
"avg": 225.59,
"min": 0
},
"flops_dp": {
"max": 0.24,
"unit": {
"base": "F/s",
"prefix": "G"
},
"min": 0,
"avg": 0
},
"mem_used": {
"min": 1.55,
"avg": 27.84,
"unit": {
"base": "B",
"prefix": "G"
},
"max": 37.5
},
"flops_sp": {
"min": 0,
"avg": 225.59,
"max": 404.62,
"unit": {
"base": "F/s",
"prefix": "G"
}
}
},
"resources": [
{
"hostname": "e0102"
},
{
"hostname": "e0103"
},
{
"hostname": "e0105"
},
{
"hostname": "e0106"
},
{
"hostname": "e0107"
},
{
"hostname": "e0108"
},
{
"hostname": "e0114"
},
{
"hostname": "e0320"
},
{
"hostname": "e0321"
},
{
"hostname": "e0325"
},
{
"hostname": "e0404"
},
{
"hostname": "e0415"
},
{
"hostname": "e0433"
},
{
"hostname": "e0437"
},
{
"hostname": "e0439"
},
{
"hostname": "e0501"
},
{
"hostname": "e0503"
},
{
"hostname": "e0505"
},
{
"hostname": "e0506"
},
{
"hostname": "e0512"
},
{
"hostname": "e0513"
},
{
"hostname": "e0514"
},
{
"hostname": "e0653"
},
{
"hostname": "e0701"
},
{
"hostname": "e0716"
},
{
"hostname": "e0727"
},
{
"hostname": "e0728"
},
{
"hostname": "e0925"
},
{
"hostname": "e0926"
},
{
"hostname": "e0929"
},
{
"hostname": "e0934"
},
{
"hostname": "e0951"
}
],
"walltime": 10,
"jobState": "completed",
"cluster": "emmy",
"subCluster": "haswell",
"stopTime": 1609009562,
"user": "emmyUser6",
"startTime": 1608923076,
"partition": "work",
"tags": [],
"project": "no project",
"numNodes": 32,
"duration": 86486
}

View File

@@ -1,194 +0,0 @@
{
"stopTime": 1609387081,
"resources": [
{
"hostname": "e0151"
},
{
"hostname": "e0152"
},
{
"hostname": "e0153"
},
{
"hostname": "e0232"
},
{
"hostname": "e0303"
},
{
"hostname": "e0314"
},
{
"hostname": "e0344"
},
{
"hostname": "e0345"
},
{
"hostname": "e0348"
},
{
"hostname": "e0507"
},
{
"hostname": "e0518"
},
{
"hostname": "e0520"
},
{
"hostname": "e0522"
},
{
"hostname": "e0526"
},
{
"hostname": "e0527"
},
{
"hostname": "e0528"
},
{
"hostname": "e0530"
},
{
"hostname": "e0551"
},
{
"hostname": "e0604"
},
{
"hostname": "e0613"
},
{
"hostname": "e0634"
},
{
"hostname": "e0639"
},
{
"hostname": "e0640"
},
{
"hostname": "e0651"
},
{
"hostname": "e0653"
},
{
"hostname": "e0701"
},
{
"hostname": "e0704"
},
{
"hostname": "e0751"
},
{
"hostname": "e0809"
},
{
"hostname": "e0814"
},
{
"hostname": "e0819"
},
{
"hostname": "e0908"
}
],
"walltime": 10,
"cluster": "emmy",
"subCluster": "haswell",
"jobState": "completed",
"statistics": {
"clock": {
"max": 2634.9,
"unit": {
"base": "Hz",
"prefix": "M"
},
"min": 0,
"avg": 2597.8
},
"cpu_load": {
"max": 27.41,
"min": 0,
"avg": 18.39,
"unit": {
"base": "load"
}
},
"mem_bw": {
"min": 0,
"avg": 63.23,
"unit": {
"base": "B/s",
"prefix": "G"
},
"max": 75.06
},
"ipc": {
"min": 0.0,
"avg": 1.53846153846154,
"unit": {
"base": "IPC"
},
"max": 0.490196078431373
},
"rapl_power": {
"min": 0,
"avg": 227.32,
"unit": {
"base": "W"
},
"max": 256.22
},
"mem_used": {
"min": 1.5,
"avg": 27.77,
"unit": {
"base": "B",
"prefix": "G"
},
"max": 37.43
},
"flops_sp": {
"unit": {
"base": "F/s",
"prefix": "G"
},
"max": 413.21,
"min": 0,
"avg": 224.41
},
"flops_dp": {
"max": 5.72,
"unit": {
"base": "F/s",
"prefix": "G"
},
"min": 0,
"avg": 0
},
"flops_any": {
"min": 0,
"avg": 224.42,
"max": 413.21,
"unit": {
"base": "F/s",
"prefix": "G"
}
}
},
"exclusive": 1,
"jobId": 1404397,
"tags": [],
"partition": "work",
"project": "no project",
"user": "emmyUser6",
"startTime": 1609300556,
"duration": 86525,
"numNodes": 32
}

View File

@@ -1,974 +0,0 @@
{
"name": "emmy",
"subClusters": [
{
"name": "haswell",
"processorType": "Intel Xeon E3-1240 v3",
"socketsPerNode": 1,
"coresPerSocket": 4,
"threadsPerCore": 1,
"flopRateScalar": {
"unit": {
"prefix": "G",
"base": "F/s"
},
"value": 14
},
"flopRateSimd": {
"unit": {
"prefix": "G",
"base": "F/s"
},
"value": 112
},
"memoryBandwidth": {
"unit": {
"prefix": "G",
"base": "B/s"
},
"value": 24
},
"nodes": "w11[27-45,49-63,69-72]",
"topology": {
"node": [
0,
1,
2,
3
],
"socket": [
[
0,
1,
2,
3
]
],
"memoryDomain": [
[
0,
1,
2,
3
]
],
"core": [
[
0
],
[
1
],
[
2
],
[
3
]
]
}
},
{
"name": "skylake",
"processorType": "Intel Xeon E3-1240 v5 ",
"socketsPerNode": 1,
"coresPerSocket": 4,
"threadsPerCore": 1,
"flopRateScalar": {
"unit": {
"prefix": "G",
"base": "F/s"
},
"value": 14
},
"flopRateSimd": {
"unit": {
"prefix": "G",
"base": "F/s"
},
"value": 112
},
"memoryBandwidth": {
"unit": {
"prefix": "G",
"base": "B/s"
},
"value": 64
},
"nodes": "w12[01-08],w13[01-31,33-56]",
"topology": {
"node": [
0,
1,
2,
3
],
"socket": [
[
0,
1,
2,
3
]
],
"memoryDomain": [
[
0,
1,
2,
3
]
],
"core": [
[
0
],
[
1
],
[
2
],
[
3
]
]
}
},
{
"name": "kabylake",
"processorType": "Intel Xeon E3-1240 v6",
"socketsPerNode": 1,
"coresPerSocket": 4,
"threadsPerCore": 1,
"flopRateScalar": {
"unit": {
"prefix": "G",
"base": "F/s"
},
"value": 14
},
"flopRateSimd": {
"unit": {
"prefix": "G",
"base": "F/s"
},
"value": 112
},
"memoryBandwidth": {
"unit": {
"prefix": "G",
"base": "B/s"
},
"value": 24
},
"nodes": "w14[01-56],w15[01-05,07-56]",
"topology": {
"node": [
0,
1,
2,
3
],
"socket": [
[
0,
1,
2,
3
]
],
"memoryDomain": [
[
0,
1,
2,
3
]
],
"core": [
[
0
],
[
1
],
[
2
],
[
3
]
]
}
},
{
"name": "icelake",
"processorType": "Intel Xeon Gold 6326",
"socketsPerNode": 2,
"coresPerSocket": 16,
"threadsPerCore": 1,
"flopRateScalar": {
"unit": {
"prefix": "G",
"base": "F/s"
},
"value": 432
},
"flopRateSimd": {
"unit": {
"prefix": "G",
"base": "F/s"
},
"value": 9216
},
"memoryBandwidth": {
"unit": {
"prefix": "G",
"base": "B/s"
},
"value": 350
},
"nodes": "w22[01-35],w23[01-35]",
"topology": {
"node": [
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
62,
63,
64,
65,
66,
67,
68,
69,
70,
71
],
"socket": [
[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35
],
[
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
62,
63,
64,
65,
66,
67,
68,
69,
70,
71
]
],
"memoryDomain": [
[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17
],
[
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35
],
[
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53
],
[
54,
55,
56,
57,
58,
59,
60,
61,
62,
63,
64,
65,
66,
67,
68,
69,
70,
71
]
],
"core": [
[
0
],
[
1
],
[
2
],
[
3
],
[
4
],
[
5
],
[
6
],
[
7
],
[
8
],
[
9
],
[
10
],
[
11
],
[
12
],
[
13
],
[
14
],
[
15
],
[
16
],
[
17
],
[
18
],
[
19
],
[
20
],
[
21
],
[
22
],
[
23
],
[
24
],
[
25
],
[
26
],
[
27
],
[
28
],
[
29
],
[
30
],
[
31
],
[
32
],
[
33
],
[
34
],
[
35
],
[
36
],
[
37
],
[
38
],
[
39
],
[
40
],
[
41
],
[
42
],
[
43
],
[
44
],
[
45
],
[
46
],
[
47
],
[
48
],
[
49
],
[
50
],
[
51
],
[
52
],
[
53
],
[
54
],
[
55
],
[
56
],
[
57
],
[
58
],
[
59
],
[
60
],
[
61
],
[
62
],
[
63
],
[
64
],
[
65
],
[
66
],
[
67
],
[
68
],
[
69
],
[
70
],
[
71
]
]
}
}
],
"metricConfig": [
{
"name": "cpu_load",
"scope": "node",
"unit": {
"base": ""
},
"aggregation": "avg",
"timestep": 60,
"peak": 4,
"normal": 4,
"caution": 4,
"alert": 1,
"subClusters": [
{
"name": "icelake",
"peak": 32,
"normal": 32,
"caution": 16,
"alert": 1
}
]
},
{
"name": "cpu_user",
"scope": "hwthread",
"unit": {
"base": ""
},
"aggregation": "avg",
"timestep": 60,
"peak": 100,
"normal": 50,
"caution": 20,
"alert": 10,
"subClusters": [
{
"name": "haswell",
"remove": true
},
{
"name": "skylake",
"remove": true
}
]
},
{
"name": "ipc",
"scope": "hwthread",
"unit": {
"base": "IPC"
},
"aggregation": "avg",
"timestep": 60,
"peak": 4,
"normal": 2,
"caution": 1,
"alert": 0.25
},
{
"name": "mem_used",
"scope": "node",
"unit": {
"prefix": "G",
"base": "B"
},
"aggregation": "sum",
"timestep": 60,
"peak": 32,
"normal": 16,
"caution": 28,
"alert": 30,
"subClusters": [
{
"name": "icelake",
"peak": 256,
"normal": 128,
"caution": 245,
"alert": 255
}
]
},
{
"name": "flops_any",
"scope": "hwthread",
"unit": {
"prefix": "G",
"base": "F/s"
},
"aggregation": "sum",
"timestep": 60,
"peak": 112,
"normal": 50,
"caution": 20,
"alert": 10,
"subClusters": [
{
"name": "icelake",
"peak": 9216,
"normal": 432,
"caution": 100,
"alert": 50
}
]
},
{
"name": "flops_dp",
"scope": "hwthread",
"unit": {
"prefix": "G",
"base": "F/s"
},
"aggregation": "sum",
"timestep": 60,
"peak": 56,
"normal": 30,
"caution": 15,
"alert": 5,
"subClusters": [
{
"name": "icelake",
"peak": 4108,
"normal": 220,
"caution": 60,
"alert": 30
}
]
},
{
"name": "flops_sp",
"scope": "hwthread",
"unit": {
"prefix": "G",
"base": "F/s"
},
"aggregation": "sum",
"timestep": 60,
"peak": 112,
"normal": 50,
"caution": 20,
"alert": 10,
"subClusters": [
{
"name": "icelake",
"peak": 9216,
"normal": 432,
"caution": 100,
"alert": 50
}
]
},
{
"name": "mem_bw",
"scope": "socket",
"unit": {
"prefix": "G",
"base": "B/s"
},
"aggregation": "sum",
"timestep": 60,
"peak": 24,
"normal": 10,
"caution": 5,
"alert": 2,
"subClusters": [
{
"name": "icelake",
"peak": 350,
"normal": 100,
"caution": 50,
"alert": 25
}
]
},
{
"name": "clock",
"scope": "hwthread",
"unit": {
"prefix": "M",
"base": "Hz"
},
"aggregation": "avg",
"timestep": 60,
"peak": 2900,
"normal": 2900,
"caution": 1500,
"alert": 1200,
"subClusters": [
{
"name": "haswell",
"peak": 3500,
"normal": 3500,
"caution": 1500,
"alert": 1200
},
{
"name": "skylake",
"peak": 3500,
"normal": 3500,
"caution": 1500,
"alert": 1200
},
{
"name": "kabylake",
"peak": 3700,
"normal": 3700,
"caution": 1500,
"alert": 1200
}
]
},
{
"name": "vectorization_ratio",
"scope": "hwthread",
"unit": {
"base": ""
},
"aggregation": "avg",
"timestep": 60,
"peak": 100,
"normal": 60,
"caution": 40,
"alert": 10
},
{
"name": "nfs4_read",
"scope": "node",
"unit": {
"prefix": "M",
"base": "B/s"
},
"aggregation": "sum",
"timestep": 60,
"peak": 6,
"normal": 4,
"caution": 2,
"alert": 1,
"subClusters": [
{
"name": "haswell",
"remove": true
}
]
},
{
"name": "nfs4_write",
"scope": "node",
"unit": {
"prefix": "M",
"base": "B/s"
},
"aggregation": "sum",
"timestep": 60,
"peak": 6,
"normal": 4,
"caution": 2,
"alert": 1,
"subClusters": [
{
"name": "haswell",
"remove": true
}
]
},
{
"name": "nfs4_total",
"scope": "node",
"unit": {
"prefix": "M",
"base": "B/s"
},
"aggregation": "sum",
"timestep": 60,
"peak": 6,
"normal": 4,
"caution": 2,
"alert": 1,
"subClusters": [
{
"name": "haswell",
"remove": true
}
]
}
]
}

File diff suppressed because it is too large Load Diff

View File

@@ -1 +0,0 @@
1