Merge pull request #398 from ClusterCockpit/Refactor-job-struct

Refactor job struct
This commit is contained in:
Jan Eitzinger
2025-06-02 12:13:43 +02:00
committed by GitHub
30 changed files with 259 additions and 803 deletions

View File

@@ -23,7 +23,7 @@ type ArchiveBackend interface {
Exists(job *schema.Job) bool
LoadJobMeta(job *schema.Job) (*schema.JobMeta, error)
LoadJobMeta(job *schema.Job) (*schema.Job, error)
LoadJobData(job *schema.Job) (schema.JobData, error)
@@ -31,9 +31,9 @@ type ArchiveBackend interface {
LoadClusterCfg(name string) (*schema.Cluster, error)
StoreJobMeta(jobMeta *schema.JobMeta) error
StoreJobMeta(jobMeta *schema.Job) error
ImportJob(jobMeta *schema.JobMeta, jobData *schema.JobData) error
ImportJob(jobMeta *schema.Job, jobData *schema.JobData) error
GetClusters() []string
@@ -51,7 +51,7 @@ type ArchiveBackend interface {
}
type JobContainer struct {
Meta *schema.JobMeta
Meta *schema.Job
Data *schema.JobData
}
@@ -162,7 +162,6 @@ func LoadScopedStatsFromArchive(
metrics []string,
scopes []schema.MetricScope,
) (schema.ScopedJobStats, error) {
data, err := ar.LoadJobStats(job)
if err != nil {
log.Errorf("Error while loading job stats from archiveBackend: %s", err.Error())

View File

@@ -9,7 +9,6 @@ import (
"fmt"
"path/filepath"
"testing"
"time"
"github.com/ClusterCockpit/cc-backend/internal/util"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
@@ -32,12 +31,12 @@ func setup(t *testing.T) archive.ArchiveBackend {
jobs[0] = &schema.Job{}
jobs[0].JobID = 1403244
jobs[0].Cluster = "emmy"
jobs[0].StartTime = time.Unix(1608923076, 0)
jobs[0].StartTime = 1608923076
jobs[1] = &schema.Job{}
jobs[0].JobID = 1404397
jobs[0].Cluster = "emmy"
jobs[0].StartTime = time.Unix(1609300556, 0)
jobs[0].StartTime = 1609300556
return archive.GetHandle()
}

View File

@@ -223,7 +223,7 @@ func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
// AssignSubCluster sets the `job.subcluster` property of the job based
// on its cluster and resources.
func AssignSubCluster(job *schema.BaseJob) error {
func AssignSubCluster(job *schema.Job) error {
cluster := GetCluster(job.Cluster)
if cluster == nil {
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > unkown cluster: %v", job.Cluster)

View File

@@ -53,7 +53,7 @@ func getDirectory(
rootPath,
job.Cluster,
lvl1, lvl2,
strconv.FormatInt(job.StartTime.Unix(), 10))
strconv.FormatInt(job.StartTime, 10))
}
func getPath(
@@ -65,15 +65,15 @@ func getPath(
getDirectory(job, rootPath), file)
}
func loadJobMeta(filename string) (*schema.JobMeta, error) {
func loadJobMeta(filename string) (*schema.Job, error) {
b, err := os.ReadFile(filename)
if err != nil {
log.Errorf("loadJobMeta() > open file error: %v", err)
return &schema.JobMeta{}, err
return nil, err
}
if config.Keys.Validate {
if err := schema.Validate(schema.Meta, bytes.NewReader(b)); err != nil {
return &schema.JobMeta{}, fmt.Errorf("validate job meta: %v", err)
return nil, fmt.Errorf("validate job meta: %v", err)
}
}
@@ -429,7 +429,7 @@ func (fsa *FsArchive) LoadJobStats(job *schema.Job) (schema.ScopedJobStats, erro
return loadJobStats(filename, isCompressed)
}
func (fsa *FsArchive) LoadJobMeta(job *schema.Job) (*schema.JobMeta, error) {
func (fsa *FsArchive) LoadJobMeta(job *schema.Job) (*schema.Job, error) {
filename := getPath(job, fsa.path, "meta.json")
return loadJobMeta(filename)
}
@@ -518,18 +518,13 @@ func (fsa *FsArchive) Iter(loadMetricData bool) <-chan JobContainer {
return ch
}
func (fsa *FsArchive) StoreJobMeta(jobMeta *schema.JobMeta) error {
job := schema.Job{
BaseJob: jobMeta.BaseJob,
StartTime: time.Unix(jobMeta.StartTime, 0),
StartTimeUnix: jobMeta.StartTime,
}
f, err := os.Create(getPath(&job, fsa.path, "meta.json"))
func (fsa *FsArchive) StoreJobMeta(job *schema.Job) error {
f, err := os.Create(getPath(job, fsa.path, "meta.json"))
if err != nil {
log.Error("Error while creating filepath for meta.json")
return err
}
if err := EncodeJobMeta(f, jobMeta); err != nil {
if err := EncodeJobMeta(f, job); err != nil {
log.Error("Error while encoding job metadata to meta.json file")
return err
}
@@ -546,15 +541,10 @@ func (fsa *FsArchive) GetClusters() []string {
}
func (fsa *FsArchive) ImportJob(
jobMeta *schema.JobMeta,
jobMeta *schema.Job,
jobData *schema.JobData,
) error {
job := schema.Job{
BaseJob: jobMeta.BaseJob,
StartTime: time.Unix(jobMeta.StartTime, 0),
StartTimeUnix: jobMeta.StartTime,
}
dir := getPath(&job, fsa.path, "")
dir := getPath(jobMeta, fsa.path, "")
if err := os.MkdirAll(dir, 0777); err != nil {
log.Error("Error while creating job archive path")
return err

View File

@@ -9,7 +9,6 @@ import (
"fmt"
"path/filepath"
"testing"
"time"
"github.com/ClusterCockpit/cc-backend/internal/util"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
@@ -86,8 +85,11 @@ func TestLoadJobMeta(t *testing.T) {
t.Fatal(err)
}
jobIn := schema.Job{BaseJob: schema.JobDefaults}
jobIn.StartTime = time.Unix(1608923076, 0)
jobIn := schema.Job{
Exclusive: 1,
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
}
jobIn.StartTime = 1608923076
jobIn.JobID = 1403244
jobIn.Cluster = "emmy"
@@ -114,8 +116,11 @@ func TestLoadJobData(t *testing.T) {
t.Fatal(err)
}
jobIn := schema.Job{BaseJob: schema.JobDefaults}
jobIn.StartTime = time.Unix(1608923076, 0)
jobIn := schema.Job{
Exclusive: 1,
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
}
jobIn.StartTime = 1608923076
jobIn.JobID = 1403244
jobIn.Cluster = "emmy"
@@ -142,8 +147,11 @@ func BenchmarkLoadJobData(b *testing.B) {
var fsa FsArchive
fsa.Init(json.RawMessage(archiveCfg))
jobIn := schema.Job{BaseJob: schema.JobDefaults}
jobIn.StartTime = time.Unix(1608923076, 0)
jobIn := schema.Job{
Exclusive: 1,
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
}
jobIn.StartTime = 1608923076
jobIn.JobID = 1403244
jobIn.Cluster = "emmy"
@@ -165,8 +173,11 @@ func BenchmarkLoadJobDataCompressed(b *testing.B) {
var fsa FsArchive
fsa.Init(json.RawMessage(archiveCfg))
jobIn := schema.Job{BaseJob: schema.JobDefaults}
jobIn.StartTime = time.Unix(1608923076, 0)
jobIn := schema.Job{
Exclusive: 1,
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
}
jobIn.StartTime = 1608923076
jobIn.JobID = 1403244
jobIn.Cluster = "emmy"

View File

@@ -69,8 +69,8 @@ func DecodeJobStats(r io.Reader, k string) (schema.ScopedJobStats, error) {
return nil, err
}
func DecodeJobMeta(r io.Reader) (*schema.JobMeta, error) {
var d schema.JobMeta
func DecodeJobMeta(r io.Reader) (*schema.Job, error) {
var d schema.Job
if err := json.NewDecoder(r).Decode(&d); err != nil {
log.Warn("Error while decoding raw job meta json")
return &d, err
@@ -103,7 +103,7 @@ func EncodeJobData(w io.Writer, d *schema.JobData) error {
return nil
}
func EncodeJobMeta(w io.Writer, d *schema.JobMeta) error {
func EncodeJobMeta(w io.Writer, d *schema.Job) error {
// Sanitize parameters
if err := json.NewEncoder(w).Encode(d); err != nil {
log.Warn("Error while encoding new job meta json")

View File

@@ -8,43 +8,8 @@ import (
"errors"
"fmt"
"io"
"time"
)
// BaseJob is the common part of the job metadata structs
//
// Common subset of Job and JobMeta. Use one of those, not this type directly.
type BaseJob struct {
Cluster string `json:"cluster" db:"cluster" example:"fritz"`
SubCluster string `json:"subCluster" db:"subcluster" example:"main"`
Partition string `json:"partition,omitempty" db:"cluster_partition" example:"main"`
Project string `json:"project" db:"project" example:"abcd200"`
User string `json:"user" db:"hpc_user" example:"abcd100h"`
State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"`
Tags []*Tag `json:"tags,omitempty"`
RawEnergyFootprint []byte `json:"-" db:"energy_footprint"`
RawFootprint []byte `json:"-" db:"footprint"`
RawMetaData []byte `json:"-" db:"meta_data"`
RawResources []byte `json:"-" db:"resources"`
Resources []*Resource `json:"resources"`
EnergyFootprint map[string]float64 `json:"energyFootprint"`
Footprint map[string]float64 `json:"footprint"`
MetaData map[string]string `json:"metaData"`
ConcurrentJobs JobLinkResultList `json:"concurrentJobs"`
Energy float64 `json:"energy" db:"energy"`
ArrayJobId int64 `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"`
Walltime int64 `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"`
JobID int64 `json:"jobId" db:"job_id" example:"123000"`
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"`
SMT int32 `json:"smt,omitempty" db:"smt" example:"4"`
MonitoringStatus int32 `json:"monitoringStatus,omitempty" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"`
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"`
NumAcc int32 `json:"numAcc,omitempty" db:"num_acc" example:"2" minimum:"1"`
NumHWThreads int32 `json:"numHwthreads,omitempty" db:"num_hwthreads" example:"20" minimum:"1"`
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"`
}
// Job struct type
//
// This type is used as the GraphQL interface and using sqlx as a table row.
@@ -52,10 +17,36 @@ type BaseJob struct {
// Job model
// @Description Information of a HPC job.
type Job struct {
StartTime time.Time `json:"startTime"`
BaseJob
ID int64 `json:"id" db:"id"`
StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"`
Cluster string `json:"cluster" db:"cluster" example:"fritz"`
SubCluster string `json:"subCluster" db:"subcluster" example:"main"`
Partition string `json:"partition,omitempty" db:"cluster_partition" example:"main"`
Project string `json:"project" db:"project" example:"abcd200"`
User string `json:"user" db:"hpc_user" example:"abcd100h"`
State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"`
Tags []*Tag `json:"tags,omitempty"`
RawEnergyFootprint []byte `json:"-" db:"energy_footprint"`
RawFootprint []byte `json:"-" db:"footprint"`
RawMetaData []byte `json:"-" db:"meta_data"`
RawResources []byte `json:"-" db:"resources"`
Resources []*Resource `json:"resources"`
EnergyFootprint map[string]float64 `json:"energyFootprint"`
Footprint map[string]float64 `json:"footprint"`
MetaData map[string]string `json:"metaData"`
ConcurrentJobs JobLinkResultList `json:"concurrentJobs"`
Energy float64 `json:"energy" db:"energy"`
ArrayJobId int64 `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"`
Walltime int64 `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"`
JobID int64 `json:"jobId" db:"job_id" example:"123000"`
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"`
SMT int32 `json:"smt,omitempty" db:"smt" example:"4"`
MonitoringStatus int32 `json:"monitoringStatus,omitempty" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"`
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"`
NumAcc int32 `json:"numAcc,omitempty" db:"num_acc" example:"2" minimum:"1"`
NumHWThreads int32 `json:"numHwthreads,omitempty" db:"num_hwthreads" example:"20" minimum:"1"`
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"`
Statistics map[string]JobStatistics `json:"statistics"`
ID *int64 `json:"id,omitempty" db:"id"`
StartTime int64 `json:"startTime" db:"start_time" example:"1649723812"`
}
// JobMeta struct type
@@ -70,12 +61,12 @@ type Job struct {
//
// JobMeta model
// @Description Meta data information of a HPC job.
type JobMeta struct {
ID *int64 `json:"id,omitempty"`
Statistics map[string]JobStatistics `json:"statistics"`
BaseJob
StartTime int64 `json:"startTime" db:"start_time" example:"1649723812" minimum:"1"`
}
// type JobMeta struct {
// ID *int64 `json:"id,omitempty"`
// BaseJob
// Statistics map[string]JobStatistics `json:"statistics"`
// StartTime int64 `json:"startTime" db:"start_time" example:"1649723812" minimum:"1"`
// }
type JobLink struct {
ID int64 `json:"id"`
@@ -94,10 +85,10 @@ const (
MonitoringStatusArchivingSuccessful int32 = 3
)
var JobDefaults BaseJob = BaseJob{
Exclusive: 1,
MonitoringStatus: MonitoringStatusRunningOrArchiving,
}
// var JobDefaults Job = Job{
// Exclusive: 1,
// MonitoringStatus: MonitoringStatusRunningOrArchiving,
// }
type Unit struct {
Base string `json:"base"`
@@ -144,9 +135,9 @@ const (
JobStateOutOfMemory JobState = "out_of_memory"
)
func (j JobMeta) GoString() string {
return fmt.Sprintf("JobMeta{ID:%d, StartTime:%d, JobID:%v, BaseJob:%v}",
j.ID, j.StartTime, j.JobID, j.BaseJob)
func (j Job) GoString() string {
return fmt.Sprintf("Job{ID:%d, StartTime:%d, JobID:%v, BaseJob:%v}",
j.ID, j.StartTime, j.JobID, j)
}
func (e *JobState) UnmarshalGQL(v any) error {