mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2024-12-26 13:29:05 +01:00
Intermediate Save commit
This commit is contained in:
parent
7251344d4a
commit
29d215fcea
@ -17,6 +17,7 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/units"
|
||||||
)
|
)
|
||||||
|
|
||||||
// `AUTO_INCREMENT` is in a comment because of this hack:
|
// `AUTO_INCREMENT` is in a comment because of this hack:
|
||||||
@ -103,11 +104,11 @@ func HandleImportFlag(flag string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.Keys.Validate {
|
// if config.Keys.Validate {
|
||||||
if err := schema.Validate(schema.Meta, bytes.NewReader(raw)); err != nil {
|
if err := schema.Validate(schema.Meta, bytes.NewReader(raw)); err != nil {
|
||||||
return fmt.Errorf("validate job meta: %v", err)
|
return fmt.Errorf("validate job meta: %v", err)
|
||||||
}
|
}
|
||||||
}
|
// }
|
||||||
dec := json.NewDecoder(bytes.NewReader(raw))
|
dec := json.NewDecoder(bytes.NewReader(raw))
|
||||||
dec.DisallowUnknownFields()
|
dec.DisallowUnknownFields()
|
||||||
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
|
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||||
@ -132,6 +133,7 @@ func HandleImportFlag(flag string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
checkJobData(&jobData)
|
||||||
SanityChecks(&jobMeta.BaseJob)
|
SanityChecks(&jobMeta.BaseJob)
|
||||||
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||||
if job, err := GetJobRepository().Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
|
if job, err := GetJobRepository().Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
|
||||||
@ -368,14 +370,31 @@ func loadJobStat(job *schema.JobMeta, metric string) float64 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func checkJobData(d *schema.JobData) error {
|
func checkJobData(d *schema.JobData) error {
|
||||||
// for name, scopes := range *d {
|
for _, scopes := range *d {
|
||||||
|
var newUnit string
|
||||||
|
// Add node scope if missing
|
||||||
|
for _, metric := range scopes {
|
||||||
|
if strings.Contains(metric.Unit.Base, "B/s") ||
|
||||||
|
strings.Contains(metric.Unit.Base, "F/s") ||
|
||||||
|
strings.Contains(metric.Unit.Base, "B") {
|
||||||
|
|
||||||
// for scope, metric := range scopes {
|
// First get overall avg
|
||||||
// // 1. Unit normalisation
|
sum := 0.0
|
||||||
|
for _, s := range metric.Series {
|
||||||
|
sum += s.Statistics.Avg
|
||||||
|
}
|
||||||
|
|
||||||
// }
|
avg := sum / float64(len(metric.Series))
|
||||||
// // 2. Add node scope if missing
|
|
||||||
|
|
||||||
// }
|
for _, s := range metric.Series {
|
||||||
|
fp := schema.ConvertFloatToFloat64(s.Data)
|
||||||
|
// Normalize values with new unit prefix
|
||||||
|
units.NormalizeSeries(fp, avg, metric.Unit, &newUnit)
|
||||||
|
s.Data = schema.GetFloat64ToFloat(fp)
|
||||||
|
}
|
||||||
|
metric.Unit = newUnit
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -211,7 +211,13 @@ func (r *JobRepository) Stop(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Use node hours instead: SELECT job.user, sum(job.num_nodes * (CASE WHEN job.job_state = "running" THEN CAST(strftime('%s', 'now') AS INTEGER) - job.start_time ELSE job.duration END)) as x FROM job GROUP BY user ORDER BY x DESC;
|
// TODO: Use node hours instead: SELECT job.user, sum(job.num_nodes * (CASE WHEN job.job_state = "running" THEN CAST(strftime('%s', 'now') AS INTEGER) - job.start_time ELSE job.duration END)) as x FROM job GROUP BY user ORDER BY x DESC;
|
||||||
func (r *JobRepository) CountGroupedJobs(ctx context.Context, aggreg model.Aggregate, filters []*model.JobFilter, weight *model.Weights, limit *int) (map[string]int, error) {
|
func (r *JobRepository) CountGroupedJobs(
|
||||||
|
ctx context.Context,
|
||||||
|
aggreg model.Aggregate,
|
||||||
|
filters []*model.JobFilter,
|
||||||
|
weight *model.Weights,
|
||||||
|
limit *int) (map[string]int, error) {
|
||||||
|
|
||||||
if !aggreg.IsValid() {
|
if !aggreg.IsValid() {
|
||||||
return nil, errors.New("invalid aggregate")
|
return nil, errors.New("invalid aggregate")
|
||||||
}
|
}
|
||||||
@ -301,10 +307,14 @@ func (r *JobRepository) Archive(
|
|||||||
|
|
||||||
var ErrNotFound = errors.New("no such job or user")
|
var ErrNotFound = errors.New("no such job or user")
|
||||||
|
|
||||||
// FindJobOrUser returns a job database ID or a username if a job or user machtes the search term.
|
// FindJobOrUser returns a job database ID or a username if a job or user
|
||||||
// As 0 is a valid job id, check if username is "" instead in order to check what machted.
|
// machtes the search term. As 0 is a valid job id, check if username is ""
|
||||||
// If nothing matches the search, `ErrNotFound` is returned.
|
// instead in order to check what matched. If nothing matches the search,
|
||||||
func (r *JobRepository) FindJobOrUser(ctx context.Context, searchterm string) (job int64, username string, err error) {
|
// `ErrNotFound` is returned.
|
||||||
|
func (r *JobRepository) FindJobOrUser(
|
||||||
|
ctx context.Context,
|
||||||
|
searchterm string) (job int64, username string, err error) {
|
||||||
|
|
||||||
user := auth.GetUser(ctx)
|
user := auth.GetUser(ctx)
|
||||||
if id, err := strconv.Atoi(searchterm); err == nil {
|
if id, err := strconv.Atoi(searchterm); err == nil {
|
||||||
qb := sq.Select("job.id").From("job").Where("job.job_id = ?", id)
|
qb := sq.Select("job.id").From("job").Where("job.job_id = ?", id)
|
||||||
@ -353,6 +363,7 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
|
|||||||
// AllocatedNodes returns a map of all subclusters to a map of hostnames to the amount of jobs running on that host.
|
// AllocatedNodes returns a map of all subclusters to a map of hostnames to the amount of jobs running on that host.
|
||||||
// Hosts with zero jobs running on them will not show up!
|
// Hosts with zero jobs running on them will not show up!
|
||||||
func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]int, error) {
|
func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]int, error) {
|
||||||
|
|
||||||
subclusters := make(map[string]map[string]int)
|
subclusters := make(map[string]map[string]int)
|
||||||
rows, err := sq.Select("resources", "subcluster").From("job").
|
rows, err := sq.Select("resources", "subcluster").From("job").
|
||||||
Where("job.job_state = 'running'").
|
Where("job.job_state = 'running'").
|
||||||
@ -390,6 +401,7 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
||||||
|
|
||||||
res, err := sq.Update("job").
|
res, err := sq.Update("job").
|
||||||
Set("monitoring_status", schema.MonitoringStatusArchivingFailed).
|
Set("monitoring_status", schema.MonitoringStatusArchivingFailed).
|
||||||
Set("duration", 0).
|
Set("duration", 0).
|
||||||
|
@ -45,7 +45,7 @@ type SubClusterConfig struct {
|
|||||||
|
|
||||||
type MetricConfig struct {
|
type MetricConfig struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Unit string `json:"unit"`
|
Unit Unit `json:"unit"`
|
||||||
Scope MetricScope `json:"scope"`
|
Scope MetricScope `json:"scope"`
|
||||||
Aggregation *string `json:"aggregation"`
|
Aggregation *string `json:"aggregation"`
|
||||||
Timestep int `json:"timestep"`
|
Timestep int `json:"timestep"`
|
||||||
|
@ -107,3 +107,23 @@ func (s *Series) MarshalJSON() ([]byte, error) {
|
|||||||
buf = append(buf, ']', '}')
|
buf = append(buf, ']', '}')
|
||||||
return buf, nil
|
return buf, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ConvertFloatToFloat64(s []Float) []float64 {
|
||||||
|
fp := make([]float64, len(s))
|
||||||
|
|
||||||
|
for i, val := range s {
|
||||||
|
fp[i] = float64(val)
|
||||||
|
}
|
||||||
|
|
||||||
|
return fp
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetFloat64ToFloat(s []float64) []Float {
|
||||||
|
fp := make([]Float, len(s))
|
||||||
|
|
||||||
|
for i, val := range s {
|
||||||
|
fp[i] = Float(val)
|
||||||
|
}
|
||||||
|
|
||||||
|
return fp
|
||||||
|
}
|
||||||
|
@ -89,11 +89,15 @@ var JobDefaults BaseJob = BaseJob{
|
|||||||
MonitoringStatus: MonitoringStatusRunningOrArchiving,
|
MonitoringStatus: MonitoringStatusRunningOrArchiving,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Unit struct {
|
||||||
|
Base string `json:"base"`
|
||||||
|
Prefix string `json:"prefix"`
|
||||||
|
}
|
||||||
|
|
||||||
// JobStatistics model
|
// JobStatistics model
|
||||||
// @Description Specification for job metric statistics.
|
// @Description Specification for job metric statistics.
|
||||||
type JobStatistics struct {
|
type JobStatistics struct {
|
||||||
// Metric unit (see schema/unit.schema.json)
|
Unit Unit `json:"unit" example:"GHz"`
|
||||||
Unit string `json:"unit" example:"GHz"`
|
|
||||||
Avg float64 `json:"avg" example:"2500" minimum:"0"` // Job metric average
|
Avg float64 `json:"avg" example:"2500" minimum:"0"` // Job metric average
|
||||||
Min float64 `json:"min" example:"2000" minimum:"0"` // Job metric minimum
|
Min float64 `json:"min" example:"2000" minimum:"0"` // Job metric minimum
|
||||||
Max float64 `json:"max" example:"3000" minimum:"0"` // Job metric maximum
|
Max float64 `json:"max" example:"3000" minimum:"0"` // Job metric maximum
|
||||||
|
@ -15,7 +15,7 @@ import (
|
|||||||
type JobData map[string]map[MetricScope]*JobMetric
|
type JobData map[string]map[MetricScope]*JobMetric
|
||||||
|
|
||||||
type JobMetric struct {
|
type JobMetric struct {
|
||||||
Unit string `json:"unit"`
|
Unit Unit `json:"unit"`
|
||||||
Scope MetricScope `json:"scope"`
|
Scope MetricScope `json:"scope"`
|
||||||
Timestep int `json:"timestep"`
|
Timestep int `json:"timestep"`
|
||||||
Series []Series `json:"series"`
|
Series []Series `json:"series"`
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
},
|
},
|
||||||
"unit": {
|
"unit": {
|
||||||
"description": "Metric unit",
|
"description": "Metric unit",
|
||||||
"type": "string"
|
"$ref": "embedfs://unit.schema.json"
|
||||||
},
|
},
|
||||||
"scope": {
|
"scope": {
|
||||||
"description": "Native measurement resolution",
|
"description": "Native measurement resolution",
|
||||||
@ -38,7 +38,6 @@
|
|||||||
"sum",
|
"sum",
|
||||||
"avg"
|
"avg"
|
||||||
]
|
]
|
||||||
|
|
||||||
},
|
},
|
||||||
"subClusters": {
|
"subClusters": {
|
||||||
"description": "Array of cluster hardware partition metric thresholds",
|
"description": "Array of cluster hardware partition metric thresholds",
|
||||||
|
@ -349,7 +349,6 @@
|
|||||||
"jobState",
|
"jobState",
|
||||||
"duration",
|
"duration",
|
||||||
"resources",
|
"resources",
|
||||||
"tags",
|
|
||||||
"statistics"
|
"statistics"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
"description": "Format specification for job metric units",
|
"description": "Format specification for job metric units",
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"base_unit": {
|
"base": {
|
||||||
"description": "Metric base unit",
|
"description": "Metric base unit",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": [
|
"enum": [
|
||||||
@ -36,6 +36,6 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": [
|
||||||
"base_unit"
|
"base"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -602,4 +602,17 @@ func testImportFlag(t *testing.T) {
|
|||||||
if len(data) != 8 {
|
if len(data) != 8 {
|
||||||
t.Errorf("Job data length: Got %d, want 8", len(data))
|
t.Errorf("Job data length: Got %d, want 8", len(data))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
r := map[string]string{"mem_used": "GB", "net_bw": "KB/s",
|
||||||
|
"cpu_power": "W", "cpu_used": "cpu_used",
|
||||||
|
"file_bw": "KB/s", "flops_any": "Flops/s",
|
||||||
|
"mem_bw": "GB/s", "ipc": "IPC"}
|
||||||
|
|
||||||
|
for name, scopes := range data {
|
||||||
|
for _, metric := range scopes {
|
||||||
|
if metric.Unit != r[name] {
|
||||||
|
t.Errorf("Metric %s unit: Got %s, want %s", name, metric.Unit, r[name])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user