mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-07-22 20:41:40 +02:00
Remove jobMeta and use job struct everywhere
This commit is contained in:
@@ -278,7 +278,7 @@ func TestRestApi(t *testing.T) {
|
||||
job.MonitoringStatus != 1 ||
|
||||
job.SMT != 1 ||
|
||||
!reflect.DeepEqual(job.Resources, []*schema.Resource{{Hostname: "host123", HWThreads: []int{0, 1, 2, 3, 4, 5, 6, 7}}}) ||
|
||||
job.StartTime.Unix() != 123456789 {
|
||||
job.StartTime != 123456789 {
|
||||
t.Fatalf("unexpected job properties: %#v", job)
|
||||
}
|
||||
|
||||
|
@@ -150,9 +150,9 @@ type DeleteJobApiRequest struct {
|
||||
|
||||
// GetJobsApiResponse model
|
||||
type GetJobsApiResponse struct {
|
||||
Jobs []*schema.JobMeta `json:"jobs"` // Array of jobs
|
||||
Items int `json:"items"` // Number of jobs returned
|
||||
Page int `json:"page"` // Page id returned
|
||||
Jobs []*schema.Job `json:"jobs"` // Array of jobs
|
||||
Items int `json:"items"` // Number of jobs returned
|
||||
Page int `json:"page"` // Page id returned
|
||||
}
|
||||
|
||||
// GetClustersApiResponse model
|
||||
@@ -361,7 +361,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
results := make([]*schema.JobMeta, 0, len(jobs))
|
||||
results := make([]*schema.Job, 0, len(jobs))
|
||||
for _, job := range jobs {
|
||||
if withMetadata {
|
||||
if _, err = api.JobRepository.FetchMetadata(job); err != nil {
|
||||
@@ -370,27 +370,21 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
}
|
||||
|
||||
res := &schema.JobMeta{
|
||||
ID: &job.ID,
|
||||
BaseJob: job.BaseJob,
|
||||
StartTime: job.StartTime.Unix(),
|
||||
}
|
||||
|
||||
res.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), &job.ID)
|
||||
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), job.ID)
|
||||
if err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
if res.MonitoringStatus == schema.MonitoringStatusArchivingSuccessful {
|
||||
res.Statistics, err = archive.GetStatistics(job)
|
||||
if job.MonitoringStatus == schema.MonitoringStatusArchivingSuccessful {
|
||||
job.Statistics, err = archive.GetStatistics(job)
|
||||
if err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
results = append(results, res)
|
||||
results = append(results, job)
|
||||
}
|
||||
|
||||
log.Debugf("/api/jobs: %d jobs returned", len(results))
|
||||
@@ -449,7 +443,7 @@ func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request)
|
||||
return
|
||||
}
|
||||
|
||||
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), &job.ID)
|
||||
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), job.ID)
|
||||
if err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
@@ -542,7 +536,7 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), &job.ID)
|
||||
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), job.ID)
|
||||
if err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
@@ -683,7 +677,7 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), &job.ID)
|
||||
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), job.ID)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
@@ -696,7 +690,7 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
for _, tag := range req {
|
||||
tagId, err := api.JobRepository.AddTagOrCreate(repository.GetUserFromContext(r.Context()), job.ID, tag.Type, tag.Name, tag.Scope)
|
||||
tagId, err := api.JobRepository.AddTagOrCreate(repository.GetUserFromContext(r.Context()), *job.ID, tag.Type, tag.Name, tag.Scope)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
@@ -745,7 +739,7 @@ func (api *RestApi) removeTagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), &job.ID)
|
||||
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), job.ID)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
@@ -764,7 +758,7 @@ func (api *RestApi) removeTagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
continue
|
||||
}
|
||||
|
||||
remainingTags, err := api.JobRepository.RemoveJobTagByRequest(repository.GetUserFromContext(r.Context()), job.ID, rtag.Type, rtag.Name, rtag.Scope)
|
||||
remainingTags, err := api.JobRepository.RemoveJobTagByRequest(repository.GetUserFromContext(r.Context()), *job.ID, rtag.Type, rtag.Name, rtag.Scope)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
@@ -840,7 +834,10 @@ func (api *RestApi) removeTags(rw http.ResponseWriter, r *http.Request) {
|
||||
// @security ApiKeyAuth
|
||||
// @router /api/jobs/start_job/ [post]
|
||||
func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||
req := schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||
req := schema.Job{
|
||||
Exclusive: 1,
|
||||
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||
}
|
||||
if err := decode(r.Body, &req); err != nil {
|
||||
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
|
||||
return
|
||||
@@ -849,7 +846,7 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||
log.Printf("REST: %s\n", req.GoString())
|
||||
req.State = schema.JobStateRunning
|
||||
|
||||
if err := importer.SanityChecks(&req.BaseJob); err != nil {
|
||||
if err := importer.SanityChecks(&req); err != nil {
|
||||
handleError(err, http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
@@ -866,7 +863,7 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
} else if err == nil {
|
||||
for _, job := range jobs {
|
||||
if (req.StartTime - job.StartTimeUnix) < 86400 {
|
||||
if (req.StartTime - job.StartTime) < 86400 {
|
||||
handleError(fmt.Errorf("a job with that jobId, cluster and startTime already exists: dbid: %d, jobid: %d", job.ID, job.JobID), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
@@ -1023,7 +1020,7 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
|
||||
return
|
||||
}
|
||||
|
||||
err = api.JobRepository.DeleteJobById(job.ID)
|
||||
err = api.JobRepository.DeleteJobById(*job.ID)
|
||||
if err != nil {
|
||||
handleError(fmt.Errorf("deleting job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
@@ -1087,8 +1084,8 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
|
||||
return
|
||||
}
|
||||
|
||||
if job == nil || job.StartTime.Unix() > req.StopTime {
|
||||
handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger/equal than startTime %d", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime.Unix()), http.StatusBadRequest, rw)
|
||||
if job == nil || job.StartTime > req.StopTime {
|
||||
handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger/equal than startTime %d", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1100,11 +1097,11 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
|
||||
}
|
||||
|
||||
// Mark job as stopped in the database (update state and duration)
|
||||
job.Duration = int32(req.StopTime - job.StartTime.Unix())
|
||||
job.Duration = int32(req.StopTime - job.StartTime)
|
||||
job.State = req.State
|
||||
api.JobRepository.Mutex.Lock()
|
||||
if err := api.JobRepository.Stop(job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||
if err := api.JobRepository.StopCached(job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||
if err := api.JobRepository.Stop(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||
if err := api.JobRepository.StopCached(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||
api.JobRepository.Mutex.Unlock()
|
||||
handleError(fmt.Errorf("jobId %d (id %d) on %s : marking job as '%s' (duration: %d) in DB failed: %w", job.JobID, job.ID, job.Cluster, job.State, job.Duration, err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
@@ -1112,7 +1109,7 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
|
||||
}
|
||||
api.JobRepository.Mutex.Unlock()
|
||||
|
||||
log.Printf("archiving job... (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%s, duration=%d, state=%s", job.ID, job.Cluster, job.JobID, job.User, job.StartTime, job.Duration, job.State)
|
||||
log.Printf("archiving job... (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%d, duration=%d, state=%s", job.ID, job.Cluster, job.JobID, job.User, job.StartTime, job.Duration, job.State)
|
||||
|
||||
// Send a response (with status OK). This means that erros that happen from here on forward
|
||||
// can *NOT* be communicated to the client. If reading from a MetricDataRepository or
|
||||
|
@@ -41,7 +41,7 @@ func archivingWorker() {
|
||||
// will fail if job meta not in repository
|
||||
if _, err := jobRepo.FetchMetadata(job); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
|
||||
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
jobRepo.UpdateMonitoringStatus(*job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -50,7 +50,7 @@ func archivingWorker() {
|
||||
jobMeta, err := ArchiveJob(job, context.Background())
|
||||
if err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
|
||||
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
jobRepo.UpdateMonitoringStatus(*job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
continue
|
||||
}
|
||||
|
||||
|
@@ -16,7 +16,7 @@ import (
|
||||
)
|
||||
|
||||
// Writes a running job to the job-archive
|
||||
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.Job, error) {
|
||||
allMetrics := make([]string, 0)
|
||||
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
for _, mc := range metricConfigs {
|
||||
@@ -40,11 +40,7 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobMeta := &schema.JobMeta{
|
||||
BaseJob: job.BaseJob,
|
||||
StartTime: job.StartTime.Unix(),
|
||||
Statistics: make(map[string]schema.JobStatistics),
|
||||
}
|
||||
job.Statistics = make(map[string]schema.JobStatistics)
|
||||
|
||||
for metric, data := range jobData {
|
||||
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||
@@ -61,7 +57,7 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
}
|
||||
|
||||
// Round AVG Result to 2 Digits
|
||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
||||
job.Statistics[metric] = schema.JobStatistics{
|
||||
Unit: schema.Unit{
|
||||
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||
@@ -76,8 +72,8 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
// only return the JobMeta structure as the
|
||||
// statistics in there are needed.
|
||||
if config.Keys.DisableArchive {
|
||||
return jobMeta, nil
|
||||
return job, nil
|
||||
}
|
||||
|
||||
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
|
||||
return job, archive.GetHandle().ImportJob(job, &jobData)
|
||||
}
|
||||
|
@@ -31,7 +31,7 @@ func (r *clusterResolver) Partitions(ctx context.Context, obj *schema.Cluster) (
|
||||
|
||||
// Tags is the resolver for the tags field.
|
||||
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
||||
return r.Repo.GetTags(repository.GetUserFromContext(ctx), &obj.ID)
|
||||
return r.Repo.GetTags(repository.GetUserFromContext(ctx), obj.ID)
|
||||
}
|
||||
|
||||
// ConcurrentJobs is the resolver for the concurrentJobs field.
|
||||
@@ -615,9 +615,9 @@ func (r *queryResolver) JobsMetricStats(ctx context.Context, filter []*model.Job
|
||||
numThreadsInt := int(job.NumHWThreads)
|
||||
numAccsInt := int(job.NumAcc)
|
||||
res = append(res, &model.JobStats{
|
||||
ID: int(job.ID),
|
||||
ID: int(*job.ID),
|
||||
JobID: strconv.Itoa(int(job.JobID)),
|
||||
StartTime: int(job.StartTime.Unix()),
|
||||
StartTime: int(job.StartTime),
|
||||
Duration: int(job.Duration),
|
||||
Cluster: job.Cluster,
|
||||
SubCluster: job.SubCluster,
|
||||
|
@@ -42,7 +42,10 @@ func HandleImportFlag(flag string) error {
|
||||
}
|
||||
dec := json.NewDecoder(bytes.NewReader(raw))
|
||||
dec.DisallowUnknownFields()
|
||||
job := schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||
job := schema.Job{
|
||||
Exclusive: 1,
|
||||
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||
}
|
||||
if err = dec.Decode(&job); err != nil {
|
||||
log.Warn("Error while decoding raw json metadata for import")
|
||||
return err
|
||||
@@ -141,7 +144,7 @@ func HandleImportFlag(flag string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = SanityChecks(&job.BaseJob); err != nil {
|
||||
if err = SanityChecks(&job); err != nil {
|
||||
log.Warn("BaseJob SanityChecks failed")
|
||||
return err
|
||||
}
|
||||
|
@@ -60,11 +60,6 @@ func InitDB() error {
|
||||
}
|
||||
|
||||
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||
job := schema.Job{
|
||||
BaseJob: jobMeta.BaseJob,
|
||||
StartTime: time.Unix(jobMeta.StartTime, 0),
|
||||
StartTimeUnix: jobMeta.StartTime,
|
||||
}
|
||||
|
||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||
if err != nil {
|
||||
@@ -72,7 +67,7 @@ func InitDB() error {
|
||||
return err
|
||||
}
|
||||
|
||||
job.Footprint = make(map[string]float64)
|
||||
jobMeta.Footprint = make(map[string]float64)
|
||||
|
||||
for _, fp := range sc.Footprint {
|
||||
statType := "avg"
|
||||
@@ -83,16 +78,16 @@ func InitDB() error {
|
||||
|
||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||
|
||||
job.Footprint[name] = repository.LoadJobStat(jobMeta, fp, statType)
|
||||
jobMeta.Footprint[name] = repository.LoadJobStat(jobMeta, fp, statType)
|
||||
}
|
||||
|
||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||
jobMeta.RawFootprint, err = json.Marshal(jobMeta.Footprint)
|
||||
if err != nil {
|
||||
log.Warn("Error while marshaling job footprint")
|
||||
return err
|
||||
}
|
||||
|
||||
job.EnergyFootprint = make(map[string]float64)
|
||||
jobMeta.EnergyFootprint = make(map[string]float64)
|
||||
|
||||
// Total Job Energy Outside Loop
|
||||
totalEnergy := 0.0
|
||||
@@ -117,45 +112,45 @@ func InitDB() error {
|
||||
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
||||
}
|
||||
|
||||
job.EnergyFootprint[fp] = metricEnergy
|
||||
jobMeta.EnergyFootprint[fp] = metricEnergy
|
||||
totalEnergy += metricEnergy
|
||||
}
|
||||
|
||||
job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
|
||||
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
|
||||
jobMeta.Energy = (math.Round(totalEnergy*100.0) / 100.0)
|
||||
if jobMeta.RawEnergyFootprint, err = json.Marshal(jobMeta.EnergyFootprint); err != nil {
|
||||
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
jobMeta.RawResources, err = json.Marshal(jobMeta.Resources)
|
||||
if err != nil {
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
jobMeta.RawMetaData, err = json.Marshal(jobMeta.MetaData)
|
||||
if err != nil {
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
|
||||
if err := SanityChecks(&job.BaseJob); err != nil {
|
||||
if err := SanityChecks(jobMeta); err != nil {
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
|
||||
id, err := r.TransactionAddNamed(t,
|
||||
repository.NamedJobInsert, job)
|
||||
repository.NamedJobInsert, jobMeta)
|
||||
if err != nil {
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
|
||||
for _, tag := range job.Tags {
|
||||
for _, tag := range jobMeta.Tags {
|
||||
tagstr := tag.Name + ":" + tag.Type
|
||||
tagId, ok := tags[tagstr]
|
||||
if !ok {
|
||||
@@ -190,7 +185,7 @@ func InitDB() error {
|
||||
}
|
||||
|
||||
// This function also sets the subcluster if necessary!
|
||||
func SanityChecks(job *schema.BaseJob) error {
|
||||
func SanityChecks(job *schema.Job) error {
|
||||
if c := archive.GetCluster(job.Cluster); c == nil {
|
||||
return fmt.Errorf("no such cluster: %v", job.Cluster)
|
||||
}
|
||||
|
@@ -183,8 +183,8 @@ func (ccms *CCMetricStore) LoadData(
|
||||
|
||||
req := ApiQueryRequest{
|
||||
Cluster: job.Cluster,
|
||||
From: job.StartTime.Unix(),
|
||||
To: job.StartTime.Add(time.Duration(job.Duration) * time.Second).Unix(),
|
||||
From: job.StartTime,
|
||||
To: job.StartTime + int64(job.Duration),
|
||||
Queries: queries,
|
||||
WithStats: true,
|
||||
WithData: true,
|
||||
@@ -570,7 +570,6 @@ func (ccms *CCMetricStore) LoadStats(
|
||||
metrics []string,
|
||||
ctx context.Context,
|
||||
) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
|
||||
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0) // #166 Add scope shere for analysis view accelerator normalization?
|
||||
if err != nil {
|
||||
log.Errorf("Error while building queries for jobId %d, Metrics %v: %s", job.JobID, metrics, err.Error())
|
||||
@@ -579,8 +578,8 @@ func (ccms *CCMetricStore) LoadStats(
|
||||
|
||||
req := ApiQueryRequest{
|
||||
Cluster: job.Cluster,
|
||||
From: job.StartTime.Unix(),
|
||||
To: job.StartTime.Add(time.Duration(job.Duration) * time.Second).Unix(),
|
||||
From: job.StartTime,
|
||||
To: job.StartTime + int64(job.Duration),
|
||||
Queries: queries,
|
||||
WithStats: true,
|
||||
WithData: false,
|
||||
@@ -638,8 +637,8 @@ func (ccms *CCMetricStore) LoadScopedStats(
|
||||
|
||||
req := ApiQueryRequest{
|
||||
Cluster: job.Cluster,
|
||||
From: job.StartTime.Unix(),
|
||||
To: job.StartTime.Add(time.Duration(job.Duration) * time.Second).Unix(),
|
||||
From: job.StartTime,
|
||||
To: job.StartTime + int64(job.Duration),
|
||||
Queries: queries,
|
||||
WithStats: true,
|
||||
WithData: false,
|
||||
@@ -816,7 +815,6 @@ func (ccms *CCMetricStore) LoadNodeListData(
|
||||
page *model.PageRequest,
|
||||
ctx context.Context,
|
||||
) (map[string]schema.JobData, int, bool, error) {
|
||||
|
||||
// 0) Init additional vars
|
||||
var totalNodes int = 0
|
||||
var hasNextPage bool = false
|
||||
@@ -975,7 +973,6 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
) ([]ApiQuery, []schema.MetricScope, error) {
|
||||
|
||||
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(nodes))
|
||||
assignedScope := []schema.MetricScope{}
|
||||
|
||||
|
@@ -1,575 +0,0 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package metricdata
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
||||
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
||||
)
|
||||
|
||||
type InfluxDBv2DataRepositoryConfig struct {
|
||||
Url string `json:"url"`
|
||||
Token string `json:"token"`
|
||||
Bucket string `json:"bucket"`
|
||||
Org string `json:"org"`
|
||||
SkipTls bool `json:"skiptls"`
|
||||
}
|
||||
|
||||
type InfluxDBv2DataRepository struct {
|
||||
client influxdb2.Client
|
||||
queryClient influxdb2Api.QueryAPI
|
||||
bucket, measurement string
|
||||
}
|
||||
|
||||
func (idb *InfluxDBv2DataRepository) Init(rawConfig json.RawMessage) error {
|
||||
var config InfluxDBv2DataRepositoryConfig
|
||||
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
||||
log.Warn("Error while unmarshaling raw json config")
|
||||
return err
|
||||
}
|
||||
|
||||
idb.client = influxdb2.NewClientWithOptions(config.Url, config.Token, influxdb2.DefaultOptions().SetTLSConfig(&tls.Config{InsecureSkipVerify: config.SkipTls}))
|
||||
idb.queryClient = idb.client.QueryAPI(config.Org)
|
||||
idb.bucket = config.Bucket
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (idb *InfluxDBv2DataRepository) formatTime(t time.Time) string {
|
||||
return t.Format(time.RFC3339) // Like “2006-01-02T15:04:05Z07:00”
|
||||
}
|
||||
|
||||
func (idb *InfluxDBv2DataRepository) epochToTime(epoch int64) time.Time {
|
||||
return time.Unix(epoch, 0)
|
||||
}
|
||||
|
||||
func (idb *InfluxDBv2DataRepository) LoadData(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context,
|
||||
resolution int) (schema.JobData, error) {
|
||||
|
||||
log.Infof("InfluxDB 2 Backend: Resolution Scaling not Implemented, will return default timestep. Requested Resolution %d", resolution)
|
||||
|
||||
measurementsConds := make([]string, 0, len(metrics))
|
||||
for _, m := range metrics {
|
||||
measurementsConds = append(measurementsConds, fmt.Sprintf(`r["_measurement"] == "%s"`, m))
|
||||
}
|
||||
measurementsCond := strings.Join(measurementsConds, " or ")
|
||||
|
||||
hostsConds := make([]string, 0, len(job.Resources))
|
||||
for _, h := range job.Resources {
|
||||
if h.HWThreads != nil || h.Accelerators != nil {
|
||||
// TODO
|
||||
return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
|
||||
}
|
||||
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname))
|
||||
}
|
||||
hostsCond := strings.Join(hostsConds, " or ")
|
||||
|
||||
jobData := make(schema.JobData) // Empty Schema: map[<string>FIELD]map[<MetricScope>SCOPE]<*JobMetric>METRIC
|
||||
// Requested Scopes
|
||||
for _, scope := range scopes {
|
||||
query := ""
|
||||
switch scope {
|
||||
case "node":
|
||||
// Get Finest Granularity, Groupy By Measurement and Hostname (== Metric / Node), Calculate Mean for 60s windows <-- Resolution could be added here?
|
||||
// log.Info("Scope 'node' requested. ")
|
||||
query = fmt.Sprintf(`
|
||||
from(bucket: "%s")
|
||||
|> range(start: %s, stop: %s)
|
||||
|> filter(fn: (r) => (%s) and (%s) )
|
||||
|> drop(columns: ["_start", "_stop"])
|
||||
|> group(columns: ["hostname", "_measurement"])
|
||||
|> aggregateWindow(every: 60s, fn: mean)
|
||||
|> drop(columns: ["_time"])`,
|
||||
idb.bucket,
|
||||
idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix+int64(job.Duration)+int64(1))),
|
||||
measurementsCond, hostsCond)
|
||||
case "socket":
|
||||
log.Info("Scope 'socket' requested, but not yet supported: Will return 'node' scope only. ")
|
||||
continue
|
||||
case "core":
|
||||
log.Info(" Scope 'core' requested, but not yet supported: Will return 'node' scope only. ")
|
||||
continue
|
||||
// Get Finest Granularity only, Set NULL to 0.0
|
||||
// query = fmt.Sprintf(`
|
||||
// from(bucket: "%s")
|
||||
// |> range(start: %s, stop: %s)
|
||||
// |> filter(fn: (r) => %s )
|
||||
// |> filter(fn: (r) => %s )
|
||||
// |> drop(columns: ["_start", "_stop", "cluster"])
|
||||
// |> map(fn: (r) => (if exists r._value then {r with _value: r._value} else {r with _value: 0.0}))`,
|
||||
// idb.bucket,
|
||||
// idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix + int64(job.Duration) + int64(1) )),
|
||||
// measurementsCond, hostsCond)
|
||||
case "hwthread":
|
||||
log.Info(" Scope 'hwthread' requested, but not yet supported: Will return 'node' scope only. ")
|
||||
continue
|
||||
case "accelerator":
|
||||
log.Info(" Scope 'accelerator' requested, but not yet supported: Will return 'node' scope only. ")
|
||||
continue
|
||||
default:
|
||||
log.Infof("Unknown scope '%s' requested: Will return 'node' scope.", scope)
|
||||
continue
|
||||
// return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support other scopes than 'node'")
|
||||
}
|
||||
|
||||
rows, err := idb.queryClient.Query(ctx, query)
|
||||
if err != nil {
|
||||
log.Error("Error while performing query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Init Metrics: Only Node level now -> TODO: Matching /check on scope level ...
|
||||
for _, metric := range metrics {
|
||||
jobMetric, ok := jobData[metric]
|
||||
if !ok {
|
||||
mc := archive.GetMetricConfig(job.Cluster, metric)
|
||||
jobMetric = map[schema.MetricScope]*schema.JobMetric{
|
||||
scope: { // uses scope var from above!
|
||||
Unit: mc.Unit,
|
||||
Timestep: mc.Timestep,
|
||||
Series: make([]schema.Series, 0, len(job.Resources)),
|
||||
StatisticsSeries: nil, // Should be: &schema.StatsSeries{},
|
||||
},
|
||||
}
|
||||
}
|
||||
jobData[metric] = jobMetric
|
||||
}
|
||||
|
||||
// Process Result: Time-Data
|
||||
field, host, hostSeries := "", "", schema.Series{}
|
||||
// typeId := 0
|
||||
switch scope {
|
||||
case "node":
|
||||
for rows.Next() {
|
||||
row := rows.Record()
|
||||
if host == "" || host != row.ValueByKey("hostname").(string) || rows.TableChanged() {
|
||||
if host != "" {
|
||||
// Append Series before reset
|
||||
jobData[field][scope].Series = append(jobData[field][scope].Series, hostSeries)
|
||||
}
|
||||
field, host = row.Measurement(), row.ValueByKey("hostname").(string)
|
||||
hostSeries = schema.Series{
|
||||
Hostname: host,
|
||||
Statistics: schema.MetricStatistics{}, //TODO Add Statistics
|
||||
Data: make([]schema.Float, 0),
|
||||
}
|
||||
}
|
||||
val, ok := row.Value().(float64)
|
||||
if ok {
|
||||
hostSeries.Data = append(hostSeries.Data, schema.Float(val))
|
||||
} else {
|
||||
hostSeries.Data = append(hostSeries.Data, schema.Float(0))
|
||||
}
|
||||
}
|
||||
case "socket":
|
||||
continue
|
||||
case "accelerator":
|
||||
continue
|
||||
case "hwthread":
|
||||
// See below @ core
|
||||
continue
|
||||
case "core":
|
||||
continue
|
||||
// Include Series.Id in hostSeries
|
||||
// for rows.Next() {
|
||||
// row := rows.Record()
|
||||
// if ( host == "" || host != row.ValueByKey("hostname").(string) || typeId != row.ValueByKey("type-id").(int) || rows.TableChanged() ) {
|
||||
// if ( host != "" ) {
|
||||
// // Append Series before reset
|
||||
// jobData[field][scope].Series = append(jobData[field][scope].Series, hostSeries)
|
||||
// }
|
||||
// field, host, typeId = row.Measurement(), row.ValueByKey("hostname").(string), row.ValueByKey("type-id").(int)
|
||||
// hostSeries = schema.Series{
|
||||
// Hostname: host,
|
||||
// Id: &typeId,
|
||||
// Statistics: nil,
|
||||
// Data: make([]schema.Float, 0),
|
||||
// }
|
||||
// }
|
||||
// val := row.Value().(float64)
|
||||
// hostSeries.Data = append(hostSeries.Data, schema.Float(val))
|
||||
// }
|
||||
default:
|
||||
log.Infof("Unknown scope '%s' requested: Will return 'node' scope.", scope)
|
||||
continue
|
||||
// return nil, errors.New("the InfluxDB metric data repository does not yet support other scopes than 'node, core'")
|
||||
}
|
||||
// Append last Series
|
||||
jobData[field][scope].Series = append(jobData[field][scope].Series, hostSeries)
|
||||
}
|
||||
|
||||
// Get Stats
|
||||
stats, err := idb.LoadStats(job, metrics, ctx)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading statistics")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, scope := range scopes {
|
||||
if scope == "node" { // No 'socket/core' support yet
|
||||
for metric, nodes := range stats {
|
||||
for node, stats := range nodes {
|
||||
for index, _ := range jobData[metric][scope].Series {
|
||||
if jobData[metric][scope].Series[index].Hostname == node {
|
||||
jobData[metric][scope].Series[index].Statistics = schema.MetricStatistics{Avg: stats.Avg, Min: stats.Min, Max: stats.Max}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return jobData, nil
|
||||
}
|
||||
|
||||
func (idb *InfluxDBv2DataRepository) LoadStats(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
|
||||
stats := map[string]map[string]schema.MetricStatistics{}
|
||||
|
||||
hostsConds := make([]string, 0, len(job.Resources))
|
||||
for _, h := range job.Resources {
|
||||
if h.HWThreads != nil || h.Accelerators != nil {
|
||||
// TODO
|
||||
return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
|
||||
}
|
||||
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname))
|
||||
}
|
||||
hostsCond := strings.Join(hostsConds, " or ")
|
||||
|
||||
// lenMet := len(metrics)
|
||||
|
||||
for _, metric := range metrics {
|
||||
// log.Debugf("<< You are here: %s (Index %d of %d metrics)", metric, index, lenMet)
|
||||
|
||||
query := fmt.Sprintf(`
|
||||
data = from(bucket: "%s")
|
||||
|> range(start: %s, stop: %s)
|
||||
|> filter(fn: (r) => r._measurement == "%s" and r._field == "value" and (%s))
|
||||
union(tables: [data |> mean(column: "_value") |> set(key: "_field", value: "avg"),
|
||||
data |> min(column: "_value") |> set(key: "_field", value: "min"),
|
||||
data |> max(column: "_value") |> set(key: "_field", value: "max")])
|
||||
|> pivot(rowKey: ["hostname"], columnKey: ["_field"], valueColumn: "_value")
|
||||
|> group()`,
|
||||
idb.bucket,
|
||||
idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix+int64(job.Duration)+int64(1))),
|
||||
metric, hostsCond)
|
||||
|
||||
rows, err := idb.queryClient.Query(ctx, query)
|
||||
if err != nil {
|
||||
log.Error("Error while performing query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
nodes := map[string]schema.MetricStatistics{}
|
||||
for rows.Next() {
|
||||
row := rows.Record()
|
||||
host := row.ValueByKey("hostname").(string)
|
||||
|
||||
avg, avgok := row.ValueByKey("avg").(float64)
|
||||
if !avgok {
|
||||
// log.Debugf(">> Assertion error for metric %s, statistic AVG. Expected 'float64', got %v", metric, avg)
|
||||
avg = 0.0
|
||||
}
|
||||
min, minok := row.ValueByKey("min").(float64)
|
||||
if !minok {
|
||||
// log.Debugf(">> Assertion error for metric %s, statistic MIN. Expected 'float64', got %v", metric, min)
|
||||
min = 0.0
|
||||
}
|
||||
max, maxok := row.ValueByKey("max").(float64)
|
||||
if !maxok {
|
||||
// log.Debugf(">> Assertion error for metric %s, statistic MAX. Expected 'float64', got %v", metric, max)
|
||||
max = 0.0
|
||||
}
|
||||
|
||||
nodes[host] = schema.MetricStatistics{
|
||||
Avg: avg,
|
||||
Min: min,
|
||||
Max: max,
|
||||
}
|
||||
}
|
||||
stats[metric] = nodes
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// Used in Job-View StatsTable
|
||||
// UNTESTED
|
||||
func (idb *InfluxDBv2DataRepository) LoadScopedStats(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context) (schema.ScopedJobStats, error) {
|
||||
|
||||
// Assumption: idb.loadData() only returns series node-scope - use node scope for statsTable
|
||||
scopedJobStats := make(schema.ScopedJobStats)
|
||||
data, err := idb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job for scopedJobStats")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for metric, metricData := range data {
|
||||
for _, scope := range scopes {
|
||||
if scope != schema.MetricScopeNode {
|
||||
logOnce.Do(func() {
|
||||
log.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := scopedJobStats[metric]; !ok {
|
||||
scopedJobStats[metric] = make(map[schema.MetricScope][]*schema.ScopedStats)
|
||||
}
|
||||
|
||||
if _, ok := scopedJobStats[metric][scope]; !ok {
|
||||
scopedJobStats[metric][scope] = make([]*schema.ScopedStats, 0)
|
||||
}
|
||||
|
||||
for _, series := range metricData[scope].Series {
|
||||
scopedJobStats[metric][scope] = append(scopedJobStats[metric][scope], &schema.ScopedStats{
|
||||
Hostname: series.Hostname,
|
||||
Data: &series.Statistics,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return scopedJobStats, nil
|
||||
}
|
||||
|
||||
// Used in Systems-View @ Node-Overview
|
||||
// UNTESTED
|
||||
func (idb *InfluxDBv2DataRepository) LoadNodeData(
|
||||
cluster string,
|
||||
metrics, nodes []string,
|
||||
scopes []schema.MetricScope,
|
||||
from, to time.Time,
|
||||
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
|
||||
// Note: scopes[] Array will be ignored, only return node scope
|
||||
|
||||
// CONVERT ARGS TO INFLUX
|
||||
measurementsConds := make([]string, 0)
|
||||
for _, m := range metrics {
|
||||
measurementsConds = append(measurementsConds, fmt.Sprintf(`r["_measurement"] == "%s"`, m))
|
||||
}
|
||||
measurementsCond := strings.Join(measurementsConds, " or ")
|
||||
|
||||
hostsConds := make([]string, 0)
|
||||
if nodes == nil {
|
||||
var allNodes []string
|
||||
subClusterNodeLists := archive.NodeLists[cluster]
|
||||
for _, nodeList := range subClusterNodeLists {
|
||||
allNodes = append(nodes, nodeList.PrintList()...)
|
||||
}
|
||||
for _, node := range allNodes {
|
||||
nodes = append(nodes, node)
|
||||
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, node))
|
||||
}
|
||||
} else {
|
||||
for _, node := range nodes {
|
||||
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, node))
|
||||
}
|
||||
}
|
||||
hostsCond := strings.Join(hostsConds, " or ")
|
||||
|
||||
// BUILD AND PERFORM QUERY
|
||||
query := fmt.Sprintf(`
|
||||
from(bucket: "%s")
|
||||
|> range(start: %s, stop: %s)
|
||||
|> filter(fn: (r) => (%s) and (%s) )
|
||||
|> drop(columns: ["_start", "_stop"])
|
||||
|> group(columns: ["hostname", "_measurement"])
|
||||
|> aggregateWindow(every: 60s, fn: mean)
|
||||
|> drop(columns: ["_time"])`,
|
||||
idb.bucket,
|
||||
idb.formatTime(from), idb.formatTime(to),
|
||||
measurementsCond, hostsCond)
|
||||
|
||||
rows, err := idb.queryClient.Query(ctx, query)
|
||||
if err != nil {
|
||||
log.Error("Error while performing query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// HANDLE QUERY RETURN
|
||||
// Collect Float Arrays for Node@Metric -> No Scope Handling!
|
||||
influxData := make(map[string]map[string][]schema.Float)
|
||||
for rows.Next() {
|
||||
row := rows.Record()
|
||||
host, field := row.ValueByKey("hostname").(string), row.Measurement()
|
||||
|
||||
influxHostData, ok := influxData[host]
|
||||
if !ok {
|
||||
influxHostData = make(map[string][]schema.Float)
|
||||
influxData[host] = influxHostData
|
||||
}
|
||||
|
||||
influxFieldData, ok := influxData[host][field]
|
||||
if !ok {
|
||||
influxFieldData = make([]schema.Float, 0)
|
||||
influxData[host][field] = influxFieldData
|
||||
}
|
||||
|
||||
val, ok := row.Value().(float64)
|
||||
if ok {
|
||||
influxData[host][field] = append(influxData[host][field], schema.Float(val))
|
||||
} else {
|
||||
influxData[host][field] = append(influxData[host][field], schema.Float(0))
|
||||
}
|
||||
}
|
||||
|
||||
// BUILD FUNCTION RETURN
|
||||
data := make(map[string]map[string][]*schema.JobMetric)
|
||||
for node, metricData := range influxData {
|
||||
|
||||
nodeData, ok := data[node]
|
||||
if !ok {
|
||||
nodeData = make(map[string][]*schema.JobMetric)
|
||||
data[node] = nodeData
|
||||
}
|
||||
|
||||
for metric, floatArray := range metricData {
|
||||
avg, min, max := 0.0, 0.0, 0.0
|
||||
for _, val := range floatArray {
|
||||
avg += float64(val)
|
||||
min = math.Min(min, float64(val))
|
||||
max = math.Max(max, float64(val))
|
||||
}
|
||||
|
||||
stats := schema.MetricStatistics{
|
||||
Avg: (math.Round((avg/float64(len(floatArray)))*100) / 100),
|
||||
Min: (math.Round(min*100) / 100),
|
||||
Max: (math.Round(max*100) / 100),
|
||||
}
|
||||
|
||||
mc := archive.GetMetricConfig(cluster, metric)
|
||||
nodeData[metric] = append(nodeData[metric], &schema.JobMetric{
|
||||
Unit: mc.Unit,
|
||||
Timestep: mc.Timestep,
|
||||
Series: []schema.Series{
|
||||
{
|
||||
Hostname: node,
|
||||
Statistics: stats,
|
||||
Data: floatArray,
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// Used in Systems-View @ Node-List
|
||||
// UNTESTED
|
||||
func (idb *InfluxDBv2DataRepository) LoadNodeListData(
|
||||
cluster, subCluster, nodeFilter string,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
page *model.PageRequest,
|
||||
ctx context.Context,
|
||||
) (map[string]schema.JobData, int, bool, error) {
|
||||
|
||||
// Assumption: idb.loadData() only returns series node-scope - use node scope for NodeList
|
||||
|
||||
// 0) Init additional vars
|
||||
var totalNodes int = 0
|
||||
var hasNextPage bool = false
|
||||
|
||||
// 1) Get list of all nodes
|
||||
var nodes []string
|
||||
if subCluster != "" {
|
||||
scNodes := archive.NodeLists[cluster][subCluster]
|
||||
nodes = scNodes.PrintList()
|
||||
} else {
|
||||
subClusterNodeLists := archive.NodeLists[cluster]
|
||||
for _, nodeList := range subClusterNodeLists {
|
||||
nodes = append(nodes, nodeList.PrintList()...)
|
||||
}
|
||||
}
|
||||
|
||||
// 2) Filter nodes
|
||||
if nodeFilter != "" {
|
||||
filteredNodes := []string{}
|
||||
for _, node := range nodes {
|
||||
if strings.Contains(node, nodeFilter) {
|
||||
filteredNodes = append(filteredNodes, node)
|
||||
}
|
||||
}
|
||||
nodes = filteredNodes
|
||||
}
|
||||
|
||||
// 2.1) Count total nodes && Sort nodes -> Sorting invalidated after return ...
|
||||
totalNodes = len(nodes)
|
||||
sort.Strings(nodes)
|
||||
|
||||
// 3) Apply paging
|
||||
if len(nodes) > page.ItemsPerPage {
|
||||
start := (page.Page - 1) * page.ItemsPerPage
|
||||
end := start + page.ItemsPerPage
|
||||
if end > len(nodes) {
|
||||
end = len(nodes)
|
||||
hasNextPage = false
|
||||
} else {
|
||||
hasNextPage = true
|
||||
}
|
||||
nodes = nodes[start:end]
|
||||
}
|
||||
|
||||
// 4) Fetch And Convert Data, use idb.LoadNodeData() for query
|
||||
|
||||
rawNodeData, err := idb.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||
if err != nil {
|
||||
log.Error(fmt.Sprintf("Error while loading influx nodeData for nodeListData %#v\n", err))
|
||||
return nil, totalNodes, hasNextPage, err
|
||||
}
|
||||
|
||||
data := make(map[string]schema.JobData)
|
||||
for node, nodeData := range rawNodeData {
|
||||
// Init Nested Map Data Structures If Not Found
|
||||
hostData, ok := data[node]
|
||||
if !ok {
|
||||
hostData = make(schema.JobData)
|
||||
data[node] = hostData
|
||||
}
|
||||
|
||||
for metric, nodeMetricData := range nodeData {
|
||||
metricData, ok := hostData[metric]
|
||||
if !ok {
|
||||
metricData = make(map[schema.MetricScope]*schema.JobMetric)
|
||||
data[node][metric] = metricData
|
||||
}
|
||||
|
||||
data[node][metric][schema.MetricScopeNode] = nodeMetricData[0] // Only Node Scope Returned from loadNodeData
|
||||
}
|
||||
}
|
||||
|
||||
return data, totalNodes, hasNextPage, nil
|
||||
}
|
@@ -54,8 +54,6 @@ func Init() error {
|
||||
switch kind.Kind {
|
||||
case "cc-metric-store":
|
||||
mdr = &CCMetricStore{}
|
||||
case "influxdb":
|
||||
mdr = &InfluxDBv2DataRepository{}
|
||||
case "prometheus":
|
||||
mdr = &PrometheusDataRepository{}
|
||||
case "test":
|
||||
|
@@ -279,8 +279,8 @@ func (pdb *PrometheusDataRepository) LoadData(
|
||||
for i, resource := range job.Resources {
|
||||
nodes[i] = resource.Hostname
|
||||
}
|
||||
from := job.StartTime
|
||||
to := job.StartTime.Add(time.Duration(job.Duration) * time.Second)
|
||||
from := time.Unix(job.StartTime, 0)
|
||||
to := time.Unix(job.StartTime+int64(job.Duration), 0)
|
||||
|
||||
for _, scope := range scopes {
|
||||
if scope != schema.MetricScopeNode {
|
||||
@@ -453,8 +453,8 @@ func (pdb *PrometheusDataRepository) LoadScopedStats(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context) (schema.ScopedJobStats, error) {
|
||||
|
||||
ctx context.Context,
|
||||
) (schema.ScopedJobStats, error) {
|
||||
// Assumption: pdb.loadData() only returns series node-scope - use node scope for statsTable
|
||||
scopedJobStats := make(schema.ScopedJobStats)
|
||||
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
|
||||
@@ -502,7 +502,6 @@ func (pdb *PrometheusDataRepository) LoadNodeListData(
|
||||
page *model.PageRequest,
|
||||
ctx context.Context,
|
||||
) (map[string]schema.JobData, int, bool, error) {
|
||||
|
||||
// Assumption: pdb.loadData() only returns series node-scope - use node scope for NodeList
|
||||
|
||||
// 0) Init additional vars
|
||||
|
@@ -73,7 +73,7 @@ func scanJob(row interface{ Scan(...any) error }) (*schema.Job, error) {
|
||||
|
||||
if err := row.Scan(
|
||||
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster,
|
||||
&job.StartTimeUnix, &job.Partition, &job.ArrayJobId, &job.NumNodes, &job.NumHWThreads,
|
||||
&job.StartTime, &job.Partition, &job.ArrayJobId, &job.NumNodes, &job.NumHWThreads,
|
||||
&job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
|
||||
&job.Duration, &job.Walltime, &job.RawResources, &job.RawFootprint, &job.Energy); err != nil {
|
||||
log.Warnf("Error while scanning rows (Job): %v", err)
|
||||
@@ -92,10 +92,9 @@ func scanJob(row interface{ Scan(...any) error }) (*schema.Job, error) {
|
||||
}
|
||||
job.RawFootprint = nil
|
||||
|
||||
job.StartTime = time.Unix(job.StartTimeUnix, 0)
|
||||
// Always ensure accurate duration for running jobs
|
||||
if job.State == schema.JobStateRunning {
|
||||
job.Duration = int32(time.Since(job.StartTime).Seconds())
|
||||
job.Duration = int32(time.Now().Unix() - job.StartTime)
|
||||
}
|
||||
|
||||
return job, nil
|
||||
@@ -582,7 +581,7 @@ func (r *JobRepository) MarkArchived(
|
||||
|
||||
func (r *JobRepository) UpdateEnergy(
|
||||
stmt sq.UpdateBuilder,
|
||||
jobMeta *schema.JobMeta,
|
||||
jobMeta *schema.Job,
|
||||
) (sq.UpdateBuilder, error) {
|
||||
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||
@@ -632,7 +631,7 @@ func (r *JobRepository) UpdateEnergy(
|
||||
|
||||
func (r *JobRepository) UpdateFootprint(
|
||||
stmt sq.UpdateBuilder,
|
||||
jobMeta *schema.JobMeta,
|
||||
jobMeta *schema.Job,
|
||||
) (sq.UpdateBuilder, error) {
|
||||
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||
|
@@ -29,7 +29,7 @@ const NamedJobInsert string = `INSERT INTO job (
|
||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
||||
);`
|
||||
|
||||
func (r *JobRepository) InsertJob(job *schema.JobMeta) (int64, error) {
|
||||
func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
|
||||
r.Mutex.Lock()
|
||||
res, err := r.DB.NamedExec(NamedJobCacheInsert, job)
|
||||
r.Mutex.Unlock()
|
||||
@@ -87,7 +87,7 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
|
||||
|
||||
// Start inserts a new job in the table, returning the unique job ID.
|
||||
// Statistics are not transfered!
|
||||
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
|
||||
func (r *JobRepository) Start(job *schema.Job) (id int64, err error) {
|
||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding footprint field failed: %w", err)
|
||||
|
@@ -227,7 +227,7 @@ func (r *JobRepository) FindConcurrentJobs(
|
||||
var startTime int64
|
||||
var stopTime int64
|
||||
|
||||
startTime = job.StartTimeUnix
|
||||
startTime = job.StartTime
|
||||
hostname := job.Resources[0].Hostname
|
||||
|
||||
if job.State == schema.JobStateRunning {
|
||||
|
@@ -24,7 +24,7 @@ func TestFind(t *testing.T) {
|
||||
|
||||
// fmt.Printf("%+v", job)
|
||||
|
||||
if job.ID != 5 {
|
||||
if *job.ID != 5 {
|
||||
t.Errorf("wrong summary for diagnostic 3\ngot: %d \nwant: 1366", job.JobID)
|
||||
}
|
||||
}
|
||||
|
@@ -291,7 +291,7 @@ func (r *JobRepository) JobsStats(
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func LoadJobStat(job *schema.JobMeta, metric string, statType string) float64 {
|
||||
func LoadJobStat(job *schema.Job, metric string, statType string) float64 {
|
||||
if stats, ok := job.Statistics[metric]; ok {
|
||||
switch statType {
|
||||
case "avg":
|
||||
@@ -759,7 +759,6 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
|
||||
filters []*model.JobFilter,
|
||||
bins *int,
|
||||
) []*model.MetricHistoPoints {
|
||||
|
||||
// Get Jobs
|
||||
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil)
|
||||
if err != nil {
|
||||
|
BIN
internal/repository/testdata/job.db
vendored
BIN
internal/repository/testdata/job.db
vendored
Binary file not shown.
@@ -301,7 +301,7 @@ func (t *JobClassTagger) Match(job *schema.Job) {
|
||||
}
|
||||
if match.(bool) {
|
||||
log.Info("Rule matches!")
|
||||
id := job.ID
|
||||
id := *job.ID
|
||||
if !r.HasTag(id, t.tagType, tag) {
|
||||
r.AddTagOrCreateDirect(id, t.tagType, tag)
|
||||
}
|
||||
|
@@ -105,7 +105,7 @@ func (t *AppTagger) Match(job *schema.Job) {
|
||||
|
||||
jobscript, ok := metadata["jobScript"]
|
||||
if ok {
|
||||
id := job.ID
|
||||
id := *job.ID
|
||||
|
||||
out:
|
||||
for _, a := range t.apps {
|
||||
|
@@ -73,11 +73,7 @@ func RegisterFootprintWorker() {
|
||||
continue
|
||||
}
|
||||
|
||||
jobMeta := &schema.JobMeta{
|
||||
BaseJob: job.BaseJob,
|
||||
StartTime: job.StartTime.Unix(),
|
||||
Statistics: make(map[string]schema.JobStatistics),
|
||||
}
|
||||
job.Statistics = make(map[string]schema.JobStatistics)
|
||||
|
||||
for _, metric := range allMetrics {
|
||||
avg, min, max := 0.0, 0.0, 0.0
|
||||
@@ -95,7 +91,7 @@ func RegisterFootprintWorker() {
|
||||
}
|
||||
|
||||
// Add values rounded to 2 digits: repo.LoadStats may return unrounded
|
||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
||||
job.Statistics[metric] = schema.JobStatistics{
|
||||
Unit: schema.Unit{
|
||||
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||
@@ -108,7 +104,7 @@ func RegisterFootprintWorker() {
|
||||
|
||||
// Build Statement per Job, Add to Pending Array
|
||||
stmt := sq.Update("job")
|
||||
stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta)
|
||||
stmt, err = jobRepo.UpdateFootprint(stmt, job)
|
||||
if err != nil {
|
||||
log.Errorf("update job (dbid: %d) statement build failed at footprint step: %s", job.ID, err.Error())
|
||||
ce++
|
||||
|
Reference in New Issue
Block a user