Trial and Test MetricStore components

This commit is contained in:
Aditya Ujeniya
2025-09-08 22:54:13 +02:00
parent 62565b9ae2
commit af43901ca3
34 changed files with 394 additions and 219 deletions

View File

@@ -241,7 +241,7 @@ func TestRestApi(t *testing.T) {
"numNodes": 1,
"numHwthreads": 8,
"numAcc": 0,
"exclusive": 1,
"shared": "none",
"monitoringStatus": 1,
"smt": 1,
"resources": [
@@ -396,7 +396,7 @@ func TestRestApi(t *testing.T) {
"partition": "default",
"walltime": 3600,
"numNodes": 1,
"exclusive": 1,
"shared": "none",
"monitoringStatus": 1,
"smt": 1,
"resources": [

View File

@@ -1401,12 +1401,6 @@ const docTemplate = `{
"format": "float64"
}
},
"exclusive": {
"type": "integer",
"maximum": 2,
"minimum": 0,
"example": 1
},
"footprint": {
"type": "object",
"additionalProperties": {
@@ -1423,12 +1417,18 @@ const docTemplate = `{
},
"jobState": {
"enum": [
"completed",
"failed",
"boot_fail",
"cancelled",
"stopped",
"timeout",
"out_of_memory"
"completed",
"deadline",
"failed",
"node_fail",
"out-of-memory",
"pending",
"preempted",
"running",
"suspended",
"timeout"
],
"allOf": [
{
@@ -1484,6 +1484,14 @@ const docTemplate = `{
"$ref": "#/definitions/schema.Resource"
}
},
"shared": {
"type": "string",
"enum": [
"none",
"single_user",
"multi_user"
]
},
"smt": {
"type": "integer",
"example": 4

View File

@@ -97,7 +97,6 @@ func (api *RestApi) MountUserApiRoutes(r *mux.Router) {
}
func (api *RestApi) MountMetricStoreApiRoutes(r *mux.Router) {
r.StrictSlash(true)
// REST API Uses TokenAuth
r.HandleFunc("/api/free", memorystore.HandleFree).Methods(http.MethodPost)
r.HandleFunc("/api/write", memorystore.HandleWrite).Methods(http.MethodPost)

View File

@@ -29,7 +29,7 @@ func DataStaging(wg *sync.WaitGroup, ctx context.Context) {
return
case val := <-LineProtocolMessages:
//Fetch the frequency of the metric from the global configuration
freq, err := config.MetricStoreKeys.GetMetricFrequency(val.MetricName)
freq, err := config.GetMetricFrequency(val.MetricName)
if err != nil {
fmt.Printf("Error fetching metric frequency: %s\n", err)
continue

View File

@@ -97,10 +97,10 @@ func InitMetricStore(msConfig json.RawMessage) {
}
}
func (c *MetricStoreConfig) GetMetricFrequency(metricName string) (int64, error) {
// if metric, ok := c.Metrics[metricName]; ok {
// return metric.Frequency, nil
// }
func GetMetricFrequency(metricName string) (int64, error) {
if metric, ok := Metrics[metricName]; ok {
return metric.Frequency, nil
}
return 0, fmt.Errorf("[METRICSTORE]> metric %s not found", metricName)
}

View File

@@ -118,7 +118,6 @@ type ComplexityRoot struct {
Duration func(childComplexity int) int
Energy func(childComplexity int) int
EnergyFootprint func(childComplexity int) int
Exclusive func(childComplexity int) int
Footprint func(childComplexity int) int
ID func(childComplexity int) int
JobID func(childComplexity int) int
@@ -131,6 +130,7 @@ type ComplexityRoot struct {
Project func(childComplexity int) int
Resources func(childComplexity int) int
SMT func(childComplexity int) int
Shared func(childComplexity int) int
StartTime func(childComplexity int) int
State func(childComplexity int) int
SubCluster func(childComplexity int) int
@@ -425,8 +425,6 @@ type ClusterResolver interface {
type JobResolver interface {
StartTime(ctx context.Context, obj *schema.Job) (*time.Time, error)
Exclusive(ctx context.Context, obj *schema.Job) (int, error)
Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error)
ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error)
@@ -726,13 +724,6 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
return e.complexity.Job.EnergyFootprint(childComplexity), true
case "Job.exclusive":
if e.complexity.Job.Exclusive == nil {
break
}
return e.complexity.Job.Exclusive(childComplexity), true
case "Job.footprint":
if e.complexity.Job.Footprint == nil {
break
@@ -817,6 +808,13 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
return e.complexity.Job.SMT(childComplexity), true
case "Job.shared":
if e.complexity.Job.Shared == nil {
break
}
return e.complexity.Job.Shared(childComplexity), true
case "Job.startTime":
if e.complexity.Job.StartTime == nil {
break
@@ -2361,7 +2359,7 @@ type Job {
numAcc: Int!
energy: Float!
SMT: Int!
exclusive: Int!
shared: String!
partition: String!
arrayJobId: Int!
monitoringStatus: Int!
@@ -2743,7 +2741,7 @@ input JobFilter {
startTime: TimeRange
state: [JobState!]
metricStats: [MetricStatItem!]
exclusive: Int
shared: StringInput
node: StringInput
}
@@ -5217,8 +5215,8 @@ func (ec *executionContext) fieldContext_Job_SMT(_ context.Context, field graphq
return fc, nil
}
func (ec *executionContext) _Job_exclusive(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Job_exclusive(ctx, field)
func (ec *executionContext) _Job_shared(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Job_shared(ctx, field)
if err != nil {
return graphql.Null
}
@@ -5231,7 +5229,7 @@ func (ec *executionContext) _Job_exclusive(ctx context.Context, field graphql.Co
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) {
ctx = rctx // use context from middleware stack in children
return ec.resolvers.Job().Exclusive(rctx, obj)
return obj.Shared, nil
})
if err != nil {
ec.Error(ctx, err)
@@ -5243,19 +5241,19 @@ func (ec *executionContext) _Job_exclusive(ctx context.Context, field graphql.Co
}
return graphql.Null
}
res := resTmp.(int)
res := resTmp.(string)
fc.Result = res
return ec.marshalNInt2int(ctx, field.Selections, res)
return ec.marshalNString2string(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_Job_exclusive(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
func (ec *executionContext) fieldContext_Job_shared(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Job",
Field: field,
IsMethod: true,
IsResolver: true,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type Int does not have child fields")
return nil, errors.New("field of type String does not have child fields")
},
}
return fc, nil
@@ -6404,8 +6402,8 @@ func (ec *executionContext) fieldContext_JobResultList_items(_ context.Context,
return ec.fieldContext_Job_energy(ctx, field)
case "SMT":
return ec.fieldContext_Job_SMT(ctx, field)
case "exclusive":
return ec.fieldContext_Job_exclusive(ctx, field)
case "shared":
return ec.fieldContext_Job_shared(ctx, field)
case "partition":
return ec.fieldContext_Job_partition(ctx, field)
case "arrayJobId":
@@ -11042,8 +11040,8 @@ func (ec *executionContext) fieldContext_Query_job(ctx context.Context, field gr
return ec.fieldContext_Job_energy(ctx, field)
case "SMT":
return ec.fieldContext_Job_SMT(ctx, field)
case "exclusive":
return ec.fieldContext_Job_exclusive(ctx, field)
case "shared":
return ec.fieldContext_Job_shared(ctx, field)
case "partition":
return ec.fieldContext_Job_partition(ctx, field)
case "arrayJobId":
@@ -16357,7 +16355,7 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj any
asMap[k] = v
}
fieldsInOrder := [...]string{"tags", "dbId", "jobId", "arrayJobId", "user", "project", "jobName", "cluster", "partition", "duration", "energy", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "metricStats", "exclusive", "node"}
fieldsInOrder := [...]string{"tags", "dbId", "jobId", "arrayJobId", "user", "project", "jobName", "cluster", "partition", "duration", "energy", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "metricStats", "shared", "node"}
for _, k := range fieldsInOrder {
v, ok := asMap[k]
if !ok {
@@ -16490,13 +16488,13 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj any
return it, err
}
it.MetricStats = data
case "exclusive":
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("exclusive"))
data, err := ec.unmarshalOInt2ᚖint(ctx, v)
case "shared":
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("shared"))
data, err := ec.unmarshalOStringInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐStringInput(ctx, v)
if err != nil {
return it, err
}
it.Exclusive = data
it.Shared = data
case "node":
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("node"))
data, err := ec.unmarshalOStringInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐStringInput(ctx, v)
@@ -17397,42 +17395,11 @@ func (ec *executionContext) _Job(ctx context.Context, sel ast.SelectionSet, obj
if out.Values[i] == graphql.Null {
atomic.AddUint32(&out.Invalids, 1)
}
case "exclusive":
field := field
innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) {
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
}
}()
res = ec._Job_exclusive(ctx, field, obj)
if res == graphql.Null {
atomic.AddUint32(&fs.Invalids, 1)
}
return res
case "shared":
out.Values[i] = ec._Job_shared(ctx, field, obj)
if out.Values[i] == graphql.Null {
atomic.AddUint32(&out.Invalids, 1)
}
if field.Deferrable != nil {
dfs, ok := deferred[field.Deferrable.Label]
di := 0
if ok {
dfs.AddField(field)
di = len(dfs.Values) - 1
} else {
dfs = graphql.NewFieldSet([]graphql.CollectedField{field})
deferred[field.Deferrable.Label] = dfs
}
dfs.Concurrently(di, func(ctx context.Context) graphql.Marshaler {
return innerFunc(ctx, dfs)
})
// don't run the out.Concurrently() call below
out.Values[i] = graphql.Null
continue
}
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
case "partition":
out.Values[i] = ec._Job_partition(ctx, field, obj)
if out.Values[i] == graphql.Null {

View File

@@ -69,7 +69,7 @@ type JobFilter struct {
StartTime *config.TimeRange `json:"startTime,omitempty"`
State []schema.JobState `json:"state,omitempty"`
MetricStats []*MetricStatItem `json:"metricStats,omitempty"`
Exclusive *int `json:"exclusive,omitempty"`
Shared *StringInput `json:"shared,omitempty"`
Node *StringInput `json:"node,omitempty"`
}

View File

@@ -35,11 +35,6 @@ func (r *jobResolver) StartTime(ctx context.Context, obj *schema.Job) (*time.Tim
return &timestamp, nil
}
// Exclusive is the resolver for the exclusive field.
func (r *jobResolver) Exclusive(ctx context.Context, obj *schema.Job) (int, error) {
panic(fmt.Errorf("not implemented: Exclusive - exclusive"))
}
// Tags is the resolver for the tags field.
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
return r.Repo.GetTags(repository.GetUserFromContext(ctx), obj.ID)

View File

@@ -380,7 +380,7 @@ func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
if err != nil {
log.Fatalf("[METRICSTORE]> Error creating directory: %#v\n", err)
}
fmt.Printf("[METRICSTORE]> %#v Directory created successfully.\n", dir)
log.Printf("[METRICSTORE]> %#v Directory created successfully.\n", dir)
}
// Config read (replace with your actual config read)

View File

@@ -2,10 +2,8 @@ package memorystore
import (
"context"
"errors"
"fmt"
"log"
"net"
"sync"
"time"
@@ -17,67 +15,67 @@ import (
)
// Each connection is handled in it's own goroutine. This is a blocking function.
func ReceiveRaw(ctx context.Context,
listener net.Listener,
handleLine func(*lineprotocol.Decoder, string) error,
) error {
var wg sync.WaitGroup
// func ReceiveRaw(ctx context.Context,
// listener net.Listener,
// handleLine func(*lineprotocol.Decoder, string) error,
// ) error {
// var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
<-ctx.Done()
if err := listener.Close(); err != nil {
log.Printf("listener.Close(): %s", err.Error())
}
}()
// wg.Add(1)
// go func() {
// defer wg.Done()
// <-ctx.Done()
// if err := listener.Close(); err != nil {
// log.Printf("listener.Close(): %s", err.Error())
// }
// }()
for {
conn, err := listener.Accept()
if err != nil {
if errors.Is(err, net.ErrClosed) {
break
}
// for {
// conn, err := listener.Accept()
// if err != nil {
// if errors.Is(err, net.ErrClosed) {
// break
// }
log.Printf("listener.Accept(): %s", err.Error())
}
// log.Printf("listener.Accept(): %s", err.Error())
// }
wg.Add(2)
go func() {
defer wg.Done()
defer conn.Close()
// wg.Add(2)
// go func() {
// defer wg.Done()
// defer conn.Close()
dec := lineprotocol.NewDecoder(conn)
connctx, cancel := context.WithCancel(context.Background())
defer cancel()
go func() {
defer wg.Done()
select {
case <-connctx.Done():
conn.Close()
case <-ctx.Done():
conn.Close()
}
}()
// dec := lineprotocol.NewDecoder(conn)
// connctx, cancel := context.WithCancel(context.Background())
// defer cancel()
// go func() {
// defer wg.Done()
// select {
// case <-connctx.Done():
// conn.Close()
// case <-ctx.Done():
// conn.Close()
// }
// }()
if err := handleLine(dec, "default"); err != nil {
if errors.Is(err, net.ErrClosed) {
return
}
// if err := handleLine(dec, "default"); err != nil {
// if errors.Is(err, net.ErrClosed) {
// return
// }
log.Printf("%s: %s", conn.RemoteAddr().String(), err.Error())
errmsg := make([]byte, 128)
errmsg = append(errmsg, `error: `...)
errmsg = append(errmsg, err.Error()...)
errmsg = append(errmsg, '\n')
conn.Write(errmsg)
}
}()
}
// log.Printf("%s: %s", conn.RemoteAddr().String(), err.Error())
// errmsg := make([]byte, 128)
// errmsg = append(errmsg, `error: `...)
// errmsg = append(errmsg, err.Error()...)
// errmsg = append(errmsg, '\n')
// conn.Write(errmsg)
// }
// }()
// }
wg.Wait()
return nil
}
// wg.Wait()
// return nil
// }
// Connect to a nats server and subscribe to "updates". This is a blocking
// function. handleLine will be called for each line recieved via nats.
@@ -113,7 +111,7 @@ func ReceiveNats(conf *(config.NatsConfig),
if workers > 1 {
wg.Add(workers)
for i := 0; i < workers; i++ {
for range workers {
go func() {
for m := range msgs {
dec := lineprotocol.NewDecoderWithBytes(m.Data)

View File

@@ -47,7 +47,7 @@ type MemoryStore struct {
root Level
}
func Init(wg sync.WaitGroup) {
func Init(wg *sync.WaitGroup) {
startupTime := time.Now()
//Pass the config.MetricStoreKeys
@@ -82,10 +82,10 @@ func Init(wg sync.WaitGroup) {
wg.Add(4)
Retention(&wg, ctx)
Checkpointing(&wg, ctx)
Archiving(&wg, ctx)
avro.DataStaging(&wg, ctx)
Retention(wg, ctx)
Checkpointing(wg, ctx)
Archiving(wg, ctx)
avro.DataStaging(wg, ctx)
wg.Add(1)
sigs := make(chan os.Signal, 1)
@@ -337,12 +337,12 @@ func (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metric
// the range asked for if no data was available.
func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64) ([]schema.Float, int64, int64, int64, error) {
if from > to {
return nil, 0, 0, 0, errors.New("[METRICSTORE]> invalid time range")
return nil, 0, 0, 0, errors.New("[METRICSTORE]> invalid time range\n")
}
minfo, ok := m.Metrics[metric]
if !ok {
return nil, 0, 0, 0, errors.New("[METRICSTORE]> unkown metric: " + metric)
return nil, 0, 0, 0, errors.New("[METRICSTORE]> unkown metric: \n" + metric)
}
n, data := 0, make([]schema.Float, (to-from)/minfo.Frequency+1)
@@ -381,7 +381,7 @@ func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, reso
if err != nil {
return nil, 0, 0, 0, err
} else if n == 0 {
return nil, 0, 0, 0, errors.New("[METRICSTORE]> metric or host not found")
return nil, 0, 0, 0, errors.New("[METRICSTORE]> metric or host not found\n")
} else if n > 1 {
if minfo.Aggregation == config.AvgAggregation {
normalize := 1. / schema.Float(n)

View File

@@ -52,18 +52,18 @@ func GetJobRepository() *JobRepository {
}
var jobColumns []string = []string{
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster",
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.hpc_cluster", "job.subcluster",
"job.start_time", "job.cluster_partition", "job.array_job_id", "job.num_nodes",
"job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status",
"job.num_hwthreads", "job.num_acc", "job.shared", "job.monitoring_status",
"job.smt", "job.job_state", "job.duration", "job.walltime", "job.resources",
"job.footprint", "job.energy",
}
var jobCacheColumns []string = []string{
"job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.cluster",
"job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.hpc_cluster",
"job_cache.subcluster", "job_cache.start_time", "job_cache.cluster_partition",
"job_cache.array_job_id", "job_cache.num_nodes", "job_cache.num_hwthreads",
"job_cache.num_acc", "job_cache.exclusive", "job_cache.monitoring_status", "job_cache.smt",
"job_cache.num_acc", "job_cache.shared", "job_cache.monitoring_status", "job_cache.smt",
"job_cache.job_state", "job_cache.duration", "job_cache.walltime", "job_cache.resources",
"job_cache.footprint", "job_cache.energy",
}
@@ -390,7 +390,7 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
start := time.Now()
partitions := r.cache.Get("partitions:"+cluster, func() (any, time.Duration, int) {
parts := []string{}
if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.hpc_cluster = ?;`, cluster); err != nil {
return nil, 0, 1000
}
@@ -410,7 +410,7 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
subclusters := make(map[string]map[string]int)
rows, err := sq.Select("resources", "subcluster").From("job").
Where("job.job_state = 'running'").
Where("job.cluster = ?", cluster).
Where("job.hpc_cluster = ?", cluster).
RunWith(r.stmtCache).Query()
if err != nil {
cclog.Error("Error while running query")
@@ -505,7 +505,7 @@ func (r *JobRepository) FindJobIdsByTag(tagId int64) ([]int64, error) {
// FIXME: Reconsider filtering short jobs with harcoded threshold
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
query := sq.Select(jobColumns...).From("job").
Where(fmt.Sprintf("job.cluster = '%s'", cluster)).
Where(fmt.Sprintf("job.hpc_cluster = '%s'", cluster)).
Where("job.job_state = 'running'").
Where("job.duration > 600")

View File

@@ -70,7 +70,7 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
}
_, err = r.DB.Exec(
"INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, exclusive, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, exclusive, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
"INSERT INTO job (job_id, hpc_cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, hpc_cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
if err != nil {
cclog.Warnf("Error while Job sync: %v", err)
return nil, err

View File

@@ -31,7 +31,7 @@ func (r *JobRepository) Find(
Where("job.job_id = ?", *jobId)
if cluster != nil {
q = q.Where("job.cluster = ?", *cluster)
q = q.Where("job.hpc_cluster = ?", *cluster)
}
if startTime != nil {
q = q.Where("job.start_time = ?", *startTime)
@@ -52,7 +52,7 @@ func (r *JobRepository) FindCached(
Where("job_cache.job_id = ?", *jobId)
if cluster != nil {
q = q.Where("job_cache.cluster = ?", *cluster)
q = q.Where("job_cache.hpc_cluster = ?", *cluster)
}
if startTime != nil {
q = q.Where("job_cache.start_time = ?", *startTime)
@@ -78,7 +78,7 @@ func (r *JobRepository) FindAll(
Where("job.job_id = ?", *jobId)
if cluster != nil {
q = q.Where("job.cluster = ?", *cluster)
q = q.Where("job.hpc_cluster = ?", *cluster)
}
if startTime != nil {
q = q.Where("job.start_time = ?", *startTime)
@@ -183,7 +183,7 @@ func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime
q := sq.Select(jobColumns...).
From("job").
Where("job.job_id = ?", jobId).
Where("job.cluster = ?", cluster).
Where("job.hpc_cluster = ?", cluster).
Where("job.start_time = ?", startTime)
q, qerr := SecurityCheck(ctx, q)
@@ -203,7 +203,7 @@ func (r *JobRepository) IsJobOwner(jobId int64, startTime int64, user string, cl
From("job").
Where("job.job_id = ?", jobId).
Where("job.hpc_user = ?", user).
Where("job.cluster = ?", cluster).
Where("job.hpc_cluster = ?", cluster).
Where("job.start_time = ?", startTime)
_, err := scanJob(q.RunWith(r.stmtCache).QueryRow())

View File

@@ -168,7 +168,7 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
query = buildMetaJsonCondition("jobName", filter.JobName, query)
}
if filter.Cluster != nil {
query = buildStringCondition("job.cluster", filter.Cluster, query)
query = buildStringCondition("job.hpc_cluster", filter.Cluster, query)
}
if filter.Partition != nil {
query = buildStringCondition("job.cluster_partition", filter.Partition, query)
@@ -183,8 +183,8 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor)
}
if filter.Exclusive != nil {
query = query.Where("job.exclusive = ?", *filter.Exclusive)
if filter.Shared != nil {
query = query.Where("job.shared = ?", *filter.Shared)
}
if filter.State != nil {
states := make([]string, len(filter.State))

View File

@@ -23,7 +23,7 @@ import (
var groupBy2column = map[model.Aggregate]string{
model.AggregateUser: "job.hpc_user",
model.AggregateProject: "job.project",
model.AggregateCluster: "job.cluster",
model.AggregateCluster: "job.hpc_cluster",
}
var sortBy2column = map[model.SortByAggregate]string{

View File

@@ -8,7 +8,7 @@
],
"metrics": ["cpu_load"],
"requirements": [
"job.exclusive == 1",
"job.shared == \"none\"",
"job.duration > job_min_duration_seconds"
],
"variables": [

View File

@@ -4,7 +4,7 @@
"parameters": ["job_min_duration_seconds"],
"metrics": ["flops_any", "mem_bw"],
"requirements": [
"job.exclusive == 1",
"job.shared == \"none\"",
"job.duration > job_min_duration_seconds"
],
"variables": [

View File

@@ -8,7 +8,7 @@
],
"metrics": ["cpu_load"],
"requirements": [
"job.exclusive == 1",
"job.shared == \"none\"",
"job.duration > job_min_duration_seconds"
],
"variables": [

View File

@@ -26,9 +26,9 @@ func RegisterCommitJobService() {
gocron.NewTask(
func() {
start := time.Now()
cclog.Printf("Jobcache sync started at %s", start.Format(time.RFC3339))
cclog.Printf("Jobcache sync started at %s\n", start.Format(time.RFC3339))
jobs, _ := jobRepo.SyncJobs()
repository.CallJobStartHooks(jobs)
cclog.Printf("Jobcache sync and job callbacks are done and took %s", time.Since(start))
cclog.Printf("Jobcache sync and job callbacks are done and took %s\n", time.Since(start))
}))
}

View File

@@ -7,7 +7,6 @@ package taskManager
import (
"bytes"
"encoding/json"
"fmt"
"time"
"github.com/ClusterCockpit/cc-backend/internal/auth"
@@ -66,10 +65,6 @@ func Start(cronCfg, archiveConfig json.RawMessage) {
RegisterStopJobsExceedTime()
}
fmt.Printf("Keys : %#v\n", Keys)
fmt.Printf("cronCfg : %#v\n", cronCfg)
fmt.Printf("archiveConfig : %#v\n", archiveConfig)
dec := json.NewDecoder(bytes.NewReader(cronCfg))
dec.DisallowUnknownFields()
if err := dec.Decode(&Keys); err != nil {

View File

@@ -25,8 +25,8 @@ func RegisterUpdateDurationWorker() {
gocron.NewTask(
func() {
start := time.Now()
cclog.Printf("Update duration started at %s", start.Format(time.RFC3339))
cclog.Printf("Update duration started at %s\n", start.Format(time.RFC3339))
jobRepo.UpdateDuration()
cclog.Printf("Update duration is done and took %s", time.Since(start))
cclog.Printf("Update duration is done and took %s\n", time.Since(start))
}))
}

View File

@@ -134,8 +134,8 @@ func RegisterFootprintWorker() {
}
jobRepo.TransactionEnd(t)
}
cclog.Debugf("Finish Cluster %s, took %s", cluster.Name, time.Since(s_cluster))
cclog.Debugf("Finish Cluster %s, took %s\n", cluster.Name, time.Since(s_cluster))
}
cclog.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s", c, cl, ce, time.Since(s))
cclog.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s\n", c, cl, ce, time.Since(s))
}))
}