mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-10-27 22:55:07 +01:00
Trial and Test MetricStore components
This commit is contained in:
@@ -241,7 +241,7 @@ func TestRestApi(t *testing.T) {
|
||||
"numNodes": 1,
|
||||
"numHwthreads": 8,
|
||||
"numAcc": 0,
|
||||
"exclusive": 1,
|
||||
"shared": "none",
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"resources": [
|
||||
@@ -396,7 +396,7 @@ func TestRestApi(t *testing.T) {
|
||||
"partition": "default",
|
||||
"walltime": 3600,
|
||||
"numNodes": 1,
|
||||
"exclusive": 1,
|
||||
"shared": "none",
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"resources": [
|
||||
|
||||
@@ -1401,12 +1401,6 @@ const docTemplate = `{
|
||||
"format": "float64"
|
||||
}
|
||||
},
|
||||
"exclusive": {
|
||||
"type": "integer",
|
||||
"maximum": 2,
|
||||
"minimum": 0,
|
||||
"example": 1
|
||||
},
|
||||
"footprint": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
@@ -1423,12 +1417,18 @@ const docTemplate = `{
|
||||
},
|
||||
"jobState": {
|
||||
"enum": [
|
||||
"completed",
|
||||
"failed",
|
||||
"boot_fail",
|
||||
"cancelled",
|
||||
"stopped",
|
||||
"timeout",
|
||||
"out_of_memory"
|
||||
"completed",
|
||||
"deadline",
|
||||
"failed",
|
||||
"node_fail",
|
||||
"out-of-memory",
|
||||
"pending",
|
||||
"preempted",
|
||||
"running",
|
||||
"suspended",
|
||||
"timeout"
|
||||
],
|
||||
"allOf": [
|
||||
{
|
||||
@@ -1484,6 +1484,14 @@ const docTemplate = `{
|
||||
"$ref": "#/definitions/schema.Resource"
|
||||
}
|
||||
},
|
||||
"shared": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"none",
|
||||
"single_user",
|
||||
"multi_user"
|
||||
]
|
||||
},
|
||||
"smt": {
|
||||
"type": "integer",
|
||||
"example": 4
|
||||
|
||||
@@ -97,7 +97,6 @@ func (api *RestApi) MountUserApiRoutes(r *mux.Router) {
|
||||
}
|
||||
|
||||
func (api *RestApi) MountMetricStoreApiRoutes(r *mux.Router) {
|
||||
r.StrictSlash(true)
|
||||
// REST API Uses TokenAuth
|
||||
r.HandleFunc("/api/free", memorystore.HandleFree).Methods(http.MethodPost)
|
||||
r.HandleFunc("/api/write", memorystore.HandleWrite).Methods(http.MethodPost)
|
||||
|
||||
@@ -29,7 +29,7 @@ func DataStaging(wg *sync.WaitGroup, ctx context.Context) {
|
||||
return
|
||||
case val := <-LineProtocolMessages:
|
||||
//Fetch the frequency of the metric from the global configuration
|
||||
freq, err := config.MetricStoreKeys.GetMetricFrequency(val.MetricName)
|
||||
freq, err := config.GetMetricFrequency(val.MetricName)
|
||||
if err != nil {
|
||||
fmt.Printf("Error fetching metric frequency: %s\n", err)
|
||||
continue
|
||||
|
||||
@@ -97,10 +97,10 @@ func InitMetricStore(msConfig json.RawMessage) {
|
||||
}
|
||||
}
|
||||
|
||||
func (c *MetricStoreConfig) GetMetricFrequency(metricName string) (int64, error) {
|
||||
// if metric, ok := c.Metrics[metricName]; ok {
|
||||
// return metric.Frequency, nil
|
||||
// }
|
||||
func GetMetricFrequency(metricName string) (int64, error) {
|
||||
if metric, ok := Metrics[metricName]; ok {
|
||||
return metric.Frequency, nil
|
||||
}
|
||||
return 0, fmt.Errorf("[METRICSTORE]> metric %s not found", metricName)
|
||||
}
|
||||
|
||||
|
||||
@@ -118,7 +118,6 @@ type ComplexityRoot struct {
|
||||
Duration func(childComplexity int) int
|
||||
Energy func(childComplexity int) int
|
||||
EnergyFootprint func(childComplexity int) int
|
||||
Exclusive func(childComplexity int) int
|
||||
Footprint func(childComplexity int) int
|
||||
ID func(childComplexity int) int
|
||||
JobID func(childComplexity int) int
|
||||
@@ -131,6 +130,7 @@ type ComplexityRoot struct {
|
||||
Project func(childComplexity int) int
|
||||
Resources func(childComplexity int) int
|
||||
SMT func(childComplexity int) int
|
||||
Shared func(childComplexity int) int
|
||||
StartTime func(childComplexity int) int
|
||||
State func(childComplexity int) int
|
||||
SubCluster func(childComplexity int) int
|
||||
@@ -425,8 +425,6 @@ type ClusterResolver interface {
|
||||
type JobResolver interface {
|
||||
StartTime(ctx context.Context, obj *schema.Job) (*time.Time, error)
|
||||
|
||||
Exclusive(ctx context.Context, obj *schema.Job) (int, error)
|
||||
|
||||
Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error)
|
||||
|
||||
ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error)
|
||||
@@ -726,13 +724,6 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
|
||||
|
||||
return e.complexity.Job.EnergyFootprint(childComplexity), true
|
||||
|
||||
case "Job.exclusive":
|
||||
if e.complexity.Job.Exclusive == nil {
|
||||
break
|
||||
}
|
||||
|
||||
return e.complexity.Job.Exclusive(childComplexity), true
|
||||
|
||||
case "Job.footprint":
|
||||
if e.complexity.Job.Footprint == nil {
|
||||
break
|
||||
@@ -817,6 +808,13 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
|
||||
|
||||
return e.complexity.Job.SMT(childComplexity), true
|
||||
|
||||
case "Job.shared":
|
||||
if e.complexity.Job.Shared == nil {
|
||||
break
|
||||
}
|
||||
|
||||
return e.complexity.Job.Shared(childComplexity), true
|
||||
|
||||
case "Job.startTime":
|
||||
if e.complexity.Job.StartTime == nil {
|
||||
break
|
||||
@@ -2361,7 +2359,7 @@ type Job {
|
||||
numAcc: Int!
|
||||
energy: Float!
|
||||
SMT: Int!
|
||||
exclusive: Int!
|
||||
shared: String!
|
||||
partition: String!
|
||||
arrayJobId: Int!
|
||||
monitoringStatus: Int!
|
||||
@@ -2743,7 +2741,7 @@ input JobFilter {
|
||||
startTime: TimeRange
|
||||
state: [JobState!]
|
||||
metricStats: [MetricStatItem!]
|
||||
exclusive: Int
|
||||
shared: StringInput
|
||||
node: StringInput
|
||||
}
|
||||
|
||||
@@ -5217,8 +5215,8 @@ func (ec *executionContext) fieldContext_Job_SMT(_ context.Context, field graphq
|
||||
return fc, nil
|
||||
}
|
||||
|
||||
func (ec *executionContext) _Job_exclusive(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
|
||||
fc, err := ec.fieldContext_Job_exclusive(ctx, field)
|
||||
func (ec *executionContext) _Job_shared(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
|
||||
fc, err := ec.fieldContext_Job_shared(ctx, field)
|
||||
if err != nil {
|
||||
return graphql.Null
|
||||
}
|
||||
@@ -5231,7 +5229,7 @@ func (ec *executionContext) _Job_exclusive(ctx context.Context, field graphql.Co
|
||||
}()
|
||||
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) {
|
||||
ctx = rctx // use context from middleware stack in children
|
||||
return ec.resolvers.Job().Exclusive(rctx, obj)
|
||||
return obj.Shared, nil
|
||||
})
|
||||
if err != nil {
|
||||
ec.Error(ctx, err)
|
||||
@@ -5243,19 +5241,19 @@ func (ec *executionContext) _Job_exclusive(ctx context.Context, field graphql.Co
|
||||
}
|
||||
return graphql.Null
|
||||
}
|
||||
res := resTmp.(int)
|
||||
res := resTmp.(string)
|
||||
fc.Result = res
|
||||
return ec.marshalNInt2int(ctx, field.Selections, res)
|
||||
return ec.marshalNString2string(ctx, field.Selections, res)
|
||||
}
|
||||
|
||||
func (ec *executionContext) fieldContext_Job_exclusive(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
|
||||
func (ec *executionContext) fieldContext_Job_shared(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
|
||||
fc = &graphql.FieldContext{
|
||||
Object: "Job",
|
||||
Field: field,
|
||||
IsMethod: true,
|
||||
IsResolver: true,
|
||||
IsMethod: false,
|
||||
IsResolver: false,
|
||||
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
|
||||
return nil, errors.New("field of type Int does not have child fields")
|
||||
return nil, errors.New("field of type String does not have child fields")
|
||||
},
|
||||
}
|
||||
return fc, nil
|
||||
@@ -6404,8 +6402,8 @@ func (ec *executionContext) fieldContext_JobResultList_items(_ context.Context,
|
||||
return ec.fieldContext_Job_energy(ctx, field)
|
||||
case "SMT":
|
||||
return ec.fieldContext_Job_SMT(ctx, field)
|
||||
case "exclusive":
|
||||
return ec.fieldContext_Job_exclusive(ctx, field)
|
||||
case "shared":
|
||||
return ec.fieldContext_Job_shared(ctx, field)
|
||||
case "partition":
|
||||
return ec.fieldContext_Job_partition(ctx, field)
|
||||
case "arrayJobId":
|
||||
@@ -11042,8 +11040,8 @@ func (ec *executionContext) fieldContext_Query_job(ctx context.Context, field gr
|
||||
return ec.fieldContext_Job_energy(ctx, field)
|
||||
case "SMT":
|
||||
return ec.fieldContext_Job_SMT(ctx, field)
|
||||
case "exclusive":
|
||||
return ec.fieldContext_Job_exclusive(ctx, field)
|
||||
case "shared":
|
||||
return ec.fieldContext_Job_shared(ctx, field)
|
||||
case "partition":
|
||||
return ec.fieldContext_Job_partition(ctx, field)
|
||||
case "arrayJobId":
|
||||
@@ -16357,7 +16355,7 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj any
|
||||
asMap[k] = v
|
||||
}
|
||||
|
||||
fieldsInOrder := [...]string{"tags", "dbId", "jobId", "arrayJobId", "user", "project", "jobName", "cluster", "partition", "duration", "energy", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "metricStats", "exclusive", "node"}
|
||||
fieldsInOrder := [...]string{"tags", "dbId", "jobId", "arrayJobId", "user", "project", "jobName", "cluster", "partition", "duration", "energy", "minRunningFor", "numNodes", "numAccelerators", "numHWThreads", "startTime", "state", "metricStats", "shared", "node"}
|
||||
for _, k := range fieldsInOrder {
|
||||
v, ok := asMap[k]
|
||||
if !ok {
|
||||
@@ -16490,13 +16488,13 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj any
|
||||
return it, err
|
||||
}
|
||||
it.MetricStats = data
|
||||
case "exclusive":
|
||||
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("exclusive"))
|
||||
data, err := ec.unmarshalOInt2ᚖint(ctx, v)
|
||||
case "shared":
|
||||
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("shared"))
|
||||
data, err := ec.unmarshalOStringInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐStringInput(ctx, v)
|
||||
if err != nil {
|
||||
return it, err
|
||||
}
|
||||
it.Exclusive = data
|
||||
it.Shared = data
|
||||
case "node":
|
||||
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("node"))
|
||||
data, err := ec.unmarshalOStringInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐStringInput(ctx, v)
|
||||
@@ -17397,42 +17395,11 @@ func (ec *executionContext) _Job(ctx context.Context, sel ast.SelectionSet, obj
|
||||
if out.Values[i] == graphql.Null {
|
||||
atomic.AddUint32(&out.Invalids, 1)
|
||||
}
|
||||
case "exclusive":
|
||||
field := field
|
||||
|
||||
innerFunc := func(ctx context.Context, fs *graphql.FieldSet) (res graphql.Marshaler) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
ec.Error(ctx, ec.Recover(ctx, r))
|
||||
}
|
||||
}()
|
||||
res = ec._Job_exclusive(ctx, field, obj)
|
||||
if res == graphql.Null {
|
||||
atomic.AddUint32(&fs.Invalids, 1)
|
||||
}
|
||||
return res
|
||||
case "shared":
|
||||
out.Values[i] = ec._Job_shared(ctx, field, obj)
|
||||
if out.Values[i] == graphql.Null {
|
||||
atomic.AddUint32(&out.Invalids, 1)
|
||||
}
|
||||
|
||||
if field.Deferrable != nil {
|
||||
dfs, ok := deferred[field.Deferrable.Label]
|
||||
di := 0
|
||||
if ok {
|
||||
dfs.AddField(field)
|
||||
di = len(dfs.Values) - 1
|
||||
} else {
|
||||
dfs = graphql.NewFieldSet([]graphql.CollectedField{field})
|
||||
deferred[field.Deferrable.Label] = dfs
|
||||
}
|
||||
dfs.Concurrently(di, func(ctx context.Context) graphql.Marshaler {
|
||||
return innerFunc(ctx, dfs)
|
||||
})
|
||||
|
||||
// don't run the out.Concurrently() call below
|
||||
out.Values[i] = graphql.Null
|
||||
continue
|
||||
}
|
||||
|
||||
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
|
||||
case "partition":
|
||||
out.Values[i] = ec._Job_partition(ctx, field, obj)
|
||||
if out.Values[i] == graphql.Null {
|
||||
|
||||
@@ -69,7 +69,7 @@ type JobFilter struct {
|
||||
StartTime *config.TimeRange `json:"startTime,omitempty"`
|
||||
State []schema.JobState `json:"state,omitempty"`
|
||||
MetricStats []*MetricStatItem `json:"metricStats,omitempty"`
|
||||
Exclusive *int `json:"exclusive,omitempty"`
|
||||
Shared *StringInput `json:"shared,omitempty"`
|
||||
Node *StringInput `json:"node,omitempty"`
|
||||
}
|
||||
|
||||
|
||||
@@ -35,11 +35,6 @@ func (r *jobResolver) StartTime(ctx context.Context, obj *schema.Job) (*time.Tim
|
||||
return ×tamp, nil
|
||||
}
|
||||
|
||||
// Exclusive is the resolver for the exclusive field.
|
||||
func (r *jobResolver) Exclusive(ctx context.Context, obj *schema.Job) (int, error) {
|
||||
panic(fmt.Errorf("not implemented: Exclusive - exclusive"))
|
||||
}
|
||||
|
||||
// Tags is the resolver for the tags field.
|
||||
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
||||
return r.Repo.GetTags(repository.GetUserFromContext(ctx), obj.ID)
|
||||
|
||||
@@ -380,7 +380,7 @@ func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
|
||||
if err != nil {
|
||||
log.Fatalf("[METRICSTORE]> Error creating directory: %#v\n", err)
|
||||
}
|
||||
fmt.Printf("[METRICSTORE]> %#v Directory created successfully.\n", dir)
|
||||
log.Printf("[METRICSTORE]> %#v Directory created successfully.\n", dir)
|
||||
}
|
||||
|
||||
// Config read (replace with your actual config read)
|
||||
|
||||
@@ -2,10 +2,8 @@ package memorystore
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -17,67 +15,67 @@ import (
|
||||
)
|
||||
|
||||
// Each connection is handled in it's own goroutine. This is a blocking function.
|
||||
func ReceiveRaw(ctx context.Context,
|
||||
listener net.Listener,
|
||||
handleLine func(*lineprotocol.Decoder, string) error,
|
||||
) error {
|
||||
var wg sync.WaitGroup
|
||||
// func ReceiveRaw(ctx context.Context,
|
||||
// listener net.Listener,
|
||||
// handleLine func(*lineprotocol.Decoder, string) error,
|
||||
// ) error {
|
||||
// var wg sync.WaitGroup
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
<-ctx.Done()
|
||||
if err := listener.Close(); err != nil {
|
||||
log.Printf("listener.Close(): %s", err.Error())
|
||||
}
|
||||
}()
|
||||
// wg.Add(1)
|
||||
// go func() {
|
||||
// defer wg.Done()
|
||||
// <-ctx.Done()
|
||||
// if err := listener.Close(); err != nil {
|
||||
// log.Printf("listener.Close(): %s", err.Error())
|
||||
// }
|
||||
// }()
|
||||
|
||||
for {
|
||||
conn, err := listener.Accept()
|
||||
if err != nil {
|
||||
if errors.Is(err, net.ErrClosed) {
|
||||
break
|
||||
}
|
||||
// for {
|
||||
// conn, err := listener.Accept()
|
||||
// if err != nil {
|
||||
// if errors.Is(err, net.ErrClosed) {
|
||||
// break
|
||||
// }
|
||||
|
||||
log.Printf("listener.Accept(): %s", err.Error())
|
||||
}
|
||||
// log.Printf("listener.Accept(): %s", err.Error())
|
||||
// }
|
||||
|
||||
wg.Add(2)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
defer conn.Close()
|
||||
// wg.Add(2)
|
||||
// go func() {
|
||||
// defer wg.Done()
|
||||
// defer conn.Close()
|
||||
|
||||
dec := lineprotocol.NewDecoder(conn)
|
||||
connctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
select {
|
||||
case <-connctx.Done():
|
||||
conn.Close()
|
||||
case <-ctx.Done():
|
||||
conn.Close()
|
||||
}
|
||||
}()
|
||||
// dec := lineprotocol.NewDecoder(conn)
|
||||
// connctx, cancel := context.WithCancel(context.Background())
|
||||
// defer cancel()
|
||||
// go func() {
|
||||
// defer wg.Done()
|
||||
// select {
|
||||
// case <-connctx.Done():
|
||||
// conn.Close()
|
||||
// case <-ctx.Done():
|
||||
// conn.Close()
|
||||
// }
|
||||
// }()
|
||||
|
||||
if err := handleLine(dec, "default"); err != nil {
|
||||
if errors.Is(err, net.ErrClosed) {
|
||||
return
|
||||
}
|
||||
// if err := handleLine(dec, "default"); err != nil {
|
||||
// if errors.Is(err, net.ErrClosed) {
|
||||
// return
|
||||
// }
|
||||
|
||||
log.Printf("%s: %s", conn.RemoteAddr().String(), err.Error())
|
||||
errmsg := make([]byte, 128)
|
||||
errmsg = append(errmsg, `error: `...)
|
||||
errmsg = append(errmsg, err.Error()...)
|
||||
errmsg = append(errmsg, '\n')
|
||||
conn.Write(errmsg)
|
||||
}
|
||||
}()
|
||||
}
|
||||
// log.Printf("%s: %s", conn.RemoteAddr().String(), err.Error())
|
||||
// errmsg := make([]byte, 128)
|
||||
// errmsg = append(errmsg, `error: `...)
|
||||
// errmsg = append(errmsg, err.Error()...)
|
||||
// errmsg = append(errmsg, '\n')
|
||||
// conn.Write(errmsg)
|
||||
// }
|
||||
// }()
|
||||
// }
|
||||
|
||||
wg.Wait()
|
||||
return nil
|
||||
}
|
||||
// wg.Wait()
|
||||
// return nil
|
||||
// }
|
||||
|
||||
// Connect to a nats server and subscribe to "updates". This is a blocking
|
||||
// function. handleLine will be called for each line recieved via nats.
|
||||
@@ -113,7 +111,7 @@ func ReceiveNats(conf *(config.NatsConfig),
|
||||
if workers > 1 {
|
||||
wg.Add(workers)
|
||||
|
||||
for i := 0; i < workers; i++ {
|
||||
for range workers {
|
||||
go func() {
|
||||
for m := range msgs {
|
||||
dec := lineprotocol.NewDecoderWithBytes(m.Data)
|
||||
|
||||
@@ -47,7 +47,7 @@ type MemoryStore struct {
|
||||
root Level
|
||||
}
|
||||
|
||||
func Init(wg sync.WaitGroup) {
|
||||
func Init(wg *sync.WaitGroup) {
|
||||
startupTime := time.Now()
|
||||
|
||||
//Pass the config.MetricStoreKeys
|
||||
@@ -82,10 +82,10 @@ func Init(wg sync.WaitGroup) {
|
||||
|
||||
wg.Add(4)
|
||||
|
||||
Retention(&wg, ctx)
|
||||
Checkpointing(&wg, ctx)
|
||||
Archiving(&wg, ctx)
|
||||
avro.DataStaging(&wg, ctx)
|
||||
Retention(wg, ctx)
|
||||
Checkpointing(wg, ctx)
|
||||
Archiving(wg, ctx)
|
||||
avro.DataStaging(wg, ctx)
|
||||
|
||||
wg.Add(1)
|
||||
sigs := make(chan os.Signal, 1)
|
||||
@@ -337,12 +337,12 @@ func (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metric
|
||||
// the range asked for if no data was available.
|
||||
func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64) ([]schema.Float, int64, int64, int64, error) {
|
||||
if from > to {
|
||||
return nil, 0, 0, 0, errors.New("[METRICSTORE]> invalid time range")
|
||||
return nil, 0, 0, 0, errors.New("[METRICSTORE]> invalid time range\n")
|
||||
}
|
||||
|
||||
minfo, ok := m.Metrics[metric]
|
||||
if !ok {
|
||||
return nil, 0, 0, 0, errors.New("[METRICSTORE]> unkown metric: " + metric)
|
||||
return nil, 0, 0, 0, errors.New("[METRICSTORE]> unkown metric: \n" + metric)
|
||||
}
|
||||
|
||||
n, data := 0, make([]schema.Float, (to-from)/minfo.Frequency+1)
|
||||
@@ -381,7 +381,7 @@ func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, reso
|
||||
if err != nil {
|
||||
return nil, 0, 0, 0, err
|
||||
} else if n == 0 {
|
||||
return nil, 0, 0, 0, errors.New("[METRICSTORE]> metric or host not found")
|
||||
return nil, 0, 0, 0, errors.New("[METRICSTORE]> metric or host not found\n")
|
||||
} else if n > 1 {
|
||||
if minfo.Aggregation == config.AvgAggregation {
|
||||
normalize := 1. / schema.Float(n)
|
||||
|
||||
@@ -52,18 +52,18 @@ func GetJobRepository() *JobRepository {
|
||||
}
|
||||
|
||||
var jobColumns []string = []string{
|
||||
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster",
|
||||
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.hpc_cluster", "job.subcluster",
|
||||
"job.start_time", "job.cluster_partition", "job.array_job_id", "job.num_nodes",
|
||||
"job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status",
|
||||
"job.num_hwthreads", "job.num_acc", "job.shared", "job.monitoring_status",
|
||||
"job.smt", "job.job_state", "job.duration", "job.walltime", "job.resources",
|
||||
"job.footprint", "job.energy",
|
||||
}
|
||||
|
||||
var jobCacheColumns []string = []string{
|
||||
"job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.cluster",
|
||||
"job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.hpc_cluster",
|
||||
"job_cache.subcluster", "job_cache.start_time", "job_cache.cluster_partition",
|
||||
"job_cache.array_job_id", "job_cache.num_nodes", "job_cache.num_hwthreads",
|
||||
"job_cache.num_acc", "job_cache.exclusive", "job_cache.monitoring_status", "job_cache.smt",
|
||||
"job_cache.num_acc", "job_cache.shared", "job_cache.monitoring_status", "job_cache.smt",
|
||||
"job_cache.job_state", "job_cache.duration", "job_cache.walltime", "job_cache.resources",
|
||||
"job_cache.footprint", "job_cache.energy",
|
||||
}
|
||||
@@ -390,7 +390,7 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
|
||||
start := time.Now()
|
||||
partitions := r.cache.Get("partitions:"+cluster, func() (any, time.Duration, int) {
|
||||
parts := []string{}
|
||||
if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
|
||||
if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.hpc_cluster = ?;`, cluster); err != nil {
|
||||
return nil, 0, 1000
|
||||
}
|
||||
|
||||
@@ -410,7 +410,7 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
|
||||
subclusters := make(map[string]map[string]int)
|
||||
rows, err := sq.Select("resources", "subcluster").From("job").
|
||||
Where("job.job_state = 'running'").
|
||||
Where("job.cluster = ?", cluster).
|
||||
Where("job.hpc_cluster = ?", cluster).
|
||||
RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
cclog.Error("Error while running query")
|
||||
@@ -505,7 +505,7 @@ func (r *JobRepository) FindJobIdsByTag(tagId int64) ([]int64, error) {
|
||||
// FIXME: Reconsider filtering short jobs with harcoded threshold
|
||||
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
|
||||
query := sq.Select(jobColumns...).From("job").
|
||||
Where(fmt.Sprintf("job.cluster = '%s'", cluster)).
|
||||
Where(fmt.Sprintf("job.hpc_cluster = '%s'", cluster)).
|
||||
Where("job.job_state = 'running'").
|
||||
Where("job.duration > 600")
|
||||
|
||||
|
||||
@@ -70,7 +70,7 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
|
||||
}
|
||||
|
||||
_, err = r.DB.Exec(
|
||||
"INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, exclusive, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, exclusive, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
|
||||
"INSERT INTO job (job_id, hpc_cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, hpc_cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
|
||||
if err != nil {
|
||||
cclog.Warnf("Error while Job sync: %v", err)
|
||||
return nil, err
|
||||
|
||||
@@ -31,7 +31,7 @@ func (r *JobRepository) Find(
|
||||
Where("job.job_id = ?", *jobId)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job.cluster = ?", *cluster)
|
||||
q = q.Where("job.hpc_cluster = ?", *cluster)
|
||||
}
|
||||
if startTime != nil {
|
||||
q = q.Where("job.start_time = ?", *startTime)
|
||||
@@ -52,7 +52,7 @@ func (r *JobRepository) FindCached(
|
||||
Where("job_cache.job_id = ?", *jobId)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job_cache.cluster = ?", *cluster)
|
||||
q = q.Where("job_cache.hpc_cluster = ?", *cluster)
|
||||
}
|
||||
if startTime != nil {
|
||||
q = q.Where("job_cache.start_time = ?", *startTime)
|
||||
@@ -78,7 +78,7 @@ func (r *JobRepository) FindAll(
|
||||
Where("job.job_id = ?", *jobId)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job.cluster = ?", *cluster)
|
||||
q = q.Where("job.hpc_cluster = ?", *cluster)
|
||||
}
|
||||
if startTime != nil {
|
||||
q = q.Where("job.start_time = ?", *startTime)
|
||||
@@ -183,7 +183,7 @@ func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").
|
||||
Where("job.job_id = ?", jobId).
|
||||
Where("job.cluster = ?", cluster).
|
||||
Where("job.hpc_cluster = ?", cluster).
|
||||
Where("job.start_time = ?", startTime)
|
||||
|
||||
q, qerr := SecurityCheck(ctx, q)
|
||||
@@ -203,7 +203,7 @@ func (r *JobRepository) IsJobOwner(jobId int64, startTime int64, user string, cl
|
||||
From("job").
|
||||
Where("job.job_id = ?", jobId).
|
||||
Where("job.hpc_user = ?", user).
|
||||
Where("job.cluster = ?", cluster).
|
||||
Where("job.hpc_cluster = ?", cluster).
|
||||
Where("job.start_time = ?", startTime)
|
||||
|
||||
_, err := scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
|
||||
@@ -168,7 +168,7 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
||||
query = buildMetaJsonCondition("jobName", filter.JobName, query)
|
||||
}
|
||||
if filter.Cluster != nil {
|
||||
query = buildStringCondition("job.cluster", filter.Cluster, query)
|
||||
query = buildStringCondition("job.hpc_cluster", filter.Cluster, query)
|
||||
}
|
||||
if filter.Partition != nil {
|
||||
query = buildStringCondition("job.cluster_partition", filter.Partition, query)
|
||||
@@ -183,8 +183,8 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
||||
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
|
||||
query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor)
|
||||
}
|
||||
if filter.Exclusive != nil {
|
||||
query = query.Where("job.exclusive = ?", *filter.Exclusive)
|
||||
if filter.Shared != nil {
|
||||
query = query.Where("job.shared = ?", *filter.Shared)
|
||||
}
|
||||
if filter.State != nil {
|
||||
states := make([]string, len(filter.State))
|
||||
|
||||
@@ -23,7 +23,7 @@ import (
|
||||
var groupBy2column = map[model.Aggregate]string{
|
||||
model.AggregateUser: "job.hpc_user",
|
||||
model.AggregateProject: "job.project",
|
||||
model.AggregateCluster: "job.cluster",
|
||||
model.AggregateCluster: "job.hpc_cluster",
|
||||
}
|
||||
|
||||
var sortBy2column = map[model.SortByAggregate]string{
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
],
|
||||
"metrics": ["cpu_load"],
|
||||
"requirements": [
|
||||
"job.exclusive == 1",
|
||||
"job.shared == \"none\"",
|
||||
"job.duration > job_min_duration_seconds"
|
||||
],
|
||||
"variables": [
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
"parameters": ["job_min_duration_seconds"],
|
||||
"metrics": ["flops_any", "mem_bw"],
|
||||
"requirements": [
|
||||
"job.exclusive == 1",
|
||||
"job.shared == \"none\"",
|
||||
"job.duration > job_min_duration_seconds"
|
||||
],
|
||||
"variables": [
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
],
|
||||
"metrics": ["cpu_load"],
|
||||
"requirements": [
|
||||
"job.exclusive == 1",
|
||||
"job.shared == \"none\"",
|
||||
"job.duration > job_min_duration_seconds"
|
||||
],
|
||||
"variables": [
|
||||
|
||||
@@ -26,9 +26,9 @@ func RegisterCommitJobService() {
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
start := time.Now()
|
||||
cclog.Printf("Jobcache sync started at %s", start.Format(time.RFC3339))
|
||||
cclog.Printf("Jobcache sync started at %s\n", start.Format(time.RFC3339))
|
||||
jobs, _ := jobRepo.SyncJobs()
|
||||
repository.CallJobStartHooks(jobs)
|
||||
cclog.Printf("Jobcache sync and job callbacks are done and took %s", time.Since(start))
|
||||
cclog.Printf("Jobcache sync and job callbacks are done and took %s\n", time.Since(start))
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -7,7 +7,6 @@ package taskManager
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
@@ -66,10 +65,6 @@ func Start(cronCfg, archiveConfig json.RawMessage) {
|
||||
RegisterStopJobsExceedTime()
|
||||
}
|
||||
|
||||
fmt.Printf("Keys : %#v\n", Keys)
|
||||
fmt.Printf("cronCfg : %#v\n", cronCfg)
|
||||
fmt.Printf("archiveConfig : %#v\n", archiveConfig)
|
||||
|
||||
dec := json.NewDecoder(bytes.NewReader(cronCfg))
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&Keys); err != nil {
|
||||
|
||||
@@ -25,8 +25,8 @@ func RegisterUpdateDurationWorker() {
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
start := time.Now()
|
||||
cclog.Printf("Update duration started at %s", start.Format(time.RFC3339))
|
||||
cclog.Printf("Update duration started at %s\n", start.Format(time.RFC3339))
|
||||
jobRepo.UpdateDuration()
|
||||
cclog.Printf("Update duration is done and took %s", time.Since(start))
|
||||
cclog.Printf("Update duration is done and took %s\n", time.Since(start))
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -134,8 +134,8 @@ func RegisterFootprintWorker() {
|
||||
}
|
||||
jobRepo.TransactionEnd(t)
|
||||
}
|
||||
cclog.Debugf("Finish Cluster %s, took %s", cluster.Name, time.Since(s_cluster))
|
||||
cclog.Debugf("Finish Cluster %s, took %s\n", cluster.Name, time.Since(s_cluster))
|
||||
}
|
||||
cclog.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s", c, cl, ce, time.Since(s))
|
||||
cclog.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s\n", c, cl, ce, time.Since(s))
|
||||
}))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user