mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-09-12 19:53:00 +02:00
Migration SQL fix
This commit is contained in:
@@ -9,6 +9,7 @@ import (
|
||||
"encoding/json"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
)
|
||||
|
||||
@@ -166,3 +167,12 @@ func Init(mainConfig json.RawMessage, clusterConfig json.RawMessage) {
|
||||
cclog.Abort("Config Init: At least one cluster required in config. Exited with error.")
|
||||
}
|
||||
}
|
||||
|
||||
func InitMetricStore(msConfig json.RawMessage) {
|
||||
// Validate(msConfigSchema, msConfig)
|
||||
dec := json.NewDecoder(bytes.NewReader(msConfig))
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&memorystore.Keys); err != nil {
|
||||
cclog.Abortf("Metric Store Config Init: Could not decode config file '%s'.\nError: %s\n", msConfig, err.Error())
|
||||
}
|
||||
}
|
||||
|
@@ -142,6 +142,10 @@ func InitDB() error {
|
||||
continue
|
||||
}
|
||||
|
||||
if jobMeta.Shared == "" {
|
||||
jobMeta.Shared = "none"
|
||||
}
|
||||
|
||||
id, err := r.TransactionAddNamed(t,
|
||||
repository.NamedJobInsert, jobMeta)
|
||||
if err != nil {
|
||||
|
@@ -19,7 +19,7 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/avro"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/avro"
|
||||
"github.com/ClusterCockpit/cc-lib/util"
|
||||
"github.com/linkedin/goavro/v2"
|
||||
)
|
||||
|
@@ -2,16 +2,18 @@ package memorystore
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"runtime"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/avro"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/avro"
|
||||
"github.com/ClusterCockpit/cc-lib/resampler"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/ClusterCockpit/cc-lib/util"
|
||||
"github.com/ClusterCockpit/cc-metric-store/internal/config"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -29,20 +31,101 @@ func init() {
|
||||
}
|
||||
}
|
||||
|
||||
// For aggregation over multiple values at different cpus/sockets/..., not time!
|
||||
type AggregationStrategy int
|
||||
|
||||
const (
|
||||
NoAggregation AggregationStrategy = iota
|
||||
SumAggregation
|
||||
AvgAggregation
|
||||
)
|
||||
|
||||
func (as *AggregationStrategy) UnmarshalJSON(data []byte) error {
|
||||
var str string
|
||||
if err := json.Unmarshal(data, &str); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
switch str {
|
||||
case "":
|
||||
*as = NoAggregation
|
||||
case "sum":
|
||||
*as = SumAggregation
|
||||
case "avg":
|
||||
*as = AvgAggregation
|
||||
default:
|
||||
return fmt.Errorf("invalid aggregation strategy: %#v", str)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type MetricConfig struct {
|
||||
// Interval in seconds at which measurements will arive.
|
||||
Frequency int64 `json:"frequency"`
|
||||
|
||||
// Can be 'sum', 'avg' or null. Describes how to aggregate metrics from the same timestep over the hierarchy.
|
||||
Aggregation AggregationStrategy `json:"aggregation"`
|
||||
|
||||
// Private, used internally...
|
||||
Offset int
|
||||
}
|
||||
|
||||
type Metric struct {
|
||||
Name string
|
||||
Value util.Float
|
||||
MetricConfig config.MetricConfig
|
||||
MetricConfig MetricConfig
|
||||
}
|
||||
|
||||
type MemoryStore struct {
|
||||
Metrics map[string]config.MetricConfig
|
||||
Metrics map[string]MetricConfig
|
||||
root Level
|
||||
}
|
||||
|
||||
func Init() {
|
||||
startupTime := time.Now()
|
||||
|
||||
//Pass the keys from cluster config
|
||||
InitMetrics()
|
||||
|
||||
ms := GetMemoryStore()
|
||||
|
||||
d, err := time.ParseDuration(Keys.Checkpoints.Restore)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
restoreFrom := startupTime.Add(-d)
|
||||
log.Printf("Loading checkpoints newer than %s\n", restoreFrom.Format(time.RFC3339))
|
||||
files, err := ms.FromCheckpointFiles(Keys.Checkpoints.RootDir, restoreFrom.Unix())
|
||||
loadedData := ms.SizeInBytes() / 1024 / 1024 // In MB
|
||||
if err != nil {
|
||||
log.Fatalf("Loading checkpoints failed: %s\n", err.Error())
|
||||
} else {
|
||||
log.Printf("Checkpoints loaded (%d files, %d MB, that took %fs)\n", files, loadedData, time.Since(startupTime).Seconds())
|
||||
}
|
||||
|
||||
// Try to use less memory by forcing a GC run here and then
|
||||
// lowering the target percentage. The default of 100 means
|
||||
// that only once the ratio of new allocations execeds the
|
||||
// previously active heap, a GC is triggered.
|
||||
// Forcing a GC here will set the "previously active heap"
|
||||
// to a minumum.
|
||||
runtime.GC()
|
||||
|
||||
ctx, _ := context.WithCancel(context.Background())
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(4)
|
||||
|
||||
Retention(&wg, ctx)
|
||||
Checkpointing(&wg, ctx)
|
||||
Archiving(&wg, ctx)
|
||||
avro.DataStaging(&wg, ctx)
|
||||
}
|
||||
|
||||
// Create a new, initialized instance of a MemoryStore.
|
||||
// Will panic if values in the metric configurations are invalid.
|
||||
func Init(metrics map[string]config.MetricConfig) {
|
||||
func InitMetrics(metrics map[string]MetricConfig) {
|
||||
singleton.Do(func() {
|
||||
offset := 0
|
||||
for key, cfg := range metrics {
|
||||
@@ -50,7 +133,7 @@ func Init(metrics map[string]config.MetricConfig) {
|
||||
panic("invalid frequency")
|
||||
}
|
||||
|
||||
metrics[key] = config.MetricConfig{
|
||||
metrics[key] = MetricConfig{
|
||||
Frequency: cfg.Frequency,
|
||||
Aggregation: cfg.Aggregation,
|
||||
Offset: offset,
|
||||
@@ -77,16 +160,16 @@ func GetMemoryStore() *MemoryStore {
|
||||
}
|
||||
|
||||
func Shutdown() {
|
||||
log.Printf("Writing to '%s'...\n", config.Keys.Checkpoints.RootDir)
|
||||
log.Printf("Writing to '%s'...\n", Keys.Checkpoints.RootDir)
|
||||
var files int
|
||||
var err error
|
||||
|
||||
ms := GetMemoryStore()
|
||||
|
||||
if config.Keys.Checkpoints.FileFormat == "json" {
|
||||
files, err = ms.ToCheckpoint(config.Keys.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix())
|
||||
if Keys.Checkpoints.FileFormat == "json" {
|
||||
files, err = ms.ToCheckpoint(Keys.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix())
|
||||
} else {
|
||||
files, err = avro.GetAvroStore().ToCheckpoint(config.Keys.Checkpoints.RootDir, true)
|
||||
files, err = avro.GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, true)
|
||||
close(avro.LineProtocolMessages)
|
||||
}
|
||||
|
||||
@@ -172,7 +255,7 @@ func Retention(wg *sync.WaitGroup, ctx context.Context) {
|
||||
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
d, err := time.ParseDuration(config.Keys.RetentionInMemory)
|
||||
d, err := time.ParseDuration(Keys.RetentionInMemory)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
@@ -261,7 +344,7 @@ func (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metric
|
||||
// If the level does not hold the metric itself, the data will be aggregated recursively from the children.
|
||||
// The second and third return value are the actual from/to for the data. Those can be different from
|
||||
// the range asked for if no data was available.
|
||||
func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64) ([]util.Float, int64, int64, int64, error) {
|
||||
func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64) ([]schema.Float, int64, int64, int64, error) {
|
||||
if from > to {
|
||||
return nil, 0, 0, 0, errors.New("invalid time range")
|
||||
}
|
||||
@@ -271,7 +354,7 @@ func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, reso
|
||||
return nil, 0, 0, 0, errors.New("unkown metric: " + metric)
|
||||
}
|
||||
|
||||
n, data := 0, make([]util.Float, (to-from)/minfo.Frequency+1)
|
||||
n, data := 0, make([]schema.Float, (to-from)/minfo.Frequency+1)
|
||||
|
||||
err := m.root.findBuffers(selector, minfo.Offset, func(b *buffer) error {
|
||||
cdata, cfrom, cto, err := b.read(from, to, data)
|
||||
@@ -309,12 +392,12 @@ func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, reso
|
||||
} else if n == 0 {
|
||||
return nil, 0, 0, 0, errors.New("metric or host not found")
|
||||
} else if n > 1 {
|
||||
if minfo.Aggregation == config.AvgAggregation {
|
||||
normalize := 1. / util.Float(n)
|
||||
if minfo.Aggregation == AvgAggregation {
|
||||
normalize := 1. / schema.Float(n)
|
||||
for i := 0; i < len(data); i++ {
|
||||
data[i] *= normalize
|
||||
}
|
||||
} else if minfo.Aggregation != config.SumAggregation {
|
||||
} else if minfo.Aggregation != SumAggregation {
|
||||
return nil, 0, 0, 0, errors.New("invalid aggregation")
|
||||
}
|
||||
}
|
||||
|
@@ -5,7 +5,6 @@ import (
|
||||
"math"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/util"
|
||||
"github.com/ClusterCockpit/cc-metric-store/internal/config"
|
||||
)
|
||||
|
||||
type Stats struct {
|
||||
@@ -105,9 +104,9 @@ func (m *MemoryStore) Stats(selector util.Selector, metric string, from, to int6
|
||||
return nil, 0, 0, ErrNoData
|
||||
}
|
||||
|
||||
if minfo.Aggregation == config.AvgAggregation {
|
||||
if minfo.Aggregation == AvgAggregation {
|
||||
avg /= util.Float(n)
|
||||
} else if n > 1 && minfo.Aggregation != config.SumAggregation {
|
||||
} else if n > 1 && minfo.Aggregation != SumAggregation {
|
||||
return nil, 0, 0, errors.New("invalid aggregation")
|
||||
}
|
||||
|
||||
|
@@ -14,19 +14,19 @@ import (
|
||||
)
|
||||
|
||||
const NamedJobCacheInsert string = `INSERT INTO job_cache (
|
||||
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
||||
job_id, hpc_user, project, hpc_cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
shared, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
||||
) VALUES (
|
||||
:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
||||
:job_id, :hpc_user, :project, :hpc_cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||
:shared, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
||||
);`
|
||||
|
||||
const NamedJobInsert string = `INSERT INTO job (
|
||||
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
||||
job_id, hpc_user, project, hpc_cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
shared, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
||||
) VALUES (
|
||||
:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
||||
:job_id, :hpc_user, :project, :hpc_cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||
:shared, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
||||
);`
|
||||
|
||||
func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
|
||||
|
@@ -3,7 +3,7 @@ CREATE TABLE "job_cache" (
|
||||
job_id BIGINT NOT NULL,
|
||||
hpc_cluster VARCHAR(255) NOT NULL,
|
||||
subcluster VARCHAR(255) NOT NULL,
|
||||
submit_time BIGINT NOT NULL, -- Unix timestamp
|
||||
submit_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp
|
||||
start_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp
|
||||
hpc_user VARCHAR(255) NOT NULL,
|
||||
project VARCHAR(255) NOT NULL,
|
||||
@@ -30,7 +30,7 @@ CREATE TABLE "job_cache" (
|
||||
energy REAL NOT NULL DEFAULT 0.0,
|
||||
energy_footprint TEXT DEFAULT NULL,
|
||||
footprint TEXT DEFAULT NULL,
|
||||
UNIQUE (job_id, cluster, start_time)
|
||||
UNIQUE (job_id, hpc_cluster, start_time)
|
||||
);
|
||||
|
||||
CREATE TABLE "job_new" (
|
||||
@@ -65,10 +65,21 @@ CREATE TABLE "job_new" (
|
||||
energy REAL NOT NULL DEFAULT 0.0,
|
||||
energy_footprint TEXT DEFAULT NULL,
|
||||
footprint TEXT DEFAULT NULL,
|
||||
UNIQUE (job_id, cluster, start_time)
|
||||
UNIQUE (job_id, hpc_cluster, start_time)
|
||||
);
|
||||
|
||||
ALTER TABLE job RENAME COLUMN cluster TO hpc_cluster;
|
||||
INSERT INTO job_new SELECT * FROM job;
|
||||
INSERT INTO job_new (
|
||||
id, job_id, hpc_cluster, subcluster, submit_time, start_time, hpc_user, project,
|
||||
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources,
|
||||
num_nodes, num_hwthreads, num_acc, smt, shared, monitoring_status, energy,
|
||||
energy_footprint, footprint
|
||||
)
|
||||
SELECT
|
||||
id, job_id, hpc_cluster, subcluster, 0, start_time, hpc_user, project,
|
||||
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources,
|
||||
num_nodes, num_hwthreads, num_acc, smt, exclusive, monitoring_status, energy,
|
||||
energy_footprint, footprint
|
||||
FROM job;
|
||||
DROP TABLE job;
|
||||
ALTER TABLE job_new RENAME TO job;
|
||||
|
@@ -7,6 +7,7 @@ package taskManager
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
@@ -65,10 +66,14 @@ func Start(cronCfg, archiveConfig json.RawMessage) {
|
||||
RegisterStopJobsExceedTime()
|
||||
}
|
||||
|
||||
fmt.Printf("Keys : %#v\n", Keys)
|
||||
fmt.Printf("cronCfg : %#v\n", cronCfg)
|
||||
fmt.Printf("archiveConfig : %#v\n", archiveConfig)
|
||||
|
||||
dec := json.NewDecoder(bytes.NewReader(cronCfg))
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&Keys); err != nil {
|
||||
cclog.Errorf("error while decoding ldap config: %v", err)
|
||||
cclog.Errorf("error while decoding cron config: %v", err)
|
||||
}
|
||||
|
||||
var cfg struct {
|
||||
|
Reference in New Issue
Block a user