From e707fd08936ac683936e45af0155091e05ee7ced Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Thu, 18 Dec 2025 11:26:05 +0100 Subject: [PATCH 01/17] Provide fallback in archive manager in case fd is not available --- tools/archive-manager/main.go | 70 +++++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/tools/archive-manager/main.go b/tools/archive-manager/main.go index 940c92d..4972fe9 100644 --- a/tools/archive-manager/main.go +++ b/tools/archive-manager/main.go @@ -9,9 +9,11 @@ import ( "encoding/json" "flag" "fmt" + "io/fs" "os" "os/exec" "os/signal" + "path/filepath" "strconv" "strings" "sync" @@ -39,28 +41,47 @@ func parseDate(in string) int64 { return 0 } -// countJobs counts the total number of jobs in the source archive using external fd command. -// It requires the fd binary to be available in PATH. -// The srcConfig parameter should be the JSON configuration string containing the archive path. -func countJobs(srcConfig string) (int, error) { - fdPath, err := exec.LookPath("fd") - if err != nil { - return 0, fmt.Errorf("fd binary not found in PATH: %w", err) - } - +// parseArchivePath extracts the path from the source config JSON. +func parseArchivePath(srcConfig string) (string, error) { var config struct { Kind string `json:"kind"` Path string `json:"path"` } if err := json.Unmarshal([]byte(srcConfig), &config); err != nil { - return 0, fmt.Errorf("failed to parse source config: %w", err) + return "", fmt.Errorf("failed to parse source config: %w", err) } if config.Path == "" { - return 0, fmt.Errorf("no path found in source config") + return "", fmt.Errorf("no path found in source config") } - fdCmd := exec.Command(fdPath, "meta.json", config.Path) + return config.Path, nil +} + +// countJobsNative counts jobs using native Go filepath.WalkDir. +// This is used as a fallback when fd/fdfind is not available. +func countJobsNative(archivePath string) (int, error) { + count := 0 + err := filepath.WalkDir(archivePath, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return nil // Skip directories we can't access + } + if !d.IsDir() && d.Name() == "meta.json" { + count++ + } + return nil + }) + + if err != nil { + return 0, fmt.Errorf("failed to walk directory: %w", err) + } + + return count, nil +} + +// countJobsWithFd counts jobs using the external fd command. +func countJobsWithFd(fdPath, archivePath string) (int, error) { + fdCmd := exec.Command(fdPath, "meta.json", archivePath) wcCmd := exec.Command("wc", "-l") pipe, err := fdCmd.StdoutPipe() @@ -91,6 +112,31 @@ func countJobs(srcConfig string) (int, error) { return count, nil } +// countJobs counts the total number of jobs in the source archive. +// It tries to use external fd/fdfind command for speed, falling back to +// native Go filepath.WalkDir if neither is available. +// The srcConfig parameter should be the JSON configuration string containing the archive path. +func countJobs(srcConfig string) (int, error) { + archivePath, err := parseArchivePath(srcConfig) + if err != nil { + return 0, err + } + + // Try fd first (common name) + if fdPath, err := exec.LookPath("fd"); err == nil { + return countJobsWithFd(fdPath, archivePath) + } + + // Try fdfind (Debian/Ubuntu package name) + if fdPath, err := exec.LookPath("fdfind"); err == nil { + return countJobsWithFd(fdPath, archivePath) + } + + // Fall back to native Go implementation + cclog.Debug("fd/fdfind not found, using native Go file walker") + return countJobsNative(archivePath) +} + // formatDuration formats a duration as a human-readable string. func formatDuration(d time.Duration) string { if d < time.Minute { From 43bdb56072554ed32fdddfa783b9c184826574fa Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Thu, 18 Dec 2025 15:04:03 +0100 Subject: [PATCH 02/17] add fallback case if metric has no name in nodeListRow --- web/frontend/src/systems/nodelist/NodeListRow.svelte | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/web/frontend/src/systems/nodelist/NodeListRow.svelte b/web/frontend/src/systems/nodelist/NodeListRow.svelte index d2c71ff..bc93a32 100644 --- a/web/frontend/src/systems/nodelist/NodeListRow.svelte +++ b/web/frontend/src/systems/nodelist/NodeListRow.svelte @@ -148,13 +148,19 @@ hoststate={nodeData?.state? nodeData.state: 'notindb'}/> {/if} - {#each refinedData as metricData (metricData.data.name)} + {#each refinedData as metricData, i (metricData?.data?.name || i)} {#key metricData} {#if metricData?.disabled} Metric disabled for subcluster {metricData.data.name}:{nodeData.subCluster}{metricData?.data?.name ? metricData.data.name : `Metric Index ${i}`}:{nodeData.subCluster} + {:else if !metricData?.data?.name} + Metric without name for subcluster {`Metric Index ${i}`}:{nodeData.subCluster} {:else if !!metricData.data?.metric.statisticsSeries} From 6e74fa294aa3e3d61aa07a891d5ccf0dabbaf989 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Thu, 18 Dec 2025 15:47:30 +0100 Subject: [PATCH 03/17] Add role-based visibility for metrics Fixes #387 --- go.mod | 2 +- go.sum | 4 +- internal/graph/schema.resolvers.go | 45 +++++++------ pkg/archive/clusterConfig.go | 100 +++++++++++++++-------------- pkg/nats/client.go | 2 +- 5 files changed, 81 insertions(+), 72 deletions(-) diff --git a/go.mod b/go.mod index 75e62f1..df8e1fb 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ tool ( require ( github.com/99designs/gqlgen v0.17.84 - github.com/ClusterCockpit/cc-lib v1.0.0 + github.com/ClusterCockpit/cc-lib v1.0.2 github.com/Masterminds/squirrel v1.5.4 github.com/aws/aws-sdk-go-v2 v1.41.0 github.com/aws/aws-sdk-go-v2/config v1.31.20 diff --git a/go.sum b/go.sum index e8630b7..711c555 100644 --- a/go.sum +++ b/go.sum @@ -6,8 +6,8 @@ github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25 github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= -github.com/ClusterCockpit/cc-lib v1.0.0 h1:/8DFRomt4BpVWKWrsEZ/ru4K8x76QTVnEgdwHc5eSps= -github.com/ClusterCockpit/cc-lib v1.0.0/go.mod h1:UGdOvXEnjFqlnPSxtvtFwO6BtXYW6NnXFoud9FtN93k= +github.com/ClusterCockpit/cc-lib v1.0.2 h1:ZWn3oZkXgxrr3zSigBdlOOfayZ4Om4xL20DhmritPPg= +github.com/ClusterCockpit/cc-lib v1.0.2/go.mod h1:UGdOvXEnjFqlnPSxtvtFwO6BtXYW6NnXFoud9FtN93k= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM= diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index 9747479..cd4af05 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -88,14 +88,14 @@ func (r *jobResolver) EnergyFootprint(ctx context.Context, obj *schema.Job) ([]* res := []*model.EnergyFootprintValue{} for name, value := range rawEnergyFootprint { // Suboptimal: Nearly hardcoded metric name expectations - matchCpu := regexp.MustCompile(`cpu|Cpu|CPU`) + matchCPU := regexp.MustCompile(`cpu|Cpu|CPU`) matchAcc := regexp.MustCompile(`acc|Acc|ACC`) matchMem := regexp.MustCompile(`mem|Mem|MEM`) matchCore := regexp.MustCompile(`core|Core|CORE`) hwType := "" switch test := name; { // NOtice ';' for var declaration - case matchCpu.MatchString(test): + case matchCPU.MatchString(test): hwType = "CPU" case matchAcc.MatchString(test): hwType = "Accelerator" @@ -175,9 +175,9 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds } tags := []*schema.Tag{} - for _, tagId := range tagIds { + for _, tagID := range tagIds { // Get ID - tid, err := strconv.ParseInt(tagId, 10, 64) + tid, err := strconv.ParseInt(tagID, 10, 64) if err != nil { cclog.Warn("Error while parsing tag id") return nil, err @@ -222,9 +222,9 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta } tags := []*schema.Tag{} - for _, tagId := range tagIds { + for _, tagID := range tagIds { // Get ID - tid, err := strconv.ParseInt(tagId, 10, 64) + tid, err := strconv.ParseInt(tagID, 10, 64) if err != nil { cclog.Warn("Error while parsing tag id") return nil, err @@ -265,9 +265,9 @@ func (r *mutationResolver) RemoveTagFromList(ctx context.Context, tagIds []strin } tags := []int{} - for _, tagId := range tagIds { + for _, tagID := range tagIds { // Get ID - tid, err := strconv.ParseInt(tagId, 10, 64) + tid, err := strconv.ParseInt(tagID, 10, 64) if err != nil { cclog.Warn("Error while parsing tag id for removal") return nil, err @@ -317,7 +317,7 @@ func (r *nodeResolver) SchedulerState(ctx context.Context, obj *schema.Node) (sc if obj.NodeState != "" { return obj.NodeState, nil } else { - return "", fmt.Errorf("No SchedulerState (NodeState) on Object") + return "", fmt.Errorf("no SchedulerState (NodeState) on Object") } } @@ -343,6 +343,14 @@ func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) { // GlobalMetrics is the resolver for the globalMetrics field. func (r *queryResolver) GlobalMetrics(ctx context.Context) ([]*schema.GlobalMetricListItem, error) { + user := repository.GetUserFromContext(ctx) + + if user != nil { + if user.HasRole(schema.RoleUser) || user.HasRole(schema.RoleManager) { + return archive.GlobalUserMetricList, nil + } + } + return archive.GlobalMetricList, nil } @@ -373,12 +381,12 @@ func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]* // Node is the resolver for the node field. func (r *queryResolver) Node(ctx context.Context, id string) (*schema.Node, error) { repo := repository.GetNodeRepository() - numericId, err := strconv.ParseInt(id, 10, 64) + numericID, err := strconv.ParseInt(id, 10, 64) if err != nil { cclog.Warn("Error while parsing job id") return nil, err } - return repo.GetNodeByID(numericId, false) + return repo.GetNodeByID(numericID, false) } // Nodes is the resolver for the nodes field. @@ -405,8 +413,7 @@ func (r *queryResolver) NodeStates(ctx context.Context, filter []*model.NodeFilt return nil, herr } - allCounts := make([]*model.NodeStates, 0) - allCounts = append(stateCounts, healthCounts...) + allCounts := append(stateCounts, healthCounts...) return allCounts, nil } @@ -433,18 +440,18 @@ func (r *queryResolver) NodeStatesTimed(ctx context.Context, filter []*model.Nod return healthCounts, nil } - return nil, errors.New("Unknown Node State Query Type") + return nil, errors.New("unknown Node State Query Type") } // Job is the resolver for the job field. func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) { - numericId, err := strconv.ParseInt(id, 10, 64) + numericID, err := strconv.ParseInt(id, 10, 64) if err != nil { cclog.Warn("Error while parsing job id") return nil, err } - job, err := r.Repo.FindByID(ctx, numericId) + job, err := r.Repo.FindByID(ctx, numericID) if err != nil { cclog.Warn("Error while finding job by id") return nil, err @@ -809,7 +816,7 @@ func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, sub nodeRepo := repository.GetNodeRepository() nodes, stateMap, countNodes, hasNextPage, nerr := nodeRepo.GetNodesForList(ctx, cluster, subCluster, stateFilter, nodeFilter, page) if nerr != nil { - return nil, errors.New("Could not retrieve node list required for resolving NodeMetricsList") + return nil, errors.New("could not retrieve node list required for resolving NodeMetricsList") } if metrics == nil { @@ -898,9 +905,7 @@ func (r *queryResolver) ClusterMetrics(ctx context.Context, cluster string, metr collectorUnit[metric] = scopedMetric.Unit // Collect Initial Data for _, ser := range scopedMetric.Series { - for _, val := range ser.Data { - collectorData[metric] = append(collectorData[metric], val) - } + collectorData[metric] = append(collectorData[metric], ser.Data...) } } } else { diff --git a/pkg/archive/clusterConfig.go b/pkg/archive/clusterConfig.go index 13890c9..696601b 100644 --- a/pkg/archive/clusterConfig.go +++ b/pkg/archive/clusterConfig.go @@ -6,7 +6,6 @@ package archive import ( - "errors" "fmt" cclog "github.com/ClusterCockpit/cc-lib/ccLogger" @@ -14,13 +13,16 @@ import ( ) var ( - Clusters []*schema.Cluster - GlobalMetricList []*schema.GlobalMetricListItem - NodeLists map[string]map[string]NodeList + Clusters []*schema.Cluster + GlobalMetricList []*schema.GlobalMetricListItem + GlobalUserMetricList []*schema.GlobalMetricListItem + NodeLists map[string]map[string]NodeList ) func initClusterConfig() error { Clusters = []*schema.Cluster{} + GlobalMetricList = []*schema.GlobalMetricListItem{} + GlobalUserMetricList = []*schema.GlobalMetricListItem{} NodeLists = map[string]map[string]NodeList{} metricLookup := make(map[string]schema.GlobalMetricListItem) @@ -29,38 +31,41 @@ func initClusterConfig() error { cluster, err := ar.LoadClusterCfg(c) if err != nil { cclog.Warnf("Error while loading cluster config for cluster '%v'", c) - return err + return fmt.Errorf("failed to load cluster config for '%s': %w", c, err) } - if len(cluster.Name) == 0 || - len(cluster.MetricConfig) == 0 || - len(cluster.SubClusters) == 0 { - return errors.New("cluster.name, cluster.metricConfig and cluster.SubClusters should not be empty") + if len(cluster.Name) == 0 { + return fmt.Errorf("cluster name is empty in config for '%s'", c) + } + if len(cluster.MetricConfig) == 0 { + return fmt.Errorf("cluster '%s' has no metric configurations", cluster.Name) + } + if len(cluster.SubClusters) == 0 { + return fmt.Errorf("cluster '%s' has no subclusters defined", cluster.Name) } for _, mc := range cluster.MetricConfig { if len(mc.Name) == 0 { - return errors.New("cluster.metricConfig.name should not be empty") + return fmt.Errorf("cluster '%s' has a metric config with empty name", cluster.Name) } if mc.Timestep < 1 { - return errors.New("cluster.metricConfig.timestep should not be smaller than one") + return fmt.Errorf("metric '%s' in cluster '%s' has invalid timestep %d (must be >= 1)", mc.Name, cluster.Name, mc.Timestep) } - // For backwards compability... + // For backwards compatibility... if mc.Scope == "" { mc.Scope = schema.MetricScopeNode } if !mc.Scope.Valid() { - return errors.New("cluster.metricConfig.scope must be a valid scope ('node', 'scocket', ...)") + return fmt.Errorf("metric '%s' in cluster '%s' has invalid scope '%s' (must be 'node', 'socket', 'core', etc.)", mc.Name, cluster.Name, mc.Scope) } - ml, ok := metricLookup[mc.Name] - if !ok { + if _, ok := metricLookup[mc.Name]; !ok { metricLookup[mc.Name] = schema.GlobalMetricListItem{ - Name: mc.Name, Scope: mc.Scope, Unit: mc.Unit, Footprint: mc.Footprint, + Name: mc.Name, Scope: mc.Scope, Restrict: mc.Restrict, Unit: mc.Unit, Footprint: mc.Footprint, } - ml = metricLookup[mc.Name] } + availability := schema.ClusterSupport{Cluster: cluster.Name} scLookup := make(map[string]*schema.SubClusterConfig) @@ -90,39 +95,35 @@ func initClusterConfig() error { } if cfg, ok := scLookup[sc.Name]; ok { - if !cfg.Remove { - availability.SubClusters = append(availability.SubClusters, sc.Name) - newMetric.Peak = cfg.Peak - newMetric.Normal = cfg.Normal - newMetric.Caution = cfg.Caution - newMetric.Alert = cfg.Alert - newMetric.Footprint = cfg.Footprint - newMetric.Energy = cfg.Energy - newMetric.LowerIsBetter = cfg.LowerIsBetter - sc.MetricConfig = append(sc.MetricConfig, *newMetric) + if cfg.Remove { + continue + } + newMetric.Peak = cfg.Peak + newMetric.Normal = cfg.Normal + newMetric.Caution = cfg.Caution + newMetric.Alert = cfg.Alert + newMetric.Footprint = cfg.Footprint + newMetric.Energy = cfg.Energy + newMetric.LowerIsBetter = cfg.LowerIsBetter + } - if newMetric.Footprint != "" { - sc.Footprint = append(sc.Footprint, newMetric.Name) - ml.Footprint = newMetric.Footprint - } - if newMetric.Energy != "" { - sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name) - } - } - } else { - availability.SubClusters = append(availability.SubClusters, sc.Name) - sc.MetricConfig = append(sc.MetricConfig, *newMetric) + availability.SubClusters = append(availability.SubClusters, sc.Name) + sc.MetricConfig = append(sc.MetricConfig, *newMetric) - if newMetric.Footprint != "" { - sc.Footprint = append(sc.Footprint, newMetric.Name) - } - if newMetric.Energy != "" { - sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name) - } + if newMetric.Footprint != "" { + sc.Footprint = append(sc.Footprint, newMetric.Name) + item := metricLookup[mc.Name] + item.Footprint = newMetric.Footprint + metricLookup[mc.Name] = item + } + if newMetric.Energy != "" { + sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name) } } - ml.Availability = append(metricLookup[mc.Name].Availability, availability) - metricLookup[mc.Name] = ml + + item := metricLookup[mc.Name] + item.Availability = append(item.Availability, availability) + metricLookup[mc.Name] = item } Clusters = append(Clusters, cluster) @@ -141,8 +142,11 @@ func initClusterConfig() error { } } - for _, ml := range metricLookup { - GlobalMetricList = append(GlobalMetricList, &ml) + for _, metric := range metricLookup { + GlobalMetricList = append(GlobalMetricList, &metric) + if !metric.Restrict { + GlobalUserMetricList = append(GlobalUserMetricList, &metric) + } } return nil diff --git a/pkg/nats/client.go b/pkg/nats/client.go index e61d060..822a7b2 100644 --- a/pkg/nats/client.go +++ b/pkg/nats/client.go @@ -83,7 +83,7 @@ func Connect() { client, err := NewClient(nil) if err != nil { - cclog.Errorf("NATS connection failed: %v", err) + cclog.Warnf("NATS connection failed: %v", err) return } From d446c135468056f11df75d272964b375dc6bfa7a Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Thu, 18 Dec 2025 15:47:51 +0100 Subject: [PATCH 04/17] Restore startDemo script --- startDemo.sh | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/startDemo.sh b/startDemo.sh index e709db2..108c95f 100755 --- a/startDemo.sh +++ b/startDemo.sh @@ -1,22 +1,18 @@ #!/bin/sh -# rm -rf var - if [ -d './var' ]; then echo 'Directory ./var already exists! Skipping initialization.' - ./cc-backend -server -dev + ./cc-backend -server -dev -loglevel info else make - wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-dev.tar - tar xf job-archive-dev.tar - rm ./job-archive-dev.tar - - cp ./configs/env-template.txt .env + ./cc-backend --init cp ./configs/config-demo.json config.json - ./cc-backend -migrate-db + wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar + tar xf job-archive-demo.tar + rm ./job-archive-demo.tar + ./cc-backend -dev -init-db -add-user demo:admin,api:demo + ./cc-backend -server -dev -loglevel info +fi - ./cc-backend -server -dev - -fi \ No newline at end of file From 436afa4a61116035e069801650551e8bd84c57f9 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Thu, 18 Dec 2025 15:55:30 +0100 Subject: [PATCH 05/17] fix tag count by including type in grouping --- internal/repository/tags.go | 9 +++++---- internal/routerConfig/routes.go | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/internal/repository/tags.go b/internal/repository/tags.go index 5ca1338..8a076e8 100644 --- a/internal/repository/tags.go +++ b/internal/repository/tags.go @@ -224,10 +224,10 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts } // Query and Count Jobs with attached Tags - q := sq.Select("t.tag_name, t.id, count(jt.tag_id)"). + q := sq.Select("t.tag_type, t.tag_name, t.id, count(jt.tag_id)"). From("tag t"). LeftJoin("jobtag jt ON t.id = jt.tag_id"). - GroupBy("t.tag_name") + GroupBy("t.tag_type, t.tag_name") // Build scope list for filtering var scopeBuilder strings.Builder @@ -260,14 +260,15 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts counts = make(map[string]int) for rows.Next() { + var tagType string var tagName string var tagId int var count int - if err = rows.Scan(&tagName, &tagId, &count); err != nil { + if err = rows.Scan(&tagType, &tagName, &tagId, &count); err != nil { return nil, nil, err } // Use tagId as second Map-Key component to differentiate tags with identical names - counts[fmt.Sprint(tagName, tagId)] = count + counts[fmt.Sprint(tagType, tagName, tagId)] = count } err = rows.Err() diff --git a/internal/routerConfig/routes.go b/internal/routerConfig/routes.go index c2126cd..4466034 100644 --- a/internal/routerConfig/routes.go +++ b/internal/routerConfig/routes.go @@ -205,13 +205,13 @@ func setupTaglistRoute(i InfoType, r *http.Request) InfoType { "id": tag.ID, "name": tag.Name, "scope": tag.Scope, - "count": counts[fmt.Sprint(tag.Name, tag.ID)], + "count": counts[fmt.Sprint(tag.Type, tag.Name, tag.ID)], } tagMap[tag.Type] = append(tagMap[tag.Type], tagItem) } } else if userAuthlevel < 4 && userAuthlevel >= 2 { // User+ : Show global and admin scope only if at least 1 tag used, private scope regardless of count for _, tag := range tags { - tagCount := counts[fmt.Sprint(tag.Name, tag.ID)] + tagCount := counts[fmt.Sprint(tag.Type, tag.Name, tag.ID)] if ((tag.Scope == "global" || tag.Scope == "admin") && tagCount >= 1) || (tag.Scope != "global" && tag.Scope != "admin") { tagItem := map[string]interface{}{ "id": tag.ID, From c58b01a602543d893c0ffdecb34461ce4964a725 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 19 Dec 2025 14:42:02 +0100 Subject: [PATCH 06/17] fix wrong render condition order in nodeList --- web/frontend/src/systems/NodeList.svelte | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/web/frontend/src/systems/NodeList.svelte b/web/frontend/src/systems/NodeList.svelte index c01ef23..e904076 100644 --- a/web/frontend/src/systems/NodeList.svelte +++ b/web/frontend/src/systems/NodeList.svelte @@ -246,16 +246,7 @@ {$nodesQuery.error.message} - {:else} - {#each nodes as nodeData (nodeData.host)} - - {:else} - - No nodes found - - {/each} - {/if} - {#if $nodesQuery.fetching || !$nodesQuery.data} + {:else if $nodesQuery.fetching || !$nodesQuery.data}
@@ -272,6 +263,14 @@
+ {:else} + {#each nodes as nodeData (nodeData.host)} + + {:else} + + No nodes found + + {/each} {/if} From 7a0975b94d8af215139c2202c55f2dbfa0a2153a Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 19 Dec 2025 15:10:15 +0100 Subject: [PATCH 07/17] final fix render race condition if metrics change in nodeList --- web/frontend/src/Systems.root.svelte | 2 +- web/frontend/src/systems/NodeList.svelte | 24 ++++++++++--------- .../src/systems/nodelist/NodeListRow.svelte | 18 ++++++++++++++ 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/web/frontend/src/Systems.root.svelte b/web/frontend/src/Systems.root.svelte index b27cefa..d81a64c 100644 --- a/web/frontend/src/Systems.root.svelte +++ b/web/frontend/src/Systems.root.svelte @@ -269,7 +269,7 @@ {:else} - + {/if} {/if} diff --git a/web/frontend/src/systems/NodeList.svelte b/web/frontend/src/systems/NodeList.svelte index e904076..fa758c1 100644 --- a/web/frontend/src/systems/NodeList.svelte +++ b/web/frontend/src/systems/NodeList.svelte @@ -5,7 +5,7 @@ - `cluster String`: The nodes' cluster - `subCluster String`: The nodes' subCluster [Default: ""] - `ccconfig Object?`: The ClusterCockpit Config Context [Default: null] - - `selectedMetrics [String]`: The array of selected metrics [Default []] + - `pendingSelectedMetrics [String]`: The array of selected metrics [Default []] - `selectedResolution Number?`: The selected data resolution [Default: 0] - `hostnameFilter String?`: The active hostnamefilter [Default: ""] - `hoststateFilter String?`: The active hoststatefilter [Default: ""] @@ -27,7 +27,7 @@ cluster, subCluster = "", ccconfig = null, - selectedMetrics = [], + pendingSelectedMetrics = [], selectedResolution = 0, hostnameFilter = "", hoststateFilter = "", @@ -94,6 +94,7 @@ /* State Init */ let nodes = $state([]); + let selectedMetrics = $state(pendingSelectedMetrics); let page = $state(1); let itemsPerPage = $state(usePaging ? (ccconfig?.nodeList_nodesPerPage || 10) : 10); let headerPaddingTop = $state(0); @@ -110,7 +111,7 @@ stateFilter: hoststateFilter, nodeFilter: hostnameFilter, scopes: ["core", "socket", "accelerator"], - metrics: selectedMetrics, + metrics: pendingSelectedMetrics, from: from.toISOString(), to: to.toISOString(), paging: paging, @@ -140,15 +141,17 @@ $effect(() => { if ($nodesQuery?.data) { untrack(() => { - handleNodes($nodesQuery?.data?.nodeMetricsList); + nodes = handleNodes($nodesQuery?.data?.nodeMetricsList); + matchedNodes = $nodesQuery?.data?.totalNodes || 0; }); + selectedMetrics = [...pendingSelectedMetrics]; // Trigger Rerender in NodeListRow Only After Data is Fetched }; }); $effect(() => { // Triggers (Except Paging) from, to - selectedMetrics, selectedResolution + pendingSelectedMetrics, selectedResolution hostnameFilter, hoststateFilter // Continous Scroll: Paging if parameters change: Existing entries will not match new selections // Nodes Array Reset in HandleNodes func @@ -162,17 +165,16 @@ if (data) { if (usePaging) { // console.log('New Paging', $state.snapshot(paging)) - nodes = [...data.items].sort((a, b) => a.host.localeCompare(b.host)); + return [...data.items].sort((a, b) => a.host.localeCompare(b.host)); } else { if ($state.snapshot(page) == 1) { // console.log('Page 1 Reset', [...data.items]) - nodes = [...data.items].sort((a, b) => a.host.localeCompare(b.host)); + return [...data.items].sort((a, b) => a.host.localeCompare(b.host)); } else { // console.log('Add Nodes', $state.snapshot(nodes), [...data.items]) - nodes = nodes.concat([...data.items]) + return nodes.concat([...data.items]) } } - matchedNodes = data.totalNodes; }; }; @@ -228,7 +230,7 @@ {/if} - {#each selectedMetrics as metric (metric)} + {#each pendingSelectedMetrics as metric (metric)} {:else if $nodesQuery.fetching || !$nodesQuery.data} - +
{#if !usePaging}

diff --git a/web/frontend/src/systems/nodelist/NodeListRow.svelte b/web/frontend/src/systems/nodelist/NodeListRow.svelte index bc93a32..e7e095e 100644 --- a/web/frontend/src/systems/nodelist/NodeListRow.svelte +++ b/web/frontend/src/systems/nodelist/NodeListRow.svelte @@ -128,6 +128,24 @@ } return pendingExtendedLegendData; } + + /* Inspect */ + // $inspect(selectedMetrics).with((type, selectedMetrics) => { + // console.log(type, 'selectedMetrics', selectedMetrics) + // }); + + // $inspect(nodeData).with((type, nodeData) => { + // console.log(type, 'nodeData', nodeData) + // }); + + // $inspect(refinedData).with((type, refinedData) => { + // console.log(type, 'refinedData', refinedData) + // }); + + // $inspect(dataHealth).with((type, dataHealth) => { + // console.log(type, 'dataHealth', dataHealth) + // }); + From 91b90d033e8703acffe5740872310a2f094fdcd2 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 19 Dec 2025 15:27:35 +0100 Subject: [PATCH 08/17] fix metric select drag and drop --- .../src/generic/select/MetricSelection.svelte | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/web/frontend/src/generic/select/MetricSelection.svelte b/web/frontend/src/generic/select/MetricSelection.svelte index 8bcaefc..eeab56d 100644 --- a/web/frontend/src/generic/select/MetricSelection.svelte +++ b/web/frontend/src/generic/select/MetricSelection.svelte @@ -107,13 +107,18 @@ } } + function columnsDragOver(event) { + event.preventDefault(); + event.dataTransfer.dropEffect = 'move'; + } + function columnsDragStart(event, i) { event.dataTransfer.effectAllowed = "move"; event.dataTransfer.dropEffect = "move"; event.dataTransfer.setData("text/plain", i); } - function columnsDrag(event, target) { + function columnsDrop(event, target) { event.dataTransfer.dropEffect = "move"; const start = Number.parseInt(event.dataTransfer.getData("text/plain")); @@ -182,19 +187,18 @@ {/if} {#each listedMetrics as metric, index (metric)}

  • { - event.preventDefault() - return false + columnsDragOver(event) }} ondragstart={(event) => { columnsDragStart(event, index) }} ondrop={(event) => { event.preventDefault() - columnsDrag(event, index) + columnsDrop(event, index) }} ondragenter={() => (columnHovering = index)} > @@ -237,4 +241,10 @@ color: #fff; cursor: grabbing; } + + li.prevent-select { + -webkit-user-select: none; /* Safari */ + -ms-user-select: none; /* IE 10 and IE 11 */ + user-select: none; /* Standard syntax */ +} From af7d208c21c9dfbd539d2baa9c6bd6d4ea95d85c Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 19 Dec 2025 16:16:57 +0100 Subject: [PATCH 09/17] remove unused class --- web/frontend/src/generic/select/MetricSelection.svelte | 6 ------ 1 file changed, 6 deletions(-) diff --git a/web/frontend/src/generic/select/MetricSelection.svelte b/web/frontend/src/generic/select/MetricSelection.svelte index eeab56d..67bbbd0 100644 --- a/web/frontend/src/generic/select/MetricSelection.svelte +++ b/web/frontend/src/generic/select/MetricSelection.svelte @@ -241,10 +241,4 @@ color: #fff; cursor: grabbing; } - - li.prevent-select { - -webkit-user-select: none; /* Safari */ - -ms-user-select: none; /* IE 10 and IE 11 */ - user-select: none; /* Standard syntax */ -} From 7acc89e42d6b23f0250f387f93dc72223cce1655 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 19 Dec 2025 17:52:21 +0100 Subject: [PATCH 10/17] move public dash close button --- web/frontend/src/DashPublic.root.svelte | 27 +++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/web/frontend/src/DashPublic.root.svelte b/web/frontend/src/DashPublic.root.svelte index 36e6703..25e2683 100644 --- a/web/frontend/src/DashPublic.root.svelte +++ b/web/frontend/src/DashPublic.root.svelte @@ -338,7 +338,7 @@ - + - - - {#if $statusQuery.fetching || $statesTimed.fetching} @@ -368,6 +363,13 @@ {:else if $statusQuery.error || $statesTimed.error} + + + + + {#if $statusQuery.error} @@ -385,8 +387,17 @@ - -

    Cluster {presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)}

    + + + +

    Cluster {presetCluster.charAt(0).toUpperCase() + presetCluster.slice(1)}

    + + + + +

    CPU(s)

    {[...clusterInfo?.processorTypes].join(', ')}

    From fdee4f89386aa8c712effe19467cf755d97807e8 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Sat, 20 Dec 2025 09:21:58 +0100 Subject: [PATCH 11/17] Integrate NATS API. Only start either REST start/stop API or NATS start/stop API --- cmd/cc-backend/server.go | 26 +++++++++++++++++--------- internal/api/rest.go | 7 +++++-- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/cmd/cc-backend/server.go b/cmd/cc-backend/server.go index 2c5ce8b..4ed7962 100644 --- a/cmd/cc-backend/server.go +++ b/cmd/cc-backend/server.go @@ -49,9 +49,10 @@ const ( // Server encapsulates the HTTP server state and dependencies type Server struct { - router *mux.Router - server *http.Server - apiHandle *api.RestAPI + router *mux.Router + server *http.Server + restAPIHandle *api.RestAPI + natsAPIHandle *api.NatsAPI } func onFailureResponse(rw http.ResponseWriter, r *http.Request, err error) { @@ -104,7 +105,7 @@ func (s *Server) init() error { authHandle := auth.GetAuthInstance() - s.apiHandle = api.New() + s.restAPIHandle = api.New() info := map[string]any{} info["hasOpenIDConnect"] = false @@ -240,13 +241,20 @@ func (s *Server) init() error { // Mount all /monitoring/... and /api/... routes. routerConfig.SetupRoutes(secured, buildInfo) - s.apiHandle.MountAPIRoutes(securedapi) - s.apiHandle.MountUserAPIRoutes(userapi) - s.apiHandle.MountConfigAPIRoutes(configapi) - s.apiHandle.MountFrontendAPIRoutes(frontendapi) + s.restAPIHandle.MountAPIRoutes(securedapi) + s.restAPIHandle.MountUserAPIRoutes(userapi) + s.restAPIHandle.MountConfigAPIRoutes(configapi) + s.restAPIHandle.MountFrontendAPIRoutes(frontendapi) + + if config.Keys.APISubjects != nil { + s.natsAPIHandle = api.NewNatsAPI() + if err := s.natsAPIHandle.StartSubscriptions(); err != nil { + return fmt.Errorf("starting NATS subscriptions: %w", err) + } + } if memorystore.InternalCCMSFlag { - s.apiHandle.MountMetricStoreAPIRoutes(metricstoreapi) + s.restAPIHandle.MountMetricStoreAPIRoutes(metricstoreapi) } if config.Keys.EmbedStaticFiles { diff --git a/internal/api/rest.go b/internal/api/rest.go index 8232b64..ebcf31e 100644 --- a/internal/api/rest.go +++ b/internal/api/rest.go @@ -79,8 +79,11 @@ func (api *RestAPI) MountAPIRoutes(r *mux.Router) { // Slurm node state r.HandleFunc("/nodestate/", api.updateNodeStates).Methods(http.MethodPost, http.MethodPut) // Job Handler - r.HandleFunc("/jobs/start_job/", api.startJob).Methods(http.MethodPost, http.MethodPut) - r.HandleFunc("/jobs/stop_job/", api.stopJobByRequest).Methods(http.MethodPost, http.MethodPut) + if config.Keys.APISubjects == nil { + cclog.Info("Enabling REST start/stop job API") + r.HandleFunc("/jobs/start_job/", api.startJob).Methods(http.MethodPost, http.MethodPut) + r.HandleFunc("/jobs/stop_job/", api.stopJobByRequest).Methods(http.MethodPost, http.MethodPut) + } r.HandleFunc("/jobs/", api.getJobs).Methods(http.MethodGet) r.HandleFunc("/jobs/{id}", api.getJobByID).Methods(http.MethodPost) r.HandleFunc("/jobs/{id}", api.getCompleteJobByID).Methods(http.MethodGet) From e56532e5c8b8b52f9e1089552cc949fb6844db43 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Sat, 20 Dec 2025 09:35:54 +0100 Subject: [PATCH 12/17] Add example json API payloads --- configs/startJobPayload.json | 22 ++++++++++++++++++++++ configs/stopJobPayload.json | 7 +++++++ 2 files changed, 29 insertions(+) create mode 100644 configs/startJobPayload.json create mode 100644 configs/stopJobPayload.json diff --git a/configs/startJobPayload.json b/configs/startJobPayload.json new file mode 100644 index 0000000..9517876 --- /dev/null +++ b/configs/startJobPayload.json @@ -0,0 +1,22 @@ +{ + "cluster": "fritz", + "jobId": 123000, + "jobState": "running", + "numAcc": 0, + "numHwthreads": 72, + "numNodes": 1, + "partition": "main", + "requestedMemory": 128000, + "resources": [{ "hostname": "f0726" }], + "startTime": 1649723812, + "subCluster": "main", + "submitTime": 1649723812, + "user": "k106eb10", + "project": "k106eb", + "walltime": 86400, + "metaData": { + "slurmInfo": "JobId=398759\nJobName=myJob\nUserId=dummyUser\nGroupId=dummyGroup\nAccount=dummyAccount\nQOS=normal Requeue=False Restarts=0 BatchFlag=True\nTimeLimit=1439'\nSubmitTime=2023-02-09T14:10:18\nPartition=singlenode\nNodeList=xx\nNumNodes=xx NumCPUs=72 NumTasks=72 CPUs/Task=1\nNTasksPerNode:Socket:Core=0:None:None\nTRES_req=cpu=72,mem=250000M,node=1,billing=72\nTRES_alloc=cpu=72,node=1,billing=72\nCommand=myCmd\nWorkDir=myDir\nStdErr=\nStdOut=\n", + "jobScript": "#!/bin/bash -l\n#SBATCH --job-name=dummy_job\n#SBATCH --time=23:59:00\n#SBATCH --partition=singlenode\n#SBATCH --ntasks=72\n#SBATCH --hint=multithread\n#SBATCH --chdir=/home/atuin/k106eb/dummy/\n#SBATCH --export=NONE\nunset SLURM_EXPORT_ENV\n\n#This is a dummy job script\n./mybinary\n", + "jobName": "ams_pipeline" + } +} diff --git a/configs/stopJobPayload.json b/configs/stopJobPayload.json new file mode 100644 index 0000000..baf76f9 --- /dev/null +++ b/configs/stopJobPayload.json @@ -0,0 +1,7 @@ +{ + "cluster": "fritz", + "jobId": 123000, + "jobState": "completed", + "startTime": 1649723812, + "stopTime": 1649763839 +} From 3cfcd301281c14af88f60e10790c4d52e44c213b Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Sat, 20 Dec 2025 10:17:54 +0100 Subject: [PATCH 13/17] Add CLAUDE.md documentation for Claude Code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provides architecture overview, build commands, and development workflows to help future Claude Code instances work productively in this codebase. Includes guidance on GraphQL/REST API patterns, database migrations, and the repository/metric data architecture. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- CLAUDE.md | 198 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..2bb08c9 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,198 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +ClusterCockpit is a job-specific performance monitoring framework for HPC clusters. This is a Golang backend that provides REST and GraphQL APIs, serves a Svelte-based frontend, and manages job archives and metric data from various time-series databases. + +## Build and Development Commands + +### Building + +```bash +# Build everything (frontend + backend) +make + +# Build only the frontend +make frontend + +# Build only the backend (requires frontend to be built first) +go build -ldflags='-s -X main.date=$(date +"%Y-%m-%d:T%H:%M:%S") -X main.version=1.4.4 -X main.commit=$(git rev-parse --short HEAD)' ./cmd/cc-backend +``` + +### Testing + +```bash +# Run all tests +make test + +# Run tests with verbose output +go test -v ./... + +# Run tests for a specific package +go test ./internal/repository +``` + +### Code Generation + +```bash +# Regenerate GraphQL schema and resolvers (after modifying api/*.graphqls) +make graphql + +# Regenerate Swagger/OpenAPI docs (after modifying API comments) +make swagger +``` + +### Frontend Development + +```bash +cd web/frontend + +# Install dependencies +npm install + +# Build for production +npm run build + +# Development mode with watch +npm run dev +``` + +### Running + +```bash +# Initialize database and create admin user +./cc-backend -init-db -add-user demo:admin:demo + +# Start server in development mode (enables GraphQL Playground and Swagger UI) +./cc-backend -server -dev -loglevel info + +# Start demo with sample data +./startDemo.sh +``` + +## Architecture + +### Backend Structure + +The backend follows a layered architecture with clear separation of concerns: + +- **cmd/cc-backend**: Entry point, orchestrates initialization of all subsystems +- **internal/repository**: Data access layer using repository pattern + - Abstracts database operations (SQLite/MySQL) + - Implements LRU caching for performance + - Provides repositories for Job, User, Node, and Tag entities + - Transaction support for batch operations +- **internal/api**: REST API endpoints (Swagger/OpenAPI documented) +- **internal/graph**: GraphQL API (uses gqlgen) + - Schema in `api/*.graphqls` + - Generated code in `internal/graph/generated/` + - Resolvers in `internal/graph/schema.resolvers.go` +- **internal/auth**: Authentication layer + - Supports local accounts, LDAP, OIDC, and JWT tokens + - Implements rate limiting for login attempts +- **internal/metricdata**: Metric data repository abstraction + - Pluggable backends: cc-metric-store, Prometheus, InfluxDB + - Each cluster can have a different metric data backend +- **internal/archiver**: Job archiving to file-based archive +- **pkg/archive**: Job archive backend implementations + - File system backend (default) + - S3 backend + - SQLite backend (experimental) +- **pkg/nats**: NATS integration for metric ingestion + +### Frontend Structure + +- **web/frontend**: Svelte 5 application + - Uses Rollup for building + - Components organized by feature (analysis, job, user, etc.) + - GraphQL client using @urql/svelte + - Bootstrap 5 + SvelteStrap for UI + - uPlot for time-series visualization +- **web/templates**: Server-side Go templates + +### Key Concepts + +**Job Archive**: Completed jobs are stored in a file-based archive following the [ClusterCockpit job-archive specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive). Each job has a `meta.json` file with metadata and metric data files. + +**Metric Data Repositories**: Time-series metric data is stored separately from job metadata. The system supports multiple backends (cc-metric-store is recommended). Configuration is per-cluster in `config.json`. + +**Authentication Flow**: +1. Multiple authenticators can be configured (local, LDAP, OIDC, JWT) +2. Each authenticator's `CanLogin` method is called to determine if it should handle the request +3. The first authenticator that returns true performs the actual `Login` +4. JWT tokens are used for API authentication + +**Database Migrations**: SQL migrations in `internal/repository/migrations/` are applied automatically on startup. Version tracking in `version` table. + +**Scopes**: Metrics can be collected at different scopes: +- Node scope (always available) +- Core scope (for jobs with ≤8 nodes) +- Accelerator scope (for GPU/accelerator metrics) + +## Configuration + +- **config.json**: Main configuration (clusters, metric repositories, archive settings) +- **.env**: Environment variables (secrets like JWT keys) + - Copy from `configs/env-template.txt` + - NEVER commit this file +- **cluster.json**: Cluster topology and metric definitions (loaded from archive or config) + +## Database + +- Default: SQLite 3 (`./var/job.db`) +- Optional: MySQL/MariaDB +- Connection managed by `internal/repository` +- Schema version in `internal/repository/migration.go` + +## Code Generation + +**GraphQL** (gqlgen): +- Schema: `api/*.graphqls` +- Config: `gqlgen.yml` +- Generated code: `internal/graph/generated/` +- Custom resolvers: `internal/graph/schema.resolvers.go` +- Run `make graphql` after schema changes + +**Swagger/OpenAPI**: +- Annotations in `internal/api/*.go` +- Generated docs: `api/docs.go`, `api/swagger.yaml` +- Run `make swagger` after API changes + +## Testing Conventions + +- Test files use `_test.go` suffix +- Test data in `testdata/` subdirectories +- Repository tests use in-memory SQLite +- API tests use httptest + +## Common Workflows + +### Adding a new GraphQL field +1. Edit schema in `api/*.graphqls` +2. Run `make graphql` +3. Implement resolver in `internal/graph/schema.resolvers.go` + +### Adding a new REST endpoint +1. Add handler in `internal/api/*.go` +2. Add route in `internal/api/rest.go` +3. Add Swagger annotations +4. Run `make swagger` + +### Adding a new metric data backend +1. Implement `MetricDataRepository` interface in `internal/metricdata/` +2. Register in `metricdata.Init()` switch statement +3. Update config.json schema documentation + +### Modifying database schema +1. Create new migration in `internal/repository/migrations/` +2. Increment `repository.Version` +3. Test with fresh database and existing database + +## Dependencies + +- Go 1.24.0+ (check go.mod for exact version) +- Node.js (for frontend builds) +- SQLite 3 or MySQL/MariaDB +- Optional: NATS server for metric ingestion From b35172e2f7bc56fad47a12ef36398ddba376d6db Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Sat, 20 Dec 2025 11:13:02 +0100 Subject: [PATCH 14/17] Add context information for CLAUDE coding agent --- CLAUDE.md | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 2bb08c9..379b4db 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,10 +1,14 @@ # CLAUDE.md -This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. +This file provides guidance to Claude Code (claude.ai/code) when working with +code in this repository. ## Project Overview -ClusterCockpit is a job-specific performance monitoring framework for HPC clusters. This is a Golang backend that provides REST and GraphQL APIs, serves a Svelte-based frontend, and manages job archives and metric data from various time-series databases. +ClusterCockpit is a job-specific performance monitoring framework for HPC +clusters. This is a Golang backend that provides REST and GraphQL APIs, serves a +Svelte-based frontend, and manages job archives and metric data from various +time-series databases. ## Build and Development Commands @@ -80,7 +84,7 @@ The backend follows a layered architecture with clear separation of concerns: - **cmd/cc-backend**: Entry point, orchestrates initialization of all subsystems - **internal/repository**: Data access layer using repository pattern - - Abstracts database operations (SQLite/MySQL) + - Abstracts database operations (SQLite3 only) - Implements LRU caching for performance - Provides repositories for Job, User, Node, and Tag entities - Transaction support for batch operations @@ -114,19 +118,27 @@ The backend follows a layered architecture with clear separation of concerns: ### Key Concepts -**Job Archive**: Completed jobs are stored in a file-based archive following the [ClusterCockpit job-archive specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive). Each job has a `meta.json` file with metadata and metric data files. +**Job Archive**: Completed jobs are stored in a file-based archive following the +[ClusterCockpit job-archive +specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive). +Each job has a `meta.json` file with metadata and metric data files. -**Metric Data Repositories**: Time-series metric data is stored separately from job metadata. The system supports multiple backends (cc-metric-store is recommended). Configuration is per-cluster in `config.json`. +**Metric Data Repositories**: Time-series metric data is stored separately from +job metadata. The system supports multiple backends (cc-metric-store is +recommended). Configuration is per-cluster in `config.json`. **Authentication Flow**: + 1. Multiple authenticators can be configured (local, LDAP, OIDC, JWT) 2. Each authenticator's `CanLogin` method is called to determine if it should handle the request 3. The first authenticator that returns true performs the actual `Login` 4. JWT tokens are used for API authentication -**Database Migrations**: SQL migrations in `internal/repository/migrations/` are applied automatically on startup. Version tracking in `version` table. +**Database Migrations**: SQL migrations in `internal/repository/migrations/` are +applied automatically on startup. Version tracking in `version` table. **Scopes**: Metrics can be collected at different scopes: + - Node scope (always available) - Core scope (for jobs with ≤8 nodes) - Accelerator scope (for GPU/accelerator metrics) @@ -142,13 +154,13 @@ The backend follows a layered architecture with clear separation of concerns: ## Database - Default: SQLite 3 (`./var/job.db`) -- Optional: MySQL/MariaDB - Connection managed by `internal/repository` - Schema version in `internal/repository/migration.go` ## Code Generation **GraphQL** (gqlgen): + - Schema: `api/*.graphqls` - Config: `gqlgen.yml` - Generated code: `internal/graph/generated/` @@ -156,6 +168,7 @@ The backend follows a layered architecture with clear separation of concerns: - Run `make graphql` after schema changes **Swagger/OpenAPI**: + - Annotations in `internal/api/*.go` - Generated docs: `api/docs.go`, `api/swagger.yaml` - Run `make swagger` after API changes @@ -170,22 +183,26 @@ The backend follows a layered architecture with clear separation of concerns: ## Common Workflows ### Adding a new GraphQL field + 1. Edit schema in `api/*.graphqls` 2. Run `make graphql` 3. Implement resolver in `internal/graph/schema.resolvers.go` ### Adding a new REST endpoint + 1. Add handler in `internal/api/*.go` 2. Add route in `internal/api/rest.go` 3. Add Swagger annotations 4. Run `make swagger` ### Adding a new metric data backend + 1. Implement `MetricDataRepository` interface in `internal/metricdata/` 2. Register in `metricdata.Init()` switch statement 3. Update config.json schema documentation ### Modifying database schema + 1. Create new migration in `internal/repository/migrations/` 2. Increment `repository.Version` 3. Test with fresh database and existing database @@ -194,5 +211,5 @@ The backend follows a layered architecture with clear separation of concerns: - Go 1.24.0+ (check go.mod for exact version) - Node.js (for frontend builds) -- SQLite 3 or MySQL/MariaDB +- SQLite 3 (only supported database) - Optional: NATS server for metric ingestion From 1cd4a57bd3206e1f3115c1cbc58fcad5cbfb87a5 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Sat, 20 Dec 2025 11:13:41 +0100 Subject: [PATCH 15/17] Remove support for mysql/mariadb --- README.md | 11 +- cmd/cc-backend/init.go | 4 +- cmd/cc-backend/main.go | 19 ++- configs/config-mariadb.json | 64 --------- go.mod | 2 - go.sum | 49 +------ init/clustercockpit.service | 2 +- internal/api/api_test.go | 2 +- internal/config/config.go | 4 +- internal/config/schema.go | 2 +- internal/importer/importer_test.go | 2 +- internal/repository/dbConnection.go | 56 ++++---- internal/repository/job.go | 52 ++------ internal/repository/migration.go | 89 ++++--------- .../migrations/mysql/01_init-schema.down.sql | 5 - .../migrations/mysql/01_init-schema.up.sql | 66 ---------- .../migrations/mysql/02_add-index.down.sql | 8 -- .../migrations/mysql/02_add-index.up.sql | 8 -- .../mysql/03_add-userprojects.down.sql | 1 - .../mysql/03_add-userprojects.up.sql | 1 - .../mysql/04_alter-table-job.down.sql | 5 - .../mysql/04_alter-table-job.up.sql | 5 - .../migrations/mysql/05_extend-tags.down.sql | 2 - .../migrations/mysql/05_extend-tags.up.sql | 2 - .../mysql/06_change-config.down.sql | 1 - .../migrations/mysql/06_change-config.up.sql | 1 - .../migrations/mysql/07_fix-tag-id.down.sql | 3 - .../migrations/mysql/07_fix-tag-id.up.sql | 3 - .../mysql/08_add-footprint.down.sql | 83 ------------ .../migrations/mysql/08_add-footprint.up.sql | 123 ------------------ internal/repository/node_test.go | 2 +- internal/repository/repository_test.go | 2 +- internal/repository/stats.go | 49 +++---- internal/repository/userConfig_test.go | 2 +- internal/tagger/detectApp_test.go | 2 +- 35 files changed, 104 insertions(+), 628 deletions(-) delete mode 100644 configs/config-mariadb.json delete mode 100644 internal/repository/migrations/mysql/01_init-schema.down.sql delete mode 100644 internal/repository/migrations/mysql/01_init-schema.up.sql delete mode 100644 internal/repository/migrations/mysql/02_add-index.down.sql delete mode 100644 internal/repository/migrations/mysql/02_add-index.up.sql delete mode 100644 internal/repository/migrations/mysql/03_add-userprojects.down.sql delete mode 100644 internal/repository/migrations/mysql/03_add-userprojects.up.sql delete mode 100644 internal/repository/migrations/mysql/04_alter-table-job.down.sql delete mode 100644 internal/repository/migrations/mysql/04_alter-table-job.up.sql delete mode 100644 internal/repository/migrations/mysql/05_extend-tags.down.sql delete mode 100644 internal/repository/migrations/mysql/05_extend-tags.up.sql delete mode 100644 internal/repository/migrations/mysql/06_change-config.down.sql delete mode 100644 internal/repository/migrations/mysql/06_change-config.up.sql delete mode 100644 internal/repository/migrations/mysql/07_fix-tag-id.down.sql delete mode 100644 internal/repository/migrations/mysql/07_fix-tag-id.up.sql delete mode 100644 internal/repository/migrations/mysql/08_add-footprint.down.sql delete mode 100644 internal/repository/migrations/mysql/08_add-footprint.up.sql diff --git a/README.md b/README.md index 0799bd9..a0352d1 100644 --- a/README.md +++ b/README.md @@ -29,12 +29,11 @@ is also served by the backend using [Svelte](https://svelte.dev/) components. Layout and styling are based on [Bootstrap 5](https://getbootstrap.com/) using [Bootstrap Icons](https://icons.getbootstrap.com/). -The backend uses [SQLite 3](https://sqlite.org/) as a relational SQL database by -default. Optionally it can use a MySQL/MariaDB database server. While there are -metric data backends for the InfluxDB and Prometheus time series databases, the -only tested and supported setup is to use cc-metric-store as the metric data -backend. Documentation on how to integrate ClusterCockpit with other time series -databases will be added in the future. +The backend uses [SQLite 3](https://sqlite.org/) as the relational SQL database. +While there are metric data backends for the InfluxDB and Prometheus time series +databases, the only tested and supported setup is to use cc-metric-store as the +metric data backend. Documentation on how to integrate ClusterCockpit with other +time series databases will be added in the future. Completed batch jobs are stored in a file-based job archive according to [this specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive). diff --git a/cmd/cc-backend/init.go b/cmd/cc-backend/init.go index ee60b12..151eee9 100644 --- a/cmd/cc-backend/init.go +++ b/cmd/cc-backend/init.go @@ -105,9 +105,9 @@ func initEnv() { cclog.Abortf("Could not create default ./var folder with permissions '0o777'. Application initialization failed, exited.\nError: %s\n", err.Error()) } - err := repository.MigrateDB("sqlite3", "./var/job.db") + err := repository.MigrateDB("./var/job.db") if err != nil { - cclog.Abortf("Could not initialize default sqlite3 database as './var/job.db'. Application initialization failed, exited.\nError: %s\n", err.Error()) + cclog.Abortf("Could not initialize default SQLite database as './var/job.db'. Application initialization failed, exited.\nError: %s\n", err.Error()) } if err := os.Mkdir("var/job-archive", 0o777); err != nil { cclog.Abortf("Could not create default ./var/job-archive folder with permissions '0o777'. Application initialization failed, exited.\nError: %s\n", err.Error()) diff --git a/cmd/cc-backend/main.go b/cmd/cc-backend/main.go index 6239d36..9464ccf 100644 --- a/cmd/cc-backend/main.go +++ b/cmd/cc-backend/main.go @@ -40,7 +40,6 @@ import ( "github.com/google/gops/agent" "github.com/joho/godotenv" - _ "github.com/go-sql-driver/mysql" _ "github.com/mattn/go-sqlite3" ) @@ -120,30 +119,30 @@ func initDatabase() error { func handleDatabaseCommands() error { if flagMigrateDB { - err := repository.MigrateDB(config.Keys.DBDriver, config.Keys.DB) + err := repository.MigrateDB(config.Keys.DB) if err != nil { return fmt.Errorf("migrating database to version %d: %w", repository.Version, err) } - cclog.Exitf("MigrateDB Success: Migrated '%s' database at location '%s' to version %d.\n", - config.Keys.DBDriver, config.Keys.DB, repository.Version) + cclog.Exitf("MigrateDB Success: Migrated SQLite database at '%s' to version %d.\n", + config.Keys.DB, repository.Version) } if flagRevertDB { - err := repository.RevertDB(config.Keys.DBDriver, config.Keys.DB) + err := repository.RevertDB(config.Keys.DB) if err != nil { return fmt.Errorf("reverting database to version %d: %w", repository.Version-1, err) } - cclog.Exitf("RevertDB Success: Reverted '%s' database at location '%s' to version %d.\n", - config.Keys.DBDriver, config.Keys.DB, repository.Version-1) + cclog.Exitf("RevertDB Success: Reverted SQLite database at '%s' to version %d.\n", + config.Keys.DB, repository.Version-1) } if flagForceDB { - err := repository.ForceDB(config.Keys.DBDriver, config.Keys.DB) + err := repository.ForceDB(config.Keys.DB) if err != nil { return fmt.Errorf("forcing database to version %d: %w", repository.Version, err) } - cclog.Exitf("ForceDB Success: Forced '%s' database at location '%s' to version %d.\n", - config.Keys.DBDriver, config.Keys.DB, repository.Version) + cclog.Exitf("ForceDB Success: Forced SQLite database at '%s' to version %d.\n", + config.Keys.DB, repository.Version) } return nil diff --git a/configs/config-mariadb.json b/configs/config-mariadb.json deleted file mode 100644 index 38bb8a9..0000000 --- a/configs/config-mariadb.json +++ /dev/null @@ -1,64 +0,0 @@ -{ - "addr": "127.0.0.1:8080", - "short-running-jobs-duration": 300, - "archive": { - "kind": "file", - "path": "./var/job-archive" - }, - "jwts": { - "max-age": "2000h" - }, - "db-driver": "mysql", - "db": "clustercockpit:demo@tcp(127.0.0.1:3306)/clustercockpit", - "enable-resampling": { - "trigger": 30, - "resolutions": [600, 300, 120, 60] - }, - "emission-constant": 317, - "clusters": [ - { - "name": "fritz", - "metricDataRepository": { - "kind": "cc-metric-store", - "url": "http://localhost:8082", - "token": "" - }, - "filterRanges": { - "numNodes": { - "from": 1, - "to": 64 - }, - "duration": { - "from": 0, - "to": 86400 - }, - "startTime": { - "from": "2022-01-01T00:00:00Z", - "to": null - } - } - }, - { - "name": "alex", - "metricDataRepository": { - "kind": "cc-metric-store", - "url": "http://localhost:8082", - "token": "" - }, - "filterRanges": { - "numNodes": { - "from": 1, - "to": 64 - }, - "duration": { - "from": 0, - "to": 86400 - }, - "startTime": { - "from": "2022-01-01T00:00:00Z", - "to": null - } - } - } - ] -} diff --git a/go.mod b/go.mod index df8e1fb..eb061de 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,6 @@ require ( github.com/expr-lang/expr v1.17.6 github.com/go-co-op/gocron/v2 v2.18.2 github.com/go-ldap/ldap/v3 v3.4.12 - github.com/go-sql-driver/mysql v1.9.3 github.com/golang-jwt/jwt/v5 v5.3.0 github.com/golang-migrate/migrate/v4 v4.19.1 github.com/google/gops v0.3.28 @@ -48,7 +47,6 @@ require ( ) require ( - filippo.io/edwards25519 v1.1.0 // indirect github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect github.com/KyleBanks/depth v1.2.1 // indirect github.com/agnivade/levenshtein v1.2.1 // indirect diff --git a/go.sum b/go.sum index 711c555..fd4980d 100644 --- a/go.sum +++ b/go.sum @@ -2,8 +2,6 @@ filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= github.com/99designs/gqlgen v0.17.84 h1:iVMdiStgUVx/BFkMb0J5GAXlqfqtQ7bqMCYK6v52kQ0= github.com/99designs/gqlgen v0.17.84/go.mod h1:qjoUqzTeiejdo+bwUg8unqSpeYG42XrcrQboGIezmFA= -github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= -github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= github.com/ClusterCockpit/cc-lib v1.0.2 h1:ZWn3oZkXgxrr3zSigBdlOOfayZ4Om4xL20DhmritPPg= @@ -12,8 +10,6 @@ github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM= github.com/Masterminds/squirrel v1.5.4/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10= -github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= -github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw= github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4= github.com/PuerkitoBio/goquery v1.11.0 h1:jZ7pwMQXIITcUXNH83LLk+txlaEy6NVOfTuP43xxfqw= @@ -70,10 +66,6 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI= -github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= -github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE= -github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk= github.com/coreos/go-oidc/v3 v3.16.0 h1:qRQUCFstKpXwmEjDQTIbyY/5jF00+asXzSkmkoa/mow= github.com/coreos/go-oidc/v3 v3.16.0/go.mod h1:wqPbKFrVnE90vty060SB40FCJ8fTHTxSwyXJqZH+sI8= github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo= @@ -85,16 +77,6 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 h1:SG7nF6SRlWhcT7cNTs5R6Hk4V2lcmLz2NsG2VnInyNo= github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA= -github.com/dhui/dktest v0.4.6 h1:+DPKyScKSEp3VLtbMDHcUq6V5Lm5zfZZVb0Sk7Ahom4= -github.com/dhui/dktest v0.4.6/go.mod h1:JHTSYDtKkvFNFHJKqCzVzqXecyv+tKt8EzceOmQOgbU= -github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= -github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= -github.com/docker/docker v28.3.3+incompatible h1:Dypm25kh4rmk49v1eiVbsAtpAsYURjYkaKubwuBdxEI= -github.com/docker/docker v28.3.3+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= -github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c= -github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc= -github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= -github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/expr-lang/expr v1.17.6 h1:1h6i8ONk9cexhDmowO/A64VPxHScu7qfSl2k8OlINec= github.com/expr-lang/expr v1.17.6/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= @@ -113,10 +95,6 @@ github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZR github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08= github.com/go-ldap/ldap/v3 v3.4.12 h1:1b81mv7MagXZ7+1r7cLTWmyuTqVqdwbtJSjC0DAp9s4= github.com/go-ldap/ldap/v3 v3.4.12/go.mod h1:+SPAGcTtOfmGsCb3h1RFiq4xpp4N636G75OEace8lNo= -github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= -github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= -github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-openapi/jsonpointer v0.22.3 h1:dKMwfV4fmt6Ah90zloTbUKWMD+0he+12XYAsPotrkn8= github.com/go-openapi/jsonpointer v0.22.3/go.mod h1:0lBbqeRsQ5lIanv3LHZBrmRGHLHcQoOXQnf88fHlGWo= github.com/go-openapi/jsonreference v0.21.3 h1:96Dn+MRPa0nYAR8DR1E03SblB5FJvh7W6krPI0Z7qMc= @@ -145,15 +123,12 @@ github.com/go-openapi/testify/enable/yaml/v2 v2.0.2/go.mod h1:kme83333GCtJQHXQ8U github.com/go-openapi/testify/v2 v2.0.2 h1:X999g3jeLcoY8qctY/c/Z8iBHTbwLz7R2WXd6Ub6wls= github.com/go-openapi/testify/v2 v2.0.2/go.mod h1:HCPmvFFnheKK2BuwSA0TbbdxJ3I16pjwMkYkP4Ywn54= github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= +github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y= github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= -github.com/go-sql-driver/mysql v1.9.3 h1:U/N249h2WzJ3Ukj8SowVFjdtZKfu9vlLZxjPXV1aweo= -github.com/go-sql-driver/mysql v1.9.3/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU= github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs= github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/goccy/go-yaml v1.19.0 h1:EmkZ9RIsX+Uq4DYFowegAuJo8+xdX3T/2dwNPXbxEYE= github.com/goccy/go-yaml v1.19.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= -github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= -github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo= github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= github.com/golang-migrate/migrate/v4 v4.19.1 h1:OCyb44lFuQfYXYLx1SCxPZQGU7mcaZ7gH9yH4jSFbBA= @@ -241,17 +216,11 @@ github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsO github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs= github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= -github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= -github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= -github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0= -github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= -github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= @@ -265,13 +234,7 @@ github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OS github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro= github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg= -github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= -github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= -github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug= -github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -323,16 +286,6 @@ github.com/vektah/gqlparser/v2 v2.5.31/go.mod h1:c1I28gSOVNzlfc4WuDlqU7voQnsqI6O github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 h1:FnBeRrxr7OU4VvAzt5X7s6266i6cSVkkFPS0TuXWbIg= github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= -go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q= -go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= -go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= -go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= -go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= -go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= -go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= diff --git a/init/clustercockpit.service b/init/clustercockpit.service index 0a9448d..b4ed8bf 100644 --- a/init/clustercockpit.service +++ b/init/clustercockpit.service @@ -3,7 +3,7 @@ Description=ClusterCockpit Web Server Documentation=https://github.com/ClusterCockpit/cc-backend Wants=network-online.target After=network-online.target -After=mariadb.service mysql.service +# Database is file-based SQLite - no service dependency required [Service] WorkingDirectory=/opt/monitoring/cc-backend diff --git a/internal/api/api_test.go b/internal/api/api_test.go index 70b0f0a..d311767 100644 --- a/internal/api/api_test.go +++ b/internal/api/api_test.go @@ -141,7 +141,7 @@ func setup(t *testing.T) *api.RestAPI { } dbfilepath := filepath.Join(tmpdir, "test.db") - err := repository.MigrateDB("sqlite3", dbfilepath) + err := repository.MigrateDB(dbfilepath) if err != nil { t.Fatal(err) } diff --git a/internal/config/config.go b/internal/config/config.go index 25ca27e..b7b8ed0 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -37,10 +37,10 @@ type ProgramConfig struct { EmbedStaticFiles bool `json:"embed-static-files"` StaticFiles string `json:"static-files"` - // 'sqlite3' or 'mysql' (mysql will work for mariadb as well) + // Database driver - only 'sqlite3' is supported DBDriver string `json:"db-driver"` - // For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!). + // Path to SQLite database file DB string `json:"db"` // Keep all metric data in the metric data repositories, diff --git a/internal/config/schema.go b/internal/config/schema.go index ed1f42d..b171f96 100644 --- a/internal/config/schema.go +++ b/internal/config/schema.go @@ -41,7 +41,7 @@ var configSchema = ` "type": "string" }, "db": { - "description": "For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!).", + "description": "Path to SQLite database file (e.g., './var/job.db')", "type": "string" }, "disable-archive": { diff --git a/internal/importer/importer_test.go b/internal/importer/importer_test.go index 2aa007d..470f760 100644 --- a/internal/importer/importer_test.go +++ b/internal/importer/importer_test.go @@ -107,7 +107,7 @@ func setup(t *testing.T) *repository.JobRepository { } dbfilepath := filepath.Join(tmpdir, "test.db") - err := repository.MigrateDB("sqlite3", dbfilepath) + err := repository.MigrateDB(dbfilepath) if err != nil { t.Fatal(err) } diff --git a/internal/repository/dbConnection.go b/internal/repository/dbConnection.go index 1c14c95..0f7536b 100644 --- a/internal/repository/dbConnection.go +++ b/internal/repository/dbConnection.go @@ -55,6 +55,10 @@ func Connect(driver string, db string) { var err error var dbHandle *sqlx.DB + if driver != "sqlite3" { + cclog.Abortf("Unsupported database driver '%s'. Only 'sqlite3' is supported.\n", driver) + } + dbConnOnce.Do(func() { opts := DatabaseOptions{ URL: db, @@ -64,39 +68,31 @@ func Connect(driver string, db string) { ConnectionMaxIdleTime: repoConfig.ConnectionMaxIdleTime, } - switch driver { - case "sqlite3": - // TODO: Have separate DB handles for Writes and Reads - // Optimize SQLite connection: https://kerkour.com/sqlite-for-servers - connectionURLParams := make(url.Values) - connectionURLParams.Add("_txlock", "immediate") - connectionURLParams.Add("_journal_mode", "WAL") - connectionURLParams.Add("_busy_timeout", "5000") - connectionURLParams.Add("_synchronous", "NORMAL") - connectionURLParams.Add("_cache_size", "1000000000") - connectionURLParams.Add("_foreign_keys", "true") - opts.URL = fmt.Sprintf("file:%s?%s", opts.URL, connectionURLParams.Encode()) + // TODO: Have separate DB handles for Writes and Reads + // Optimize SQLite connection: https://kerkour.com/sqlite-for-servers + connectionURLParams := make(url.Values) + connectionURLParams.Add("_txlock", "immediate") + connectionURLParams.Add("_journal_mode", "WAL") + connectionURLParams.Add("_busy_timeout", "5000") + connectionURLParams.Add("_synchronous", "NORMAL") + connectionURLParams.Add("_cache_size", "1000000000") + connectionURLParams.Add("_foreign_keys", "true") + opts.URL = fmt.Sprintf("file:%s?%s", opts.URL, connectionURLParams.Encode()) - if cclog.Loglevel() == "debug" { - sql.Register("sqlite3WithHooks", sqlhooks.Wrap(&sqlite3.SQLiteDriver{}, &Hooks{})) - dbHandle, err = sqlx.Open("sqlite3WithHooks", opts.URL) - } else { - dbHandle, err = sqlx.Open("sqlite3", opts.URL) - } - - err = setupSqlite(dbHandle.DB) - if err != nil { - cclog.Abortf("Failed sqlite db setup.\nError: %s\n", err.Error()) - } - case "mysql": - opts.URL += "?multiStatements=true" - dbHandle, err = sqlx.Open("mysql", opts.URL) - default: - cclog.Abortf("DB Connection: Unsupported database driver '%s'.\n", driver) + if cclog.Loglevel() == "debug" { + sql.Register("sqlite3WithHooks", sqlhooks.Wrap(&sqlite3.SQLiteDriver{}, &Hooks{})) + dbHandle, err = sqlx.Open("sqlite3WithHooks", opts.URL) + } else { + dbHandle, err = sqlx.Open("sqlite3", opts.URL) } if err != nil { - cclog.Abortf("DB Connection: Could not connect to '%s' database with sqlx.Open().\nError: %s\n", driver, err.Error()) + cclog.Abortf("DB Connection: Could not connect to SQLite database with sqlx.Open().\nError: %s\n", err.Error()) + } + + err = setupSqlite(dbHandle.DB) + if err != nil { + cclog.Abortf("Failed sqlite db setup.\nError: %s\n", err.Error()) } dbHandle.SetMaxOpenConns(opts.MaxOpenConnections) @@ -105,7 +101,7 @@ func Connect(driver string, db string) { dbHandle.SetConnMaxIdleTime(opts.ConnectionMaxIdleTime) dbConnInstance = &DBConnection{DB: dbHandle, Driver: driver} - err = checkDBVersion(driver, dbHandle.DB) + err = checkDBVersion(dbHandle.DB) if err != nil { cclog.Abortf("DB Connection: Failed DB version check.\nError: %s\n", err.Error()) } diff --git a/internal/repository/job.go b/internal/repository/job.go index f23a14c..4795937 100644 --- a/internal/repository/job.go +++ b/internal/repository/job.go @@ -14,8 +14,6 @@ // Initialize the database connection before using any repository: // // repository.Connect("sqlite3", "./var/job.db") -// // or for MySQL: -// repository.Connect("mysql", "user:password@tcp(localhost:3306)/dbname") // // # Configuration // @@ -158,52 +156,22 @@ func scanJob(row interface{ Scan(...any) error }) (*schema.Job, error) { } func (r *JobRepository) Optimize() error { - var err error - - switch r.driver { - case "sqlite3": - if _, err = r.DB.Exec(`VACUUM`); err != nil { - return err - } - case "mysql": - cclog.Info("Optimize currently not supported for mysql driver") + if _, err := r.DB.Exec(`VACUUM`); err != nil { + return err } - return nil } func (r *JobRepository) Flush() error { - var err error - - switch r.driver { - case "sqlite3": - if _, err = r.DB.Exec(`DELETE FROM jobtag`); err != nil { - return err - } - if _, err = r.DB.Exec(`DELETE FROM tag`); err != nil { - return err - } - if _, err = r.DB.Exec(`DELETE FROM job`); err != nil { - return err - } - case "mysql": - if _, err = r.DB.Exec(`SET FOREIGN_KEY_CHECKS = 0`); err != nil { - return err - } - if _, err = r.DB.Exec(`TRUNCATE TABLE jobtag`); err != nil { - return err - } - if _, err = r.DB.Exec(`TRUNCATE TABLE tag`); err != nil { - return err - } - if _, err = r.DB.Exec(`TRUNCATE TABLE job`); err != nil { - return err - } - if _, err = r.DB.Exec(`SET FOREIGN_KEY_CHECKS = 1`); err != nil { - return err - } + if _, err := r.DB.Exec(`DELETE FROM jobtag`); err != nil { + return err + } + if _, err := r.DB.Exec(`DELETE FROM tag`); err != nil { + return err + } + if _, err := r.DB.Exec(`DELETE FROM job`); err != nil { + return err } - return nil } diff --git a/internal/repository/migration.go b/internal/repository/migration.go index dec93a9..43e913c 100644 --- a/internal/repository/migration.go +++ b/internal/repository/migration.go @@ -12,7 +12,6 @@ import ( cclog "github.com/ClusterCockpit/cc-lib/ccLogger" "github.com/golang-migrate/migrate/v4" - "github.com/golang-migrate/migrate/v4/database/mysql" "github.com/golang-migrate/migrate/v4/database/sqlite3" "github.com/golang-migrate/migrate/v4/source/iofs" ) @@ -22,40 +21,19 @@ const Version uint = 10 //go:embed migrations/* var migrationFiles embed.FS -func checkDBVersion(backend string, db *sql.DB) error { - var m *migrate.Migrate +func checkDBVersion(db *sql.DB) error { + driver, err := sqlite3.WithInstance(db, &sqlite3.Config{}) + if err != nil { + return err + } + d, err := iofs.New(migrationFiles, "migrations/sqlite3") + if err != nil { + return err + } - switch backend { - case "sqlite3": - driver, err := sqlite3.WithInstance(db, &sqlite3.Config{}) - if err != nil { - return err - } - d, err := iofs.New(migrationFiles, "migrations/sqlite3") - if err != nil { - return err - } - - m, err = migrate.NewWithInstance("iofs", d, "sqlite3", driver) - if err != nil { - return err - } - case "mysql": - driver, err := mysql.WithInstance(db, &mysql.Config{}) - if err != nil { - return err - } - d, err := iofs.New(migrationFiles, "migrations/mysql") - if err != nil { - return err - } - - m, err = migrate.NewWithInstance("iofs", d, "mysql", driver) - if err != nil { - return err - } - default: - cclog.Abortf("Migration: Unsupported database backend '%s'.\n", backend) + m, err := migrate.NewWithInstance("iofs", d, "sqlite3", driver) + if err != nil { + return err } v, dirty, err := m.Version() @@ -80,37 +58,22 @@ func checkDBVersion(backend string, db *sql.DB) error { return nil } -func getMigrateInstance(backend string, db string) (m *migrate.Migrate, err error) { - switch backend { - case "sqlite3": - d, err := iofs.New(migrationFiles, "migrations/sqlite3") - if err != nil { - cclog.Fatal(err) - } +func getMigrateInstance(db string) (m *migrate.Migrate, err error) { + d, err := iofs.New(migrationFiles, "migrations/sqlite3") + if err != nil { + return nil, err + } - m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("sqlite3://%s?_foreign_keys=on", db)) - if err != nil { - return m, err - } - case "mysql": - d, err := iofs.New(migrationFiles, "migrations/mysql") - if err != nil { - return m, err - } - - m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("mysql://%s?multiStatements=true", db)) - if err != nil { - return m, err - } - default: - cclog.Abortf("Migration: Unsupported database backend '%s'.\n", backend) + m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("sqlite3://%s?_foreign_keys=on", db)) + if err != nil { + return nil, err } return m, nil } -func MigrateDB(backend string, db string) error { - m, err := getMigrateInstance(backend, db) +func MigrateDB(db string) error { + m, err := getMigrateInstance(db) if err != nil { return err } @@ -144,8 +107,8 @@ func MigrateDB(backend string, db string) error { return nil } -func RevertDB(backend string, db string) error { - m, err := getMigrateInstance(backend, db) +func RevertDB(db string) error { + m, err := getMigrateInstance(db) if err != nil { return err } @@ -162,8 +125,8 @@ func RevertDB(backend string, db string) error { return nil } -func ForceDB(backend string, db string) error { - m, err := getMigrateInstance(backend, db) +func ForceDB(db string) error { + m, err := getMigrateInstance(db) if err != nil { return err } diff --git a/internal/repository/migrations/mysql/01_init-schema.down.sql b/internal/repository/migrations/mysql/01_init-schema.down.sql deleted file mode 100644 index 68da646..0000000 --- a/internal/repository/migrations/mysql/01_init-schema.down.sql +++ /dev/null @@ -1,5 +0,0 @@ -DROP TABLE IF EXISTS job; -DROP TABLE IF EXISTS tags; -DROP TABLE IF EXISTS jobtag; -DROP TABLE IF EXISTS configuration; -DROP TABLE IF EXISTS user; diff --git a/internal/repository/migrations/mysql/01_init-schema.up.sql b/internal/repository/migrations/mysql/01_init-schema.up.sql deleted file mode 100644 index 3a6930c..0000000 --- a/internal/repository/migrations/mysql/01_init-schema.up.sql +++ /dev/null @@ -1,66 +0,0 @@ -CREATE TABLE IF NOT EXISTS job ( - id INTEGER AUTO_INCREMENT PRIMARY KEY , - job_id BIGINT NOT NULL, - cluster VARCHAR(255) NOT NULL, - subcluster VARCHAR(255) NOT NULL, - start_time BIGINT NOT NULL, -- Unix timestamp - - user VARCHAR(255) NOT NULL, - project VARCHAR(255) NOT NULL, - `partition` VARCHAR(255) NOT NULL, - array_job_id BIGINT NOT NULL, - duration INT NOT NULL DEFAULT 0, - walltime INT NOT NULL DEFAULT 0, - job_state VARCHAR(255) NOT NULL - CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled', - 'stopped', 'timeout', 'preempted', 'out_of_memory')), - meta_data TEXT, -- JSON - resources TEXT NOT NULL, -- JSON - - num_nodes INT NOT NULL, - num_hwthreads INT NOT NULL, - num_acc INT NOT NULL, - smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )), - exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)), - monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)), - - mem_used_max REAL NOT NULL DEFAULT 0.0, - flops_any_avg REAL NOT NULL DEFAULT 0.0, - mem_bw_avg REAL NOT NULL DEFAULT 0.0, - load_avg REAL NOT NULL DEFAULT 0.0, - net_bw_avg REAL NOT NULL DEFAULT 0.0, - net_data_vol_total REAL NOT NULL DEFAULT 0.0, - file_bw_avg REAL NOT NULL DEFAULT 0.0, - file_data_vol_total REAL NOT NULL DEFAULT 0.0, - UNIQUE (job_id, cluster, start_time) - ); - -CREATE TABLE IF NOT EXISTS tag ( - id INTEGER PRIMARY KEY, - tag_type VARCHAR(255) NOT NULL, - tag_name VARCHAR(255) NOT NULL, - UNIQUE (tag_type, tag_name)); - -CREATE TABLE IF NOT EXISTS jobtag ( - job_id INTEGER, - tag_id INTEGER, - PRIMARY KEY (job_id, tag_id), - FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE, - FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE); - -CREATE TABLE IF NOT EXISTS user ( - username varchar(255) PRIMARY KEY NOT NULL, - password varchar(255) DEFAULT NULL, - ldap tinyint NOT NULL DEFAULT 0, /* col called "ldap" for historic reasons, fills the "AuthSource" */ - name varchar(255) DEFAULT NULL, - roles varchar(255) NOT NULL DEFAULT "[]", - email varchar(255) DEFAULT NULL); - -CREATE TABLE IF NOT EXISTS configuration ( - username varchar(255), - confkey varchar(255), - value varchar(255), - PRIMARY KEY (username, confkey), - FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION); - - diff --git a/internal/repository/migrations/mysql/02_add-index.down.sql b/internal/repository/migrations/mysql/02_add-index.down.sql deleted file mode 100644 index 1392c45..0000000 --- a/internal/repository/migrations/mysql/02_add-index.down.sql +++ /dev/null @@ -1,8 +0,0 @@ -DROP INDEX IF EXISTS job_stats; -DROP INDEX IF EXISTS job_by_user; -DROP INDEX IF EXISTS job_by_starttime; -DROP INDEX IF EXISTS job_by_job_id; -DROP INDEX IF EXISTS job_list; -DROP INDEX IF EXISTS job_list_user; -DROP INDEX IF EXISTS job_list_users; -DROP INDEX IF EXISTS job_list_users_start; diff --git a/internal/repository/migrations/mysql/02_add-index.up.sql b/internal/repository/migrations/mysql/02_add-index.up.sql deleted file mode 100644 index 2524bd9..0000000 --- a/internal/repository/migrations/mysql/02_add-index.up.sql +++ /dev/null @@ -1,8 +0,0 @@ -CREATE INDEX IF NOT EXISTS job_stats ON job (cluster,subcluster,user); -CREATE INDEX IF NOT EXISTS job_by_user ON job (user); -CREATE INDEX IF NOT EXISTS job_by_starttime ON job (start_time); -CREATE INDEX IF NOT EXISTS job_by_job_id ON job (job_id); -CREATE INDEX IF NOT EXISTS job_list ON job (cluster, job_state); -CREATE INDEX IF NOT EXISTS job_list_user ON job (user, cluster, job_state); -CREATE INDEX IF NOT EXISTS job_list_users ON job (user, job_state); -CREATE INDEX IF NOT EXISTS job_list_users_start ON job (start_time, user, job_state); diff --git a/internal/repository/migrations/mysql/03_add-userprojects.down.sql b/internal/repository/migrations/mysql/03_add-userprojects.down.sql deleted file mode 100644 index bbf1e64..0000000 --- a/internal/repository/migrations/mysql/03_add-userprojects.down.sql +++ /dev/null @@ -1 +0,0 @@ -ALTER TABLE user DROP COLUMN projects; diff --git a/internal/repository/migrations/mysql/03_add-userprojects.up.sql b/internal/repository/migrations/mysql/03_add-userprojects.up.sql deleted file mode 100644 index d0f19c2..0000000 --- a/internal/repository/migrations/mysql/03_add-userprojects.up.sql +++ /dev/null @@ -1 +0,0 @@ -ALTER TABLE user ADD COLUMN projects varchar(255) NOT NULL DEFAULT "[]"; diff --git a/internal/repository/migrations/mysql/04_alter-table-job.down.sql b/internal/repository/migrations/mysql/04_alter-table-job.down.sql deleted file mode 100644 index ebc7454..0000000 --- a/internal/repository/migrations/mysql/04_alter-table-job.down.sql +++ /dev/null @@ -1,5 +0,0 @@ -ALTER TABLE job - MODIFY `partition` VARCHAR(255) NOT NULL, - MODIFY array_job_id BIGINT NOT NULL, - MODIFY num_hwthreads INT NOT NULL, - MODIFY num_acc INT NOT NULL; diff --git a/internal/repository/migrations/mysql/04_alter-table-job.up.sql b/internal/repository/migrations/mysql/04_alter-table-job.up.sql deleted file mode 100644 index 9fe7620..0000000 --- a/internal/repository/migrations/mysql/04_alter-table-job.up.sql +++ /dev/null @@ -1,5 +0,0 @@ -ALTER TABLE job - MODIFY `partition` VARCHAR(255), - MODIFY array_job_id BIGINT, - MODIFY num_hwthreads INT, - MODIFY num_acc INT; diff --git a/internal/repository/migrations/mysql/05_extend-tags.down.sql b/internal/repository/migrations/mysql/05_extend-tags.down.sql deleted file mode 100644 index 925c9f8..0000000 --- a/internal/repository/migrations/mysql/05_extend-tags.down.sql +++ /dev/null @@ -1,2 +0,0 @@ -ALTER TABLE tag DROP COLUMN insert_time; -ALTER TABLE jobtag DROP COLUMN insert_time; diff --git a/internal/repository/migrations/mysql/05_extend-tags.up.sql b/internal/repository/migrations/mysql/05_extend-tags.up.sql deleted file mode 100644 index 4577564..0000000 --- a/internal/repository/migrations/mysql/05_extend-tags.up.sql +++ /dev/null @@ -1,2 +0,0 @@ -ALTER TABLE tag ADD COLUMN insert_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP; -ALTER TABLE jobtag ADD COLUMN insert_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP; diff --git a/internal/repository/migrations/mysql/06_change-config.down.sql b/internal/repository/migrations/mysql/06_change-config.down.sql deleted file mode 100644 index 0651790..0000000 --- a/internal/repository/migrations/mysql/06_change-config.down.sql +++ /dev/null @@ -1 +0,0 @@ -ALTER TABLE configuration MODIFY value VARCHAR(255); diff --git a/internal/repository/migrations/mysql/06_change-config.up.sql b/internal/repository/migrations/mysql/06_change-config.up.sql deleted file mode 100644 index e35ff19..0000000 --- a/internal/repository/migrations/mysql/06_change-config.up.sql +++ /dev/null @@ -1 +0,0 @@ -ALTER TABLE configuration MODIFY value TEXT; diff --git a/internal/repository/migrations/mysql/07_fix-tag-id.down.sql b/internal/repository/migrations/mysql/07_fix-tag-id.down.sql deleted file mode 100644 index 9f9959a..0000000 --- a/internal/repository/migrations/mysql/07_fix-tag-id.down.sql +++ /dev/null @@ -1,3 +0,0 @@ -SET FOREIGN_KEY_CHECKS = 0; -ALTER TABLE tag MODIFY id INTEGER; -SET FOREIGN_KEY_CHECKS = 1; diff --git a/internal/repository/migrations/mysql/07_fix-tag-id.up.sql b/internal/repository/migrations/mysql/07_fix-tag-id.up.sql deleted file mode 100644 index 1abc4b3..0000000 --- a/internal/repository/migrations/mysql/07_fix-tag-id.up.sql +++ /dev/null @@ -1,3 +0,0 @@ -SET FOREIGN_KEY_CHECKS = 0; -ALTER TABLE tag MODIFY id INTEGER AUTO_INCREMENT; -SET FOREIGN_KEY_CHECKS = 1; diff --git a/internal/repository/migrations/mysql/08_add-footprint.down.sql b/internal/repository/migrations/mysql/08_add-footprint.down.sql deleted file mode 100644 index 57f2145..0000000 --- a/internal/repository/migrations/mysql/08_add-footprint.down.sql +++ /dev/null @@ -1,83 +0,0 @@ -ALTER TABLE job DROP energy; -ALTER TABLE job DROP energy_footprint; -ALTER TABLE job ADD COLUMN flops_any_avg; -ALTER TABLE job ADD COLUMN mem_bw_avg; -ALTER TABLE job ADD COLUMN mem_used_max; -ALTER TABLE job ADD COLUMN load_avg; -ALTER TABLE job ADD COLUMN net_bw_avg; -ALTER TABLE job ADD COLUMN net_data_vol_total; -ALTER TABLE job ADD COLUMN file_bw_avg; -ALTER TABLE job ADD COLUMN file_data_vol_total; - -UPDATE job SET flops_any_avg = json_extract(footprint, '$.flops_any_avg'); -UPDATE job SET mem_bw_avg = json_extract(footprint, '$.mem_bw_avg'); -UPDATE job SET mem_used_max = json_extract(footprint, '$.mem_used_max'); -UPDATE job SET load_avg = json_extract(footprint, '$.cpu_load_avg'); -UPDATE job SET net_bw_avg = json_extract(footprint, '$.net_bw_avg'); -UPDATE job SET net_data_vol_total = json_extract(footprint, '$.net_data_vol_total'); -UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg'); -UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total'); - -ALTER TABLE job DROP footprint; --- Do not use reserved keywords anymore -RENAME TABLE hpc_user TO `user`; -ALTER TABLE job RENAME COLUMN hpc_user TO `user`; -ALTER TABLE job RENAME COLUMN cluster_partition TO `partition`; - -DROP INDEX IF EXISTS jobs_cluster; -DROP INDEX IF EXISTS jobs_cluster_user; -DROP INDEX IF EXISTS jobs_cluster_project; -DROP INDEX IF EXISTS jobs_cluster_subcluster; -DROP INDEX IF EXISTS jobs_cluster_starttime; -DROP INDEX IF EXISTS jobs_cluster_duration; -DROP INDEX IF EXISTS jobs_cluster_numnodes; - -DROP INDEX IF EXISTS jobs_cluster_partition; -DROP INDEX IF EXISTS jobs_cluster_partition_starttime; -DROP INDEX IF EXISTS jobs_cluster_partition_duration; -DROP INDEX IF EXISTS jobs_cluster_partition_numnodes; - -DROP INDEX IF EXISTS jobs_cluster_partition_jobstate; -DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_user; -DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_project; -DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_starttime; -DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_duration; -DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numnodes; - -DROP INDEX IF EXISTS jobs_cluster_jobstate; -DROP INDEX IF EXISTS jobs_cluster_jobstate_user; -DROP INDEX IF EXISTS jobs_cluster_jobstate_project; - -DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime; -DROP INDEX IF EXISTS jobs_cluster_jobstate_duration; -DROP INDEX IF EXISTS jobs_cluster_jobstate_numnodes; - -DROP INDEX IF EXISTS jobs_user; -DROP INDEX IF EXISTS jobs_user_starttime; -DROP INDEX IF EXISTS jobs_user_duration; -DROP INDEX IF EXISTS jobs_user_numnodes; - -DROP INDEX IF EXISTS jobs_project; -DROP INDEX IF EXISTS jobs_project_user; -DROP INDEX IF EXISTS jobs_project_starttime; -DROP INDEX IF EXISTS jobs_project_duration; -DROP INDEX IF EXISTS jobs_project_numnodes; - -DROP INDEX IF EXISTS jobs_jobstate; -DROP INDEX IF EXISTS jobs_jobstate_user; -DROP INDEX IF EXISTS jobs_jobstate_project; -DROP INDEX IF EXISTS jobs_jobstate_starttime; -DROP INDEX IF EXISTS jobs_jobstate_duration; -DROP INDEX IF EXISTS jobs_jobstate_numnodes; - -DROP INDEX IF EXISTS jobs_arrayjobid_starttime; -DROP INDEX IF EXISTS jobs_cluster_arrayjobid_starttime; - -DROP INDEX IF EXISTS jobs_starttime; -DROP INDEX IF EXISTS jobs_duration; -DROP INDEX IF EXISTS jobs_numnodes; - -DROP INDEX IF EXISTS jobs_duration_starttime; -DROP INDEX IF EXISTS jobs_numnodes_starttime; -DROP INDEX IF EXISTS jobs_numacc_starttime; -DROP INDEX IF EXISTS jobs_energy_starttime; diff --git a/internal/repository/migrations/mysql/08_add-footprint.up.sql b/internal/repository/migrations/mysql/08_add-footprint.up.sql deleted file mode 100644 index 207ccf9..0000000 --- a/internal/repository/migrations/mysql/08_add-footprint.up.sql +++ /dev/null @@ -1,123 +0,0 @@ -DROP INDEX IF EXISTS job_stats ON job; -DROP INDEX IF EXISTS job_by_user ON job; -DROP INDEX IF EXISTS job_by_starttime ON job; -DROP INDEX IF EXISTS job_by_job_id ON job; -DROP INDEX IF EXISTS job_list ON job; -DROP INDEX IF EXISTS job_list_user ON job; -DROP INDEX IF EXISTS job_list_users ON job; -DROP INDEX IF EXISTS job_list_users_start ON job; - -ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0; -ALTER TABLE job ADD COLUMN energy_footprint JSON; - -ALTER TABLE job ADD COLUMN footprint JSON; -ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global'; - --- Do not use reserved keywords anymore -RENAME TABLE `user` TO hpc_user; -ALTER TABLE job RENAME COLUMN `user` TO hpc_user; -ALTER TABLE job RENAME COLUMN `partition` TO cluster_partition; - -ALTER TABLE job MODIFY COLUMN cluster VARCHAR(50); -ALTER TABLE job MODIFY COLUMN hpc_user VARCHAR(50); -ALTER TABLE job MODIFY COLUMN subcluster VARCHAR(50); -ALTER TABLE job MODIFY COLUMN project VARCHAR(50); -ALTER TABLE job MODIFY COLUMN cluster_partition VARCHAR(50); -ALTER TABLE job MODIFY COLUMN job_state VARCHAR(25); - -UPDATE job SET footprint = '{"flops_any_avg": 0.0}'; -UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg); -UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg); -UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max); -UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg); -UPDATE job SET footprint = json_insert(footprint, '$.net_bw_avg', job.net_bw_avg) WHERE job.net_bw_avg != 0; -UPDATE job SET footprint = json_insert(footprint, '$.net_data_vol_total', job.net_data_vol_total) WHERE job.net_data_vol_total != 0; -UPDATE job SET footprint = json_insert(footprint, '$.file_bw_avg', job.file_bw_avg) WHERE job.file_bw_avg != 0; -UPDATE job SET footprint = json_insert(footprint, '$.file_data_vol_total', job.file_data_vol_total) WHERE job.file_data_vol_total != 0; - -ALTER TABLE job DROP flops_any_avg; -ALTER TABLE job DROP mem_bw_avg; -ALTER TABLE job DROP mem_used_max; -ALTER TABLE job DROP load_avg; -ALTER TABLE job DROP net_bw_avg; -ALTER TABLE job DROP net_data_vol_total; -ALTER TABLE job DROP file_bw_avg; -ALTER TABLE job DROP file_data_vol_total; - --- Indices for: Single filters, combined filters, sorting, sorting with filters --- Cluster Filter -CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster); -CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user); -CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project); -CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster); --- Cluster Filter Sorting -CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time); -CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration); -CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes); - --- Cluster+Partition Filter -CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition); --- Cluster+Partition Filter Sorting -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes); - --- Cluster+Partition+Jobstate Filter -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project); --- Cluster+Partition+Jobstate Filter Sorting -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration); -CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes); - --- Cluster+JobState Filter -CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state); -CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user); -CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project); --- Cluster+JobState Filter Sorting -CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time); -CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration); -CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes); - --- User Filter -CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user); --- User Filter Sorting -CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time); -CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration); -CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes); - --- Project Filter -CREATE INDEX IF NOT EXISTS jobs_project ON job (project); -CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user); --- Project Filter Sorting -CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time); -CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration); -CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes); - --- JobState Filter -CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state); -CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user); -CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project); -CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster); --- JobState Filter Sorting -CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time); -CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration); -CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes); - --- ArrayJob Filter -CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time); -CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time); - --- Sorting without active filters -CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time); -CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration); -CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes); - --- Single filters with default starttime sorting -CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time); -CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time); -CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time); -CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time); - --- Optimize DB index usage diff --git a/internal/repository/node_test.go b/internal/repository/node_test.go index b42e09b..466f51e 100644 --- a/internal/repository/node_test.go +++ b/internal/repository/node_test.go @@ -130,7 +130,7 @@ func nodeTestSetup(t *testing.T) { } dbfilepath := filepath.Join(tmpdir, "test.db") - err := MigrateDB("sqlite3", dbfilepath) + err := MigrateDB(dbfilepath) if err != nil { t.Fatal(err) } diff --git a/internal/repository/repository_test.go b/internal/repository/repository_test.go index 1346e4d..e3dec7f 100644 --- a/internal/repository/repository_test.go +++ b/internal/repository/repository_test.go @@ -149,7 +149,7 @@ func setup(tb testing.TB) *JobRepository { tb.Helper() cclog.Init("warn", true) dbfile := "testdata/job.db" - err := MigrateDB("sqlite3", dbfile) + err := MigrateDB(dbfile) noErr(tb, err) Connect("sqlite3", dbfile) return GetJobRepository() diff --git a/internal/repository/stats.go b/internal/repository/stats.go index ba0d09f..c92f519 100644 --- a/internal/repository/stats.go +++ b/internal/repository/stats.go @@ -73,9 +73,6 @@ func (r *JobRepository) buildStatsQuery( col string, ) sq.SelectBuilder { var query sq.SelectBuilder - castType := r.getCastType() - - // fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType) if col != "" { // Scan columns: id, name, totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours @@ -84,26 +81,26 @@ func (r *JobRepository) buildStatsQuery( "name", "COUNT(job.id) as totalJobs", "COUNT(DISTINCT job.hpc_user) AS totalUsers", - fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType), - fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType), - fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType), - fmt.Sprintf(`CAST(SUM(job.num_hwthreads) as %s) as totalCores`, castType), - fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s) as totalCoreHours`, time.Now().Unix(), castType), - fmt.Sprintf(`CAST(SUM(job.num_acc) as %s) as totalAccs`, castType), - fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType), + fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as int) as totalWalltime`, time.Now().Unix()), + fmt.Sprintf(`CAST(SUM(job.num_nodes) as int) as totalNodes`), + fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as int) as totalNodeHours`, time.Now().Unix()), + fmt.Sprintf(`CAST(SUM(job.num_hwthreads) as int) as totalCores`), + fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as int) as totalCoreHours`, time.Now().Unix()), + fmt.Sprintf(`CAST(SUM(job.num_acc) as int) as totalAccs`), + fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as int) as totalAccHours`, time.Now().Unix()), ).From("job").LeftJoin("hpc_user ON hpc_user.username = job.hpc_user").GroupBy(col) } else { // Scan columns: totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours query = sq.Select( "COUNT(job.id) as totalJobs", "COUNT(DISTINCT job.hpc_user) AS totalUsers", - fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType), - fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType), - fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s)`, time.Now().Unix(), castType), - fmt.Sprintf(`CAST(SUM(job.num_hwthreads) as %s)`, castType), - fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s)`, time.Now().Unix(), castType), - fmt.Sprintf(`CAST(SUM(job.num_acc) as %s)`, castType), - fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s)`, time.Now().Unix(), castType), + fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as int)`, time.Now().Unix()), + fmt.Sprintf(`CAST(SUM(job.num_nodes) as int)`), + fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as int)`, time.Now().Unix()), + fmt.Sprintf(`CAST(SUM(job.num_hwthreads) as int)`), + fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as int)`, time.Now().Unix()), + fmt.Sprintf(`CAST(SUM(job.num_acc) as int)`), + fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as int)`, time.Now().Unix()), ).From("job") } @@ -114,21 +111,6 @@ func (r *JobRepository) buildStatsQuery( return query } -func (r *JobRepository) getCastType() string { - var castType string - - switch r.driver { - case "sqlite3": - castType = "int" - case "mysql": - castType = "unsigned" - default: - castType = "" - } - - return castType -} - func (r *JobRepository) JobsStatsGrouped( ctx context.Context, filter []*model.JobFilter, @@ -477,10 +459,9 @@ func (r *JobRepository) AddHistograms( targetBinSize = 3600 } - castType := r.getCastType() var err error // Return X-Values always as seconds, will be formatted into minutes and hours in frontend - value := fmt.Sprintf(`CAST(ROUND(((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / %d) + 1) as %s) as value`, time.Now().Unix(), targetBinSize, castType) + value := fmt.Sprintf(`CAST(ROUND(((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / %d) + 1) as int) as value`, time.Now().Unix(), targetBinSize) stat.HistDuration, err = r.jobsDurationStatisticsHistogram(ctx, value, filter, targetBinSize, &targetBinCount) if err != nil { cclog.Warn("Error while loading job statistics histogram: job duration") diff --git a/internal/repository/userConfig_test.go b/internal/repository/userConfig_test.go index 0d6dc37..b6f6843 100644 --- a/internal/repository/userConfig_test.go +++ b/internal/repository/userConfig_test.go @@ -42,7 +42,7 @@ func setupUserTest(t *testing.T) *UserCfgRepo { cclog.Init("info", true) dbfilepath := "testdata/job.db" - err := MigrateDB("sqlite3", dbfilepath) + err := MigrateDB(dbfilepath) if err != nil { t.Fatal(err) } diff --git a/internal/tagger/detectApp_test.go b/internal/tagger/detectApp_test.go index f9fc91d..7145d04 100644 --- a/internal/tagger/detectApp_test.go +++ b/internal/tagger/detectApp_test.go @@ -15,7 +15,7 @@ func setup(tb testing.TB) *repository.JobRepository { tb.Helper() cclog.Init("warn", true) dbfile := "../repository/testdata/job.db" - err := repository.MigrateDB("sqlite3", dbfile) + err := repository.MigrateDB(dbfile) noErr(tb, err) repository.Connect("sqlite3", dbfile) return repository.GetJobRepository() From 89875db4a9a8d7dc53dacb480a6c144ae847772a Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Mon, 22 Dec 2025 10:39:40 +0100 Subject: [PATCH 16/17] dashboard layout fixes --- web/frontend/src/DashPublic.root.svelte | 8 ++++---- web/frontend/src/generic/plots/Stacked.svelte | 2 +- web/frontend/src/status/DashInternal.svelte | 10 ++++++---- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/web/frontend/src/DashPublic.root.svelte b/web/frontend/src/DashPublic.root.svelte index 25e2683..c69b28f 100644 --- a/web/frontend/src/DashPublic.root.svelte +++ b/web/frontend/src/DashPublic.root.svelte @@ -338,7 +338,7 @@ - + - + @@ -540,7 +540,7 @@ Date: Mon, 22 Dec 2025 17:26:56 +0100 Subject: [PATCH 17/17] Rework info panel in public dashboard - change to bootstrap grid from table - add infos, use badges - remove non required query --- internal/metricdata/cc-metric-store.go | 30 ++-- web/frontend/src/DashPublic.root.svelte | 219 ++++++++++++++---------- web/frontend/src/generic/units.js | 2 +- 3 files changed, 146 insertions(+), 105 deletions(-) diff --git a/internal/metricdata/cc-metric-store.go b/internal/metricdata/cc-metric-store.go index 6d446d1..be2e956 100644 --- a/internal/metricdata/cc-metric-store.go +++ b/internal/metricdata/cc-metric-store.go @@ -770,21 +770,25 @@ func (ccms *CCMetricStore) LoadNodeData( } mc := archive.GetMetricConfig(cluster, metric) - hostdata[metric] = append(hostdata[metric], &schema.JobMetric{ - Unit: mc.Unit, - Timestep: mc.Timestep, - Series: []schema.Series{ - { - Hostname: query.Hostname, - Data: qdata.Data, - Statistics: schema.MetricStatistics{ - Avg: float64(qdata.Avg), - Min: float64(qdata.Min), - Max: float64(qdata.Max), + if mc != nil { + hostdata[metric] = append(hostdata[metric], &schema.JobMetric{ + Unit: mc.Unit, + Timestep: mc.Timestep, + Series: []schema.Series{ + { + Hostname: query.Hostname, + Data: qdata.Data, + Statistics: schema.MetricStatistics{ + Avg: float64(qdata.Avg), + Min: float64(qdata.Min), + Max: float64(qdata.Max), + }, }, }, - }, - }) + }) + } else { + cclog.Warnf("Metric '%s' not configured for cluster '%s': Skipped in LoadNodeData() Return!", metric, cluster) + } } if len(errors) != 0 { diff --git a/web/frontend/src/DashPublic.root.svelte b/web/frontend/src/DashPublic.root.svelte index c69b28f..fbbf486 100644 --- a/web/frontend/src/DashPublic.root.svelte +++ b/web/frontend/src/DashPublic.root.svelte @@ -30,7 +30,8 @@ Table, Progress, Icon, - Button + Button, + Badge } from "@sveltestrap/sveltestrap"; import Roofline from "./generic/plots/Roofline.svelte"; import Pie, { colors } from "./generic/plots/Pie.svelte"; @@ -85,7 +86,8 @@ query: gql` query ( $cluster: String! - $metrics: [String!] + $nmetrics: [String!] + $cmetrics: [String!] $from: Time! $to: Time! $clusterFrom: Time! @@ -97,7 +99,7 @@ # Node 5 Minute Averages for Roofline nodeMetrics( cluster: $cluster - metrics: $metrics + metrics: $nmetrics from: $from to: $to ) { @@ -106,6 +108,10 @@ metrics { name metric { + unit { + base + prefix + } series { statistics { avg @@ -114,21 +120,6 @@ } } } - # Running Job Metric Average for Rooflines - jobsMetricStats(filter: $jobFilter, metrics: $metrics) { - id - jobId - duration - numNodes - numAccelerators - subCluster - stats { - name - data { - avg - } - } - } # Get Jobs for Per-Node Counts jobs(filter: $jobFilter, order: $sorting, page: $paging) { items { @@ -175,7 +166,7 @@ # ClusterMetrics for doubleMetricPlot clusterMetrics( cluster: $cluster - metrics: $metrics + metrics: $cmetrics from: $clusterFrom to: $to ) { @@ -194,7 +185,8 @@ `, variables: { cluster: presetCluster, - metrics: ["flops_any", "mem_bw"], // Metrics For Cluster Plot and Roofline + nmetrics: ["flops_any", "mem_bw", "cpu_power", "acc_power"], // Metrics For Roofline and Stats + cmetrics: ["flops_any", "mem_bw"], // Metrics For Cluster Plot from: from.toISOString(), clusterFrom: clusterFrom.toISOString(), to: to.toISOString(), @@ -258,6 +250,11 @@ } } + // Get Idle Infos after Sums + if (!rawInfos['idleNodes']) rawInfos['idleNodes'] = rawInfos['totalNodes'] - rawInfos['allocatedNodes']; + if (!rawInfos['idleCores']) rawInfos['idleCores'] = rawInfos['totalCores'] - rawInfos['allocatedCores']; + if (!rawInfos['idleAccs']) rawInfos['idleAccs'] = rawInfos['totalAccs'] - rawInfos['allocatedAccs']; + // Keymetrics (Data on Cluster-Scope) let rawFlops = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) => sum + (node.metrics.find((m) => m.name == 'flops_any')?.metric?.series[0]?.statistics?.avg || 0), @@ -271,6 +268,26 @@ ) || 0; rawInfos['memBwRate'] = Math.floor((rawMemBw * 100) / 100) + let rawCpuPwr = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) => + sum + (node.metrics.find((m) => m.name == 'cpu_power')?.metric?.series[0]?.statistics?.avg || 0), + 0, // Initial Value + ) || 0; + rawInfos['cpuPwr'] = Math.floor((rawCpuPwr * 100) / 100) + if (!rawInfos['cpuPwrUnit']) { + let rawCpuUnit = $statusQuery?.data?.nodeMetrics[0]?.metrics.find((m) => m.name == 'cpu_power')?.metric?.unit || null + rawInfos['cpuPwrUnit'] = rawCpuUnit ? rawCpuUnit.prefix + rawCpuUnit.base : '' + } + + let rawGpuPwr = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) => + sum + (node.metrics.find((m) => m.name == 'acc_power')?.metric?.series[0]?.statistics?.avg || 0), + 0, // Initial Value + ) || 0; + rawInfos['gpuPwr'] = Math.floor((rawGpuPwr * 100) / 100) + if (!rawInfos['gpuPwrUnit']) { + let rawGpuUnit = $statusQuery?.data?.nodeMetrics[0]?.metrics.find((m) => m.name == 'acc_power')?.metric?.unit || null + rawInfos['gpuPwrUnit'] = rawGpuUnit ? rawGpuUnit.prefix + rawGpuUnit.base : '' + } + return rawInfos } else { return {}; @@ -408,79 +425,99 @@ - - - - - -
    - - - - - - - - -
    - - - - - - - - - - + + + + {clusterInfo?.runningJobs} + +
    + Running Jobs +
    + + + + {clusterInfo?.activeUsers} + +
    + Active Users +
    + + + + {clusterInfo?.allocatedNodes} + +
    + Active Nodes +
    + + + + + + {clusterInfo?.flopRate} {clusterInfo?.flopRateUnit} + +
    + Total Flop Rate +
    + + + + {clusterInfo?.memBwRate} {clusterInfo?.memBwRateUnit} + +
    + Total Memory Bandwidth +
    + {#if clusterInfo?.totalAccs !== 0} - - - - - + + + {clusterInfo?.gpuPwr} {clusterInfo?.gpuPwrUnit} + +
    + Total GPU Power +
    + + {:else} + + + {clusterInfo?.cpuPwr} {clusterInfo?.cpuPwrUnit} + +
    + Total CPU Power +
    + {/if} -
    {clusterInfo?.runningJobs} Running Jobs{clusterInfo?.activeUsers} Active Users
    - Flop Rate (Any) - - Memory BW Rate -
    - {clusterInfo?.flopRate} - {clusterInfo?.flopRateUnit} - - {clusterInfo?.memBwRate} - {clusterInfo?.memBwRateUnit} -
    Allocated Nodes
    - -
    {clusterInfo?.allocatedNodes} / {clusterInfo?.totalNodes} - Nodes
    Allocated Cores
    - -
    {formatNumber(clusterInfo?.allocatedCores)} / {formatNumber(clusterInfo?.totalCores)} - Cores
    Allocated Accelerators
    - -
    {clusterInfo?.allocatedAccs} / {clusterInfo?.totalAccs} - Accelerators
    +
    + + + Active Cores + + + + {formatNumber(clusterInfo?.allocatedCores)} + {formatNumber(clusterInfo?.idleCores)} + + + + Idle Cores + + + {#if clusterInfo?.totalAccs !== 0} + + + Active GPU + + + + {formatNumber(clusterInfo?.allocatedAccs)} + {formatNumber(clusterInfo?.idleAccs)} + + + + Idle GPU + + + {/if}
    diff --git a/web/frontend/src/generic/units.js b/web/frontend/src/generic/units.js index 1737b97..3e251fb 100644 --- a/web/frontend/src/generic/units.js +++ b/web/frontend/src/generic/units.js @@ -3,7 +3,7 @@ */ const power = [1, 1e3, 1e6, 1e9, 1e12, 1e15, 1e18, 1e21] -const prefix = ['', 'K', 'M', 'G', 'T', 'P', 'E'] +const prefix = ['', 'k', 'M', 'G', 'T', 'P', 'E'] export function formatNumber(x) { if ( isNaN(x) || x == null) {