mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-07-23 12:51:40 +02:00
subclusters instead of slurm partitions
This commit is contained in:
@@ -20,10 +20,14 @@ import (
|
||||
|
||||
var db *sqlx.DB
|
||||
var lookupConfigStmt *sqlx.Stmt
|
||||
|
||||
var lock sync.RWMutex
|
||||
var uiDefaults map[string]interface{}
|
||||
|
||||
var cache *lrucache.Cache = lrucache.New(1024)
|
||||
|
||||
var Clusters []*model.Cluster
|
||||
var nodeLists map[string]map[string]NodeList
|
||||
|
||||
func Init(usersdb *sqlx.DB, authEnabled bool, uiConfig map[string]interface{}, jobArchive string) error {
|
||||
db = usersdb
|
||||
@@ -34,6 +38,7 @@ func Init(usersdb *sqlx.DB, authEnabled bool, uiConfig map[string]interface{}, j
|
||||
}
|
||||
|
||||
Clusters = []*model.Cluster{}
|
||||
nodeLists = map[string]map[string]NodeList{}
|
||||
for _, de := range entries {
|
||||
raw, err := os.ReadFile(filepath.Join(jobArchive, de.Name(), "cluster.json"))
|
||||
if err != nil {
|
||||
@@ -53,8 +58,8 @@ func Init(usersdb *sqlx.DB, authEnabled bool, uiConfig map[string]interface{}, j
|
||||
return err
|
||||
}
|
||||
|
||||
if len(cluster.Name) == 0 || len(cluster.MetricConfig) == 0 || len(cluster.Partitions) == 0 {
|
||||
return errors.New("cluster.name, cluster.metricConfig and cluster.Partitions should not be empty")
|
||||
if len(cluster.Name) == 0 || len(cluster.MetricConfig) == 0 || len(cluster.SubClusters) == 0 {
|
||||
return errors.New("cluster.name, cluster.metricConfig and cluster.SubClusters should not be empty")
|
||||
}
|
||||
|
||||
for _, mc := range cluster.MetricConfig {
|
||||
@@ -83,6 +88,19 @@ func Init(usersdb *sqlx.DB, authEnabled bool, uiConfig map[string]interface{}, j
|
||||
}
|
||||
|
||||
Clusters = append(Clusters, &cluster)
|
||||
|
||||
nodeLists[cluster.Name] = make(map[string]NodeList)
|
||||
for _, sc := range cluster.SubClusters {
|
||||
if sc.Nodes == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
nl, err := ParseNodeList(sc.Nodes)
|
||||
if err != nil {
|
||||
return fmt.Errorf("in %s/cluster.json: %w", cluster.Name, err)
|
||||
}
|
||||
nodeLists[cluster.Name][sc.Name] = nl
|
||||
}
|
||||
}
|
||||
|
||||
if authEnabled {
|
||||
@@ -188,7 +206,7 @@ func UpdateConfig(key, value string, ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetClusterConfig(cluster string) *model.Cluster {
|
||||
func GetCluster(cluster string) *model.Cluster {
|
||||
for _, c := range Clusters {
|
||||
if c.Name == cluster {
|
||||
return c
|
||||
@@ -197,11 +215,11 @@ func GetClusterConfig(cluster string) *model.Cluster {
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetPartition(cluster, partition string) *model.Partition {
|
||||
func GetSubCluster(cluster, subcluster string) *model.SubCluster {
|
||||
for _, c := range Clusters {
|
||||
if c.Name == cluster {
|
||||
for _, p := range c.Partitions {
|
||||
if p.Name == partition {
|
||||
for _, p := range c.SubClusters {
|
||||
if p.Name == subcluster {
|
||||
return p
|
||||
}
|
||||
}
|
||||
@@ -222,3 +240,40 @@ func GetMetricConfig(cluster, metric string) *model.MetricConfig {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// AssignSubCluster sets the `job.subcluster` property of the job based
|
||||
// on its cluster and resources.
|
||||
func AssignSubCluster(job *schema.BaseJob) error {
|
||||
cluster := GetCluster(job.Cluster)
|
||||
if cluster == nil {
|
||||
return fmt.Errorf("unkown cluster: %#v", job.Cluster)
|
||||
}
|
||||
|
||||
if job.SubCluster != "" {
|
||||
for _, sc := range cluster.SubClusters {
|
||||
if sc.Name == job.SubCluster {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("already assigned subcluster %#v unkown (cluster: %#v)", job.SubCluster, job.Cluster)
|
||||
}
|
||||
|
||||
if len(job.Resources) == 0 {
|
||||
return fmt.Errorf("job without any resources/hosts")
|
||||
}
|
||||
|
||||
host0 := job.Resources[0].Hostname
|
||||
for sc, nl := range nodeLists[job.Cluster] {
|
||||
if nl != nil && nl.Contains(host0) {
|
||||
job.SubCluster = sc
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
if cluster.SubClusters[0].Nodes == "" {
|
||||
job.SubCluster = cluster.SubClusters[0].Name
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("no subcluster found for cluster %#v and host %#v", job.Cluster, host0)
|
||||
}
|
||||
|
136
config/nodelist.go
Normal file
136
config/nodelist.go
Normal file
@@ -0,0 +1,136 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/log"
|
||||
)
|
||||
|
||||
type NLExprString string
|
||||
|
||||
func (nle NLExprString) consume(input string) (next string, ok bool) {
|
||||
str := string(nle)
|
||||
if strings.HasPrefix(input, str) {
|
||||
return strings.TrimPrefix(input, str), true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
type NLExprIntRange struct {
|
||||
start, end int64
|
||||
zeroPadded bool
|
||||
digits int
|
||||
}
|
||||
|
||||
func (nle NLExprIntRange) consume(input string) (next string, ok bool) {
|
||||
if !nle.zeroPadded || nle.digits < 1 {
|
||||
log.Error("node list: only zero-padded ranges are allowed")
|
||||
return "", false
|
||||
}
|
||||
|
||||
if len(input) < nle.digits {
|
||||
return "", false
|
||||
}
|
||||
|
||||
numerals, rest := input[:nle.digits], input[nle.digits:]
|
||||
for len(numerals) > 1 && numerals[0] == '0' {
|
||||
numerals = numerals[1:]
|
||||
}
|
||||
|
||||
x, err := strconv.ParseInt(numerals, 10, 32)
|
||||
if err != nil {
|
||||
return "", false
|
||||
}
|
||||
|
||||
if nle.start <= x && x <= nle.end {
|
||||
return rest, true
|
||||
}
|
||||
|
||||
return "", false
|
||||
}
|
||||
|
||||
type NodeList [][]interface {
|
||||
consume(input string) (next string, ok bool)
|
||||
}
|
||||
|
||||
func (nl *NodeList) Contains(name string) bool {
|
||||
var ok bool
|
||||
for _, term := range *nl {
|
||||
str := name
|
||||
for _, expr := range term {
|
||||
str, ok = expr.consume(str)
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if ok && str == "" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func ParseNodeList(raw string) (NodeList, error) {
|
||||
nl := NodeList{}
|
||||
|
||||
isLetter := func(r byte) bool { return ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') }
|
||||
isDigit := func(r byte) bool { return '0' <= r && r <= '9' }
|
||||
|
||||
for _, rawterm := range strings.Split(raw, ",") {
|
||||
exprs := []interface {
|
||||
consume(input string) (next string, ok bool)
|
||||
}{}
|
||||
for i := 0; i < len(rawterm); i++ {
|
||||
c := rawterm[i]
|
||||
if isLetter(c) || isDigit(c) {
|
||||
j := i
|
||||
for j < len(rawterm) && (isLetter(rawterm[j]) || isDigit(rawterm[j])) {
|
||||
j++
|
||||
}
|
||||
exprs = append(exprs, NLExprString(rawterm[i:j]))
|
||||
i = j - 1
|
||||
} else if c == '[' {
|
||||
end := strings.Index(rawterm[i:], "]")
|
||||
if end == -1 {
|
||||
return nil, fmt.Errorf("node list: unclosed '['")
|
||||
}
|
||||
|
||||
minus := strings.Index(rawterm[i:i+end], "-")
|
||||
if minus == -1 {
|
||||
return nil, fmt.Errorf("node list: no '-' found inside '[...]'")
|
||||
}
|
||||
|
||||
s1, s2 := rawterm[i+1:i+minus], rawterm[i+minus+1:i+end]
|
||||
if len(s1) != len(s2) || len(s1) == 0 {
|
||||
return nil, fmt.Errorf("node list: %#v and %#v are not of equal length or of length zero", s1, s2)
|
||||
}
|
||||
|
||||
x1, err := strconv.ParseInt(s1, 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("node list: %w", err)
|
||||
}
|
||||
x2, err := strconv.ParseInt(s2, 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("node list: %w", err)
|
||||
}
|
||||
|
||||
exprs = append(exprs, NLExprIntRange{
|
||||
start: x1,
|
||||
end: x2,
|
||||
digits: len(s1),
|
||||
zeroPadded: true,
|
||||
})
|
||||
i += end
|
||||
} else {
|
||||
return nil, fmt.Errorf("node list: invalid character: %#v", rune(c))
|
||||
}
|
||||
}
|
||||
nl = append(nl, exprs)
|
||||
}
|
||||
|
||||
return nl, nil
|
||||
}
|
37
config/nodelist_test.go
Normal file
37
config/nodelist_test.go
Normal file
@@ -0,0 +1,37 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestNodeList(t *testing.T) {
|
||||
nl, err := ParseNodeList("hallo,wel123t,emmy[01-99],fritz[005-500],woody[100-200]")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// fmt.Printf("terms\n")
|
||||
// for i, term := range nl.terms {
|
||||
// fmt.Printf("term %d: %#v\n", i, term)
|
||||
// }
|
||||
|
||||
if nl.Contains("hello") || nl.Contains("woody") {
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if nl.Contains("fritz1") || nl.Contains("fritz9") || nl.Contains("fritz004") || nl.Contains("woody201") {
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if !nl.Contains("hallo") || !nl.Contains("wel123t") {
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if !nl.Contains("emmy01") || !nl.Contains("emmy42") || !nl.Contains("emmy99") {
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if !nl.Contains("woody100") || !nl.Contains("woody199") {
|
||||
t.Fail()
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user