feat: add automatic application detection and tagging

This commit is contained in:
Jan Eitzinger 2023-09-28 10:20:20 +02:00
parent dc0d9fe038
commit ba7cc9168e
7 changed files with 161 additions and 11 deletions

View File

@ -16,9 +16,7 @@ func TestFind(t *testing.T) {
jobId, cluster, startTime := int64(398998), "fritz", int64(1675957496) jobId, cluster, startTime := int64(398998), "fritz", int64(1675957496)
job, err := r.Find(&jobId, &cluster, &startTime) job, err := r.Find(&jobId, &cluster, &startTime)
if err != nil { noErr(t, err)
t.Fatal(err)
}
// fmt.Printf("%+v", job) // fmt.Printf("%+v", job)
@ -31,9 +29,7 @@ func TestFindById(t *testing.T) {
r := setup(t) r := setup(t)
job, err := r.FindById(5) job, err := r.FindById(5)
if err != nil { noErr(t, err)
t.Fatal(err)
}
// fmt.Printf("%+v", job) // fmt.Printf("%+v", job)
@ -46,14 +42,26 @@ func TestGetTags(t *testing.T) {
r := setup(t) r := setup(t)
tags, counts, err := r.CountTags(nil) tags, counts, err := r.CountTags(nil)
if err != nil { noErr(t, err)
t.Fatal(err)
}
fmt.Printf("TAGS %+v \n", tags) fmt.Printf("TAGS %+v \n", tags)
// fmt.Printf("COUNTS %+v \n", counts) // fmt.Printf("COUNTS %+v \n", counts)
if counts["bandwidth"] != 0 { if counts["bandwidth"] != 2 {
t.Errorf("wrong tag count \ngot: %d \nwant: 0", counts["bandwidth"]) t.Errorf("wrong tag count \ngot: %d \nwant: 2", counts["bandwidth"])
}
}
func TestHasTag(t *testing.T) {
r := setup(t)
if !r.HasTag(5, "util", "bandwidth") {
t.Errorf("Expected has tag")
}
if r.HasTag(4, "patho", "idle") {
t.Errorf("Expected has not tag")
}
if !r.HasTag(5, "patho", "idle") {
t.Errorf("Expected has tag")
} }
} }

View File

@ -134,6 +134,19 @@ func (r *JobRepository) AddTagOrCreate(jobId int64, tagType string, tagName stri
return tagId, nil return tagId, nil
} }
func (r *JobRepository) HasTag(jobId int64, tagType string, tagName string) bool {
var id int64
q := sq.Select("id").From("tag").Join("jobtag ON jobtag.tag_id = tag.id").
Where("jobtag.job_id = ?", jobId).Where("tag.tag_type = ?", tagType).
Where("tag.tag_name = ?", tagName)
err := q.RunWith(r.stmtCache).QueryRow().Scan(&id)
if err != nil {
return false
} else {
return true
}
}
// TagId returns the database id of the tag with the specified type and name. // TagId returns the database id of the tag with the specified type and name.
func (r *JobRepository) TagId(tagType string, tagName string) (tagId int64, exists bool) { func (r *JobRepository) TagId(tagType string, tagName string) (tagId int64, exists bool) {
exists = true exists = true

View File

@ -0,0 +1,3 @@
GROMACS
gromacs
GMX

View File

@ -0,0 +1 @@
openfoam

View File

@ -0,0 +1,2 @@
VASP
vasp

View File

@ -4,17 +4,81 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package tagger package tagger
import (
"bufio"
"embed"
"fmt"
"path/filepath"
"strings"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
const tagType = "app" const tagType = "app"
//go:embed apps/*
var appFiles embed.FS
type appInfo struct { type appInfo struct {
tag string tag string
strings []string strings []string
} }
type AppTagger struct { type AppTagger struct {
apps []appInfo apps []appInfo
} }
func (t *AppTagger) Register() error { func (t *AppTagger) Register() error {
files, err := appFiles.ReadDir("apps")
if err != nil {
return fmt.Errorf("error reading app folder: %#v", err)
}
t.apps = make([]appInfo, 0)
for _, fn := range files {
fns := fn.Name()
log.Debugf("Process: %s", fns)
f, err := appFiles.Open(fmt.Sprintf("apps/%s", fns))
if err != nil {
return fmt.Errorf("error opening app file %s: %#v", fns, err)
}
scanner := bufio.NewScanner(f)
ai := appInfo{tag: strings.TrimSuffix(fns, filepath.Ext(fns)), strings: make([]string, 0)}
for scanner.Scan() {
ai.strings = append(ai.strings, scanner.Text())
}
t.apps = append(t.apps, ai)
}
return nil return nil
} }
func (t *AppTagger) Match(job *schema.Job) {
r := repository.GetJobRepository()
meta, err := r.FetchMetadata(job)
if err != nil {
log.Error("cannot fetch meta data")
}
jobscript, ok := meta["jobScript"]
if ok {
id := job.ID
out:
for _, a := range t.apps {
tag := a.tag
for _, s := range a.strings {
if strings.Contains(jobscript, s) {
if !r.HasTag(id, tagType, tag) {
r.AddTagOrCreate(id, tagType, tag)
break out
}
}
}
}
} else {
log.Infof("Cannot extract job script for job: %d on %s", job.JobID, job.Cluster)
}
}

View File

@ -0,0 +1,59 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package tagger
import (
"testing"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log"
)
func setup(tb testing.TB) *repository.JobRepository {
tb.Helper()
log.Init("warn", true)
dbfile := "../repository/testdata/job.db"
err := repository.MigrateDB("sqlite3", dbfile)
noErr(tb, err)
repository.Connect("sqlite3", dbfile)
return repository.GetJobRepository()
}
func noErr(tb testing.TB, err error) {
tb.Helper()
if err != nil {
tb.Fatal("Error is not nil:", err)
}
}
func TestRegister(t *testing.T) {
var tagger AppTagger
err := tagger.Register()
noErr(t, err)
if len(tagger.apps) != 3 {
t.Errorf("wrong summary for diagnostic \ngot: %d \nwant: 3", len(tagger.apps))
}
}
func TestMatch(t *testing.T) {
r := setup(t)
job, err := r.FindById(5)
noErr(t, err)
var tagger AppTagger
err = tagger.Register()
noErr(t, err)
tagger.Match(job)
if !r.HasTag(5, "app", "vasp") {
t.Errorf("missing tag vasp")
}
}