From 9d49fea48a7adae7a40a4c7cebe615f20a2957f3 Mon Sep 17 00:00:00 2001 From: Lou Knauer Date: Mon, 9 May 2022 11:53:41 +0200 Subject: [PATCH] Only autostop jobs with a positive walltime --- repository/job.go | 1 + server.go | 25 +++++++++++++------------ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/repository/job.go b/repository/job.go index 8c2d339..c7d65cf 100644 --- a/repository/job.go +++ b/repository/job.go @@ -378,6 +378,7 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error { Set("duration", 0). Set("job_state", schema.JobStateFailed). Where("job.job_state = 'running'"). + Where("job.walltime > 0"). Where(fmt.Sprintf("(%d - job.start_time) > (job.walltime + %d)", time.Now().Unix(), seconds)). RunWith(r.DB).Exec() if err != nil { diff --git a/server.go b/server.go index 148dd3c..85cbd3f 100644 --- a/server.go +++ b/server.go @@ -13,6 +13,7 @@ import ( "net/url" "os" "os/signal" + "runtime" "runtime/debug" "strings" "sync" @@ -126,7 +127,7 @@ var programConfig ProgramConfig = ProgramConfig{ "plot_view_showStatTable": true, "system_view_selectedMetric": "cpu_load", }, - StopJobsExceedingWalltime: 3600, + StopJobsExceedingWalltime: -1, } func main() { @@ -481,17 +482,17 @@ func main() { api.OngoingArchivings.Wait() }() - // if programConfig.StopJobsExceedingWalltime != 0 { - // go func() { - // for range time.Tick(1 * time.Hour) { - // err := jobRepo.StopJobsExceedingWalltimeBy(programConfig.StopJobsExceedingWalltime) - // if err != nil { - // log.Errorf("error while looking for jobs exceeding theire walltime: %s", err.Error()) - // } - // runtime.GC() - // } - // }() - // } + if programConfig.StopJobsExceedingWalltime > 0 { + go func() { + for range time.Tick(30 * time.Minute) { + err := jobRepo.StopJobsExceedingWalltimeBy(programConfig.StopJobsExceedingWalltime) + if err != nil { + log.Errorf("error while looking for jobs exceeding theire walltime: %s", err.Error()) + } + runtime.GC() + } + }() + } if os.Getenv("GOGC") == "" { debug.SetGCPercent(25)