mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2024-12-25 12:59:06 +01:00
fix stop_job returned state; handle monitoring status
This commit is contained in:
parent
8d4498f58e
commit
2f32760cc5
18
api/rest.go
18
api/rest.go
@ -255,12 +255,6 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||
})
|
||||
}
|
||||
|
||||
const (
|
||||
// TODO: Constants in schema/? What constants to use?
|
||||
MonitoringStatusArchivingSuccessfull int32 = 0
|
||||
MonitoringStatusArchivingFailed int32 = 2
|
||||
)
|
||||
|
||||
// A job has stopped and should be archived.
|
||||
func (api *RestApi) stopJob(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
|
||||
@ -314,7 +308,8 @@ func (api *RestApi) stopJob(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
// Mark job as stopped in the database (update state and duration)
|
||||
job.Duration = int32(req.StopTime - job.StartTime.Unix())
|
||||
if err := api.JobRepository.Stop(job.ID, job.Duration, req.State); err != nil {
|
||||
job.State = req.State
|
||||
if err := api.JobRepository.Stop(job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||
handleError(fmt.Errorf("marking job as stopped failed: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
@ -328,6 +323,11 @@ func (api *RestApi) stopJob(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(rw).Encode(job)
|
||||
|
||||
// Monitoring is disabled...
|
||||
if job.MonitoringStatus == schema.MonitoringStatusDisabled {
|
||||
return
|
||||
}
|
||||
|
||||
// We need to start a new goroutine as this functions needs to return
|
||||
// for the response to be flushed to the client.
|
||||
api.OngoingArchivings.Add(1) // So that a shutdown does not interrupt this goroutine.
|
||||
@ -338,12 +338,12 @@ func (api *RestApi) stopJob(rw http.ResponseWriter, r *http.Request) {
|
||||
jobMeta, err := metricdata.ArchiveJob(job, context.Background())
|
||||
if err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
|
||||
api.JobRepository.UpdateMonitoringStatus(job.ID, MonitoringStatusArchivingFailed)
|
||||
api.JobRepository.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
return
|
||||
}
|
||||
|
||||
// Update the jobs database entry one last time:
|
||||
if err := api.JobRepository.Archive(job.ID, 0, jobMeta.Statistics); err != nil {
|
||||
if err := api.JobRepository.Archive(job.ID, schema.MonitoringStatusArchivingSuccessful, jobMeta.Statistics); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
|
||||
return
|
||||
}
|
||||
|
@ -40,7 +40,7 @@ const JOBS_DB_SCHEMA string = `
|
||||
num_acc INT NOT NULL,
|
||||
smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )),
|
||||
exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)),
|
||||
monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1 )),
|
||||
monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
|
||||
|
||||
mem_used_max REAL NOT NULL DEFAULT 0.0,
|
||||
flops_any_avg REAL NOT NULL DEFAULT 0.0,
|
||||
|
@ -136,10 +136,6 @@ func loadAveragesFromArchive(job *schema.Job, metrics []string, data [][]schema.
|
||||
|
||||
// Writes a running job to the job-archive
|
||||
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
if job.State != schema.JobStateRunning {
|
||||
return nil, errors.New("cannot archive job that is not running")
|
||||
}
|
||||
|
||||
allMetrics := make([]string, 0)
|
||||
metricConfigs := config.GetClusterConfig(job.Cluster).MetricConfig
|
||||
for _, mc := range metricConfigs {
|
||||
|
@ -65,7 +65,7 @@ func LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ct
|
||||
data := cache.Get(cacheKey(job, metrics, scopes), func() (interface{}, time.Duration, int) {
|
||||
var jd schema.JobData
|
||||
var err error
|
||||
if job.State == schema.JobStateRunning || !useArchive {
|
||||
if job.State == schema.JobStateRunning || job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving || !useArchive {
|
||||
repo, ok := metricDataRepos[job.Cluster]
|
||||
if !ok {
|
||||
return fmt.Errorf("no metric data repository configured for '%s'", job.Cluster), 0, 0
|
||||
|
@ -77,11 +77,13 @@ func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
|
||||
func (r *JobRepository) Stop(
|
||||
jobId int64,
|
||||
duration int32,
|
||||
state schema.JobState) (err error) {
|
||||
state schema.JobState,
|
||||
monitoringStatus int32) (err error) {
|
||||
|
||||
stmt := sq.Update("job").
|
||||
Set("job_state", state).
|
||||
Set("duration", duration).
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", jobId)
|
||||
|
||||
_, err = stmt.RunWith(r.DB).Exec()
|
||||
|
@ -57,9 +57,16 @@ type JobMeta struct {
|
||||
Statistics map[string]JobStatistics `json:"statistics,omitempty"`
|
||||
}
|
||||
|
||||
const (
|
||||
MonitoringStatusDisabled int32 = 0
|
||||
MonitoringStatusRunningOrArchiving int32 = 1
|
||||
MonitoringStatusArchivingFailed int32 = 2
|
||||
MonitoringStatusArchivingSuccessful int32 = 3
|
||||
)
|
||||
|
||||
var JobDefaults BaseJob = BaseJob{
|
||||
Exclusive: 1,
|
||||
MonitoringStatus: 1,
|
||||
MonitoringStatus: MonitoringStatusRunningOrArchiving,
|
||||
MetaData: "",
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user