fix: fix jobname and arrayjobid timeouts by adding additional 30d filter

- improve archive worker logs
- add arrayjobid filter to url if used
This commit is contained in:
Christoph Kluge 2024-05-23 11:53:23 +02:00
parent ba1658beac
commit 420bec7c46
3 changed files with 14 additions and 5 deletions

View File

@ -520,7 +520,7 @@ func (r *JobRepository) archivingWorker() {
// not using meta data, called to load JobMeta into Cache?
// will fail if job meta not in repository
if _, err := r.FetchMetadata(job); err != nil {
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
log.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
r.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
continue
}
@ -529,14 +529,14 @@ func (r *JobRepository) archivingWorker() {
// TODO: Maybe use context with cancel/timeout here
jobMeta, err := metricdata.ArchiveJob(job, context.Background())
if err != nil {
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
log.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
r.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
continue
}
// Update the jobs database entry one last time:
if err := r.MarkArchived(job.ID, schema.MonitoringStatusArchivingSuccessful, jobMeta.Statistics); err != nil {
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
log.Errorf("archiving job (dbid: %d) failed at marking archived step: %s", job.ID, err.Error())
continue
}
log.Debugf("archiving job %d took %s", job.JobID, time.Since(start))

View File

@ -302,11 +302,19 @@ func HandleSearchBar(rw http.ResponseWriter, r *http.Request, buildInfo web.Buil
case "jobId":
http.Redirect(rw, r, "/monitoring/jobs/?jobId="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound) // All Users: Redirect to Tablequery
case "jobName":
http.Redirect(rw, r, "/monitoring/jobs/?jobName="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound) // All Users: Redirect to Tablequery
// Add Last 30 Days to migitate timeouts
untilTime := strconv.FormatInt(time.Now().Unix(), 10)
fromTime := strconv.FormatInt((time.Now().Unix() - int64(30*24*3600)), 10)
http.Redirect(rw, r, "/monitoring/jobs/?startTime="+fromTime+"-"+untilTime+"&jobName="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound) // All Users: Redirect to Tablequery
case "projectId":
http.Redirect(rw, r, "/monitoring/jobs/?projectMatch=eq&project="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound) // All Users: Redirect to Tablequery
case "arrayJobId":
http.Redirect(rw, r, "/monitoring/jobs/?arrayJobId="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound) // All Users: Redirect to Tablequery
// Add Last 30 Days to migitate timeouts
untilTime := strconv.FormatInt(time.Now().Unix(), 10)
fromTime := strconv.FormatInt((time.Now().Unix() - int64(30*24*3600)), 10)
http.Redirect(rw, r, "/monitoring/jobs/?startTime="+fromTime+"-"+untilTime+"&arrayJobId="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound) // All Users: Redirect to Tablequery
case "username":
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleManager}) {
http.Redirect(rw, r, "/monitoring/users/?user="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound)

View File

@ -193,6 +193,7 @@
opts.push(`userMatch=${filters.userMatch}`);
if (filters.project) opts.push(`project=${filters.project}`);
if (filters.jobName) opts.push(`jobName=${filters.jobName}`);
if (filters.arrayJobId) opts.push(`arrayJobId=${filters.arrayJobId}`);
if (filters.projectMatch != "contains")
opts.push(`projectMatch=${filters.projectMatch}`);