mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-07-23 21:01:40 +02:00
Add log messages to error events w/o log message, primaryly error level
- "log spam" to be controlled via loglevel flag on startup
This commit is contained in:
@@ -100,6 +100,7 @@ func HandleImportFlag(flag string) error {
|
||||
|
||||
raw, err := os.ReadFile(files[0])
|
||||
if err != nil {
|
||||
log.Error("Error while reading metadata file for import")
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -112,11 +113,13 @@ func HandleImportFlag(flag string) error {
|
||||
dec.DisallowUnknownFields()
|
||||
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||
if err := dec.Decode(&jobMeta); err != nil {
|
||||
log.Error("Error while decoding raw json metadata for import")
|
||||
return err
|
||||
}
|
||||
|
||||
raw, err = os.ReadFile(files[1])
|
||||
if err != nil {
|
||||
log.Error("Error while reading jobdata file for import")
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -129,6 +132,7 @@ func HandleImportFlag(flag string) error {
|
||||
dec.DisallowUnknownFields()
|
||||
jobData := schema.JobData{}
|
||||
if err := dec.Decode(&jobData); err != nil {
|
||||
log.Error("Error while decoding raw json jobdata for import")
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -136,6 +140,7 @@ func HandleImportFlag(flag string) error {
|
||||
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||
if job, err := GetJobRepository().Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
|
||||
if err != nil {
|
||||
log.Error("Error while finding job in jobRepository")
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -155,33 +160,40 @@ func HandleImportFlag(flag string) error {
|
||||
job.FileBwAvg = loadJobStat(&jobMeta, "file_bw")
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
log.Error("Error while marshaling job resources")
|
||||
return err
|
||||
}
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
if err != nil {
|
||||
log.Error("Error while marshaling job metadata")
|
||||
return err
|
||||
}
|
||||
|
||||
if err := SanityChecks(&job.BaseJob); err != nil {
|
||||
log.Error("BaseJob SanityChecks failed")
|
||||
return err
|
||||
}
|
||||
|
||||
if err := archive.GetHandle().ImportJob(&jobMeta, &jobData); err != nil {
|
||||
log.Error("Error while importing job")
|
||||
return err
|
||||
}
|
||||
|
||||
res, err := GetConnection().DB.NamedExec(NamedJobInsert, job)
|
||||
if err != nil {
|
||||
log.Error("Error while NamedJobInsert")
|
||||
return err
|
||||
}
|
||||
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
log.Error("Error while getting last insert ID")
|
||||
return err
|
||||
}
|
||||
|
||||
for _, tag := range job.Tags {
|
||||
if _, err := GetJobRepository().AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
|
||||
log.Error("Error while adding or creating tag")
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -201,6 +213,7 @@ func InitDB() error {
|
||||
// Basic database structure:
|
||||
_, err := db.DB.Exec(JobsDBSchema)
|
||||
if err != nil {
|
||||
log.Error("Error while initializing basic DB structure")
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -208,11 +221,13 @@ func InitDB() error {
|
||||
// that speeds up inserts A LOT.
|
||||
tx, err := db.DB.Beginx()
|
||||
if err != nil {
|
||||
log.Error("Error while bundling transactions")
|
||||
return err
|
||||
}
|
||||
|
||||
stmt, err := tx.PrepareNamed(NamedJobInsert)
|
||||
if err != nil {
|
||||
log.Error("Error while preparing namedJobInsert")
|
||||
return err
|
||||
}
|
||||
tags := make(map[string]int64)
|
||||
@@ -232,12 +247,14 @@ func InitDB() error {
|
||||
if i%10 == 0 {
|
||||
if tx != nil {
|
||||
if err := tx.Commit(); err != nil {
|
||||
log.Error("Error while committing transactions for jobMeta")
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
tx, err = db.DB.Beginx()
|
||||
if err != nil {
|
||||
log.Error("Error while bundling transactions for jobMeta")
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -298,16 +315,19 @@ func InitDB() error {
|
||||
if !ok {
|
||||
res, err := tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type)
|
||||
if err != nil {
|
||||
log.Errorf("Error while inserting tag into tag table: %#v %#v", tag.Name, tag.Type)
|
||||
return err
|
||||
}
|
||||
tagId, err = res.LastInsertId()
|
||||
if err != nil {
|
||||
log.Error("Error while getting last insert ID")
|
||||
return err
|
||||
}
|
||||
tags[tagstr] = tagId
|
||||
}
|
||||
|
||||
if _, err := tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, id, tagId); err != nil {
|
||||
log.Errorf("Error while inserting jobtag into jobtag table: %#v %#v", id, tagId)
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -322,12 +342,14 @@ func InitDB() error {
|
||||
}
|
||||
|
||||
if err := tx.Commit(); err != nil {
|
||||
log.Error("Error while committing SQL transactions")
|
||||
return err
|
||||
}
|
||||
|
||||
// Create indexes after inserts so that they do not
|
||||
// need to be continually updated.
|
||||
if _, err := db.DB.Exec(JobsDbIndexes); err != nil {
|
||||
log.Error("Error while creating indices after inserts")
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -341,6 +363,7 @@ func SanityChecks(job *schema.BaseJob) error {
|
||||
return fmt.Errorf("no such cluster: %#v", job.Cluster)
|
||||
}
|
||||
if err := archive.AssignSubCluster(job); err != nil {
|
||||
log.Error("Error while assigning subcluster to job")
|
||||
return err
|
||||
}
|
||||
if !job.State.Valid() {
|
||||
|
@@ -68,10 +68,12 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
|
||||
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId,
|
||||
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
|
||||
&job.Duration, &job.Walltime, &job.RawResources /*&job.MetaData*/); err != nil {
|
||||
log.Error("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(job.RawResources, &job.Resources); err != nil {
|
||||
log.Error("Error while unmarhsaling raw resources json")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -93,6 +95,7 @@ func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error
|
||||
|
||||
if err := sq.Select("job.meta_data").From("job").Where("job.id = ?", job.ID).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&job.RawMetaData); err != nil {
|
||||
log.Error("Error while scanning for job metadata")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -101,6 +104,7 @@ func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
|
||||
log.Error("Error while unmarshaling raw metadata json")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -113,6 +117,7 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
|
||||
r.cache.Del(cachekey)
|
||||
if job.MetaData == nil {
|
||||
if _, err = r.FetchMetadata(job); err != nil {
|
||||
log.Errorf("Error while fetching metadata for job, DB ID '%#v'", job.ID)
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -129,10 +134,12 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
|
||||
}
|
||||
|
||||
if job.RawMetaData, err = json.Marshal(job.MetaData); err != nil {
|
||||
log.Errorf("Error while marshaling metadata for job, DB ID '%#v'", job.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err = sq.Update("job").Set("meta_data", job.RawMetaData).Where("job.id = ?", job.ID).RunWith(r.stmtCache).Exec(); err != nil {
|
||||
log.Errorf("Error while updating metadata for job, DB ID '%#v'", job.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -185,6 +192,7 @@ func (r *JobRepository) FindAll(
|
||||
|
||||
rows, err := q.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -192,6 +200,7 @@ func (r *JobRepository) FindAll(
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
log.Error("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
@@ -259,7 +268,7 @@ func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
|
||||
err := r.DB.Get(&cnt, qs) //ignore error as it will also occur in delete statement
|
||||
_, err = r.DB.Exec(`DELETE FROM job WHERE job.start_time < ?`, startTime)
|
||||
if err != nil {
|
||||
log.Warnf(" DeleteJobsBefore(%d): error %v", startTime, err)
|
||||
log.Errorf(" DeleteJobsBefore(%d): error %#v", startTime, err)
|
||||
} else {
|
||||
log.Infof("DeleteJobsBefore(%d): Deleted %d jobs", startTime, cnt)
|
||||
}
|
||||
@@ -269,7 +278,7 @@ func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
|
||||
func (r *JobRepository) DeleteJobById(id int64) error {
|
||||
_, err := r.DB.Exec(`DELETE FROM job WHERE job.id = ?`, id)
|
||||
if err != nil {
|
||||
log.Warnf("DeleteJobById(%d): error %v", id, err)
|
||||
log.Errorf("DeleteJobById(%d): error %#v", id, err)
|
||||
} else {
|
||||
log.Infof("DeleteJobById(%d): Success", id)
|
||||
}
|
||||
@@ -293,7 +302,7 @@ func (r *JobRepository) CountGroupedJobs(ctx context.Context, aggreg model.Aggre
|
||||
count = fmt.Sprintf(`sum(job.num_nodes * (CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) as count`, now)
|
||||
runner = r.DB
|
||||
default:
|
||||
log.Notef("CountGroupedJobs() Weight %v unknown.", *weight)
|
||||
log.Notef("CountGroupedJobs() Weight %#v unknown.", *weight)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -309,6 +318,7 @@ func (r *JobRepository) CountGroupedJobs(ctx context.Context, aggreg model.Aggre
|
||||
counts := map[string]int{}
|
||||
rows, err := q.RunWith(runner).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -316,6 +326,7 @@ func (r *JobRepository) CountGroupedJobs(ctx context.Context, aggreg model.Aggre
|
||||
var group string
|
||||
var count int
|
||||
if err := rows.Scan(&group, &count); err != nil {
|
||||
log.Error("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -359,11 +370,12 @@ func (r *JobRepository) MarkArchived(
|
||||
case "file_bw":
|
||||
stmt = stmt.Set("file_bw_avg", stats.Avg)
|
||||
default:
|
||||
log.Notef("MarkArchived() Metric %s unknown.", metric)
|
||||
log.Notef("MarkArchived() Metric '%#v' unknown", metric)
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
log.Error("Error while marking job as archived")
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
@@ -478,6 +490,7 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
|
||||
Where("job.cluster = ?", cluster).
|
||||
RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -488,9 +501,11 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
|
||||
var resources []*schema.Resource
|
||||
var subcluster string
|
||||
if err := rows.Scan(&raw, &subcluster); err != nil {
|
||||
log.Error("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
if err := json.Unmarshal(raw, &resources); err != nil {
|
||||
log.Error("Error while unmarshaling raw resources json")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -518,16 +533,18 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
||||
Where(fmt.Sprintf("(%d - job.start_time) > (job.walltime + %d)", time.Now().Unix(), seconds)).
|
||||
RunWith(r.DB).Exec()
|
||||
if err != nil {
|
||||
log.Error("Error while stopping jobs exceeding walltime")
|
||||
return err
|
||||
}
|
||||
|
||||
rowsAffected, err := res.RowsAffected()
|
||||
if err != nil {
|
||||
log.Error("Error while fetching affected rows after stopping due to exceeded walltime")
|
||||
return err
|
||||
}
|
||||
|
||||
if rowsAffected > 0 {
|
||||
log.Warnf("%d jobs have been marked as failed due to running too long", rowsAffected)
|
||||
log.Notef("%d jobs have been marked as failed due to running too long", rowsAffected)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@@ -51,12 +51,14 @@ func (r *JobRepository) QueryJobs(
|
||||
|
||||
sql, args, err := query.ToSql()
|
||||
if err != nil {
|
||||
log.Error("Error while converting query to sql")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Debugf("SQL query: `%s`, args: %#v", sql, args)
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -65,6 +67,7 @@ func (r *JobRepository) QueryJobs(
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
log.Error("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
|
@@ -7,22 +7,26 @@ package repository
|
||||
import (
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
// Add the tag with id `tagId` to the job with the database id `jobId`.
|
||||
func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
|
||||
if _, err := r.stmtCache.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES ($1, $2)`, job, tag); err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
j, err := r.FindById(job)
|
||||
if err != nil {
|
||||
log.Error("Error while finding job by id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags, err := r.GetTags(&job)
|
||||
if err != nil {
|
||||
log.Error("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -32,16 +36,19 @@ func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
|
||||
// Removes a tag from a job
|
||||
func (r *JobRepository) RemoveTag(job, tag int64) ([]*schema.Tag, error) {
|
||||
if _, err := r.stmtCache.Exec("DELETE FROM jobtag WHERE jobtag.job_id = $1 AND jobtag.tag_id = $2", job, tag); err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
j, err := r.FindById(job)
|
||||
if err != nil {
|
||||
log.Error("Error while finding job by id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags, err := r.GetTags(&job)
|
||||
if err != nil {
|
||||
log.Error("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -138,6 +145,7 @@ func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
|
||||
|
||||
rows, err := q.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -145,6 +153,7 @@ func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
|
||||
for rows.Next() {
|
||||
tag := &schema.Tag{}
|
||||
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name); err != nil {
|
||||
log.Error("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
tags = append(tags, tag)
|
||||
|
@@ -6,13 +6,13 @@ package repository
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"log"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
@@ -82,6 +82,7 @@ func (uCfg *UserCfgRepo) GetUIConfig(user *auth.User) (map[string]interface{}, e
|
||||
|
||||
rows, err := uCfg.Lookup.Query(user.Username)
|
||||
if err != nil {
|
||||
log.Errorf("Error while looking up user config for user '%#v'", user.Username)
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
@@ -90,11 +91,13 @@ func (uCfg *UserCfgRepo) GetUIConfig(user *auth.User) (map[string]interface{}, e
|
||||
for rows.Next() {
|
||||
var key, rawval string
|
||||
if err := rows.Scan(&key, &rawval); err != nil {
|
||||
log.Error("Error while scanning user config values")
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
var val interface{}
|
||||
if err := json.Unmarshal([]byte(rawval), &val); err != nil {
|
||||
log.Error("Error while unmarshaling raw user config json")
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
@@ -106,6 +109,7 @@ func (uCfg *UserCfgRepo) GetUIConfig(user *auth.User) (map[string]interface{}, e
|
||||
return config, 24 * time.Hour, size
|
||||
})
|
||||
if err, ok := data.(error); ok {
|
||||
log.Error("Error in data set")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -122,6 +126,7 @@ func (uCfg *UserCfgRepo) UpdateConfig(
|
||||
if user == nil {
|
||||
var val interface{}
|
||||
if err := json.Unmarshal([]byte(value), &val); err != nil {
|
||||
log.Error("Error while unmarshaling raw user config json")
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -133,6 +138,7 @@ func (uCfg *UserCfgRepo) UpdateConfig(
|
||||
|
||||
if _, err := uCfg.DB.Exec(`REPLACE INTO configuration (username, confkey, value) VALUES (?, ?, ?)`,
|
||||
user, key, value); err != nil {
|
||||
log.Errorf("Error while replacing user config in DB for user '$#v'", user)
|
||||
return err
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user