mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-06-08 00:23:48 +02:00
Merge branch 'dev' into migrate_svelte5
This commit is contained in:
commit
5048f7be14
@ -4,61 +4,78 @@ scalar Any
|
|||||||
scalar NullableFloat
|
scalar NullableFloat
|
||||||
scalar MetricScope
|
scalar MetricScope
|
||||||
scalar JobState
|
scalar JobState
|
||||||
|
scalar NodeState
|
||||||
|
scalar MonitoringState
|
||||||
|
|
||||||
|
type Node {
|
||||||
|
id: ID!
|
||||||
|
hostname: String!
|
||||||
|
cluster: String!
|
||||||
|
subCluster: String!
|
||||||
|
nodeState: NodeState!
|
||||||
|
HealthState: MonitoringState!
|
||||||
|
metaData: Any
|
||||||
|
}
|
||||||
|
|
||||||
|
type NodeStats {
|
||||||
|
state: String!
|
||||||
|
count: Int!
|
||||||
|
}
|
||||||
|
|
||||||
type Job {
|
type Job {
|
||||||
id: ID!
|
id: ID!
|
||||||
jobId: Int!
|
jobId: Int!
|
||||||
user: String!
|
user: String!
|
||||||
project: String!
|
project: String!
|
||||||
cluster: String!
|
cluster: String!
|
||||||
subCluster: String!
|
subCluster: String!
|
||||||
startTime: Time!
|
startTime: Time!
|
||||||
duration: Int!
|
duration: Int!
|
||||||
walltime: Int!
|
walltime: Int!
|
||||||
numNodes: Int!
|
numNodes: Int!
|
||||||
numHWThreads: Int!
|
numHWThreads: Int!
|
||||||
numAcc: Int!
|
numAcc: Int!
|
||||||
energy: Float!
|
energy: Float!
|
||||||
SMT: Int!
|
SMT: Int!
|
||||||
exclusive: Int!
|
exclusive: Int!
|
||||||
partition: String!
|
partition: String!
|
||||||
arrayJobId: Int!
|
arrayJobId: Int!
|
||||||
monitoringStatus: Int!
|
monitoringStatus: Int!
|
||||||
state: JobState!
|
state: JobState!
|
||||||
tags: [Tag!]!
|
tags: [Tag!]!
|
||||||
resources: [Resource!]!
|
resources: [Resource!]!
|
||||||
concurrentJobs: JobLinkResultList
|
concurrentJobs: JobLinkResultList
|
||||||
footprint: [FootprintValue]
|
footprint: [FootprintValue]
|
||||||
energyFootprint: [EnergyFootprintValue]
|
energyFootprint: [EnergyFootprintValue]
|
||||||
metaData: Any
|
metaData: Any
|
||||||
userData: User
|
userData: User
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobLink {
|
type JobLink {
|
||||||
id: ID!
|
id: ID!
|
||||||
jobId: Int!
|
jobId: Int!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Cluster {
|
type Cluster {
|
||||||
name: String!
|
name: String!
|
||||||
partitions: [String!]! # Slurm partitions
|
partitions: [String!]! # Slurm partitions
|
||||||
subClusters: [SubCluster!]! # Hardware partitions/subclusters
|
subClusters: [SubCluster!]! # Hardware partitions/subclusters
|
||||||
}
|
}
|
||||||
|
|
||||||
type SubCluster {
|
type SubCluster {
|
||||||
name: String!
|
name: String!
|
||||||
nodes: String!
|
nodes: String!
|
||||||
numberOfNodes: Int!
|
numberOfNodes: Int!
|
||||||
processorType: String!
|
processorType: String!
|
||||||
socketsPerNode: Int!
|
socketsPerNode: Int!
|
||||||
coresPerSocket: Int!
|
coresPerSocket: Int!
|
||||||
threadsPerCore: Int!
|
threadsPerCore: Int!
|
||||||
flopRateScalar: MetricValue!
|
flopRateScalar: MetricValue!
|
||||||
flopRateSimd: MetricValue!
|
flopRateSimd: MetricValue!
|
||||||
memoryBandwidth: MetricValue!
|
memoryBandwidth: MetricValue!
|
||||||
topology: Topology!
|
topology: Topology!
|
||||||
metricConfig: [MetricConfig!]!
|
metricConfig: [MetricConfig!]!
|
||||||
footprint: [String!]!
|
footprint: [String!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type FootprintValue {
|
type FootprintValue {
|
||||||
@ -80,94 +97,94 @@ type MetricValue {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Topology {
|
type Topology {
|
||||||
node: [Int!]
|
node: [Int!]
|
||||||
socket: [[Int!]!]
|
socket: [[Int!]!]
|
||||||
memoryDomain: [[Int!]!]
|
memoryDomain: [[Int!]!]
|
||||||
die: [[Int!]!]
|
die: [[Int!]!]
|
||||||
core: [[Int!]!]
|
core: [[Int!]!]
|
||||||
accelerators: [Accelerator!]
|
accelerators: [Accelerator!]
|
||||||
}
|
}
|
||||||
|
|
||||||
type Accelerator {
|
type Accelerator {
|
||||||
id: String!
|
id: String!
|
||||||
type: String!
|
type: String!
|
||||||
model: String!
|
model: String!
|
||||||
}
|
}
|
||||||
|
|
||||||
type SubClusterConfig {
|
type SubClusterConfig {
|
||||||
name: String!
|
name: String!
|
||||||
peak: Float
|
peak: Float
|
||||||
normal: Float
|
normal: Float
|
||||||
caution: Float
|
caution: Float
|
||||||
alert: Float
|
alert: Float
|
||||||
remove: Boolean
|
remove: Boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricConfig {
|
type MetricConfig {
|
||||||
name: String!
|
name: String!
|
||||||
unit: Unit!
|
unit: Unit!
|
||||||
scope: MetricScope!
|
scope: MetricScope!
|
||||||
aggregation: String!
|
aggregation: String!
|
||||||
timestep: Int!
|
timestep: Int!
|
||||||
peak: Float!
|
peak: Float!
|
||||||
normal: Float
|
normal: Float
|
||||||
caution: Float!
|
caution: Float!
|
||||||
alert: Float!
|
alert: Float!
|
||||||
lowerIsBetter: Boolean
|
lowerIsBetter: Boolean
|
||||||
subClusters: [SubClusterConfig!]!
|
subClusters: [SubClusterConfig!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Tag {
|
type Tag {
|
||||||
id: ID!
|
id: ID!
|
||||||
type: String!
|
type: String!
|
||||||
name: String!
|
name: String!
|
||||||
scope: String!
|
scope: String!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Resource {
|
type Resource {
|
||||||
hostname: String!
|
hostname: String!
|
||||||
hwthreads: [Int!]
|
hwthreads: [Int!]
|
||||||
accelerators: [String!]
|
accelerators: [String!]
|
||||||
configuration: String
|
configuration: String
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobMetricWithName {
|
type JobMetricWithName {
|
||||||
name: String!
|
name: String!
|
||||||
scope: MetricScope!
|
scope: MetricScope!
|
||||||
metric: JobMetric!
|
metric: JobMetric!
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobMetric {
|
type JobMetric {
|
||||||
unit: Unit
|
unit: Unit
|
||||||
timestep: Int!
|
timestep: Int!
|
||||||
series: [Series!]
|
series: [Series!]
|
||||||
statisticsSeries: StatsSeries
|
statisticsSeries: StatsSeries
|
||||||
}
|
}
|
||||||
|
|
||||||
type Series {
|
type Series {
|
||||||
hostname: String!
|
hostname: String!
|
||||||
id: String
|
id: String
|
||||||
statistics: MetricStatistics
|
statistics: MetricStatistics
|
||||||
data: [NullableFloat!]!
|
data: [NullableFloat!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type StatsSeries {
|
type StatsSeries {
|
||||||
mean: [NullableFloat!]!
|
mean: [NullableFloat!]!
|
||||||
median: [NullableFloat!]!
|
median: [NullableFloat!]!
|
||||||
min: [NullableFloat!]!
|
min: [NullableFloat!]!
|
||||||
max: [NullableFloat!]!
|
max: [NullableFloat!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type NamedStatsWithScope {
|
type NamedStatsWithScope {
|
||||||
name: String!
|
name: String!
|
||||||
scope: MetricScope!
|
scope: MetricScope!
|
||||||
stats: [ScopedStats!]!
|
stats: [ScopedStats!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type ScopedStats {
|
type ScopedStats {
|
||||||
hostname: String!
|
hostname: String!
|
||||||
id: String
|
id: String
|
||||||
data: MetricStatistics!
|
data: MetricStatistics!
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobStats {
|
type JobStats {
|
||||||
@ -184,8 +201,8 @@ type JobStats {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type NamedStats {
|
type NamedStats {
|
||||||
name: String!
|
name: String!
|
||||||
data: MetricStatistics!
|
data: MetricStatistics!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Unit {
|
type Unit {
|
||||||
@ -201,12 +218,12 @@ type MetricStatistics {
|
|||||||
|
|
||||||
type MetricFootprints {
|
type MetricFootprints {
|
||||||
metric: String!
|
metric: String!
|
||||||
data: [NullableFloat!]!
|
data: [NullableFloat!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Footprints {
|
type Footprints {
|
||||||
timeWeights: TimeWeights!
|
timeWeights: TimeWeights!
|
||||||
metrics: [MetricFootprints!]!
|
metrics: [MetricFootprints!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type TimeWeights {
|
type TimeWeights {
|
||||||
@ -215,20 +232,33 @@ type TimeWeights {
|
|||||||
coreHours: [NullableFloat!]!
|
coreHours: [NullableFloat!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
enum Aggregate { USER, PROJECT, CLUSTER }
|
enum Aggregate {
|
||||||
enum SortByAggregate { TOTALWALLTIME, TOTALJOBS, TOTALNODES, TOTALNODEHOURS, TOTALCORES, TOTALCOREHOURS, TOTALACCS, TOTALACCHOURS }
|
USER
|
||||||
|
PROJECT
|
||||||
|
CLUSTER
|
||||||
|
}
|
||||||
|
enum SortByAggregate {
|
||||||
|
TOTALWALLTIME
|
||||||
|
TOTALJOBS
|
||||||
|
TOTALNODES
|
||||||
|
TOTALNODEHOURS
|
||||||
|
TOTALCORES
|
||||||
|
TOTALCOREHOURS
|
||||||
|
TOTALACCS
|
||||||
|
TOTALACCHOURS
|
||||||
|
}
|
||||||
|
|
||||||
type NodeMetrics {
|
type NodeMetrics {
|
||||||
host: String!
|
host: String!
|
||||||
subCluster: String!
|
subCluster: String!
|
||||||
metrics: [JobMetricWithName!]!
|
metrics: [JobMetricWithName!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type NodesResultList {
|
type NodesResultList {
|
||||||
items: [NodeMetrics!]!
|
items: [NodeMetrics!]!
|
||||||
offset: Int
|
offset: Int
|
||||||
limit: Int
|
limit: Int
|
||||||
count: Int
|
count: Int
|
||||||
totalNodes: Int
|
totalNodes: Int
|
||||||
hasNextPage: Boolean
|
hasNextPage: Boolean
|
||||||
}
|
}
|
||||||
@ -247,14 +277,14 @@ type GlobalMetricListItem {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Count {
|
type Count {
|
||||||
name: String!
|
name: String!
|
||||||
count: Int!
|
count: Int!
|
||||||
}
|
}
|
||||||
|
|
||||||
type User {
|
type User {
|
||||||
username: String!
|
username: String!
|
||||||
name: String!
|
name: String!
|
||||||
email: String!
|
email: String!
|
||||||
}
|
}
|
||||||
|
|
||||||
input MetricStatItem {
|
input MetricStatItem {
|
||||||
@ -263,27 +293,81 @@ input MetricStatItem {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Query {
|
type Query {
|
||||||
clusters: [Cluster!]! # List of all clusters
|
clusters: [Cluster!]! # List of all clusters
|
||||||
tags: [Tag!]! # List of all tags
|
tags: [Tag!]! # List of all tags
|
||||||
globalMetrics: [GlobalMetricListItem!]!
|
globalMetrics: [GlobalMetricListItem!]!
|
||||||
|
|
||||||
user(username: String!): User
|
user(username: String!): User
|
||||||
allocatedNodes(cluster: String!): [Count!]!
|
allocatedNodes(cluster: String!): [Count!]!
|
||||||
|
|
||||||
job(id: ID!): Job
|
node(id: ID!): Node
|
||||||
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!], resolution: Int): [JobMetricWithName!]!
|
nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList!
|
||||||
jobStats(id: ID!, metrics: [String!]): [NamedStats!]!
|
nodeStats(filter: [NodeFilter!]): [NodeStats!]!
|
||||||
scopedJobStats(id: ID!, metrics: [String!], scopes: [MetricScope!]): [NamedStatsWithScope!]!
|
|
||||||
|
job(id: ID!): Job
|
||||||
|
jobMetrics(
|
||||||
|
id: ID!
|
||||||
|
metrics: [String!]
|
||||||
|
scopes: [MetricScope!]
|
||||||
|
resolution: Int
|
||||||
|
): [JobMetricWithName!]!
|
||||||
|
|
||||||
|
jobStats(id: ID!, metrics: [String!]): [NamedStats!]!
|
||||||
|
|
||||||
|
scopedJobStats(
|
||||||
|
id: ID!
|
||||||
|
metrics: [String!]
|
||||||
|
scopes: [MetricScope!]
|
||||||
|
): [NamedStatsWithScope!]!
|
||||||
|
|
||||||
|
jobs(
|
||||||
|
filter: [JobFilter!]
|
||||||
|
page: PageRequest
|
||||||
|
order: OrderByInput
|
||||||
|
): JobResultList!
|
||||||
|
|
||||||
|
jobsStatistics(
|
||||||
|
filter: [JobFilter!]
|
||||||
|
metrics: [String!]
|
||||||
|
page: PageRequest
|
||||||
|
sortBy: SortByAggregate
|
||||||
|
groupBy: Aggregate
|
||||||
|
numDurationBins: String
|
||||||
|
numMetricBins: Int
|
||||||
|
): [JobsStatistics!]!
|
||||||
|
|
||||||
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
|
|
||||||
jobsStatistics(filter: [JobFilter!], metrics: [String!], page: PageRequest, sortBy: SortByAggregate, groupBy: Aggregate, numDurationBins: String, numMetricBins: Int): [JobsStatistics!]!
|
|
||||||
jobsMetricStats(filter: [JobFilter!], metrics: [String!]): [JobStats!]!
|
jobsMetricStats(filter: [JobFilter!], metrics: [String!]): [JobStats!]!
|
||||||
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
|
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
|
||||||
|
|
||||||
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
|
rooflineHeatmap(
|
||||||
|
filter: [JobFilter!]!
|
||||||
|
rows: Int!
|
||||||
|
cols: Int!
|
||||||
|
minX: Float!
|
||||||
|
minY: Float!
|
||||||
|
maxX: Float!
|
||||||
|
maxY: Float!
|
||||||
|
): [[Float!]!]!
|
||||||
|
|
||||||
nodeMetrics(cluster: String!, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]!
|
nodeMetrics(
|
||||||
nodeMetricsList(cluster: String!, subCluster: String!, nodeFilter: String!, scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!, page: PageRequest, resolution: Int): NodesResultList!
|
cluster: String!
|
||||||
|
nodes: [String!]
|
||||||
|
scopes: [MetricScope!]
|
||||||
|
metrics: [String!]
|
||||||
|
from: Time!
|
||||||
|
to: Time!
|
||||||
|
): [NodeMetrics!]!
|
||||||
|
nodeMetricsList(
|
||||||
|
cluster: String!
|
||||||
|
subCluster: String!
|
||||||
|
nodeFilter: String!
|
||||||
|
scopes: [MetricScope!]
|
||||||
|
metrics: [String!]
|
||||||
|
from: Time!
|
||||||
|
to: Time!
|
||||||
|
page: PageRequest
|
||||||
|
resolution: Int
|
||||||
|
): NodesResultList!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Mutation {
|
type Mutation {
|
||||||
@ -296,38 +380,53 @@ type Mutation {
|
|||||||
updateConfiguration(name: String!, value: String!): String
|
updateConfiguration(name: String!, value: String!): String
|
||||||
}
|
}
|
||||||
|
|
||||||
type IntRangeOutput { from: Int!, to: Int! }
|
type IntRangeOutput {
|
||||||
type TimeRangeOutput { range: String, from: Time!, to: Time! }
|
from: Int!
|
||||||
|
to: Int!
|
||||||
|
}
|
||||||
|
type TimeRangeOutput {
|
||||||
|
range: String
|
||||||
|
from: Time!
|
||||||
|
to: Time!
|
||||||
|
}
|
||||||
|
|
||||||
|
input NodeFilter {
|
||||||
|
hostname: StringInput
|
||||||
|
cluster: StringInput
|
||||||
|
subCluster: StringInput
|
||||||
|
nodeState: NodeState
|
||||||
|
healthState: MonitoringState
|
||||||
|
}
|
||||||
|
|
||||||
input JobFilter {
|
input JobFilter {
|
||||||
tags: [ID!]
|
tags: [ID!]
|
||||||
dbId: [ID!]
|
dbId: [ID!]
|
||||||
jobId: StringInput
|
jobId: StringInput
|
||||||
arrayJobId: Int
|
arrayJobId: Int
|
||||||
user: StringInput
|
user: StringInput
|
||||||
project: StringInput
|
project: StringInput
|
||||||
jobName: StringInput
|
jobName: StringInput
|
||||||
cluster: StringInput
|
cluster: StringInput
|
||||||
partition: StringInput
|
partition: StringInput
|
||||||
duration: IntRange
|
duration: IntRange
|
||||||
energy: FloatRange
|
energy: FloatRange
|
||||||
|
|
||||||
minRunningFor: Int
|
minRunningFor: Int
|
||||||
|
|
||||||
numNodes: IntRange
|
numNodes: IntRange
|
||||||
numAccelerators: IntRange
|
numAccelerators: IntRange
|
||||||
numHWThreads: IntRange
|
numHWThreads: IntRange
|
||||||
|
|
||||||
startTime: TimeRange
|
startTime: TimeRange
|
||||||
state: [JobState!]
|
state: [JobState!]
|
||||||
metricStats: [MetricStatItem!]
|
metricStats: [MetricStatItem!]
|
||||||
exclusive: Int
|
exclusive: Int
|
||||||
node: StringInput
|
node: StringInput
|
||||||
}
|
}
|
||||||
|
|
||||||
input OrderByInput {
|
input OrderByInput {
|
||||||
field: String!
|
field: String!
|
||||||
type: String!,
|
type: String!
|
||||||
order: SortDirectionEnum! = ASC
|
order: SortDirectionEnum! = ASC
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -337,34 +436,46 @@ enum SortDirectionEnum {
|
|||||||
}
|
}
|
||||||
|
|
||||||
input StringInput {
|
input StringInput {
|
||||||
eq: String
|
eq: String
|
||||||
neq: String
|
neq: String
|
||||||
contains: String
|
contains: String
|
||||||
startsWith: String
|
startsWith: String
|
||||||
endsWith: String
|
endsWith: String
|
||||||
in: [String!]
|
in: [String!]
|
||||||
}
|
}
|
||||||
|
|
||||||
input IntRange { from: Int!, to: Int! }
|
input IntRange {
|
||||||
input TimeRange { range: String, from: Time, to: Time }
|
from: Int!
|
||||||
|
to: Int!
|
||||||
|
}
|
||||||
|
input TimeRange {
|
||||||
|
range: String
|
||||||
|
from: Time
|
||||||
|
to: Time
|
||||||
|
}
|
||||||
|
|
||||||
input FloatRange {
|
input FloatRange {
|
||||||
from: Float!
|
from: Float!
|
||||||
to: Float!
|
to: Float!
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type NodeStateResultList {
|
||||||
|
items: [Node!]!
|
||||||
|
count: Int
|
||||||
|
}
|
||||||
|
|
||||||
type JobResultList {
|
type JobResultList {
|
||||||
items: [Job!]!
|
items: [Job!]!
|
||||||
offset: Int
|
offset: Int
|
||||||
limit: Int
|
limit: Int
|
||||||
count: Int
|
count: Int
|
||||||
hasNextPage: Boolean
|
hasNextPage: Boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobLinkResultList {
|
type JobLinkResultList {
|
||||||
listQuery: String
|
listQuery: String
|
||||||
items: [JobLink!]!
|
items: [JobLink!]!
|
||||||
count: Int
|
count: Int
|
||||||
}
|
}
|
||||||
|
|
||||||
type HistoPoint {
|
type HistoPoint {
|
||||||
@ -386,27 +497,27 @@ type MetricHistoPoint {
|
|||||||
max: Int
|
max: Int
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobsStatistics {
|
type JobsStatistics {
|
||||||
id: ID! # If `groupBy` was used, ID of the user/project/cluster
|
id: ID! # If `groupBy` was used, ID of the user/project/cluster
|
||||||
name: String! # if User-Statistics: Given Name of Account (ID) Owner
|
name: String! # if User-Statistics: Given Name of Account (ID) Owner
|
||||||
totalJobs: Int! # Number of jobs
|
totalJobs: Int! # Number of jobs
|
||||||
runningJobs: Int! # Number of running jobs
|
runningJobs: Int! # Number of running jobs
|
||||||
shortJobs: Int! # Number of jobs with a duration of less than duration
|
shortJobs: Int! # Number of jobs with a duration of less than duration
|
||||||
totalWalltime: Int! # Sum of the duration of all matched jobs in hours
|
totalWalltime: Int! # Sum of the duration of all matched jobs in hours
|
||||||
totalNodes: Int! # Sum of the nodes of all matched jobs
|
totalNodes: Int! # Sum of the nodes of all matched jobs
|
||||||
totalNodeHours: Int! # Sum of the node hours of all matched jobs
|
totalNodeHours: Int! # Sum of the node hours of all matched jobs
|
||||||
totalCores: Int! # Sum of the cores of all matched jobs
|
totalCores: Int! # Sum of the cores of all matched jobs
|
||||||
totalCoreHours: Int! # Sum of the core hours of all matched jobs
|
totalCoreHours: Int! # Sum of the core hours of all matched jobs
|
||||||
totalAccs: Int! # Sum of the accs of all matched jobs
|
totalAccs: Int! # Sum of the accs of all matched jobs
|
||||||
totalAccHours: Int! # Sum of the gpu hours of all matched jobs
|
totalAccHours: Int! # Sum of the gpu hours of all matched jobs
|
||||||
histDuration: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value
|
histDuration: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value
|
||||||
histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes
|
histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes
|
||||||
histNumCores: [HistoPoint!]! # value: number of cores, count: number of jobs with that number of cores
|
histNumCores: [HistoPoint!]! # value: number of cores, count: number of jobs with that number of cores
|
||||||
histNumAccs: [HistoPoint!]! # value: number of accs, count: number of jobs with that number of accs
|
histNumAccs: [HistoPoint!]! # value: number of accs, count: number of jobs with that number of accs
|
||||||
histMetrics: [MetricHistoPoints!]! # metric: metricname, data array of histopoints: value: metric average bin, count: number of jobs with that metric average
|
histMetrics: [MetricHistoPoints!]! # metric: metricname, data array of histopoints: value: metric average bin, count: number of jobs with that metric average
|
||||||
}
|
}
|
||||||
|
|
||||||
input PageRequest {
|
input PageRequest {
|
||||||
itemsPerPage: Int!
|
itemsPerPage: Int!
|
||||||
page: Int!
|
page: Int!
|
||||||
}
|
}
|
||||||
|
@ -7,8 +7,9 @@ package main
|
|||||||
import "flag"
|
import "flag"
|
||||||
|
|
||||||
var (
|
var (
|
||||||
flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB, flagForceDB, flagDev, flagVersion, flagLogDateTime bool
|
flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB,
|
||||||
flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
flagForceDB, flagDev, flagVersion, flagLogDateTime, flagApplyTags bool
|
||||||
|
flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
||||||
)
|
)
|
||||||
|
|
||||||
func cliInit() {
|
func cliInit() {
|
||||||
@ -21,6 +22,7 @@ func cliInit() {
|
|||||||
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
|
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
|
||||||
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
|
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
|
||||||
flag.BoolVar(&flagRevertDB, "revert-db", false, "Migrate database to previous version and exit")
|
flag.BoolVar(&flagRevertDB, "revert-db", false, "Migrate database to previous version and exit")
|
||||||
|
flag.BoolVar(&flagApplyTags, "apply-tags", false, "Run taggers on all completed jobs and exit")
|
||||||
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
|
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
|
||||||
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
||||||
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
||||||
|
@ -19,7 +19,9 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/tagger"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/taskManager"
|
"github.com/ClusterCockpit/cc-backend/internal/taskManager"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
|
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
|
||||||
@ -211,11 +213,22 @@ func main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if config.Keys.EnableJobTaggers {
|
||||||
|
tagger.Init()
|
||||||
|
}
|
||||||
|
|
||||||
|
if flagApplyTags {
|
||||||
|
if err := tagger.RunTaggers(); err != nil {
|
||||||
|
log.Abortf("Running job taggers.\nError: %s\n", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if !flagServer {
|
if !flagServer {
|
||||||
log.Exit("No errors, server flag not set. Exiting cc-backend.")
|
log.Exit("No errors, server flag not set. Exiting cc-backend.")
|
||||||
}
|
}
|
||||||
|
|
||||||
archiver.Start(repository.GetJobRepository())
|
archiver.Start(repository.GetJobRepository())
|
||||||
|
|
||||||
taskManager.Start()
|
taskManager.Start()
|
||||||
serverInit()
|
serverInit()
|
||||||
|
|
||||||
@ -237,6 +250,8 @@ func main() {
|
|||||||
|
|
||||||
serverShutdown()
|
serverShutdown()
|
||||||
|
|
||||||
|
util.FsWatcherShutdown()
|
||||||
|
|
||||||
taskManager.Shutdown()
|
taskManager.Shutdown()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
8
go.mod
8
go.mod
@ -9,6 +9,8 @@ require (
|
|||||||
github.com/ClusterCockpit/cc-units v0.4.0
|
github.com/ClusterCockpit/cc-units v0.4.0
|
||||||
github.com/Masterminds/squirrel v1.5.4
|
github.com/Masterminds/squirrel v1.5.4
|
||||||
github.com/coreos/go-oidc/v3 v3.12.0
|
github.com/coreos/go-oidc/v3 v3.12.0
|
||||||
|
github.com/expr-lang/expr v1.17.3
|
||||||
|
github.com/fsnotify/fsnotify v1.9.0
|
||||||
github.com/go-co-op/gocron/v2 v2.16.0
|
github.com/go-co-op/gocron/v2 v2.16.0
|
||||||
github.com/go-ldap/ldap/v3 v3.4.10
|
github.com/go-ldap/ldap/v3 v3.4.10
|
||||||
github.com/go-sql-driver/mysql v1.9.0
|
github.com/go-sql-driver/mysql v1.9.0
|
||||||
@ -18,8 +20,8 @@ require (
|
|||||||
github.com/gorilla/handlers v1.5.2
|
github.com/gorilla/handlers v1.5.2
|
||||||
github.com/gorilla/mux v1.8.1
|
github.com/gorilla/mux v1.8.1
|
||||||
github.com/gorilla/sessions v1.4.0
|
github.com/gorilla/sessions v1.4.0
|
||||||
github.com/influxdata/influxdb-client-go/v2 v2.14.0
|
|
||||||
github.com/jmoiron/sqlx v1.4.0
|
github.com/jmoiron/sqlx v1.4.0
|
||||||
|
github.com/joho/godotenv v1.5.1
|
||||||
github.com/mattn/go-sqlite3 v1.14.24
|
github.com/mattn/go-sqlite3 v1.14.24
|
||||||
github.com/prometheus/client_golang v1.21.0
|
github.com/prometheus/client_golang v1.21.0
|
||||||
github.com/prometheus/common v0.62.0
|
github.com/prometheus/common v0.62.0
|
||||||
@ -39,7 +41,6 @@ require (
|
|||||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
|
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
|
||||||
github.com/KyleBanks/depth v1.2.1 // indirect
|
github.com/KyleBanks/depth v1.2.1 // indirect
|
||||||
github.com/agnivade/levenshtein v1.2.1 // indirect
|
github.com/agnivade/levenshtein v1.2.1 // indirect
|
||||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
|
|
||||||
github.com/beorn7/perks v1.0.1 // indirect
|
github.com/beorn7/perks v1.0.1 // indirect
|
||||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect
|
github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect
|
||||||
@ -57,8 +58,6 @@ require (
|
|||||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||||
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
||||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect
|
|
||||||
github.com/joho/godotenv v1.5.1 // indirect
|
|
||||||
github.com/jonboulle/clockwork v0.5.0 // indirect
|
github.com/jonboulle/clockwork v0.5.0 // indirect
|
||||||
github.com/josharian/intern v1.0.0 // indirect
|
github.com/josharian/intern v1.0.0 // indirect
|
||||||
github.com/jpillora/backoff v1.0.0 // indirect
|
github.com/jpillora/backoff v1.0.0 // indirect
|
||||||
@ -70,7 +69,6 @@ require (
|
|||||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
|
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
|
||||||
github.com/oapi-codegen/runtime v1.1.1 // indirect
|
|
||||||
github.com/prometheus/client_model v0.6.1 // indirect
|
github.com/prometheus/client_model v0.6.1 // indirect
|
||||||
github.com/prometheus/procfs v0.15.1 // indirect
|
github.com/prometheus/procfs v0.15.1 // indirect
|
||||||
github.com/robfig/cron/v3 v3.0.1 // indirect
|
github.com/robfig/cron/v3 v3.0.1 // indirect
|
||||||
|
16
go.sum
16
go.sum
@ -16,7 +16,6 @@ github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERo
|
|||||||
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
|
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
|
||||||
github.com/PuerkitoBio/goquery v1.9.3 h1:mpJr/ikUA9/GNJB/DBZcGeFDXUtosHRyRrwh7KGdTG0=
|
github.com/PuerkitoBio/goquery v1.9.3 h1:mpJr/ikUA9/GNJB/DBZcGeFDXUtosHRyRrwh7KGdTG0=
|
||||||
github.com/PuerkitoBio/goquery v1.9.3/go.mod h1:1ndLHPdTz+DyQPICCWYlYQMPl0oXZj0G6D4LCYA6u4U=
|
github.com/PuerkitoBio/goquery v1.9.3/go.mod h1:1ndLHPdTz+DyQPICCWYlYQMPl0oXZj0G6D4LCYA6u4U=
|
||||||
github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk=
|
|
||||||
github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM=
|
github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM=
|
||||||
github.com/agnivade/levenshtein v1.2.1/go.mod h1:QVVI16kDrtSuwcpd0p1+xMC6Z/VfhtCyDIjcwga4/DU=
|
github.com/agnivade/levenshtein v1.2.1/go.mod h1:QVVI16kDrtSuwcpd0p1+xMC6Z/VfhtCyDIjcwga4/DU=
|
||||||
github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa h1:LHTHcTQiSGT7VVbI0o4wBRNQIgn917usHWOd6VAffYI=
|
github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa h1:LHTHcTQiSGT7VVbI0o4wBRNQIgn917usHWOd6VAffYI=
|
||||||
@ -25,13 +24,10 @@ github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 h1:bvNMNQO63//z+xNg
|
|||||||
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8=
|
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8=
|
||||||
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
|
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
|
||||||
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
|
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
|
||||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ=
|
|
||||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk=
|
|
||||||
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
|
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
|
||||||
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
|
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
|
||||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||||
github.com/bmatcuk/doublestar v1.1.1/go.mod h1:UD6OnuiIn0yFxxA2le/rnRU1G4RaI4UvFv1sNto9p6w=
|
|
||||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||||
github.com/coreos/go-oidc/v3 v3.12.0 h1:sJk+8G2qq94rDI6ehZ71Bol3oUHy63qNYmkiSjrc/Jo=
|
github.com/coreos/go-oidc/v3 v3.12.0 h1:sJk+8G2qq94rDI6ehZ71Bol3oUHy63qNYmkiSjrc/Jo=
|
||||||
@ -53,8 +49,12 @@ github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj
|
|||||||
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
|
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
|
||||||
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
|
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
|
||||||
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
|
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
|
||||||
|
github.com/expr-lang/expr v1.17.3 h1:myeTTuDFz7k6eFe/JPlep/UsiIjVhG61FMHFu63U7j0=
|
||||||
|
github.com/expr-lang/expr v1.17.3/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
|
||||||
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
||||||
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||||
|
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
|
||||||
|
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
|
||||||
github.com/go-asn1-ber/asn1-ber v1.5.7 h1:DTX+lbVTWaTw1hQ+PbZPlnDZPEIs0SS/GCZAl535dDk=
|
github.com/go-asn1-ber/asn1-ber v1.5.7 h1:DTX+lbVTWaTw1hQ+PbZPlnDZPEIs0SS/GCZAl535dDk=
|
||||||
github.com/go-asn1-ber/asn1-ber v1.5.7/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
|
github.com/go-asn1-ber/asn1-ber v1.5.7/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
|
||||||
github.com/go-co-op/gocron/v2 v2.16.0 h1:uqUF6WFZ4enRU45pWFNcn1xpDLc+jBOTKhPQI16Z1xs=
|
github.com/go-co-op/gocron/v2 v2.16.0 h1:uqUF6WFZ4enRU45pWFNcn1xpDLc+jBOTKhPQI16Z1xs=
|
||||||
@ -119,10 +119,6 @@ github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/C
|
|||||||
github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
|
github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
|
||||||
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
|
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
|
||||||
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
|
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
|
||||||
github.com/influxdata/influxdb-client-go/v2 v2.14.0 h1:AjbBfJuq+QoaXNcrova8smSjwJdUHnwvfjMF71M1iI4=
|
|
||||||
github.com/influxdata/influxdb-client-go/v2 v2.14.0/go.mod h1:Ahpm3QXKMJslpXl3IftVLVezreAUtBOTZssDrjZEFHI=
|
|
||||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU=
|
|
||||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
|
|
||||||
github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8=
|
github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8=
|
||||||
github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs=
|
github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs=
|
||||||
github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo=
|
github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo=
|
||||||
@ -147,7 +143,6 @@ github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2E
|
|||||||
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
|
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
|
||||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||||
github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
|
|
||||||
github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
|
github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
|
||||||
github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
|
github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
|
||||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||||
@ -182,8 +177,6 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq
|
|||||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
|
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
|
||||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
||||||
github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro=
|
|
||||||
github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
|
|
||||||
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
|
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
|
||||||
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
|
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
|
||||||
github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
|
github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
|
||||||
@ -215,7 +208,6 @@ github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8=
|
|||||||
github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I=
|
github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I=
|
||||||
github.com/sosodev/duration v1.3.1 h1:qtHBDMQ6lvMQsL15g4aopM4HEfOaYuhWBw3NPTtlqq4=
|
github.com/sosodev/duration v1.3.1 h1:qtHBDMQ6lvMQsL15g4aopM4HEfOaYuhWBw3NPTtlqq4=
|
||||||
github.com/sosodev/duration v1.3.1/go.mod h1:RQIBBX0+fMLc/D9+Jb/fwvVmo0eZvDDEERAikUR6SDg=
|
github.com/sosodev/duration v1.3.1/go.mod h1:RQIBBX0+fMLc/D9+Jb/fwvVmo0eZvDDEERAikUR6SDg=
|
||||||
github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0=
|
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||||
|
@ -62,6 +62,11 @@ models:
|
|||||||
fields:
|
fields:
|
||||||
partitions:
|
partitions:
|
||||||
resolver: true
|
resolver: true
|
||||||
|
Node:
|
||||||
|
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Node"
|
||||||
|
fields:
|
||||||
|
metaData:
|
||||||
|
resolver: true
|
||||||
NullableFloat:
|
NullableFloat:
|
||||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
|
||||||
MetricScope:
|
MetricScope:
|
||||||
@ -81,6 +86,10 @@ models:
|
|||||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
|
||||||
JobState:
|
JobState:
|
||||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
|
||||||
|
MonitoringState:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.NodeState" }
|
||||||
|
HealthState:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MonitoringState" }
|
||||||
TimeRange:
|
TimeRange:
|
||||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
|
||||||
IntRange:
|
IntRange:
|
||||||
|
@ -123,7 +123,7 @@ func setup(t *testing.T) *api.RestApi {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 2)), 0666); err != nil {
|
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), fmt.Appendf(nil, "%d", 2), 0666); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -204,11 +204,11 @@ func TestRestApi(t *testing.T) {
|
|||||||
restapi.MountApiRoutes(r)
|
restapi.MountApiRoutes(r)
|
||||||
|
|
||||||
var TestJobId int64 = 123
|
var TestJobId int64 = 123
|
||||||
var TestClusterName string = "testcluster"
|
TestClusterName := "testcluster"
|
||||||
var TestStartTime int64 = 123456789
|
var TestStartTime int64 = 123456789
|
||||||
|
|
||||||
const startJobBody string = `{
|
const startJobBody string = `{
|
||||||
"jobId": 123,
|
"jobId": 123,
|
||||||
"user": "testuser",
|
"user": "testuser",
|
||||||
"project": "testproj",
|
"project": "testproj",
|
||||||
"cluster": "testcluster",
|
"cluster": "testcluster",
|
||||||
@ -221,7 +221,6 @@ func TestRestApi(t *testing.T) {
|
|||||||
"exclusive": 1,
|
"exclusive": 1,
|
||||||
"monitoringStatus": 1,
|
"monitoringStatus": 1,
|
||||||
"smt": 1,
|
"smt": 1,
|
||||||
"tags": [{ "type": "testTagType", "name": "testTagName", "scope": "testuser" }],
|
|
||||||
"resources": [
|
"resources": [
|
||||||
{
|
{
|
||||||
"hostname": "host123",
|
"hostname": "host123",
|
||||||
@ -252,16 +251,17 @@ func TestRestApi(t *testing.T) {
|
|||||||
if response.StatusCode != http.StatusCreated {
|
if response.StatusCode != http.StatusCreated {
|
||||||
t.Fatal(response.Status, recorder.Body.String())
|
t.Fatal(response.Status, recorder.Body.String())
|
||||||
}
|
}
|
||||||
resolver := graph.GetResolverInstance()
|
// resolver := graph.GetResolverInstance()
|
||||||
|
restapi.JobRepository.SyncJobs()
|
||||||
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
|
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
job.Tags, err = resolver.Job().Tags(ctx, job)
|
// job.Tags, err = resolver.Job().Tags(ctx, job)
|
||||||
if err != nil {
|
// if err != nil {
|
||||||
t.Fatal(err)
|
// t.Fatal(err)
|
||||||
}
|
// }
|
||||||
|
|
||||||
if job.JobID != 123 ||
|
if job.JobID != 123 ||
|
||||||
job.User != "testuser" ||
|
job.User != "testuser" ||
|
||||||
@ -278,13 +278,13 @@ func TestRestApi(t *testing.T) {
|
|||||||
job.MonitoringStatus != 1 ||
|
job.MonitoringStatus != 1 ||
|
||||||
job.SMT != 1 ||
|
job.SMT != 1 ||
|
||||||
!reflect.DeepEqual(job.Resources, []*schema.Resource{{Hostname: "host123", HWThreads: []int{0, 1, 2, 3, 4, 5, 6, 7}}}) ||
|
!reflect.DeepEqual(job.Resources, []*schema.Resource{{Hostname: "host123", HWThreads: []int{0, 1, 2, 3, 4, 5, 6, 7}}}) ||
|
||||||
job.StartTime.Unix() != 123456789 {
|
job.StartTime != 123456789 {
|
||||||
t.Fatalf("unexpected job properties: %#v", job)
|
t.Fatalf("unexpected job properties: %#v", job)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" || job.Tags[0].Scope != "testuser" {
|
// if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" || job.Tags[0].Scope != "testuser" {
|
||||||
t.Fatalf("unexpected tags: %#v", job.Tags)
|
// t.Fatalf("unexpected tags: %#v", job.Tags)
|
||||||
}
|
// }
|
||||||
}); !ok {
|
}); !ok {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -352,7 +352,7 @@ func TestRestApi(t *testing.T) {
|
|||||||
|
|
||||||
t.Run("CheckDoubleStart", func(t *testing.T) {
|
t.Run("CheckDoubleStart", func(t *testing.T) {
|
||||||
// Starting a job with the same jobId and cluster should only be allowed if the startTime is far appart!
|
// Starting a job with the same jobId and cluster should only be allowed if the startTime is far appart!
|
||||||
body := strings.Replace(startJobBody, `"startTime": 123456789`, `"startTime": 123456790`, -1)
|
body := strings.ReplaceAll(startJobBody, `"startTime": 123456789`, `"startTime": 123456790`)
|
||||||
|
|
||||||
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(body)))
|
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(body)))
|
||||||
recorder := httptest.NewRecorder()
|
recorder := httptest.NewRecorder()
|
||||||
@ -402,6 +402,7 @@ func TestRestApi(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
time.Sleep(1 * time.Second)
|
time.Sleep(1 * time.Second)
|
||||||
|
restapi.JobRepository.SyncJobs()
|
||||||
|
|
||||||
const stopJobBodyFailed string = `{
|
const stopJobBodyFailed string = `{
|
||||||
"jobId": 12345,
|
"jobId": 12345,
|
||||||
|
70
internal/api/cluster.go
Normal file
70
internal/api/cluster.go
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GetClustersApiResponse model
|
||||||
|
type GetClustersApiResponse struct {
|
||||||
|
Clusters []*schema.Cluster `json:"clusters"` // Array of clusters
|
||||||
|
}
|
||||||
|
|
||||||
|
// getClusters godoc
|
||||||
|
// @summary Lists all cluster configs
|
||||||
|
// @tags Cluster query
|
||||||
|
// @description Get a list of all cluster configs. Specific cluster can be requested using query parameter.
|
||||||
|
// @produce json
|
||||||
|
// @param cluster query string false "Job Cluster"
|
||||||
|
// @success 200 {object} api.GetClustersApiResponse "Array of clusters"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/clusters/ [get]
|
||||||
|
func (api *RestApi) getClusters(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||||
|
!user.HasRole(schema.RoleApi) {
|
||||||
|
|
||||||
|
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
bw := bufio.NewWriter(rw)
|
||||||
|
defer bw.Flush()
|
||||||
|
|
||||||
|
var clusters []*schema.Cluster
|
||||||
|
|
||||||
|
if r.URL.Query().Has("cluster") {
|
||||||
|
name := r.URL.Query().Get("cluster")
|
||||||
|
cluster := archive.GetCluster(name)
|
||||||
|
if cluster == nil {
|
||||||
|
handleError(fmt.Errorf("unknown cluster: %s", name), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
clusters = append(clusters, cluster)
|
||||||
|
} else {
|
||||||
|
clusters = archive.Clusters
|
||||||
|
}
|
||||||
|
|
||||||
|
payload := GetClustersApiResponse{
|
||||||
|
Clusters: clusters,
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewEncoder(bw).Encode(payload); err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
987
internal/api/job.go
Normal file
987
internal/api/job.go
Normal file
@ -0,0 +1,987 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"database/sql"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
"github.com/gorilla/mux"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DefaultApiResponse model
|
||||||
|
type DefaultJobApiResponse struct {
|
||||||
|
Message string `json:"msg"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// StopJobApiRequest model
|
||||||
|
type StopJobApiRequest struct {
|
||||||
|
JobId *int64 `json:"jobId" example:"123000"`
|
||||||
|
Cluster *string `json:"cluster" example:"fritz"`
|
||||||
|
StartTime *int64 `json:"startTime" example:"1649723812"`
|
||||||
|
State schema.JobState `json:"jobState" validate:"required" example:"completed"`
|
||||||
|
StopTime int64 `json:"stopTime" validate:"required" example:"1649763839"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeleteJobApiRequest model
|
||||||
|
type DeleteJobApiRequest struct {
|
||||||
|
JobId *int64 `json:"jobId" validate:"required" example:"123000"` // Cluster Job ID of job
|
||||||
|
Cluster *string `json:"cluster" example:"fritz"` // Cluster of job
|
||||||
|
StartTime *int64 `json:"startTime" example:"1649723812"` // Start Time of job as epoch
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetJobsApiResponse model
|
||||||
|
type GetJobsApiResponse struct {
|
||||||
|
Jobs []*schema.Job `json:"jobs"` // Array of jobs
|
||||||
|
Items int `json:"items"` // Number of jobs returned
|
||||||
|
Page int `json:"page"` // Page id returned
|
||||||
|
}
|
||||||
|
|
||||||
|
// ApiTag model
|
||||||
|
type ApiTag struct {
|
||||||
|
// Tag Type
|
||||||
|
Type string `json:"type" example:"Debug"`
|
||||||
|
Name string `json:"name" example:"Testjob"` // Tag Name
|
||||||
|
Scope string `json:"scope" example:"global"` // Tag Scope for Frontend Display
|
||||||
|
}
|
||||||
|
|
||||||
|
// ApiMeta model
|
||||||
|
type EditMetaRequest struct {
|
||||||
|
Key string `json:"key" example:"jobScript"`
|
||||||
|
Value string `json:"value" example:"bash script"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type TagJobApiRequest []*ApiTag
|
||||||
|
|
||||||
|
type GetJobApiRequest []string
|
||||||
|
|
||||||
|
type GetJobApiResponse struct {
|
||||||
|
Meta *schema.Job
|
||||||
|
Data []*JobMetricWithName
|
||||||
|
}
|
||||||
|
|
||||||
|
type GetCompleteJobApiResponse struct {
|
||||||
|
Meta *schema.Job
|
||||||
|
Data schema.JobData
|
||||||
|
}
|
||||||
|
|
||||||
|
type JobMetricWithName struct {
|
||||||
|
Metric *schema.JobMetric `json:"metric"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Scope schema.MetricScope `json:"scope"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// getJobs godoc
|
||||||
|
// @summary Lists all jobs
|
||||||
|
// @tags Job query
|
||||||
|
// @description Get a list of all jobs. Filters can be applied using query parameters.
|
||||||
|
// @description Number of results can be limited by page. Results are sorted by descending startTime.
|
||||||
|
// @produce json
|
||||||
|
// @param state query string false "Job State" Enums(running, completed, failed, cancelled, stopped, timeout)
|
||||||
|
// @param cluster query string false "Job Cluster"
|
||||||
|
// @param start-time query string false "Syntax: '$from-$to', as unix epoch timestamps in seconds"
|
||||||
|
// @param items-per-page query int false "Items per page (Default: 25)"
|
||||||
|
// @param page query int false "Page Number (Default: 1)"
|
||||||
|
// @param with-metadata query bool false "Include metadata (e.g. jobScript) in response"
|
||||||
|
// @success 200 {object} api.GetJobsApiResponse "Job array and page info"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/ [get]
|
||||||
|
func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
withMetadata := false
|
||||||
|
filter := &model.JobFilter{}
|
||||||
|
page := &model.PageRequest{ItemsPerPage: 25, Page: 1}
|
||||||
|
order := &model.OrderByInput{Field: "startTime", Type: "col", Order: model.SortDirectionEnumDesc}
|
||||||
|
|
||||||
|
for key, vals := range r.URL.Query() {
|
||||||
|
switch key {
|
||||||
|
case "state":
|
||||||
|
for _, s := range vals {
|
||||||
|
state := schema.JobState(s)
|
||||||
|
if !state.Valid() {
|
||||||
|
handleError(fmt.Errorf("invalid query parameter value: state"),
|
||||||
|
http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
filter.State = append(filter.State, state)
|
||||||
|
}
|
||||||
|
case "cluster":
|
||||||
|
filter.Cluster = &model.StringInput{Eq: &vals[0]}
|
||||||
|
case "start-time":
|
||||||
|
st := strings.Split(vals[0], "-")
|
||||||
|
if len(st) != 2 {
|
||||||
|
handleError(fmt.Errorf("invalid query parameter value: startTime"),
|
||||||
|
http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
from, err := strconv.ParseInt(st[0], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
to, err := strconv.ParseInt(st[1], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ufrom, uto := time.Unix(from, 0), time.Unix(to, 0)
|
||||||
|
filter.StartTime = &schema.TimeRange{From: &ufrom, To: &uto}
|
||||||
|
case "page":
|
||||||
|
x, err := strconv.Atoi(vals[0])
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
page.Page = x
|
||||||
|
case "items-per-page":
|
||||||
|
x, err := strconv.Atoi(vals[0])
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
page.ItemsPerPage = x
|
||||||
|
case "with-metadata":
|
||||||
|
withMetadata = true
|
||||||
|
default:
|
||||||
|
handleError(fmt.Errorf("invalid query parameter: %s", key),
|
||||||
|
http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
jobs, err := api.JobRepository.QueryJobs(r.Context(), []*model.JobFilter{filter}, page, order)
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
results := make([]*schema.Job, 0, len(jobs))
|
||||||
|
for _, job := range jobs {
|
||||||
|
if withMetadata {
|
||||||
|
if _, err = api.JobRepository.FetchMetadata(job); err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), job.ID)
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if job.MonitoringStatus == schema.MonitoringStatusArchivingSuccessful {
|
||||||
|
job.Statistics, err = archive.GetStatistics(job)
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
results = append(results, job)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugf("/api/jobs: %d jobs returned", len(results))
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
bw := bufio.NewWriter(rw)
|
||||||
|
defer bw.Flush()
|
||||||
|
|
||||||
|
payload := GetJobsApiResponse{
|
||||||
|
Jobs: results,
|
||||||
|
Items: page.ItemsPerPage,
|
||||||
|
Page: page.Page,
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewEncoder(bw).Encode(payload); err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// getCompleteJobById godoc
|
||||||
|
// @summary Get job meta and optional all metric data
|
||||||
|
// @tags Job query
|
||||||
|
// @description Job to get is specified by database ID
|
||||||
|
// @description Returns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.
|
||||||
|
// @produce json
|
||||||
|
// @param id path int true "Database ID of Job"
|
||||||
|
// @param all-metrics query bool false "Include all available metrics"
|
||||||
|
// @success 200 {object} api.GetJobApiResponse "Job resource"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Resource not found"
|
||||||
|
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/{id} [get]
|
||||||
|
func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// Fetch job from db
|
||||||
|
id, ok := mux.Vars(r)["id"]
|
||||||
|
var job *schema.Job
|
||||||
|
var err error
|
||||||
|
if ok {
|
||||||
|
id, e := strconv.ParseInt(id, 10, 64)
|
||||||
|
if e != nil {
|
||||||
|
handleError(fmt.Errorf("integer expected in path for id: %w", e), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job, err = api.JobRepository.FindById(r.Context(), id) // Get Job from Repo by ID
|
||||||
|
} else {
|
||||||
|
handleError(fmt.Errorf("the parameter 'id' is required"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
handleError(fmt.Errorf("finding job with db id %s failed: %w", id, err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), job.ID)
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
|
||||||
|
}
|
||||||
|
if _, err = api.JobRepository.FetchMetadata(job); err != nil {
|
||||||
|
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var scopes []schema.MetricScope
|
||||||
|
|
||||||
|
if job.NumNodes == 1 {
|
||||||
|
scopes = []schema.MetricScope{"core"}
|
||||||
|
} else {
|
||||||
|
scopes = []schema.MetricScope{"node"}
|
||||||
|
}
|
||||||
|
|
||||||
|
var data schema.JobData
|
||||||
|
|
||||||
|
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||||
|
resolution := 0
|
||||||
|
|
||||||
|
for _, mc := range metricConfigs {
|
||||||
|
resolution = max(resolution, mc.Timestep)
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.URL.Query().Get("all-metrics") == "true" {
|
||||||
|
data, err = metricDataDispatcher.LoadData(job, nil, scopes, r.Context(), resolution)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("REST: error while loading all-metrics job data for JobID %d on %s", job.JobID, job.Cluster)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugf("/api/job/%s: get job %d", id, job.JobID)
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
bw := bufio.NewWriter(rw)
|
||||||
|
defer bw.Flush()
|
||||||
|
|
||||||
|
payload := GetCompleteJobApiResponse{
|
||||||
|
Meta: job,
|
||||||
|
Data: data,
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewEncoder(bw).Encode(payload); err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// getJobById godoc
|
||||||
|
// @summary Get job meta and configurable metric data
|
||||||
|
// @tags Job query
|
||||||
|
// @description Job to get is specified by database ID
|
||||||
|
// @description Returns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.
|
||||||
|
// @accept json
|
||||||
|
// @produce json
|
||||||
|
// @param id path int true "Database ID of Job"
|
||||||
|
// @param request body api.GetJobApiRequest true "Array of metric names"
|
||||||
|
// @success 200 {object} api.GetJobApiResponse "Job resource"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Resource not found"
|
||||||
|
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/{id} [post]
|
||||||
|
func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// Fetch job from db
|
||||||
|
id, ok := mux.Vars(r)["id"]
|
||||||
|
var job *schema.Job
|
||||||
|
var err error
|
||||||
|
if ok {
|
||||||
|
id, e := strconv.ParseInt(id, 10, 64)
|
||||||
|
if e != nil {
|
||||||
|
handleError(fmt.Errorf("integer expected in path for id: %w", e), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job, err = api.JobRepository.FindById(r.Context(), id)
|
||||||
|
} else {
|
||||||
|
handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
handleError(fmt.Errorf("finding job with db id %s failed: %w", id, err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), job.ID)
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
|
||||||
|
}
|
||||||
|
if _, err = api.JobRepository.FetchMetadata(job); err != nil {
|
||||||
|
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var metrics GetJobApiRequest
|
||||||
|
if err = decode(r.Body, &metrics); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var scopes []schema.MetricScope
|
||||||
|
|
||||||
|
if job.NumNodes == 1 {
|
||||||
|
scopes = []schema.MetricScope{"core"}
|
||||||
|
} else {
|
||||||
|
scopes = []schema.MetricScope{"node"}
|
||||||
|
}
|
||||||
|
|
||||||
|
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||||
|
resolution := 0
|
||||||
|
|
||||||
|
for _, mc := range metricConfigs {
|
||||||
|
resolution = max(resolution, mc.Timestep)
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := metricDataDispatcher.LoadData(job, metrics, scopes, r.Context(), resolution)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("REST: error while loading job data for JobID %d on %s", job.JobID, job.Cluster)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
res := []*JobMetricWithName{}
|
||||||
|
for name, md := range data {
|
||||||
|
for scope, metric := range md {
|
||||||
|
res = append(res, &JobMetricWithName{
|
||||||
|
Name: name,
|
||||||
|
Scope: scope,
|
||||||
|
Metric: metric,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugf("/api/job/%s: get job %d", id, job.JobID)
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
bw := bufio.NewWriter(rw)
|
||||||
|
defer bw.Flush()
|
||||||
|
|
||||||
|
payload := GetJobApiResponse{
|
||||||
|
Meta: job,
|
||||||
|
Data: res,
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewEncoder(bw).Encode(payload); err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// editMeta godoc
|
||||||
|
// @summary Edit meta-data json
|
||||||
|
// @tags Job add and modify
|
||||||
|
// @description Edit key value pairs in job metadata json
|
||||||
|
// @description If a key already exists its content will be overwritten
|
||||||
|
// @accept json
|
||||||
|
// @produce json
|
||||||
|
// @param id path int true "Job Database ID"
|
||||||
|
// @param request body api.EditMetaRequest true "Kay value pair to add"
|
||||||
|
// @success 200 {object} schema.Job "Updated job resource"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Job does not exist"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/edit_meta/{id} [post]
|
||||||
|
func (api *RestApi) editMeta(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job, err := api.JobRepository.FindById(r.Context(), id)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var req EditMetaRequest
|
||||||
|
if err := decode(r.Body, &req); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := api.JobRepository.UpdateMetadata(job, req.Key, req.Value); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(job)
|
||||||
|
}
|
||||||
|
|
||||||
|
// tagJob godoc
|
||||||
|
// @summary Adds one or more tags to a job
|
||||||
|
// @tags Job add and modify
|
||||||
|
// @description Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.
|
||||||
|
// @description Tag Scope for frontend visibility will default to "global" if none entered, other options: "admin" or specific username.
|
||||||
|
// @description If tagged job is already finished: Tag will be written directly to respective archive files.
|
||||||
|
// @accept json
|
||||||
|
// @produce json
|
||||||
|
// @param id path int true "Job Database ID"
|
||||||
|
// @param request body api.TagJobApiRequest true "Array of tag-objects to add"
|
||||||
|
// @success 200 {object} schema.Job "Updated job resource"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Job or tag does not exist"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/tag_job/{id} [post]
|
||||||
|
func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job, err := api.JobRepository.FindById(r.Context(), id)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), job.ID)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var req TagJobApiRequest
|
||||||
|
if err := decode(r.Body, &req); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tag := range req {
|
||||||
|
tagId, err := api.JobRepository.AddTagOrCreate(repository.GetUserFromContext(r.Context()), *job.ID, tag.Type, tag.Name, tag.Scope)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags = append(job.Tags, &schema.Tag{
|
||||||
|
ID: tagId,
|
||||||
|
Type: tag.Type,
|
||||||
|
Name: tag.Name,
|
||||||
|
Scope: tag.Scope,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(job)
|
||||||
|
}
|
||||||
|
|
||||||
|
// removeTagJob godoc
|
||||||
|
// @summary Removes one or more tags from a job
|
||||||
|
// @tags Job add and modify
|
||||||
|
// @description Removes tag(s) from a job specified by DB ID. Name and Type of Tag(s) must match.
|
||||||
|
// @description Tag Scope is required for matching, options: "global", "admin". Private tags can not be deleted via API.
|
||||||
|
// @description If tagged job is already finished: Tag will be removed from respective archive files.
|
||||||
|
// @accept json
|
||||||
|
// @produce json
|
||||||
|
// @param id path int true "Job Database ID"
|
||||||
|
// @param request body api.TagJobApiRequest true "Array of tag-objects to remove"
|
||||||
|
// @success 200 {object} schema.Job "Updated job resource"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Job or tag does not exist"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /jobs/tag_job/{id} [delete]
|
||||||
|
func (api *RestApi) removeTagJob(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job, err := api.JobRepository.FindById(r.Context(), id)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), job.ID)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var req TagJobApiRequest
|
||||||
|
if err := decode(r.Body, &req); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, rtag := range req {
|
||||||
|
// Only Global and Admin Tags
|
||||||
|
if rtag.Scope != "global" && rtag.Scope != "admin" {
|
||||||
|
log.Warnf("Cannot delete private tag for job %d: Skip", job.JobID)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
remainingTags, err := api.JobRepository.RemoveJobTagByRequest(repository.GetUserFromContext(r.Context()), *job.ID, rtag.Type, rtag.Name, rtag.Scope)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags = remainingTags
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(job)
|
||||||
|
}
|
||||||
|
|
||||||
|
// removeTags godoc
|
||||||
|
// @summary Removes all tags and job-relations for type:name tuple
|
||||||
|
// @tags Tag remove
|
||||||
|
// @description Removes tags by type and name. Name and Type of Tag(s) must match.
|
||||||
|
// @description Tag Scope is required for matching, options: "global", "admin". Private tags can not be deleted via API.
|
||||||
|
// @description Tag wills be removed from respective archive files.
|
||||||
|
// @accept json
|
||||||
|
// @produce plain
|
||||||
|
// @param request body api.TagJobApiRequest true "Array of tag-objects to remove"
|
||||||
|
// @success 200 {string} string "Success Response"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Job or tag does not exist"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /tags/ [delete]
|
||||||
|
func (api *RestApi) removeTags(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
var req TagJobApiRequest
|
||||||
|
if err := decode(r.Body, &req); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
targetCount := len(req)
|
||||||
|
currentCount := 0
|
||||||
|
for _, rtag := range req {
|
||||||
|
// Only Global and Admin Tags
|
||||||
|
if rtag.Scope != "global" && rtag.Scope != "admin" {
|
||||||
|
log.Warn("Cannot delete private tag: Skip")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
err := api.JobRepository.RemoveTagByRequest(rtag.Type, rtag.Name, rtag.Scope)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
} else {
|
||||||
|
currentCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
fmt.Fprintf(rw, "Deleted Tags from DB: %d successfull of %d requested\n", currentCount, targetCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
// startJob godoc
|
||||||
|
// @summary Adds a new job as "running"
|
||||||
|
// @tags Job add and modify
|
||||||
|
// @description Job specified in request body will be saved to database as "running" with new DB ID.
|
||||||
|
// @description Job specifications follow the 'JobMeta' scheme, API will fail to execute if requirements are not met.
|
||||||
|
// @accept json
|
||||||
|
// @produce json
|
||||||
|
// @param request body schema.JobMeta true "Job to add"
|
||||||
|
// @success 201 {object} api.DefaultJobApiResponse "Job added successfully"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: The combination of jobId, clusterId and startTime does already exist"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/start_job/ [post]
|
||||||
|
func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
req := schema.Job{
|
||||||
|
Exclusive: 1,
|
||||||
|
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||||
|
}
|
||||||
|
if err := decode(r.Body, &req); err != nil {
|
||||||
|
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("REST: %s\n", req.GoString())
|
||||||
|
req.State = schema.JobStateRunning
|
||||||
|
|
||||||
|
if err := importer.SanityChecks(&req); err != nil {
|
||||||
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// aquire lock to avoid race condition between API calls
|
||||||
|
var unlockOnce sync.Once
|
||||||
|
api.RepositoryMutex.Lock()
|
||||||
|
defer unlockOnce.Do(api.RepositoryMutex.Unlock)
|
||||||
|
|
||||||
|
// Check if combination of (job_id, cluster_id, start_time) already exists:
|
||||||
|
jobs, err := api.JobRepository.FindAll(&req.JobID, &req.Cluster, nil)
|
||||||
|
if err != nil && err != sql.ErrNoRows {
|
||||||
|
handleError(fmt.Errorf("checking for duplicate failed: %w", err), http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
} else if err == nil {
|
||||||
|
for _, job := range jobs {
|
||||||
|
if (req.StartTime - job.StartTime) < 86400 {
|
||||||
|
handleError(fmt.Errorf("a job with that jobId, cluster and startTime already exists: dbid: %d, jobid: %d", job.ID, job.JobID), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
id, err := api.JobRepository.Start(&req)
|
||||||
|
if err != nil {
|
||||||
|
handleError(fmt.Errorf("insert into database failed: %w", err), http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// unlock here, adding Tags can be async
|
||||||
|
unlockOnce.Do(api.RepositoryMutex.Unlock)
|
||||||
|
|
||||||
|
for _, tag := range req.Tags {
|
||||||
|
if _, err := api.JobRepository.AddTagOrCreate(repository.GetUserFromContext(r.Context()), id, tag.Type, tag.Name, tag.Scope); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
handleError(fmt.Errorf("adding tag to new job %d failed: %w", id, err), http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d", id, req.Cluster, req.JobID, req.User, req.StartTime)
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusCreated)
|
||||||
|
json.NewEncoder(rw).Encode(DefaultJobApiResponse{
|
||||||
|
Message: "success",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// stopJobByRequest godoc
|
||||||
|
// @summary Marks job as completed and triggers archiving
|
||||||
|
// @tags Job add and modify
|
||||||
|
// @description Job to stop is specified by request body. All fields are required in this case.
|
||||||
|
// @description Returns full job resource information according to 'JobMeta' scheme.
|
||||||
|
// @produce json
|
||||||
|
// @param request body api.StopJobApiRequest true "All fields required"
|
||||||
|
// @success 200 {object} schema.JobMeta "Success message"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Resource not found"
|
||||||
|
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: job has already been stopped"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/stop_job/ [post]
|
||||||
|
func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// Parse request body
|
||||||
|
req := StopJobApiRequest{}
|
||||||
|
if err := decode(r.Body, &req); err != nil {
|
||||||
|
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch job (that will be stopped) from db
|
||||||
|
var job *schema.Job
|
||||||
|
var err error
|
||||||
|
if req.JobId == nil {
|
||||||
|
handleError(errors.New("the field 'jobId' is required"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// log.Printf("loading db job for stopJobByRequest... : stopJobApiRequest=%v", req)
|
||||||
|
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
|
||||||
|
if err != nil {
|
||||||
|
job, err = api.JobRepository.FindCached(req.JobId, req.Cluster, req.StartTime)
|
||||||
|
// FIXME: Previous error is hidden
|
||||||
|
if err != nil {
|
||||||
|
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
api.checkAndHandleStopJob(rw, job, req)
|
||||||
|
}
|
||||||
|
|
||||||
|
// deleteJobById godoc
|
||||||
|
// @summary Remove a job from the sql database
|
||||||
|
// @tags Job remove
|
||||||
|
// @description Job to remove is specified by database ID. This will not remove the job from the job archive.
|
||||||
|
// @produce json
|
||||||
|
// @param id path int true "Database ID of Job"
|
||||||
|
// @success 200 {object} api.DefaultJobApiResponse "Success message"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Resource not found"
|
||||||
|
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/delete_job/{id} [delete]
|
||||||
|
func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// Fetch job (that will be stopped) from db
|
||||||
|
id, ok := mux.Vars(r)["id"]
|
||||||
|
var err error
|
||||||
|
if ok {
|
||||||
|
id, e := strconv.ParseInt(id, 10, 64)
|
||||||
|
if e != nil {
|
||||||
|
handleError(fmt.Errorf("integer expected in path for id: %w", e), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
err = api.JobRepository.DeleteJobById(id)
|
||||||
|
} else {
|
||||||
|
handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
handleError(fmt.Errorf("deleting job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(DefaultJobApiResponse{
|
||||||
|
Message: fmt.Sprintf("Successfully deleted job %s", id),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// deleteJobByRequest godoc
|
||||||
|
// @summary Remove a job from the sql database
|
||||||
|
// @tags Job remove
|
||||||
|
// @description Job to delete is specified by request body. All fields are required in this case.
|
||||||
|
// @accept json
|
||||||
|
// @produce json
|
||||||
|
// @param request body api.DeleteJobApiRequest true "All fields required"
|
||||||
|
// @success 200 {object} api.DefaultJobApiResponse "Success message"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Resource not found"
|
||||||
|
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/delete_job/ [delete]
|
||||||
|
func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// Parse request body
|
||||||
|
req := DeleteJobApiRequest{}
|
||||||
|
if err := decode(r.Body, &req); err != nil {
|
||||||
|
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch job (that will be deleted) from db
|
||||||
|
var job *schema.Job
|
||||||
|
var err error
|
||||||
|
if req.JobId == nil {
|
||||||
|
handleError(errors.New("the field 'jobId' is required"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
|
||||||
|
if err != nil {
|
||||||
|
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
err = api.JobRepository.DeleteJobById(*job.ID)
|
||||||
|
if err != nil {
|
||||||
|
handleError(fmt.Errorf("deleting job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(DefaultJobApiResponse{
|
||||||
|
Message: fmt.Sprintf("Successfully deleted job %d", job.ID),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// deleteJobBefore godoc
|
||||||
|
// @summary Remove a job from the sql database
|
||||||
|
// @tags Job remove
|
||||||
|
// @description Remove all jobs with start time before timestamp. The jobs will not be removed from the job archive.
|
||||||
|
// @produce json
|
||||||
|
// @param ts path int true "Unix epoch timestamp"
|
||||||
|
// @success 200 {object} api.DefaultJobApiResponse "Success message"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Resource not found"
|
||||||
|
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/delete_job_before/{ts} [delete]
|
||||||
|
func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
var cnt int
|
||||||
|
// Fetch job (that will be stopped) from db
|
||||||
|
id, ok := mux.Vars(r)["ts"]
|
||||||
|
var err error
|
||||||
|
if ok {
|
||||||
|
ts, e := strconv.ParseInt(id, 10, 64)
|
||||||
|
if e != nil {
|
||||||
|
handleError(fmt.Errorf("integer expected in path for ts: %w", e), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
cnt, err = api.JobRepository.DeleteJobsBefore(ts)
|
||||||
|
} else {
|
||||||
|
handleError(errors.New("the parameter 'ts' is required"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
handleError(fmt.Errorf("deleting jobs failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(DefaultJobApiResponse{
|
||||||
|
Message: fmt.Sprintf("Successfully deleted %d jobs", cnt),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Job, req StopJobApiRequest) {
|
||||||
|
// Sanity checks
|
||||||
|
if job.State != schema.JobStateRunning {
|
||||||
|
handleError(fmt.Errorf("jobId %d (id %d) on %s : job has already been stopped (state is: %s)", job.JobID, job.ID, job.Cluster, job.State), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if job == nil || job.StartTime > req.StopTime {
|
||||||
|
handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger/equal than startTime %d", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if req.State != "" && !req.State.Valid() {
|
||||||
|
handleError(fmt.Errorf("jobId %d (id %d) on %s : invalid requested job state: %#v", job.JobID, job.ID, job.Cluster, req.State), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
} else if req.State == "" {
|
||||||
|
req.State = schema.JobStateCompleted
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark job as stopped in the database (update state and duration)
|
||||||
|
job.Duration = int32(req.StopTime - job.StartTime)
|
||||||
|
job.State = req.State
|
||||||
|
api.JobRepository.Mutex.Lock()
|
||||||
|
if err := api.JobRepository.Stop(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||||
|
if err := api.JobRepository.StopCached(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||||
|
api.JobRepository.Mutex.Unlock()
|
||||||
|
handleError(fmt.Errorf("jobId %d (id %d) on %s : marking job as '%s' (duration: %d) in DB failed: %w", job.JobID, job.ID, job.Cluster, job.State, job.Duration, err), http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
api.JobRepository.Mutex.Unlock()
|
||||||
|
|
||||||
|
log.Printf("archiving job... (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%d, duration=%d, state=%s", job.ID, job.Cluster, job.JobID, job.User, job.StartTime, job.Duration, job.State)
|
||||||
|
|
||||||
|
// Send a response (with status OK). This means that erros that happen from here on forward
|
||||||
|
// can *NOT* be communicated to the client. If reading from a MetricDataRepository or
|
||||||
|
// writing to the filesystem fails, the client will not know.
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(job)
|
||||||
|
|
||||||
|
// Monitoring is disabled...
|
||||||
|
if job.MonitoringStatus == schema.MonitoringStatusDisabled {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trigger async archiving
|
||||||
|
archiver.TriggerArchiving(job)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
id := mux.Vars(r)["id"]
|
||||||
|
metrics := r.URL.Query()["metric"]
|
||||||
|
var scopes []schema.MetricScope
|
||||||
|
for _, scope := range r.URL.Query()["scope"] {
|
||||||
|
var s schema.MetricScope
|
||||||
|
if err := s.UnmarshalGQL(scope); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
scopes = append(scopes, s)
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
|
||||||
|
type Respone struct {
|
||||||
|
Data *struct {
|
||||||
|
JobMetrics []*model.JobMetricWithName `json:"jobMetrics"`
|
||||||
|
} `json:"data"`
|
||||||
|
Error *struct {
|
||||||
|
Message string `json:"message"`
|
||||||
|
} `json:"error"`
|
||||||
|
}
|
||||||
|
|
||||||
|
resolver := graph.GetResolverInstance()
|
||||||
|
data, err := resolver.Query().JobMetrics(r.Context(), id, metrics, scopes, nil)
|
||||||
|
if err != nil {
|
||||||
|
json.NewEncoder(rw).Encode(Respone{
|
||||||
|
Error: &struct {
|
||||||
|
Message string "json:\"message\""
|
||||||
|
}{Message: err.Error()},
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
json.NewEncoder(rw).Encode(Respone{
|
||||||
|
Data: &struct {
|
||||||
|
JobMetrics []*model.JobMetricWithName "json:\"jobMetrics\""
|
||||||
|
}{JobMetrics: data},
|
||||||
|
})
|
||||||
|
}
|
74
internal/api/node.go
Normal file
74
internal/api/node.go
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Node struct {
|
||||||
|
Name string `json:"hostname"`
|
||||||
|
States []string `json:"states"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// updateNodeStatesRequest model
|
||||||
|
type UpdateNodeStatesRequest struct {
|
||||||
|
Nodes []Node `json:"nodes"`
|
||||||
|
Cluster string `json:"cluster" example:"fritz"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// this routine assumes that only one of them applies per node
|
||||||
|
func determineState(states []string) schema.NodeState {
|
||||||
|
for _, state := range states {
|
||||||
|
switch strings.ToLower(state) {
|
||||||
|
case "allocated":
|
||||||
|
return schema.NodeStateAllocated
|
||||||
|
case "reserved":
|
||||||
|
return schema.NodeStateReserved
|
||||||
|
case "idle":
|
||||||
|
return schema.NodeStateIdle
|
||||||
|
case "down":
|
||||||
|
return schema.NodeStateDown
|
||||||
|
case "mixed":
|
||||||
|
return schema.NodeStateMixed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return schema.NodeStateUnknown
|
||||||
|
}
|
||||||
|
|
||||||
|
// updateNodeStates godoc
|
||||||
|
// @summary Deliver updated Slurm node states
|
||||||
|
// @tags node
|
||||||
|
// @description Returns a JSON-encoded list of users.
|
||||||
|
// @description Required query-parameter defines if all users or only users with additional special roles are returned.
|
||||||
|
// @produce json
|
||||||
|
// @param request body UpdateNodeStatesRequest true "Request body containing nodes and their states"
|
||||||
|
// @success 200 {array} api.SuccessResponse "Success"
|
||||||
|
// @failure 400 {string} string "Bad Request"
|
||||||
|
// @failure 401 {string} string "Unauthorized"
|
||||||
|
// @failure 403 {string} string "Forbidden"
|
||||||
|
// @failure 500 {string} string "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/nodestats/ [post]
|
||||||
|
func (api *RestApi) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// Parse request body
|
||||||
|
req := UpdateNodeStatesRequest{}
|
||||||
|
if err := decode(r.Body, &req); err != nil {
|
||||||
|
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
repo := repository.GetNodeRepository()
|
||||||
|
|
||||||
|
for _, node := range req.Nodes {
|
||||||
|
state := determineState(node.States)
|
||||||
|
repo.UpdateNodeState(node.Name, req.Cluster, &state)
|
||||||
|
}
|
||||||
|
}
|
1165
internal/api/rest.go
1165
internal/api/rest.go
File diff suppressed because it is too large
Load Diff
159
internal/api/user.go
Normal file
159
internal/api/user.go
Normal file
@ -0,0 +1,159 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
"github.com/gorilla/mux"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ApiReturnedUser struct {
|
||||||
|
Username string `json:"username"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Roles []string `json:"roles"`
|
||||||
|
Email string `json:"email"`
|
||||||
|
Projects []string `json:"projects"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// getUsers godoc
|
||||||
|
// @summary Returns a list of users
|
||||||
|
// @tags User
|
||||||
|
// @description Returns a JSON-encoded list of users.
|
||||||
|
// @description Required query-parameter defines if all users or only users with additional special roles are returned.
|
||||||
|
// @produce json
|
||||||
|
// @param not-just-user query bool true "If returned list should contain all users or only users with additional special roles"
|
||||||
|
// @success 200 {array} api.ApiReturnedUser "List of users returned successfully"
|
||||||
|
// @failure 400 {string} string "Bad Request"
|
||||||
|
// @failure 401 {string} string "Unauthorized"
|
||||||
|
// @failure 403 {string} string "Forbidden"
|
||||||
|
// @failure 500 {string} string "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/users/ [get]
|
||||||
|
func (api *RestApi) getUsers(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// SecuredCheck() only worked with TokenAuth: Removed
|
||||||
|
|
||||||
|
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||||
|
http.Error(rw, "Only admins are allowed to fetch a list of users", http.StatusForbidden)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
users, err := repository.GetUserRepository().ListUsers(r.URL.Query().Get("not-just-user") == "true")
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
json.NewEncoder(rw).Encode(users)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) updateUser(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// SecuredCheck() only worked with TokenAuth: Removed
|
||||||
|
|
||||||
|
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||||
|
http.Error(rw, "Only admins are allowed to update a user", http.StatusForbidden)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get Values
|
||||||
|
newrole := r.FormValue("add-role")
|
||||||
|
delrole := r.FormValue("remove-role")
|
||||||
|
newproj := r.FormValue("add-project")
|
||||||
|
delproj := r.FormValue("remove-project")
|
||||||
|
|
||||||
|
// TODO: Handle anything but roles...
|
||||||
|
if newrole != "" {
|
||||||
|
if err := repository.GetUserRepository().AddRole(r.Context(), mux.Vars(r)["id"], newrole); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
rw.Write([]byte("Add Role Success"))
|
||||||
|
} else if delrole != "" {
|
||||||
|
if err := repository.GetUserRepository().RemoveRole(r.Context(), mux.Vars(r)["id"], delrole); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
rw.Write([]byte("Remove Role Success"))
|
||||||
|
} else if newproj != "" {
|
||||||
|
if err := repository.GetUserRepository().AddProject(r.Context(), mux.Vars(r)["id"], newproj); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
rw.Write([]byte("Add Project Success"))
|
||||||
|
} else if delproj != "" {
|
||||||
|
if err := repository.GetUserRepository().RemoveProject(r.Context(), mux.Vars(r)["id"], delproj); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
rw.Write([]byte("Remove Project Success"))
|
||||||
|
} else {
|
||||||
|
http.Error(rw, "Not Add or Del [role|project]?", http.StatusInternalServerError)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) createUser(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// SecuredCheck() only worked with TokenAuth: Removed
|
||||||
|
|
||||||
|
rw.Header().Set("Content-Type", "text/plain")
|
||||||
|
me := repository.GetUserFromContext(r.Context())
|
||||||
|
if !me.HasRole(schema.RoleAdmin) {
|
||||||
|
http.Error(rw, "Only admins are allowed to create new users", http.StatusForbidden)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
username, password, role, name, email, project := r.FormValue("username"),
|
||||||
|
r.FormValue("password"), r.FormValue("role"), r.FormValue("name"),
|
||||||
|
r.FormValue("email"), r.FormValue("project")
|
||||||
|
|
||||||
|
if len(password) == 0 && role != schema.GetRoleString(schema.RoleApi) {
|
||||||
|
http.Error(rw, "Only API users are allowed to have a blank password (login will be impossible)", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(project) != 0 && role != schema.GetRoleString(schema.RoleManager) {
|
||||||
|
http.Error(rw, "only managers require a project (can be changed later)",
|
||||||
|
http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
} else if len(project) == 0 && role == schema.GetRoleString(schema.RoleManager) {
|
||||||
|
http.Error(rw, "managers require a project to manage (can be changed later)",
|
||||||
|
http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := repository.GetUserRepository().AddUser(&schema.User{
|
||||||
|
Username: username,
|
||||||
|
Name: name,
|
||||||
|
Password: password,
|
||||||
|
Email: email,
|
||||||
|
Projects: []string{project},
|
||||||
|
Roles: []string{role},
|
||||||
|
}); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(rw, "User %v successfully created!\n", username)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) deleteUser(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// SecuredCheck() only worked with TokenAuth: Removed
|
||||||
|
|
||||||
|
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||||
|
http.Error(rw, "Only admins are allowed to delete a user", http.StatusForbidden)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
username := r.FormValue("username")
|
||||||
|
if err := repository.GetUserRepository().DelUser(username); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
}
|
@ -41,7 +41,7 @@ func archivingWorker() {
|
|||||||
// will fail if job meta not in repository
|
// will fail if job meta not in repository
|
||||||
if _, err := jobRepo.FetchMetadata(job); err != nil {
|
if _, err := jobRepo.FetchMetadata(job); err != nil {
|
||||||
log.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
|
log.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
|
||||||
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
jobRepo.UpdateMonitoringStatus(*job.ID, schema.MonitoringStatusArchivingFailed)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -50,7 +50,7 @@ func archivingWorker() {
|
|||||||
jobMeta, err := ArchiveJob(job, context.Background())
|
jobMeta, err := ArchiveJob(job, context.Background())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
|
log.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
|
||||||
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
jobRepo.UpdateMonitoringStatus(*job.ID, schema.MonitoringStatusArchivingFailed)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -72,7 +72,11 @@ func archivingWorker() {
|
|||||||
}
|
}
|
||||||
log.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
|
log.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
|
||||||
log.Printf("archiving job (dbid: %d) successful", job.ID)
|
log.Printf("archiving job (dbid: %d) successful", job.ID)
|
||||||
|
|
||||||
|
repository.CallJobStopHooks(job)
|
||||||
archivePending.Done()
|
archivePending.Done()
|
||||||
|
default:
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// Writes a running job to the job-archive
|
// Writes a running job to the job-archive
|
||||||
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.Job, error) {
|
||||||
allMetrics := make([]string, 0)
|
allMetrics := make([]string, 0)
|
||||||
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||||
for _, mc := range metricConfigs {
|
for _, mc := range metricConfigs {
|
||||||
@ -40,11 +40,7 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
jobMeta := &schema.JobMeta{
|
job.Statistics = make(map[string]schema.JobStatistics)
|
||||||
BaseJob: job.BaseJob,
|
|
||||||
StartTime: job.StartTime.Unix(),
|
|
||||||
Statistics: make(map[string]schema.JobStatistics),
|
|
||||||
}
|
|
||||||
|
|
||||||
for metric, data := range jobData {
|
for metric, data := range jobData {
|
||||||
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||||
@ -61,7 +57,7 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Round AVG Result to 2 Digits
|
// Round AVG Result to 2 Digits
|
||||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
job.Statistics[metric] = schema.JobStatistics{
|
||||||
Unit: schema.Unit{
|
Unit: schema.Unit{
|
||||||
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||||
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||||
@ -76,8 +72,8 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
|||||||
// only return the JobMeta structure as the
|
// only return the JobMeta structure as the
|
||||||
// statistics in there are needed.
|
// statistics in there are needed.
|
||||||
if config.Keys.DisableArchive {
|
if config.Keys.DisableArchive {
|
||||||
return jobMeta, nil
|
return job, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
|
return job, archive.GetHandle().ImportJob(job, &jobData)
|
||||||
}
|
}
|
||||||
|
@ -237,7 +237,7 @@ func (auth *Authentication) Login(
|
|||||||
limiter := getIPUserLimiter(ip, username)
|
limiter := getIPUserLimiter(ip, username)
|
||||||
if !limiter.Allow() {
|
if !limiter.Allow() {
|
||||||
log.Warnf("AUTH/RATE > Too many login attempts for combination IP: %s, Username: %s", ip, username)
|
log.Warnf("AUTH/RATE > Too many login attempts for combination IP: %s, Username: %s", ip, username)
|
||||||
onfailure(rw, r, errors.New("Too many login attempts, try again in a few minutes."))
|
onfailure(rw, r, errors.New("too many login attempts, try again in a few minutes"))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -167,12 +167,30 @@ type NamedStatsWithScope struct {
|
|||||||
Stats []*ScopedStats `json:"stats"`
|
Stats []*ScopedStats `json:"stats"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type NodeFilter struct {
|
||||||
|
Hostname *StringInput `json:"hostname,omitempty"`
|
||||||
|
Cluster *StringInput `json:"cluster,omitempty"`
|
||||||
|
SubCluster *StringInput `json:"subCluster,omitempty"`
|
||||||
|
NodeState *string `json:"nodeState,omitempty"`
|
||||||
|
HealthState *schema.NodeState `json:"healthState,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type NodeMetrics struct {
|
type NodeMetrics struct {
|
||||||
Host string `json:"host"`
|
Host string `json:"host"`
|
||||||
SubCluster string `json:"subCluster"`
|
SubCluster string `json:"subCluster"`
|
||||||
Metrics []*JobMetricWithName `json:"metrics"`
|
Metrics []*JobMetricWithName `json:"metrics"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type NodeStateResultList struct {
|
||||||
|
Items []*schema.Node `json:"items"`
|
||||||
|
Count *int `json:"count,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type NodeStats struct {
|
||||||
|
State string `json:"state"`
|
||||||
|
Count int `json:"count"`
|
||||||
|
}
|
||||||
|
|
||||||
type NodesResultList struct {
|
type NodesResultList struct {
|
||||||
Items []*NodeMetrics `json:"items"`
|
Items []*NodeMetrics `json:"items"`
|
||||||
Offset *int `json:"offset,omitempty"`
|
Offset *int `json:"offset,omitempty"`
|
||||||
|
@ -29,9 +29,14 @@ func (r *clusterResolver) Partitions(ctx context.Context, obj *schema.Cluster) (
|
|||||||
return r.Repo.Partitions(obj.Name)
|
return r.Repo.Partitions(obj.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// StartTime is the resolver for the startTime field.
|
||||||
|
func (r *jobResolver) StartTime(ctx context.Context, obj *schema.Job) (*time.Time, error) {
|
||||||
|
panic(fmt.Errorf("not implemented: StartTime - startTime"))
|
||||||
|
}
|
||||||
|
|
||||||
// Tags is the resolver for the tags field.
|
// Tags is the resolver for the tags field.
|
||||||
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
||||||
return r.Repo.GetTags(repository.GetUserFromContext(ctx), &obj.ID)
|
return r.Repo.GetTags(repository.GetUserFromContext(ctx), obj.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ConcurrentJobs is the resolver for the concurrentJobs field.
|
// ConcurrentJobs is the resolver for the concurrentJobs field.
|
||||||
@ -143,7 +148,7 @@ func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name s
|
|||||||
return &schema.Tag{ID: id, Type: typeArg, Name: name, Scope: scope}, nil
|
return &schema.Tag{ID: id, Type: typeArg, Name: name, Scope: scope}, nil
|
||||||
} else {
|
} else {
|
||||||
log.Warnf("Not authorized to create tag with scope: %s", scope)
|
log.Warnf("Not authorized to create tag with scope: %s", scope)
|
||||||
return nil, fmt.Errorf("Not authorized to create tag with scope: %s", scope)
|
return nil, fmt.Errorf("not authorized to create tag with scope: %s", scope)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -179,7 +184,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
|||||||
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
||||||
if !exists {
|
if !exists {
|
||||||
log.Warnf("Tag does not exist (ID): %d", tid)
|
log.Warnf("Tag does not exist (ID): %d", tid)
|
||||||
return nil, fmt.Errorf("Tag does not exist (ID): %d", tid)
|
return nil, fmt.Errorf("tag does not exist (ID): %d", tid)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
|
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
|
||||||
@ -193,7 +198,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log.Warnf("Not authorized to add tag: %d", tid)
|
log.Warnf("Not authorized to add tag: %d", tid)
|
||||||
return nil, fmt.Errorf("Not authorized to add tag: %d", tid)
|
return nil, fmt.Errorf("not authorized to add tag: %d", tid)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -226,7 +231,7 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
|
|||||||
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
||||||
if !exists {
|
if !exists {
|
||||||
log.Warnf("Tag does not exist (ID): %d", tid)
|
log.Warnf("Tag does not exist (ID): %d", tid)
|
||||||
return nil, fmt.Errorf("Tag does not exist (ID): %d", tid)
|
return nil, fmt.Errorf("tag does not exist (ID): %d", tid)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
|
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
|
||||||
@ -240,7 +245,7 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log.Warnf("Not authorized to remove tag: %d", tid)
|
log.Warnf("Not authorized to remove tag: %d", tid)
|
||||||
return nil, fmt.Errorf("Not authorized to remove tag: %d", tid)
|
return nil, fmt.Errorf("not authorized to remove tag: %d", tid)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -269,7 +274,7 @@ func (r *mutationResolver) RemoveTagFromList(ctx context.Context, tagIds []strin
|
|||||||
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
||||||
if !exists {
|
if !exists {
|
||||||
log.Warnf("Tag does not exist (ID): %d", tid)
|
log.Warnf("Tag does not exist (ID): %d", tid)
|
||||||
return nil, fmt.Errorf("Tag does not exist (ID): %d", tid)
|
return nil, fmt.Errorf("tag does not exist (ID): %d", tid)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test Access: Admins && Admin Tag OR Everyone && Private Tag
|
// Test Access: Admins && Admin Tag OR Everyone && Private Tag
|
||||||
@ -283,7 +288,7 @@ func (r *mutationResolver) RemoveTagFromList(ctx context.Context, tagIds []strin
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log.Warnf("Not authorized to remove tag: %d", tid)
|
log.Warnf("Not authorized to remove tag: %d", tid)
|
||||||
return nil, fmt.Errorf("Not authorized to remove tag: %d", tid)
|
return nil, fmt.Errorf("not authorized to remove tag: %d", tid)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return tags, nil
|
return tags, nil
|
||||||
@ -299,6 +304,21 @@ func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string,
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NodeState is the resolver for the nodeState field.
|
||||||
|
func (r *nodeResolver) NodeState(ctx context.Context, obj *schema.Node) (string, error) {
|
||||||
|
panic(fmt.Errorf("not implemented: NodeState - nodeState"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// HealthState is the resolver for the HealthState field.
|
||||||
|
func (r *nodeResolver) HealthState(ctx context.Context, obj *schema.Node) (schema.NodeState, error) {
|
||||||
|
panic(fmt.Errorf("not implemented: HealthState - HealthState"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// MetaData is the resolver for the metaData field.
|
||||||
|
func (r *nodeResolver) MetaData(ctx context.Context, obj *schema.Node) (any, error) {
|
||||||
|
panic(fmt.Errorf("not implemented: MetaData - metaData"))
|
||||||
|
}
|
||||||
|
|
||||||
// Clusters is the resolver for the clusters field.
|
// Clusters is the resolver for the clusters field.
|
||||||
func (r *queryResolver) Clusters(ctx context.Context) ([]*schema.Cluster, error) {
|
func (r *queryResolver) Clusters(ctx context.Context) ([]*schema.Cluster, error) {
|
||||||
return archive.Clusters, nil
|
return archive.Clusters, nil
|
||||||
@ -338,6 +358,21 @@ func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]*
|
|||||||
return counts, nil
|
return counts, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Node is the resolver for the node field.
|
||||||
|
func (r *queryResolver) Node(ctx context.Context, id string) (*schema.Node, error) {
|
||||||
|
panic(fmt.Errorf("not implemented: Node - node"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Nodes is the resolver for the nodes field.
|
||||||
|
func (r *queryResolver) Nodes(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error) {
|
||||||
|
panic(fmt.Errorf("not implemented: Nodes - nodes"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// NodeStats is the resolver for the nodeStats field.
|
||||||
|
func (r *queryResolver) NodeStats(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStats, error) {
|
||||||
|
panic(fmt.Errorf("not implemented: NodeStats - nodeStats"))
|
||||||
|
}
|
||||||
|
|
||||||
// Job is the resolver for the job field.
|
// Job is the resolver for the job field.
|
||||||
func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) {
|
func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) {
|
||||||
numericId, err := strconv.ParseInt(id, 10, 64)
|
numericId, err := strconv.ParseInt(id, 10, 64)
|
||||||
@ -499,10 +534,7 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
hasNextPage := false
|
hasNextPage := len(nextJobs) == 1
|
||||||
if len(nextJobs) == 1 {
|
|
||||||
hasNextPage = true
|
|
||||||
}
|
|
||||||
|
|
||||||
return &model.JobResultList{Items: jobs, Count: &count, HasNextPage: &hasNextPage}, nil
|
return &model.JobResultList{Items: jobs, Count: &count, HasNextPage: &hasNextPage}, nil
|
||||||
}
|
}
|
||||||
@ -513,8 +545,8 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
|
|||||||
var stats []*model.JobsStatistics
|
var stats []*model.JobsStatistics
|
||||||
|
|
||||||
// Top Level Defaults
|
// Top Level Defaults
|
||||||
var defaultDurationBins string = "1h"
|
defaultDurationBins := "1h"
|
||||||
var defaultMetricBins int = 10
|
defaultMetricBins := 10
|
||||||
|
|
||||||
if requireField(ctx, "totalJobs") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") ||
|
if requireField(ctx, "totalJobs") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") ||
|
||||||
requireField(ctx, "totalAccs") || requireField(ctx, "totalNodeHours") || requireField(ctx, "totalCoreHours") || requireField(ctx, "totalAccHours") {
|
requireField(ctx, "totalAccs") || requireField(ctx, "totalNodeHours") || requireField(ctx, "totalCoreHours") || requireField(ctx, "totalAccHours") {
|
||||||
@ -618,9 +650,9 @@ func (r *queryResolver) JobsMetricStats(ctx context.Context, filter []*model.Job
|
|||||||
numThreadsInt := int(job.NumHWThreads)
|
numThreadsInt := int(job.NumHWThreads)
|
||||||
numAccsInt := int(job.NumAcc)
|
numAccsInt := int(job.NumAcc)
|
||||||
res = append(res, &model.JobStats{
|
res = append(res, &model.JobStats{
|
||||||
ID: int(job.ID),
|
ID: int(*job.ID),
|
||||||
JobID: strconv.Itoa(int(job.JobID)),
|
JobID: strconv.Itoa(int(job.JobID)),
|
||||||
StartTime: int(job.StartTime.Unix()),
|
StartTime: int(job.StartTime),
|
||||||
Duration: int(job.Duration),
|
Duration: int(job.Duration),
|
||||||
Cluster: job.Cluster,
|
Cluster: job.Cluster,
|
||||||
SubCluster: job.SubCluster,
|
SubCluster: job.SubCluster,
|
||||||
@ -773,6 +805,9 @@ func (r *Resolver) MetricValue() generated.MetricValueResolver { return &metricV
|
|||||||
// Mutation returns generated.MutationResolver implementation.
|
// Mutation returns generated.MutationResolver implementation.
|
||||||
func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResolver{r} }
|
func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResolver{r} }
|
||||||
|
|
||||||
|
// Node returns generated.NodeResolver implementation.
|
||||||
|
func (r *Resolver) Node() generated.NodeResolver { return &nodeResolver{r} }
|
||||||
|
|
||||||
// Query returns generated.QueryResolver implementation.
|
// Query returns generated.QueryResolver implementation.
|
||||||
func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }
|
func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }
|
||||||
|
|
||||||
@ -783,5 +818,6 @@ type clusterResolver struct{ *Resolver }
|
|||||||
type jobResolver struct{ *Resolver }
|
type jobResolver struct{ *Resolver }
|
||||||
type metricValueResolver struct{ *Resolver }
|
type metricValueResolver struct{ *Resolver }
|
||||||
type mutationResolver struct{ *Resolver }
|
type mutationResolver struct{ *Resolver }
|
||||||
|
type nodeResolver struct{ *Resolver }
|
||||||
type queryResolver struct{ *Resolver }
|
type queryResolver struct{ *Resolver }
|
||||||
type subClusterResolver struct{ *Resolver }
|
type subClusterResolver struct{ *Resolver }
|
||||||
|
@ -42,7 +42,10 @@ func HandleImportFlag(flag string) error {
|
|||||||
}
|
}
|
||||||
dec := json.NewDecoder(bytes.NewReader(raw))
|
dec := json.NewDecoder(bytes.NewReader(raw))
|
||||||
dec.DisallowUnknownFields()
|
dec.DisallowUnknownFields()
|
||||||
job := schema.JobMeta{BaseJob: schema.JobDefaults}
|
job := schema.Job{
|
||||||
|
Exclusive: 1,
|
||||||
|
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||||
|
}
|
||||||
if err = dec.Decode(&job); err != nil {
|
if err = dec.Decode(&job); err != nil {
|
||||||
log.Warn("Error while decoding raw json metadata for import")
|
log.Warn("Error while decoding raw json metadata for import")
|
||||||
return err
|
return err
|
||||||
@ -141,7 +144,7 @@ func HandleImportFlag(flag string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err = SanityChecks(&job.BaseJob); err != nil {
|
if err = SanityChecks(&job); err != nil {
|
||||||
log.Warn("BaseJob SanityChecks failed")
|
log.Warn("BaseJob SanityChecks failed")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -166,7 +166,7 @@ func TestHandleImportFlag(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
result := readResult(t, testname)
|
result := readResult(t, testname)
|
||||||
job, err := r.Find(&result.JobId, &result.Cluster, &result.StartTime)
|
job, err := r.FindCached(&result.JobId, &result.Cluster, &result.StartTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
@ -60,11 +60,6 @@ func InitDB() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||||
job := schema.Job{
|
|
||||||
BaseJob: jobMeta.BaseJob,
|
|
||||||
StartTime: time.Unix(jobMeta.StartTime, 0),
|
|
||||||
StartTimeUnix: jobMeta.StartTime,
|
|
||||||
}
|
|
||||||
|
|
||||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -72,7 +67,7 @@ func InitDB() error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
job.Footprint = make(map[string]float64)
|
jobMeta.Footprint = make(map[string]float64)
|
||||||
|
|
||||||
for _, fp := range sc.Footprint {
|
for _, fp := range sc.Footprint {
|
||||||
statType := "avg"
|
statType := "avg"
|
||||||
@ -83,16 +78,16 @@ func InitDB() error {
|
|||||||
|
|
||||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||||
|
|
||||||
job.Footprint[name] = repository.LoadJobStat(jobMeta, fp, statType)
|
jobMeta.Footprint[name] = repository.LoadJobStat(jobMeta, fp, statType)
|
||||||
}
|
}
|
||||||
|
|
||||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
jobMeta.RawFootprint, err = json.Marshal(jobMeta.Footprint)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while marshaling job footprint")
|
log.Warn("Error while marshaling job footprint")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
job.EnergyFootprint = make(map[string]float64)
|
jobMeta.EnergyFootprint = make(map[string]float64)
|
||||||
|
|
||||||
// Total Job Energy Outside Loop
|
// Total Job Energy Outside Loop
|
||||||
totalEnergy := 0.0
|
totalEnergy := 0.0
|
||||||
@ -117,45 +112,45 @@ func InitDB() error {
|
|||||||
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
job.EnergyFootprint[fp] = metricEnergy
|
jobMeta.EnergyFootprint[fp] = metricEnergy
|
||||||
totalEnergy += metricEnergy
|
totalEnergy += metricEnergy
|
||||||
}
|
}
|
||||||
|
|
||||||
job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
|
jobMeta.Energy = (math.Round(totalEnergy*100.0) / 100.0)
|
||||||
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
|
if jobMeta.RawEnergyFootprint, err = json.Marshal(jobMeta.EnergyFootprint); err != nil {
|
||||||
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
job.RawResources, err = json.Marshal(job.Resources)
|
jobMeta.RawResources, err = json.Marshal(jobMeta.Resources)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("repository initDB(): %v", err)
|
log.Errorf("repository initDB(): %v", err)
|
||||||
errorOccured++
|
errorOccured++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
jobMeta.RawMetaData, err = json.Marshal(jobMeta.MetaData)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("repository initDB(): %v", err)
|
log.Errorf("repository initDB(): %v", err)
|
||||||
errorOccured++
|
errorOccured++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := SanityChecks(&job.BaseJob); err != nil {
|
if err := SanityChecks(jobMeta); err != nil {
|
||||||
log.Errorf("repository initDB(): %v", err)
|
log.Errorf("repository initDB(): %v", err)
|
||||||
errorOccured++
|
errorOccured++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
id, err := r.TransactionAddNamed(t,
|
id, err := r.TransactionAddNamed(t,
|
||||||
repository.NamedJobInsert, job)
|
repository.NamedJobInsert, jobMeta)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("repository initDB(): %v", err)
|
log.Errorf("repository initDB(): %v", err)
|
||||||
errorOccured++
|
errorOccured++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tag := range job.Tags {
|
for _, tag := range jobMeta.Tags {
|
||||||
tagstr := tag.Name + ":" + tag.Type
|
tagstr := tag.Name + ":" + tag.Type
|
||||||
tagId, ok := tags[tagstr]
|
tagId, ok := tags[tagstr]
|
||||||
if !ok {
|
if !ok {
|
||||||
@ -190,7 +185,7 @@ func InitDB() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// This function also sets the subcluster if necessary!
|
// This function also sets the subcluster if necessary!
|
||||||
func SanityChecks(job *schema.BaseJob) error {
|
func SanityChecks(job *schema.Job) error {
|
||||||
if c := archive.GetCluster(job.Cluster); c == nil {
|
if c := archive.GetCluster(job.Cluster); c == nil {
|
||||||
return fmt.Errorf("no such cluster: %v", job.Cluster)
|
return fmt.Errorf("no such cluster: %v", job.Cluster)
|
||||||
}
|
}
|
||||||
|
@ -183,8 +183,8 @@ func (ccms *CCMetricStore) LoadData(
|
|||||||
|
|
||||||
req := ApiQueryRequest{
|
req := ApiQueryRequest{
|
||||||
Cluster: job.Cluster,
|
Cluster: job.Cluster,
|
||||||
From: job.StartTime.Unix(),
|
From: job.StartTime,
|
||||||
To: job.StartTime.Add(time.Duration(job.Duration) * time.Second).Unix(),
|
To: job.StartTime + int64(job.Duration),
|
||||||
Queries: queries,
|
Queries: queries,
|
||||||
WithStats: true,
|
WithStats: true,
|
||||||
WithData: true,
|
WithData: true,
|
||||||
@ -570,7 +570,6 @@ func (ccms *CCMetricStore) LoadStats(
|
|||||||
metrics []string,
|
metrics []string,
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
) (map[string]map[string]schema.MetricStatistics, error) {
|
) (map[string]map[string]schema.MetricStatistics, error) {
|
||||||
|
|
||||||
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0) // #166 Add scope shere for analysis view accelerator normalization?
|
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0) // #166 Add scope shere for analysis view accelerator normalization?
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Error while building queries for jobId %d, Metrics %v: %s", job.JobID, metrics, err.Error())
|
log.Errorf("Error while building queries for jobId %d, Metrics %v: %s", job.JobID, metrics, err.Error())
|
||||||
@ -579,8 +578,8 @@ func (ccms *CCMetricStore) LoadStats(
|
|||||||
|
|
||||||
req := ApiQueryRequest{
|
req := ApiQueryRequest{
|
||||||
Cluster: job.Cluster,
|
Cluster: job.Cluster,
|
||||||
From: job.StartTime.Unix(),
|
From: job.StartTime,
|
||||||
To: job.StartTime.Add(time.Duration(job.Duration) * time.Second).Unix(),
|
To: job.StartTime + int64(job.Duration),
|
||||||
Queries: queries,
|
Queries: queries,
|
||||||
WithStats: true,
|
WithStats: true,
|
||||||
WithData: false,
|
WithData: false,
|
||||||
@ -638,8 +637,8 @@ func (ccms *CCMetricStore) LoadScopedStats(
|
|||||||
|
|
||||||
req := ApiQueryRequest{
|
req := ApiQueryRequest{
|
||||||
Cluster: job.Cluster,
|
Cluster: job.Cluster,
|
||||||
From: job.StartTime.Unix(),
|
From: job.StartTime,
|
||||||
To: job.StartTime.Add(time.Duration(job.Duration) * time.Second).Unix(),
|
To: job.StartTime + int64(job.Duration),
|
||||||
Queries: queries,
|
Queries: queries,
|
||||||
WithStats: true,
|
WithStats: true,
|
||||||
WithData: false,
|
WithData: false,
|
||||||
@ -816,7 +815,6 @@ func (ccms *CCMetricStore) LoadNodeListData(
|
|||||||
page *model.PageRequest,
|
page *model.PageRequest,
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
) (map[string]schema.JobData, int, bool, error) {
|
) (map[string]schema.JobData, int, bool, error) {
|
||||||
|
|
||||||
// 0) Init additional vars
|
// 0) Init additional vars
|
||||||
var totalNodes int = 0
|
var totalNodes int = 0
|
||||||
var hasNextPage bool = false
|
var hasNextPage bool = false
|
||||||
@ -975,7 +973,6 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
|||||||
scopes []schema.MetricScope,
|
scopes []schema.MetricScope,
|
||||||
resolution int,
|
resolution int,
|
||||||
) ([]ApiQuery, []schema.MetricScope, error) {
|
) ([]ApiQuery, []schema.MetricScope, error) {
|
||||||
|
|
||||||
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(nodes))
|
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(nodes))
|
||||||
assignedScope := []schema.MetricScope{}
|
assignedScope := []schema.MetricScope{}
|
||||||
|
|
||||||
|
@ -1,575 +0,0 @@
|
|||||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
// All rights reserved.
|
|
||||||
// Use of this source code is governed by a MIT-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
package metricdata
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"crypto/tls"
|
|
||||||
"encoding/json"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"math"
|
|
||||||
"sort"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
|
||||||
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
|
||||||
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
|
||||||
)
|
|
||||||
|
|
||||||
type InfluxDBv2DataRepositoryConfig struct {
|
|
||||||
Url string `json:"url"`
|
|
||||||
Token string `json:"token"`
|
|
||||||
Bucket string `json:"bucket"`
|
|
||||||
Org string `json:"org"`
|
|
||||||
SkipTls bool `json:"skiptls"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type InfluxDBv2DataRepository struct {
|
|
||||||
client influxdb2.Client
|
|
||||||
queryClient influxdb2Api.QueryAPI
|
|
||||||
bucket, measurement string
|
|
||||||
}
|
|
||||||
|
|
||||||
func (idb *InfluxDBv2DataRepository) Init(rawConfig json.RawMessage) error {
|
|
||||||
var config InfluxDBv2DataRepositoryConfig
|
|
||||||
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
|
||||||
log.Warn("Error while unmarshaling raw json config")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
idb.client = influxdb2.NewClientWithOptions(config.Url, config.Token, influxdb2.DefaultOptions().SetTLSConfig(&tls.Config{InsecureSkipVerify: config.SkipTls}))
|
|
||||||
idb.queryClient = idb.client.QueryAPI(config.Org)
|
|
||||||
idb.bucket = config.Bucket
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (idb *InfluxDBv2DataRepository) formatTime(t time.Time) string {
|
|
||||||
return t.Format(time.RFC3339) // Like “2006-01-02T15:04:05Z07:00”
|
|
||||||
}
|
|
||||||
|
|
||||||
func (idb *InfluxDBv2DataRepository) epochToTime(epoch int64) time.Time {
|
|
||||||
return time.Unix(epoch, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (idb *InfluxDBv2DataRepository) LoadData(
|
|
||||||
job *schema.Job,
|
|
||||||
metrics []string,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
ctx context.Context,
|
|
||||||
resolution int) (schema.JobData, error) {
|
|
||||||
|
|
||||||
log.Infof("InfluxDB 2 Backend: Resolution Scaling not Implemented, will return default timestep. Requested Resolution %d", resolution)
|
|
||||||
|
|
||||||
measurementsConds := make([]string, 0, len(metrics))
|
|
||||||
for _, m := range metrics {
|
|
||||||
measurementsConds = append(measurementsConds, fmt.Sprintf(`r["_measurement"] == "%s"`, m))
|
|
||||||
}
|
|
||||||
measurementsCond := strings.Join(measurementsConds, " or ")
|
|
||||||
|
|
||||||
hostsConds := make([]string, 0, len(job.Resources))
|
|
||||||
for _, h := range job.Resources {
|
|
||||||
if h.HWThreads != nil || h.Accelerators != nil {
|
|
||||||
// TODO
|
|
||||||
return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
|
|
||||||
}
|
|
||||||
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname))
|
|
||||||
}
|
|
||||||
hostsCond := strings.Join(hostsConds, " or ")
|
|
||||||
|
|
||||||
jobData := make(schema.JobData) // Empty Schema: map[<string>FIELD]map[<MetricScope>SCOPE]<*JobMetric>METRIC
|
|
||||||
// Requested Scopes
|
|
||||||
for _, scope := range scopes {
|
|
||||||
query := ""
|
|
||||||
switch scope {
|
|
||||||
case "node":
|
|
||||||
// Get Finest Granularity, Groupy By Measurement and Hostname (== Metric / Node), Calculate Mean for 60s windows <-- Resolution could be added here?
|
|
||||||
// log.Info("Scope 'node' requested. ")
|
|
||||||
query = fmt.Sprintf(`
|
|
||||||
from(bucket: "%s")
|
|
||||||
|> range(start: %s, stop: %s)
|
|
||||||
|> filter(fn: (r) => (%s) and (%s) )
|
|
||||||
|> drop(columns: ["_start", "_stop"])
|
|
||||||
|> group(columns: ["hostname", "_measurement"])
|
|
||||||
|> aggregateWindow(every: 60s, fn: mean)
|
|
||||||
|> drop(columns: ["_time"])`,
|
|
||||||
idb.bucket,
|
|
||||||
idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix+int64(job.Duration)+int64(1))),
|
|
||||||
measurementsCond, hostsCond)
|
|
||||||
case "socket":
|
|
||||||
log.Info("Scope 'socket' requested, but not yet supported: Will return 'node' scope only. ")
|
|
||||||
continue
|
|
||||||
case "core":
|
|
||||||
log.Info(" Scope 'core' requested, but not yet supported: Will return 'node' scope only. ")
|
|
||||||
continue
|
|
||||||
// Get Finest Granularity only, Set NULL to 0.0
|
|
||||||
// query = fmt.Sprintf(`
|
|
||||||
// from(bucket: "%s")
|
|
||||||
// |> range(start: %s, stop: %s)
|
|
||||||
// |> filter(fn: (r) => %s )
|
|
||||||
// |> filter(fn: (r) => %s )
|
|
||||||
// |> drop(columns: ["_start", "_stop", "cluster"])
|
|
||||||
// |> map(fn: (r) => (if exists r._value then {r with _value: r._value} else {r with _value: 0.0}))`,
|
|
||||||
// idb.bucket,
|
|
||||||
// idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix + int64(job.Duration) + int64(1) )),
|
|
||||||
// measurementsCond, hostsCond)
|
|
||||||
case "hwthread":
|
|
||||||
log.Info(" Scope 'hwthread' requested, but not yet supported: Will return 'node' scope only. ")
|
|
||||||
continue
|
|
||||||
case "accelerator":
|
|
||||||
log.Info(" Scope 'accelerator' requested, but not yet supported: Will return 'node' scope only. ")
|
|
||||||
continue
|
|
||||||
default:
|
|
||||||
log.Infof("Unknown scope '%s' requested: Will return 'node' scope.", scope)
|
|
||||||
continue
|
|
||||||
// return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support other scopes than 'node'")
|
|
||||||
}
|
|
||||||
|
|
||||||
rows, err := idb.queryClient.Query(ctx, query)
|
|
||||||
if err != nil {
|
|
||||||
log.Error("Error while performing query")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Init Metrics: Only Node level now -> TODO: Matching /check on scope level ...
|
|
||||||
for _, metric := range metrics {
|
|
||||||
jobMetric, ok := jobData[metric]
|
|
||||||
if !ok {
|
|
||||||
mc := archive.GetMetricConfig(job.Cluster, metric)
|
|
||||||
jobMetric = map[schema.MetricScope]*schema.JobMetric{
|
|
||||||
scope: { // uses scope var from above!
|
|
||||||
Unit: mc.Unit,
|
|
||||||
Timestep: mc.Timestep,
|
|
||||||
Series: make([]schema.Series, 0, len(job.Resources)),
|
|
||||||
StatisticsSeries: nil, // Should be: &schema.StatsSeries{},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
jobData[metric] = jobMetric
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process Result: Time-Data
|
|
||||||
field, host, hostSeries := "", "", schema.Series{}
|
|
||||||
// typeId := 0
|
|
||||||
switch scope {
|
|
||||||
case "node":
|
|
||||||
for rows.Next() {
|
|
||||||
row := rows.Record()
|
|
||||||
if host == "" || host != row.ValueByKey("hostname").(string) || rows.TableChanged() {
|
|
||||||
if host != "" {
|
|
||||||
// Append Series before reset
|
|
||||||
jobData[field][scope].Series = append(jobData[field][scope].Series, hostSeries)
|
|
||||||
}
|
|
||||||
field, host = row.Measurement(), row.ValueByKey("hostname").(string)
|
|
||||||
hostSeries = schema.Series{
|
|
||||||
Hostname: host,
|
|
||||||
Statistics: schema.MetricStatistics{}, //TODO Add Statistics
|
|
||||||
Data: make([]schema.Float, 0),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
val, ok := row.Value().(float64)
|
|
||||||
if ok {
|
|
||||||
hostSeries.Data = append(hostSeries.Data, schema.Float(val))
|
|
||||||
} else {
|
|
||||||
hostSeries.Data = append(hostSeries.Data, schema.Float(0))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case "socket":
|
|
||||||
continue
|
|
||||||
case "accelerator":
|
|
||||||
continue
|
|
||||||
case "hwthread":
|
|
||||||
// See below @ core
|
|
||||||
continue
|
|
||||||
case "core":
|
|
||||||
continue
|
|
||||||
// Include Series.Id in hostSeries
|
|
||||||
// for rows.Next() {
|
|
||||||
// row := rows.Record()
|
|
||||||
// if ( host == "" || host != row.ValueByKey("hostname").(string) || typeId != row.ValueByKey("type-id").(int) || rows.TableChanged() ) {
|
|
||||||
// if ( host != "" ) {
|
|
||||||
// // Append Series before reset
|
|
||||||
// jobData[field][scope].Series = append(jobData[field][scope].Series, hostSeries)
|
|
||||||
// }
|
|
||||||
// field, host, typeId = row.Measurement(), row.ValueByKey("hostname").(string), row.ValueByKey("type-id").(int)
|
|
||||||
// hostSeries = schema.Series{
|
|
||||||
// Hostname: host,
|
|
||||||
// Id: &typeId,
|
|
||||||
// Statistics: nil,
|
|
||||||
// Data: make([]schema.Float, 0),
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// val := row.Value().(float64)
|
|
||||||
// hostSeries.Data = append(hostSeries.Data, schema.Float(val))
|
|
||||||
// }
|
|
||||||
default:
|
|
||||||
log.Infof("Unknown scope '%s' requested: Will return 'node' scope.", scope)
|
|
||||||
continue
|
|
||||||
// return nil, errors.New("the InfluxDB metric data repository does not yet support other scopes than 'node, core'")
|
|
||||||
}
|
|
||||||
// Append last Series
|
|
||||||
jobData[field][scope].Series = append(jobData[field][scope].Series, hostSeries)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get Stats
|
|
||||||
stats, err := idb.LoadStats(job, metrics, ctx)
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("Error while loading statistics")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, scope := range scopes {
|
|
||||||
if scope == "node" { // No 'socket/core' support yet
|
|
||||||
for metric, nodes := range stats {
|
|
||||||
for node, stats := range nodes {
|
|
||||||
for index, _ := range jobData[metric][scope].Series {
|
|
||||||
if jobData[metric][scope].Series[index].Hostname == node {
|
|
||||||
jobData[metric][scope].Series[index].Statistics = schema.MetricStatistics{Avg: stats.Avg, Min: stats.Min, Max: stats.Max}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return jobData, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (idb *InfluxDBv2DataRepository) LoadStats(
|
|
||||||
job *schema.Job,
|
|
||||||
metrics []string,
|
|
||||||
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
|
|
||||||
|
|
||||||
stats := map[string]map[string]schema.MetricStatistics{}
|
|
||||||
|
|
||||||
hostsConds := make([]string, 0, len(job.Resources))
|
|
||||||
for _, h := range job.Resources {
|
|
||||||
if h.HWThreads != nil || h.Accelerators != nil {
|
|
||||||
// TODO
|
|
||||||
return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
|
|
||||||
}
|
|
||||||
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname))
|
|
||||||
}
|
|
||||||
hostsCond := strings.Join(hostsConds, " or ")
|
|
||||||
|
|
||||||
// lenMet := len(metrics)
|
|
||||||
|
|
||||||
for _, metric := range metrics {
|
|
||||||
// log.Debugf("<< You are here: %s (Index %d of %d metrics)", metric, index, lenMet)
|
|
||||||
|
|
||||||
query := fmt.Sprintf(`
|
|
||||||
data = from(bucket: "%s")
|
|
||||||
|> range(start: %s, stop: %s)
|
|
||||||
|> filter(fn: (r) => r._measurement == "%s" and r._field == "value" and (%s))
|
|
||||||
union(tables: [data |> mean(column: "_value") |> set(key: "_field", value: "avg"),
|
|
||||||
data |> min(column: "_value") |> set(key: "_field", value: "min"),
|
|
||||||
data |> max(column: "_value") |> set(key: "_field", value: "max")])
|
|
||||||
|> pivot(rowKey: ["hostname"], columnKey: ["_field"], valueColumn: "_value")
|
|
||||||
|> group()`,
|
|
||||||
idb.bucket,
|
|
||||||
idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix+int64(job.Duration)+int64(1))),
|
|
||||||
metric, hostsCond)
|
|
||||||
|
|
||||||
rows, err := idb.queryClient.Query(ctx, query)
|
|
||||||
if err != nil {
|
|
||||||
log.Error("Error while performing query")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
nodes := map[string]schema.MetricStatistics{}
|
|
||||||
for rows.Next() {
|
|
||||||
row := rows.Record()
|
|
||||||
host := row.ValueByKey("hostname").(string)
|
|
||||||
|
|
||||||
avg, avgok := row.ValueByKey("avg").(float64)
|
|
||||||
if !avgok {
|
|
||||||
// log.Debugf(">> Assertion error for metric %s, statistic AVG. Expected 'float64', got %v", metric, avg)
|
|
||||||
avg = 0.0
|
|
||||||
}
|
|
||||||
min, minok := row.ValueByKey("min").(float64)
|
|
||||||
if !minok {
|
|
||||||
// log.Debugf(">> Assertion error for metric %s, statistic MIN. Expected 'float64', got %v", metric, min)
|
|
||||||
min = 0.0
|
|
||||||
}
|
|
||||||
max, maxok := row.ValueByKey("max").(float64)
|
|
||||||
if !maxok {
|
|
||||||
// log.Debugf(">> Assertion error for metric %s, statistic MAX. Expected 'float64', got %v", metric, max)
|
|
||||||
max = 0.0
|
|
||||||
}
|
|
||||||
|
|
||||||
nodes[host] = schema.MetricStatistics{
|
|
||||||
Avg: avg,
|
|
||||||
Min: min,
|
|
||||||
Max: max,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stats[metric] = nodes
|
|
||||||
}
|
|
||||||
|
|
||||||
return stats, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Used in Job-View StatsTable
|
|
||||||
// UNTESTED
|
|
||||||
func (idb *InfluxDBv2DataRepository) LoadScopedStats(
|
|
||||||
job *schema.Job,
|
|
||||||
metrics []string,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
ctx context.Context) (schema.ScopedJobStats, error) {
|
|
||||||
|
|
||||||
// Assumption: idb.loadData() only returns series node-scope - use node scope for statsTable
|
|
||||||
scopedJobStats := make(schema.ScopedJobStats)
|
|
||||||
data, err := idb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("Error while loading job for scopedJobStats")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
for metric, metricData := range data {
|
|
||||||
for _, scope := range scopes {
|
|
||||||
if scope != schema.MetricScopeNode {
|
|
||||||
logOnce.Do(func() {
|
|
||||||
log.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
|
|
||||||
})
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, ok := scopedJobStats[metric]; !ok {
|
|
||||||
scopedJobStats[metric] = make(map[schema.MetricScope][]*schema.ScopedStats)
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, ok := scopedJobStats[metric][scope]; !ok {
|
|
||||||
scopedJobStats[metric][scope] = make([]*schema.ScopedStats, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, series := range metricData[scope].Series {
|
|
||||||
scopedJobStats[metric][scope] = append(scopedJobStats[metric][scope], &schema.ScopedStats{
|
|
||||||
Hostname: series.Hostname,
|
|
||||||
Data: &series.Statistics,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return scopedJobStats, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Used in Systems-View @ Node-Overview
|
|
||||||
// UNTESTED
|
|
||||||
func (idb *InfluxDBv2DataRepository) LoadNodeData(
|
|
||||||
cluster string,
|
|
||||||
metrics, nodes []string,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
from, to time.Time,
|
|
||||||
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
|
|
||||||
|
|
||||||
// Note: scopes[] Array will be ignored, only return node scope
|
|
||||||
|
|
||||||
// CONVERT ARGS TO INFLUX
|
|
||||||
measurementsConds := make([]string, 0)
|
|
||||||
for _, m := range metrics {
|
|
||||||
measurementsConds = append(measurementsConds, fmt.Sprintf(`r["_measurement"] == "%s"`, m))
|
|
||||||
}
|
|
||||||
measurementsCond := strings.Join(measurementsConds, " or ")
|
|
||||||
|
|
||||||
hostsConds := make([]string, 0)
|
|
||||||
if nodes == nil {
|
|
||||||
var allNodes []string
|
|
||||||
subClusterNodeLists := archive.NodeLists[cluster]
|
|
||||||
for _, nodeList := range subClusterNodeLists {
|
|
||||||
allNodes = append(nodes, nodeList.PrintList()...)
|
|
||||||
}
|
|
||||||
for _, node := range allNodes {
|
|
||||||
nodes = append(nodes, node)
|
|
||||||
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, node))
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for _, node := range nodes {
|
|
||||||
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, node))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
hostsCond := strings.Join(hostsConds, " or ")
|
|
||||||
|
|
||||||
// BUILD AND PERFORM QUERY
|
|
||||||
query := fmt.Sprintf(`
|
|
||||||
from(bucket: "%s")
|
|
||||||
|> range(start: %s, stop: %s)
|
|
||||||
|> filter(fn: (r) => (%s) and (%s) )
|
|
||||||
|> drop(columns: ["_start", "_stop"])
|
|
||||||
|> group(columns: ["hostname", "_measurement"])
|
|
||||||
|> aggregateWindow(every: 60s, fn: mean)
|
|
||||||
|> drop(columns: ["_time"])`,
|
|
||||||
idb.bucket,
|
|
||||||
idb.formatTime(from), idb.formatTime(to),
|
|
||||||
measurementsCond, hostsCond)
|
|
||||||
|
|
||||||
rows, err := idb.queryClient.Query(ctx, query)
|
|
||||||
if err != nil {
|
|
||||||
log.Error("Error while performing query")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// HANDLE QUERY RETURN
|
|
||||||
// Collect Float Arrays for Node@Metric -> No Scope Handling!
|
|
||||||
influxData := make(map[string]map[string][]schema.Float)
|
|
||||||
for rows.Next() {
|
|
||||||
row := rows.Record()
|
|
||||||
host, field := row.ValueByKey("hostname").(string), row.Measurement()
|
|
||||||
|
|
||||||
influxHostData, ok := influxData[host]
|
|
||||||
if !ok {
|
|
||||||
influxHostData = make(map[string][]schema.Float)
|
|
||||||
influxData[host] = influxHostData
|
|
||||||
}
|
|
||||||
|
|
||||||
influxFieldData, ok := influxData[host][field]
|
|
||||||
if !ok {
|
|
||||||
influxFieldData = make([]schema.Float, 0)
|
|
||||||
influxData[host][field] = influxFieldData
|
|
||||||
}
|
|
||||||
|
|
||||||
val, ok := row.Value().(float64)
|
|
||||||
if ok {
|
|
||||||
influxData[host][field] = append(influxData[host][field], schema.Float(val))
|
|
||||||
} else {
|
|
||||||
influxData[host][field] = append(influxData[host][field], schema.Float(0))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// BUILD FUNCTION RETURN
|
|
||||||
data := make(map[string]map[string][]*schema.JobMetric)
|
|
||||||
for node, metricData := range influxData {
|
|
||||||
|
|
||||||
nodeData, ok := data[node]
|
|
||||||
if !ok {
|
|
||||||
nodeData = make(map[string][]*schema.JobMetric)
|
|
||||||
data[node] = nodeData
|
|
||||||
}
|
|
||||||
|
|
||||||
for metric, floatArray := range metricData {
|
|
||||||
avg, min, max := 0.0, 0.0, 0.0
|
|
||||||
for _, val := range floatArray {
|
|
||||||
avg += float64(val)
|
|
||||||
min = math.Min(min, float64(val))
|
|
||||||
max = math.Max(max, float64(val))
|
|
||||||
}
|
|
||||||
|
|
||||||
stats := schema.MetricStatistics{
|
|
||||||
Avg: (math.Round((avg/float64(len(floatArray)))*100) / 100),
|
|
||||||
Min: (math.Round(min*100) / 100),
|
|
||||||
Max: (math.Round(max*100) / 100),
|
|
||||||
}
|
|
||||||
|
|
||||||
mc := archive.GetMetricConfig(cluster, metric)
|
|
||||||
nodeData[metric] = append(nodeData[metric], &schema.JobMetric{
|
|
||||||
Unit: mc.Unit,
|
|
||||||
Timestep: mc.Timestep,
|
|
||||||
Series: []schema.Series{
|
|
||||||
{
|
|
||||||
Hostname: node,
|
|
||||||
Statistics: stats,
|
|
||||||
Data: floatArray,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return data, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Used in Systems-View @ Node-List
|
|
||||||
// UNTESTED
|
|
||||||
func (idb *InfluxDBv2DataRepository) LoadNodeListData(
|
|
||||||
cluster, subCluster, nodeFilter string,
|
|
||||||
metrics []string,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
resolution int,
|
|
||||||
from, to time.Time,
|
|
||||||
page *model.PageRequest,
|
|
||||||
ctx context.Context,
|
|
||||||
) (map[string]schema.JobData, int, bool, error) {
|
|
||||||
|
|
||||||
// Assumption: idb.loadData() only returns series node-scope - use node scope for NodeList
|
|
||||||
|
|
||||||
// 0) Init additional vars
|
|
||||||
var totalNodes int = 0
|
|
||||||
var hasNextPage bool = false
|
|
||||||
|
|
||||||
// 1) Get list of all nodes
|
|
||||||
var nodes []string
|
|
||||||
if subCluster != "" {
|
|
||||||
scNodes := archive.NodeLists[cluster][subCluster]
|
|
||||||
nodes = scNodes.PrintList()
|
|
||||||
} else {
|
|
||||||
subClusterNodeLists := archive.NodeLists[cluster]
|
|
||||||
for _, nodeList := range subClusterNodeLists {
|
|
||||||
nodes = append(nodes, nodeList.PrintList()...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2) Filter nodes
|
|
||||||
if nodeFilter != "" {
|
|
||||||
filteredNodes := []string{}
|
|
||||||
for _, node := range nodes {
|
|
||||||
if strings.Contains(node, nodeFilter) {
|
|
||||||
filteredNodes = append(filteredNodes, node)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
nodes = filteredNodes
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2.1) Count total nodes && Sort nodes -> Sorting invalidated after return ...
|
|
||||||
totalNodes = len(nodes)
|
|
||||||
sort.Strings(nodes)
|
|
||||||
|
|
||||||
// 3) Apply paging
|
|
||||||
if len(nodes) > page.ItemsPerPage {
|
|
||||||
start := (page.Page - 1) * page.ItemsPerPage
|
|
||||||
end := start + page.ItemsPerPage
|
|
||||||
if end > len(nodes) {
|
|
||||||
end = len(nodes)
|
|
||||||
hasNextPage = false
|
|
||||||
} else {
|
|
||||||
hasNextPage = true
|
|
||||||
}
|
|
||||||
nodes = nodes[start:end]
|
|
||||||
}
|
|
||||||
|
|
||||||
// 4) Fetch And Convert Data, use idb.LoadNodeData() for query
|
|
||||||
|
|
||||||
rawNodeData, err := idb.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
|
||||||
if err != nil {
|
|
||||||
log.Error(fmt.Sprintf("Error while loading influx nodeData for nodeListData %#v\n", err))
|
|
||||||
return nil, totalNodes, hasNextPage, err
|
|
||||||
}
|
|
||||||
|
|
||||||
data := make(map[string]schema.JobData)
|
|
||||||
for node, nodeData := range rawNodeData {
|
|
||||||
// Init Nested Map Data Structures If Not Found
|
|
||||||
hostData, ok := data[node]
|
|
||||||
if !ok {
|
|
||||||
hostData = make(schema.JobData)
|
|
||||||
data[node] = hostData
|
|
||||||
}
|
|
||||||
|
|
||||||
for metric, nodeMetricData := range nodeData {
|
|
||||||
metricData, ok := hostData[metric]
|
|
||||||
if !ok {
|
|
||||||
metricData = make(map[schema.MetricScope]*schema.JobMetric)
|
|
||||||
data[node][metric] = metricData
|
|
||||||
}
|
|
||||||
|
|
||||||
data[node][metric][schema.MetricScopeNode] = nodeMetricData[0] // Only Node Scope Returned from loadNodeData
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return data, totalNodes, hasNextPage, nil
|
|
||||||
}
|
|
@ -54,8 +54,6 @@ func Init() error {
|
|||||||
switch kind.Kind {
|
switch kind.Kind {
|
||||||
case "cc-metric-store":
|
case "cc-metric-store":
|
||||||
mdr = &CCMetricStore{}
|
mdr = &CCMetricStore{}
|
||||||
case "influxdb":
|
|
||||||
mdr = &InfluxDBv2DataRepository{}
|
|
||||||
case "prometheus":
|
case "prometheus":
|
||||||
mdr = &PrometheusDataRepository{}
|
mdr = &PrometheusDataRepository{}
|
||||||
case "test":
|
case "test":
|
||||||
|
@ -279,8 +279,8 @@ func (pdb *PrometheusDataRepository) LoadData(
|
|||||||
for i, resource := range job.Resources {
|
for i, resource := range job.Resources {
|
||||||
nodes[i] = resource.Hostname
|
nodes[i] = resource.Hostname
|
||||||
}
|
}
|
||||||
from := job.StartTime
|
from := time.Unix(job.StartTime, 0)
|
||||||
to := job.StartTime.Add(time.Duration(job.Duration) * time.Second)
|
to := time.Unix(job.StartTime+int64(job.Duration), 0)
|
||||||
|
|
||||||
for _, scope := range scopes {
|
for _, scope := range scopes {
|
||||||
if scope != schema.MetricScopeNode {
|
if scope != schema.MetricScopeNode {
|
||||||
@ -453,8 +453,8 @@ func (pdb *PrometheusDataRepository) LoadScopedStats(
|
|||||||
job *schema.Job,
|
job *schema.Job,
|
||||||
metrics []string,
|
metrics []string,
|
||||||
scopes []schema.MetricScope,
|
scopes []schema.MetricScope,
|
||||||
ctx context.Context) (schema.ScopedJobStats, error) {
|
ctx context.Context,
|
||||||
|
) (schema.ScopedJobStats, error) {
|
||||||
// Assumption: pdb.loadData() only returns series node-scope - use node scope for statsTable
|
// Assumption: pdb.loadData() only returns series node-scope - use node scope for statsTable
|
||||||
scopedJobStats := make(schema.ScopedJobStats)
|
scopedJobStats := make(schema.ScopedJobStats)
|
||||||
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
|
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
|
||||||
@ -502,7 +502,6 @@ func (pdb *PrometheusDataRepository) LoadNodeListData(
|
|||||||
page *model.PageRequest,
|
page *model.PageRequest,
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
) (map[string]schema.JobData, int, bool, error) {
|
) (map[string]schema.JobData, int, bool, error) {
|
||||||
|
|
||||||
// Assumption: pdb.loadData() only returns series node-scope - use node scope for NodeList
|
// Assumption: pdb.loadData() only returns series node-scope - use node scope for NodeList
|
||||||
|
|
||||||
// 0) Init additional vars
|
// 0) Init additional vars
|
||||||
|
@ -9,12 +9,12 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"maps"
|
||||||
"math"
|
"math"
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||||
@ -33,6 +33,7 @@ type JobRepository struct {
|
|||||||
stmtCache *sq.StmtCache
|
stmtCache *sq.StmtCache
|
||||||
cache *lrucache.Cache
|
cache *lrucache.Cache
|
||||||
driver string
|
driver string
|
||||||
|
Mutex sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetJobRepository() *JobRepository {
|
func GetJobRepository() *JobRepository {
|
||||||
@ -51,17 +52,29 @@ func GetJobRepository() *JobRepository {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var jobColumns []string = []string{
|
var jobColumns []string = []string{
|
||||||
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.cluster_partition", "job.array_job_id",
|
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster",
|
||||||
"job.num_nodes", "job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.smt", "job.job_state",
|
"job.start_time", "job.cluster_partition", "job.array_job_id", "job.num_nodes",
|
||||||
"job.duration", "job.walltime", "job.resources", "job.footprint", "job.energy",
|
"job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status",
|
||||||
|
"job.smt", "job.job_state", "job.duration", "job.walltime", "job.resources",
|
||||||
|
"job.footprint", "job.energy",
|
||||||
}
|
}
|
||||||
|
|
||||||
func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
|
var jobCacheColumns []string = []string{
|
||||||
|
"job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.cluster",
|
||||||
|
"job_cache.subcluster", "job_cache.start_time", "job_cache.cluster_partition",
|
||||||
|
"job_cache.array_job_id", "job_cache.num_nodes", "job_cache.num_hwthreads",
|
||||||
|
"job_cache.num_acc", "job_cache.exclusive", "job_cache.monitoring_status", "job_cache.smt",
|
||||||
|
"job_cache.job_state", "job_cache.duration", "job_cache.walltime", "job_cache.resources",
|
||||||
|
"job_cache.footprint", "job_cache.energy",
|
||||||
|
}
|
||||||
|
|
||||||
|
func scanJob(row interface{ Scan(...any) error }) (*schema.Job, error) {
|
||||||
job := &schema.Job{}
|
job := &schema.Job{}
|
||||||
|
|
||||||
if err := row.Scan(
|
if err := row.Scan(
|
||||||
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId,
|
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster,
|
||||||
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
|
&job.StartTime, &job.Partition, &job.ArrayJobId, &job.NumNodes, &job.NumHWThreads,
|
||||||
|
&job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
|
||||||
&job.Duration, &job.Walltime, &job.RawResources, &job.RawFootprint, &job.Energy); err != nil {
|
&job.Duration, &job.Walltime, &job.RawResources, &job.RawFootprint, &job.Energy); err != nil {
|
||||||
log.Warnf("Error while scanning rows (Job): %v", err)
|
log.Warnf("Error while scanning rows (Job): %v", err)
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -79,10 +92,9 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
|
|||||||
}
|
}
|
||||||
job.RawFootprint = nil
|
job.RawFootprint = nil
|
||||||
|
|
||||||
job.StartTime = time.Unix(job.StartTimeUnix, 0)
|
|
||||||
// Always ensure accurate duration for running jobs
|
// Always ensure accurate duration for running jobs
|
||||||
if job.State == schema.JobStateRunning {
|
if job.State == schema.JobStateRunning {
|
||||||
job.Duration = int32(time.Since(job.StartTime).Seconds())
|
job.Duration = int32(time.Now().Unix() - job.StartTime)
|
||||||
}
|
}
|
||||||
|
|
||||||
return job, nil
|
return job, nil
|
||||||
@ -138,17 +150,6 @@ func (r *JobRepository) Flush() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func scanJobLink(row interface{ Scan(...interface{}) error }) (*model.JobLink, error) {
|
|
||||||
jobLink := &model.JobLink{}
|
|
||||||
if err := row.Scan(
|
|
||||||
&jobLink.ID, &jobLink.JobID); err != nil {
|
|
||||||
log.Warn("Error while scanning rows (jobLink)")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return jobLink, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error) {
|
func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
cachekey := fmt.Sprintf("metadata:%d", job.ID)
|
cachekey := fmt.Sprintf("metadata:%d", job.ID)
|
||||||
@ -189,9 +190,7 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
|
|||||||
|
|
||||||
if job.MetaData != nil {
|
if job.MetaData != nil {
|
||||||
cpy := make(map[string]string, len(job.MetaData)+1)
|
cpy := make(map[string]string, len(job.MetaData)+1)
|
||||||
for k, v := range job.MetaData {
|
maps.Copy(cpy, job.MetaData)
|
||||||
cpy[k] = v
|
|
||||||
}
|
|
||||||
cpy[key] = val
|
cpy[key] = val
|
||||||
job.MetaData = cpy
|
job.MetaData = cpy
|
||||||
} else {
|
} else {
|
||||||
@ -389,7 +388,7 @@ func (r *JobRepository) FindColumnValues(user *schema.User, query string, table
|
|||||||
func (r *JobRepository) Partitions(cluster string) ([]string, error) {
|
func (r *JobRepository) Partitions(cluster string) ([]string, error) {
|
||||||
var err error
|
var err error
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
partitions := r.cache.Get("partitions:"+cluster, func() (interface{}, time.Duration, int) {
|
partitions := r.cache.Get("partitions:"+cluster, func() (any, time.Duration, int) {
|
||||||
parts := []string{}
|
parts := []string{}
|
||||||
if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
|
if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
|
||||||
return nil, 0, 1000
|
return nil, 0, 1000
|
||||||
@ -477,6 +476,7 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: Reconsider filtering short jobs with harcoded threshold
|
||||||
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
|
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
|
||||||
query := sq.Select(jobColumns...).From("job").
|
query := sq.Select(jobColumns...).From("job").
|
||||||
Where(fmt.Sprintf("job.cluster = '%s'", cluster)).
|
Where(fmt.Sprintf("job.cluster = '%s'", cluster)).
|
||||||
@ -581,7 +581,7 @@ func (r *JobRepository) MarkArchived(
|
|||||||
|
|
||||||
func (r *JobRepository) UpdateEnergy(
|
func (r *JobRepository) UpdateEnergy(
|
||||||
stmt sq.UpdateBuilder,
|
stmt sq.UpdateBuilder,
|
||||||
jobMeta *schema.JobMeta,
|
jobMeta *schema.Job,
|
||||||
) (sq.UpdateBuilder, error) {
|
) (sq.UpdateBuilder, error) {
|
||||||
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
||||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||||
@ -631,7 +631,7 @@ func (r *JobRepository) UpdateEnergy(
|
|||||||
|
|
||||||
func (r *JobRepository) UpdateFootprint(
|
func (r *JobRepository) UpdateFootprint(
|
||||||
stmt sq.UpdateBuilder,
|
stmt sq.UpdateBuilder,
|
||||||
jobMeta *schema.JobMeta,
|
jobMeta *schema.Job,
|
||||||
) (sq.UpdateBuilder, error) {
|
) (sq.UpdateBuilder, error) {
|
||||||
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
||||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||||
|
@ -13,6 +13,14 @@ import (
|
|||||||
sq "github.com/Masterminds/squirrel"
|
sq "github.com/Masterminds/squirrel"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const NamedJobCacheInsert string = `INSERT INTO job_cache (
|
||||||
|
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||||
|
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
||||||
|
) VALUES (
|
||||||
|
:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||||
|
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
||||||
|
);`
|
||||||
|
|
||||||
const NamedJobInsert string = `INSERT INTO job (
|
const NamedJobInsert string = `INSERT INTO job (
|
||||||
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
||||||
@ -21,8 +29,10 @@ const NamedJobInsert string = `INSERT INTO job (
|
|||||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
||||||
);`
|
);`
|
||||||
|
|
||||||
func (r *JobRepository) InsertJob(job *schema.JobMeta) (int64, error) {
|
func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
|
||||||
res, err := r.DB.NamedExec(NamedJobInsert, job)
|
r.Mutex.Lock()
|
||||||
|
res, err := r.DB.NamedExec(NamedJobCacheInsert, job)
|
||||||
|
r.Mutex.Unlock()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while NamedJobInsert")
|
log.Warn("Error while NamedJobInsert")
|
||||||
return 0, err
|
return 0, err
|
||||||
@ -36,9 +46,48 @@ func (r *JobRepository) InsertJob(job *schema.JobMeta) (int64, error) {
|
|||||||
return id, nil
|
return id, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
|
||||||
|
r.Mutex.Lock()
|
||||||
|
defer r.Mutex.Unlock()
|
||||||
|
|
||||||
|
query := sq.Select(jobCacheColumns...).From("job_cache")
|
||||||
|
|
||||||
|
rows, err := query.RunWith(r.stmtCache).Query()
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while running query %v", err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
jobs := make([]*schema.Job, 0, 50)
|
||||||
|
for rows.Next() {
|
||||||
|
job, err := scanJob(rows)
|
||||||
|
if err != nil {
|
||||||
|
rows.Close()
|
||||||
|
log.Warn("Error while scanning rows")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
jobs = append(jobs, job)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = r.DB.Exec(
|
||||||
|
"INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, exclusive, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, exclusive, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("Error while Job sync: %v", err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = r.DB.Exec("DELETE FROM job_cache")
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("Error while Job cache clean: %v", err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return jobs, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Start inserts a new job in the table, returning the unique job ID.
|
// Start inserts a new job in the table, returning the unique job ID.
|
||||||
// Statistics are not transfered!
|
// Statistics are not transfered!
|
||||||
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
|
func (r *JobRepository) Start(job *schema.Job) (id int64, err error) {
|
||||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding footprint field failed: %w", err)
|
return -1, fmt.Errorf("REPOSITORY/JOB > encoding footprint field failed: %w", err)
|
||||||
@ -73,3 +122,19 @@ func (r *JobRepository) Stop(
|
|||||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) StopCached(
|
||||||
|
jobId int64,
|
||||||
|
duration int32,
|
||||||
|
state schema.JobState,
|
||||||
|
monitoringStatus int32,
|
||||||
|
) (err error) {
|
||||||
|
stmt := sq.Update("job_cache").
|
||||||
|
Set("job_state", state).
|
||||||
|
Set("duration", duration).
|
||||||
|
Set("monitoring_status", monitoringStatus).
|
||||||
|
Where("job.id = ?", jobId)
|
||||||
|
|
||||||
|
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
@ -43,6 +43,26 @@ func (r *JobRepository) Find(
|
|||||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) FindCached(
|
||||||
|
jobId *int64,
|
||||||
|
cluster *string,
|
||||||
|
startTime *int64,
|
||||||
|
) (*schema.Job, error) {
|
||||||
|
q := sq.Select(jobCacheColumns...).From("job_cache").
|
||||||
|
Where("job_cache.job_id = ?", *jobId)
|
||||||
|
|
||||||
|
if cluster != nil {
|
||||||
|
q = q.Where("job_cache.cluster = ?", *cluster)
|
||||||
|
}
|
||||||
|
if startTime != nil {
|
||||||
|
q = q.Where("job_cache.start_time = ?", *startTime)
|
||||||
|
}
|
||||||
|
|
||||||
|
q = q.OrderBy("job_cache.id DESC") // always use newest matching job by db id if more than one match
|
||||||
|
|
||||||
|
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||||
|
}
|
||||||
|
|
||||||
// Find executes a SQL query to find a specific batch job.
|
// Find executes a SQL query to find a specific batch job.
|
||||||
// The job is queried using the batch job id, the cluster name,
|
// The job is queried using the batch job id, the cluster name,
|
||||||
// and the start time of the job in UNIX epoch time seconds.
|
// and the start time of the job in UNIX epoch time seconds.
|
||||||
@ -83,6 +103,35 @@ func (r *JobRepository) FindAll(
|
|||||||
return jobs, nil
|
return jobs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get complete joblist only consisting of db ids.
|
||||||
|
// This is useful to process large job counts and intended to be used
|
||||||
|
// together with FindById to process jobs one by one
|
||||||
|
func (r *JobRepository) GetJobList() ([]int64, error) {
|
||||||
|
query := sq.Select("id").From("job").
|
||||||
|
Where("job.job_state != 'running'")
|
||||||
|
|
||||||
|
rows, err := query.RunWith(r.stmtCache).Query()
|
||||||
|
if err != nil {
|
||||||
|
log.Error("Error while running query")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
jl := make([]int64, 0, 1000)
|
||||||
|
for rows.Next() {
|
||||||
|
var id int64
|
||||||
|
err := rows.Scan(&id)
|
||||||
|
if err != nil {
|
||||||
|
rows.Close()
|
||||||
|
log.Warn("Error while scanning rows")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
jl = append(jl, id)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("Return job count %d", len(jl))
|
||||||
|
return jl, nil
|
||||||
|
}
|
||||||
|
|
||||||
// FindById executes a SQL query to find a specific batch job.
|
// FindById executes a SQL query to find a specific batch job.
|
||||||
// The job is queried using the database id.
|
// The job is queried using the database id.
|
||||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||||
@ -178,7 +227,7 @@ func (r *JobRepository) FindConcurrentJobs(
|
|||||||
var startTime int64
|
var startTime int64
|
||||||
var stopTime int64
|
var stopTime int64
|
||||||
|
|
||||||
startTime = job.StartTimeUnix
|
startTime = job.StartTime
|
||||||
hostname := job.Resources[0].Hostname
|
hostname := job.Resources[0].Hostname
|
||||||
|
|
||||||
if job.State == schema.JobStateRunning {
|
if job.State == schema.JobStateRunning {
|
||||||
|
57
internal/repository/jobHooks.go
Normal file
57
internal/repository/jobHooks.go
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package repository
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
type JobHook interface {
|
||||||
|
JobStartCallback(job *schema.Job)
|
||||||
|
JobStopCallback(job *schema.Job)
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
initOnce sync.Once
|
||||||
|
hooks []JobHook
|
||||||
|
)
|
||||||
|
|
||||||
|
func RegisterJobJook(hook JobHook) {
|
||||||
|
initOnce.Do(func() {
|
||||||
|
hooks = make([]JobHook, 0)
|
||||||
|
})
|
||||||
|
|
||||||
|
if hook != nil {
|
||||||
|
hooks = append(hooks, hook)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func CallJobStartHooks(jobs []*schema.Job) {
|
||||||
|
if hooks == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, hook := range hooks {
|
||||||
|
if hook != nil {
|
||||||
|
for _, job := range jobs {
|
||||||
|
hook.JobStartCallback(job)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func CallJobStopHooks(job *schema.Job) {
|
||||||
|
if hooks == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, hook := range hooks {
|
||||||
|
if hook != nil {
|
||||||
|
hook.JobStopCallback(job)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -148,9 +148,7 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
|||||||
}
|
}
|
||||||
if filter.DbID != nil {
|
if filter.DbID != nil {
|
||||||
dbIDs := make([]string, len(filter.DbID))
|
dbIDs := make([]string, len(filter.DbID))
|
||||||
for i, val := range filter.DbID {
|
copy(dbIDs, filter.DbID)
|
||||||
dbIDs[i] = val
|
|
||||||
}
|
|
||||||
query = query.Where(sq.Eq{"job.id": dbIDs})
|
query = query.Where(sq.Eq{"job.id": dbIDs})
|
||||||
}
|
}
|
||||||
if filter.JobID != nil {
|
if filter.JobID != nil {
|
||||||
|
@ -24,7 +24,7 @@ func TestFind(t *testing.T) {
|
|||||||
|
|
||||||
// fmt.Printf("%+v", job)
|
// fmt.Printf("%+v", job)
|
||||||
|
|
||||||
if job.ID != 5 {
|
if *job.ID != 5 {
|
||||||
t.Errorf("wrong summary for diagnostic 3\ngot: %d \nwant: 1366", job.JobID)
|
t.Errorf("wrong summary for diagnostic 3\ngot: %d \nwant: 1366", job.JobID)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,7 @@ import (
|
|||||||
"github.com/golang-migrate/migrate/v4/source/iofs"
|
"github.com/golang-migrate/migrate/v4/source/iofs"
|
||||||
)
|
)
|
||||||
|
|
||||||
const Version uint = 8
|
const Version uint = 10
|
||||||
|
|
||||||
//go:embed migrations/*
|
//go:embed migrations/*
|
||||||
var migrationFiles embed.FS
|
var migrationFiles embed.FS
|
||||||
@ -115,8 +115,17 @@ func MigrateDB(backend string, db string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
v, dirty, err := m.Version()
|
v, dirty, err := m.Version()
|
||||||
|
if err != nil {
|
||||||
|
if err == migrate.ErrNilVersion {
|
||||||
|
log.Warn("Legacy database without version or missing database file!")
|
||||||
|
} else {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
log.Infof("unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend -migrate-db", v, Version)
|
if v < Version {
|
||||||
|
log.Infof("unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend -migrate-db", v, Version)
|
||||||
|
}
|
||||||
|
|
||||||
if dirty {
|
if dirty {
|
||||||
return fmt.Errorf("last migration to version %d has failed, please fix the db manually and force version with -force-db flag", Version)
|
return fmt.Errorf("last migration to version %d has failed, please fix the db manually and force version with -force-db flag", Version)
|
||||||
|
@ -0,0 +1 @@
|
|||||||
|
DROP TABLE IF EXISTS job_cache;
|
@ -0,0 +1,31 @@
|
|||||||
|
CREATE TABLE "job_cache" (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
job_id BIGINT NOT NULL,
|
||||||
|
cluster VARCHAR(255) NOT NULL,
|
||||||
|
subcluster VARCHAR(255) NOT NULL,
|
||||||
|
start_time BIGINT NOT NULL, -- Unix timestamp
|
||||||
|
hpc_user VARCHAR(255) NOT NULL,
|
||||||
|
project VARCHAR(255) NOT NULL,
|
||||||
|
cluster_partition VARCHAR(255),
|
||||||
|
array_job_id BIGINT,
|
||||||
|
duration INT NOT NULL,
|
||||||
|
walltime INT NOT NULL,
|
||||||
|
job_state VARCHAR(255) NOT NULL
|
||||||
|
CHECK (job_state IN (
|
||||||
|
'running', 'completed', 'failed', 'cancelled',
|
||||||
|
'stopped', 'timeout', 'preempted', 'out_of_memory'
|
||||||
|
)),
|
||||||
|
meta_data TEXT, -- JSON
|
||||||
|
resources TEXT NOT NULL, -- JSON
|
||||||
|
num_nodes INT NOT NULL,
|
||||||
|
num_hwthreads INT,
|
||||||
|
num_acc INT,
|
||||||
|
smt TINYINT NOT NULL DEFAULT 1 CHECK (smt IN (0, 1)),
|
||||||
|
exclusive TINYINT NOT NULL DEFAULT 1 CHECK (exclusive IN (0, 1, 2)),
|
||||||
|
monitoring_status TINYINT NOT NULL DEFAULT 1
|
||||||
|
CHECK (monitoring_status IN (0, 1, 2, 3)),
|
||||||
|
energy REAL NOT NULL DEFAULT 0.0,
|
||||||
|
energy_footprint TEXT DEFAULT NULL,
|
||||||
|
footprint TEXT DEFAULT NULL,
|
||||||
|
UNIQUE (job_id, cluster, start_time)
|
||||||
|
);
|
@ -0,0 +1 @@
|
|||||||
|
DROP TABLE IF EXISTS node;
|
17
internal/repository/migrations/sqlite3/10_node-table.up.sql
Normal file
17
internal/repository/migrations/sqlite3/10_node-table.up.sql
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
CREATE TABLE "node" (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
hostname VARCHAR(255) NOT NULL,
|
||||||
|
cluster VARCHAR(255) NOT NULL,
|
||||||
|
subcluster VARCHAR(255) NOT NULL,
|
||||||
|
node_state VARCHAR(255) NOT NULL
|
||||||
|
CHECK (node_state IN (
|
||||||
|
'allocated', 'reserved', 'idle', 'mixed',
|
||||||
|
'down', 'unknown'
|
||||||
|
)),
|
||||||
|
health_state VARCHAR(255) NOT NULL
|
||||||
|
CHECK (health_state IN (
|
||||||
|
'full', 'partial', 'failed'
|
||||||
|
)),
|
||||||
|
meta_data TEXT, -- JSON
|
||||||
|
UNIQUE (hostname, cluster)
|
||||||
|
);
|
241
internal/repository/node.go
Normal file
241
internal/repository/node.go
Normal file
@ -0,0 +1,241 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package repository
|
||||||
|
|
||||||
|
import (
|
||||||
|
"database/sql"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"maps"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
sq "github.com/Masterminds/squirrel"
|
||||||
|
"github.com/jmoiron/sqlx"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
nodeRepoOnce sync.Once
|
||||||
|
nodeRepoInstance *NodeRepository
|
||||||
|
)
|
||||||
|
|
||||||
|
type NodeRepository struct {
|
||||||
|
DB *sqlx.DB
|
||||||
|
stmtCache *sq.StmtCache
|
||||||
|
cache *lrucache.Cache
|
||||||
|
driver string
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetNodeRepository() *NodeRepository {
|
||||||
|
nodeRepoOnce.Do(func() {
|
||||||
|
db := GetConnection()
|
||||||
|
|
||||||
|
nodeRepoInstance = &NodeRepository{
|
||||||
|
DB: db.DB,
|
||||||
|
driver: db.Driver,
|
||||||
|
|
||||||
|
stmtCache: sq.NewStmtCache(db.DB),
|
||||||
|
cache: lrucache.New(1024 * 1024),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return nodeRepoInstance
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *NodeRepository) FetchMetadata(node *schema.Node) (map[string]string, error) {
|
||||||
|
start := time.Now()
|
||||||
|
cachekey := fmt.Sprintf("metadata:%d", node.ID)
|
||||||
|
if cached := r.cache.Get(cachekey, nil); cached != nil {
|
||||||
|
node.MetaData = cached.(map[string]string)
|
||||||
|
return node.MetaData, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := sq.Select("node.meta_data").From("node").Where("node.id = ?", node.ID).
|
||||||
|
RunWith(r.stmtCache).QueryRow().Scan(&node.RawMetaData); err != nil {
|
||||||
|
log.Warn("Error while scanning for node metadata")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(node.RawMetaData) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.Unmarshal(node.RawMetaData, &node.MetaData); err != nil {
|
||||||
|
log.Warn("Error while unmarshaling raw metadata json")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
r.cache.Put(cachekey, node.MetaData, len(node.RawMetaData), 24*time.Hour)
|
||||||
|
log.Debugf("Timer FetchMetadata %s", time.Since(start))
|
||||||
|
return node.MetaData, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *NodeRepository) UpdateMetadata(node *schema.Node, key, val string) (err error) {
|
||||||
|
cachekey := fmt.Sprintf("metadata:%d", node.ID)
|
||||||
|
r.cache.Del(cachekey)
|
||||||
|
if node.MetaData == nil {
|
||||||
|
if _, err = r.FetchMetadata(node); err != nil {
|
||||||
|
log.Warnf("Error while fetching metadata for node, DB ID '%v'", node.ID)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if node.MetaData != nil {
|
||||||
|
cpy := make(map[string]string, len(node.MetaData)+1)
|
||||||
|
maps.Copy(cpy, node.MetaData)
|
||||||
|
cpy[key] = val
|
||||||
|
node.MetaData = cpy
|
||||||
|
} else {
|
||||||
|
node.MetaData = map[string]string{key: val}
|
||||||
|
}
|
||||||
|
|
||||||
|
if node.RawMetaData, err = json.Marshal(node.MetaData); err != nil {
|
||||||
|
log.Warnf("Error while marshaling metadata for node, DB ID '%v'", node.ID)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err = sq.Update("node").
|
||||||
|
Set("meta_data", node.RawMetaData).
|
||||||
|
Where("node.id = ?", node.ID).
|
||||||
|
RunWith(r.stmtCache).Exec(); err != nil {
|
||||||
|
log.Warnf("Error while updating metadata for node, DB ID '%v'", node.ID)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
r.cache.Put(cachekey, node.MetaData, len(node.RawMetaData), 24*time.Hour)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *NodeRepository) GetNode(id int64, withMeta bool) (*schema.Node, error) {
|
||||||
|
node := &schema.Node{}
|
||||||
|
if err := sq.Select("id", "hostname", "cluster", "subcluster", "node_state",
|
||||||
|
"health_state").From("node").
|
||||||
|
Where("node.id = ?", id).RunWith(r.DB).
|
||||||
|
QueryRow().Scan(&node.ID, &node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState,
|
||||||
|
&node.HealthState); err != nil {
|
||||||
|
log.Warnf("Error while querying node '%v' from database", id)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if withMeta {
|
||||||
|
var err error
|
||||||
|
var meta map[string]string
|
||||||
|
if meta, err = r.FetchMetadata(node); err != nil {
|
||||||
|
log.Warnf("Error while fetching metadata for node '%v'", id)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
node.MetaData = meta
|
||||||
|
}
|
||||||
|
|
||||||
|
return node, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
const NamedNodeInsert string = `
|
||||||
|
INSERT INTO node (hostname, cluster, subcluster, node_state, health_state)
|
||||||
|
VALUES (:hostname, :cluster, :subcluster, :node_state, :health_state);`
|
||||||
|
|
||||||
|
func (r *NodeRepository) AddNode(node *schema.Node) (int64, error) {
|
||||||
|
var err error
|
||||||
|
|
||||||
|
res, err := r.DB.NamedExec(NamedNodeInsert, node)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while adding node '%v' to database", node.Hostname)
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
node.ID, err = res.LastInsertId()
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while getting last insert id for node '%v' from database", node.Hostname)
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return node.ID, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *NodeRepository) UpdateNodeState(hostname string, cluster string, nodeState *schema.NodeState) error {
|
||||||
|
var id int64
|
||||||
|
if err := sq.Select("id").From("node").
|
||||||
|
Where("node.hostname = ?", hostname).Where("node.cluster = ?", cluster).RunWith(r.DB).
|
||||||
|
QueryRow().Scan(&id); err != nil {
|
||||||
|
if err == sql.ErrNoRows {
|
||||||
|
subcluster, err := archive.GetSubClusterByNode(cluster, hostname)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while getting subcluster for node '%s' in cluster '%s': %v", hostname, cluster, err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
node := schema.Node{
|
||||||
|
Hostname: hostname, Cluster: cluster, SubCluster: subcluster, NodeState: *nodeState,
|
||||||
|
HealthState: schema.MonitoringStateFull,
|
||||||
|
}
|
||||||
|
_, err = r.AddNode(&node)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while adding node '%s' to database: %v", hostname, err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
} else {
|
||||||
|
log.Warnf("Error while querying node '%v' from database", id)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := sq.Update("node").Set("node_state", nodeState).Where("node.id = ?", id).RunWith(r.DB).Exec(); err != nil {
|
||||||
|
log.Errorf("error while updating node '%s'", hostname)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// func (r *NodeRepository) UpdateHealthState(hostname string, healthState *schema.MonitoringState) error {
|
||||||
|
// if _, err := sq.Update("node").Set("health_state", healthState).Where("node.id = ?", id).RunWith(r.DB).Exec(); err != nil {
|
||||||
|
// log.Errorf("error while updating node '%d'", id)
|
||||||
|
// return err
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// return nil
|
||||||
|
// }
|
||||||
|
|
||||||
|
func (r *NodeRepository) DeleteNode(id int64) error {
|
||||||
|
_, err := r.DB.Exec(`DELETE FROM node WHERE node.id = ?`, id)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while deleting node '%d' from DB", id)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
log.Infof("deleted node '%d' from DB", id)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *NodeRepository) QueryNodes() ([]*schema.Node, error) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) {
|
||||||
|
q := sq.Select("hostname", "cluster", "subcluster", "node_state",
|
||||||
|
"health_state").From("node").Where("node.cluster = ?", cluster).OrderBy("node.hostname ASC")
|
||||||
|
|
||||||
|
rows, err := q.RunWith(r.DB).Query()
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while querying user list")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
nodeList := make([]*schema.Node, 0, 100)
|
||||||
|
defer rows.Close()
|
||||||
|
for rows.Next() {
|
||||||
|
node := &schema.Node{}
|
||||||
|
if err := rows.Scan(&node.Hostname, &node.Cluster,
|
||||||
|
&node.SubCluster, &node.NodeState, &node.HealthState); err != nil {
|
||||||
|
log.Warn("Error while scanning node list")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeList = append(nodeList, node)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nodeList, nil
|
||||||
|
}
|
@ -291,7 +291,7 @@ func (r *JobRepository) JobsStats(
|
|||||||
return stats, nil
|
return stats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func LoadJobStat(job *schema.JobMeta, metric string, statType string) float64 {
|
func LoadJobStat(job *schema.Job, metric string, statType string) float64 {
|
||||||
if stats, ok := job.Statistics[metric]; ok {
|
if stats, ok := job.Statistics[metric]; ok {
|
||||||
switch statType {
|
switch statType {
|
||||||
case "avg":
|
case "avg":
|
||||||
@ -759,7 +759,6 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
|
|||||||
filters []*model.JobFilter,
|
filters []*model.JobFilter,
|
||||||
bins *int,
|
bins *int,
|
||||||
) []*model.MetricHistoPoints {
|
) []*model.MetricHistoPoints {
|
||||||
|
|
||||||
// Get Jobs
|
// Get Jobs
|
||||||
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil)
|
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -45,6 +45,36 @@ func (r *JobRepository) AddTag(user *schema.User, job int64, tag int64) ([]*sche
|
|||||||
return tags, archive.UpdateTags(j, archiveTags)
|
return tags, archive.UpdateTags(j, archiveTags)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) AddTagDirect(job int64, tag int64) ([]*schema.Tag, error) {
|
||||||
|
j, err := r.FindByIdDirect(job)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while finding job by id")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(job, tag)
|
||||||
|
|
||||||
|
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||||
|
s, _, _ := q.ToSql()
|
||||||
|
log.Errorf("Error adding tag with %s: %v", s, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
tags, err := r.GetTagsDirect(&job)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while getting tags for job")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
archiveTags, err := r.getArchiveTags(&job)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while getting tags for job")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return tags, archive.UpdateTags(j, archiveTags)
|
||||||
|
}
|
||||||
|
|
||||||
// Removes a tag from a job by tag id
|
// Removes a tag from a job by tag id
|
||||||
func (r *JobRepository) RemoveTag(user *schema.User, job, tag int64) ([]*schema.Tag, error) {
|
func (r *JobRepository) RemoveTag(user *schema.User, job, tag int64) ([]*schema.Tag, error) {
|
||||||
j, err := r.FindByIdWithUser(user, job)
|
j, err := r.FindByIdWithUser(user, job)
|
||||||
@ -82,7 +112,7 @@ func (r *JobRepository) RemoveJobTagByRequest(user *schema.User, job int64, tagT
|
|||||||
tagID, exists := r.TagId(tagType, tagName, tagScope)
|
tagID, exists := r.TagId(tagType, tagName, tagScope)
|
||||||
if !exists {
|
if !exists {
|
||||||
log.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
log.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
||||||
return nil, fmt.Errorf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
return nil, fmt.Errorf("tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get Job
|
// Get Job
|
||||||
@ -122,7 +152,7 @@ func (r *JobRepository) RemoveTagByRequest(tagType string, tagName string, tagSc
|
|||||||
tagID, exists := r.TagId(tagType, tagName, tagScope)
|
tagID, exists := r.TagId(tagType, tagName, tagScope)
|
||||||
if !exists {
|
if !exists {
|
||||||
log.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
log.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
||||||
return fmt.Errorf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
return fmt.Errorf("tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle Delete JobTagTable
|
// Handle Delete JobTagTable
|
||||||
@ -291,6 +321,37 @@ func (r *JobRepository) AddTagOrCreate(user *schema.User, jobId int64, tagType s
|
|||||||
return tagId, nil
|
return tagId, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) AddTagOrCreateDirect(jobId int64, tagType string, tagName string) (tagId int64, err error) {
|
||||||
|
tagScope := "global"
|
||||||
|
|
||||||
|
tagId, exists := r.TagId(tagType, tagName, tagScope)
|
||||||
|
if !exists {
|
||||||
|
tagId, err = r.CreateTag(tagType, tagName, tagScope)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := r.AddTagDirect(jobId, tagId); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return tagId, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) HasTag(jobId int64, tagType string, tagName string) bool {
|
||||||
|
var id int64
|
||||||
|
q := sq.Select("id").From("tag").Join("jobtag ON jobtag.tag_id = tag.id").
|
||||||
|
Where("jobtag.job_id = ?", jobId).Where("tag.tag_type = ?", tagType).
|
||||||
|
Where("tag.tag_name = ?", tagName)
|
||||||
|
err := q.RunWith(r.stmtCache).QueryRow().Scan(&id)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
} else {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TagId returns the database id of the tag with the specified type and name.
|
// TagId returns the database id of the tag with the specified type and name.
|
||||||
func (r *JobRepository) TagId(tagType string, tagName string, tagScope string) (tagId int64, exists bool) {
|
func (r *JobRepository) TagId(tagType string, tagName string, tagScope string) (tagId int64, exists bool) {
|
||||||
exists = true
|
exists = true
|
||||||
@ -346,6 +407,32 @@ func (r *JobRepository) GetTags(user *schema.User, job *int64) ([]*schema.Tag, e
|
|||||||
return tags, nil
|
return tags, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) GetTagsDirect(job *int64) ([]*schema.Tag, error) {
|
||||||
|
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
|
||||||
|
if job != nil {
|
||||||
|
q = q.Join("jobtag ON jobtag.tag_id = tag.id").Where("jobtag.job_id = ?", *job)
|
||||||
|
}
|
||||||
|
|
||||||
|
rows, err := q.RunWith(r.stmtCache).Query()
|
||||||
|
if err != nil {
|
||||||
|
s, _, _ := q.ToSql()
|
||||||
|
log.Errorf("Error get tags with %s: %v", s, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
tags := make([]*schema.Tag, 0)
|
||||||
|
for rows.Next() {
|
||||||
|
tag := &schema.Tag{}
|
||||||
|
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
|
||||||
|
log.Warn("Error while scanning rows")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
tags = append(tags, tag)
|
||||||
|
}
|
||||||
|
|
||||||
|
return tags, nil
|
||||||
|
}
|
||||||
|
|
||||||
// GetArchiveTags returns a list of all tags *regardless of scope* for archiving if job is nil or of the tags that the job with that database ID has.
|
// GetArchiveTags returns a list of all tags *regardless of scope* for archiving if job is nil or of the tags that the job with that database ID has.
|
||||||
func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
|
func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
|
||||||
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
|
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
|
||||||
|
BIN
internal/repository/testdata/job.db
vendored
BIN
internal/repository/testdata/job.db
vendored
Binary file not shown.
4
internal/tagger/apps/gromacs.txt
Normal file
4
internal/tagger/apps/gromacs.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
GROMACS
|
||||||
|
gromacs
|
||||||
|
GMX
|
||||||
|
mdrun
|
1
internal/tagger/apps/openfoam.txt
Normal file
1
internal/tagger/apps/openfoam.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
openfoam
|
3
internal/tagger/apps/python.txt
Normal file
3
internal/tagger/apps/python.txt
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
python
|
||||||
|
anaconda
|
||||||
|
conda
|
2
internal/tagger/apps/vasp.txt
Normal file
2
internal/tagger/apps/vasp.txt
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
VASP
|
||||||
|
vasp
|
322
internal/tagger/classifyJob.go
Normal file
322
internal/tagger/classifyJob.go
Normal file
@ -0,0 +1,322 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package tagger
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"embed"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"maps"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"text/template"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
"github.com/expr-lang/expr"
|
||||||
|
"github.com/expr-lang/expr/vm"
|
||||||
|
)
|
||||||
|
|
||||||
|
//go:embed jobclasses/*
|
||||||
|
var jobclassFiles embed.FS
|
||||||
|
|
||||||
|
type Variable struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Expr string `json:"expr"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ruleVariable struct {
|
||||||
|
name string
|
||||||
|
expr *vm.Program
|
||||||
|
}
|
||||||
|
|
||||||
|
type RuleFormat struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Tag string `json:"tag"`
|
||||||
|
Parameters []string `json:"parameters"`
|
||||||
|
Metrics []string `json:"metrics"`
|
||||||
|
Requirements []string `json:"requirements"`
|
||||||
|
Variables []Variable `json:"variables"`
|
||||||
|
Rule string `json:"rule"`
|
||||||
|
Hint string `json:"hint"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ruleInfo struct {
|
||||||
|
env map[string]any
|
||||||
|
metrics []string
|
||||||
|
requirements []*vm.Program
|
||||||
|
variables []ruleVariable
|
||||||
|
rule *vm.Program
|
||||||
|
hint *template.Template
|
||||||
|
}
|
||||||
|
|
||||||
|
type JobClassTagger struct {
|
||||||
|
rules map[string]ruleInfo
|
||||||
|
parameters map[string]any
|
||||||
|
tagType string
|
||||||
|
cfgPath string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *JobClassTagger) prepareRule(b []byte, fns string) {
|
||||||
|
var rule RuleFormat
|
||||||
|
if err := json.NewDecoder(bytes.NewReader(b)).Decode(&rule); err != nil {
|
||||||
|
log.Warn("Error while decoding raw job meta json")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ri := ruleInfo{}
|
||||||
|
ri.env = make(map[string]any)
|
||||||
|
ri.metrics = make([]string, 0)
|
||||||
|
ri.requirements = make([]*vm.Program, 0)
|
||||||
|
ri.variables = make([]ruleVariable, 0)
|
||||||
|
|
||||||
|
// check if all required parameters are available
|
||||||
|
for _, p := range rule.Parameters {
|
||||||
|
param, ok := t.parameters[p]
|
||||||
|
if !ok {
|
||||||
|
log.Warnf("prepareRule() > missing parameter %s in rule %s", p, fns)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ri.env[p] = param
|
||||||
|
}
|
||||||
|
|
||||||
|
// set all required metrics
|
||||||
|
ri.metrics = append(ri.metrics, rule.Metrics...)
|
||||||
|
|
||||||
|
// compile requirements
|
||||||
|
for _, r := range rule.Requirements {
|
||||||
|
req, err := expr.Compile(r, expr.AsBool())
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("error compiling requirement %s: %#v", r, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ri.requirements = append(ri.requirements, req)
|
||||||
|
}
|
||||||
|
|
||||||
|
// compile variables
|
||||||
|
for _, v := range rule.Variables {
|
||||||
|
req, err := expr.Compile(v.Expr, expr.AsFloat64())
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("error compiling requirement %s: %#v", v.Name, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ri.variables = append(ri.variables, ruleVariable{name: v.Name, expr: req})
|
||||||
|
}
|
||||||
|
|
||||||
|
// compile rule
|
||||||
|
exp, err := expr.Compile(rule.Rule, expr.AsBool())
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("error compiling rule %s: %#v", fns, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ri.rule = exp
|
||||||
|
|
||||||
|
// prepare hint template
|
||||||
|
ri.hint, err = template.New(fns).Parse(rule.Hint)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("error processing template %s: %#v", fns, err)
|
||||||
|
}
|
||||||
|
log.Infof("prepareRule() > processing %s with %d requirements and %d variables", fns, len(ri.requirements), len(ri.variables))
|
||||||
|
|
||||||
|
t.rules[rule.Tag] = ri
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *JobClassTagger) EventMatch(s string) bool {
|
||||||
|
return strings.Contains(s, "jobclasses")
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: Only process the file that caused the event
|
||||||
|
func (t *JobClassTagger) EventCallback() {
|
||||||
|
files, err := os.ReadDir(t.cfgPath)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if util.CheckFileExists(t.cfgPath + "/parameters.json") {
|
||||||
|
log.Info("Merge parameters")
|
||||||
|
b, err := os.ReadFile(t.cfgPath + "/parameters.json")
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("prepareRule() > open file error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var paramTmp map[string]any
|
||||||
|
if err := json.NewDecoder(bytes.NewReader(b)).Decode(¶mTmp); err != nil {
|
||||||
|
log.Warn("Error while decoding parameters.json")
|
||||||
|
}
|
||||||
|
|
||||||
|
maps.Copy(t.parameters, paramTmp)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, fn := range files {
|
||||||
|
fns := fn.Name()
|
||||||
|
if fns != "parameters.json" {
|
||||||
|
log.Debugf("Process: %s", fns)
|
||||||
|
filename := fmt.Sprintf("%s/%s", t.cfgPath, fns)
|
||||||
|
b, err := os.ReadFile(filename)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("prepareRule() > open file error: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.prepareRule(b, fns)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *JobClassTagger) initParameters() error {
|
||||||
|
log.Info("Initialize parameters")
|
||||||
|
b, err := jobclassFiles.ReadFile("jobclasses/parameters.json")
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("prepareRule() > open file error: %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewDecoder(bytes.NewReader(b)).Decode(&t.parameters); err != nil {
|
||||||
|
log.Warn("Error while decoding parameters.json")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *JobClassTagger) Register() error {
|
||||||
|
t.cfgPath = "./var/tagger/jobclasses"
|
||||||
|
t.tagType = "jobClass"
|
||||||
|
|
||||||
|
err := t.initParameters()
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("error reading parameters.json: %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
files, err := jobclassFiles.ReadDir("jobclasses")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error reading app folder: %#v", err)
|
||||||
|
}
|
||||||
|
t.rules = make(map[string]ruleInfo, 0)
|
||||||
|
for _, fn := range files {
|
||||||
|
fns := fn.Name()
|
||||||
|
if fns != "parameters.json" {
|
||||||
|
filename := fmt.Sprintf("jobclasses/%s", fns)
|
||||||
|
log.Infof("Process: %s", fns)
|
||||||
|
|
||||||
|
b, err := jobclassFiles.ReadFile(filename)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("prepareRule() > open file error: %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
t.prepareRule(b, fns)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if util.CheckFileExists(t.cfgPath) {
|
||||||
|
t.EventCallback()
|
||||||
|
log.Infof("Setup file watch for %s", t.cfgPath)
|
||||||
|
util.AddListener(t.cfgPath, t)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *JobClassTagger) Match(job *schema.Job) {
|
||||||
|
r := repository.GetJobRepository()
|
||||||
|
jobstats, err := archive.GetStatistics(job)
|
||||||
|
metricsList := archive.GetMetricConfigSubCluster(job.Cluster, job.SubCluster)
|
||||||
|
log.Infof("Enter match rule with %d rules for job %d", len(t.rules), job.JobID)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("job classification failed for job %d: %#v", job.JobID, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for tag, ri := range t.rules {
|
||||||
|
env := make(map[string]any)
|
||||||
|
maps.Copy(env, ri.env)
|
||||||
|
log.Infof("Try to match rule %s for job %d", tag, job.JobID)
|
||||||
|
|
||||||
|
// Initialize environment
|
||||||
|
env["job"] = map[string]any{
|
||||||
|
"exclusive": job.Exclusive,
|
||||||
|
"duration": job.Duration,
|
||||||
|
"numCores": job.NumHWThreads,
|
||||||
|
"numNodes": job.NumNodes,
|
||||||
|
"jobState": job.State,
|
||||||
|
"numAcc": job.NumAcc,
|
||||||
|
"smt": job.SMT,
|
||||||
|
}
|
||||||
|
|
||||||
|
// add metrics to env
|
||||||
|
for _, m := range ri.metrics {
|
||||||
|
stats, ok := jobstats[m]
|
||||||
|
if !ok {
|
||||||
|
log.Errorf("job classification failed for job %d: missing metric '%s'", job.JobID, m)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
env[m] = map[string]any{
|
||||||
|
"min": stats.Min,
|
||||||
|
"max": stats.Max,
|
||||||
|
"avg": stats.Avg,
|
||||||
|
"limits": map[string]float64{
|
||||||
|
"peak": metricsList[m].Peak,
|
||||||
|
"normal": metricsList[m].Normal,
|
||||||
|
"caution": metricsList[m].Caution,
|
||||||
|
"alert": metricsList[m].Alert,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// check rule requirements apply
|
||||||
|
for _, r := range ri.requirements {
|
||||||
|
ok, err := expr.Run(r, env)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("error running requirement for rule %s: %#v", tag, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if !ok.(bool) {
|
||||||
|
log.Infof("requirement for rule %s not met", tag)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// validate rule expression
|
||||||
|
for _, v := range ri.variables {
|
||||||
|
value, err := expr.Run(v.expr, env)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("error running rule %s: %#v", tag, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
env[v.name] = value
|
||||||
|
}
|
||||||
|
|
||||||
|
// dump.P(env)
|
||||||
|
|
||||||
|
match, err := expr.Run(ri.rule, env)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("error running rule %s: %#v", tag, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if match.(bool) {
|
||||||
|
log.Info("Rule matches!")
|
||||||
|
id := *job.ID
|
||||||
|
if !r.HasTag(id, t.tagType, tag) {
|
||||||
|
r.AddTagOrCreateDirect(id, t.tagType, tag)
|
||||||
|
}
|
||||||
|
|
||||||
|
// process hint template
|
||||||
|
var msg bytes.Buffer
|
||||||
|
if err := ri.hint.Execute(&msg, env); err != nil {
|
||||||
|
log.Errorf("Template error: %s", err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: Handle case where multiple tags apply
|
||||||
|
r.UpdateMetadata(job, "message", msg.String())
|
||||||
|
} else {
|
||||||
|
log.Info("Rule does not match!")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
125
internal/tagger/detectApp.go
Normal file
125
internal/tagger/detectApp.go
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package tagger
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"embed"
|
||||||
|
"fmt"
|
||||||
|
"io/fs"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
//go:embed apps/*
|
||||||
|
var appFiles embed.FS
|
||||||
|
|
||||||
|
type appInfo struct {
|
||||||
|
tag string
|
||||||
|
strings []string
|
||||||
|
}
|
||||||
|
|
||||||
|
type AppTagger struct {
|
||||||
|
apps map[string]appInfo
|
||||||
|
tagType string
|
||||||
|
cfgPath string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *AppTagger) scanApp(f fs.File, fns string) {
|
||||||
|
scanner := bufio.NewScanner(f)
|
||||||
|
ai := appInfo{tag: strings.TrimSuffix(fns, filepath.Ext(fns)), strings: make([]string, 0)}
|
||||||
|
|
||||||
|
for scanner.Scan() {
|
||||||
|
ai.strings = append(ai.strings, scanner.Text())
|
||||||
|
}
|
||||||
|
delete(t.apps, ai.tag)
|
||||||
|
t.apps[ai.tag] = ai
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *AppTagger) EventMatch(s string) bool {
|
||||||
|
return strings.Contains(s, "apps")
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: Only process the file that caused the event
|
||||||
|
func (t *AppTagger) EventCallback() {
|
||||||
|
files, err := os.ReadDir(t.cfgPath)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, fn := range files {
|
||||||
|
fns := fn.Name()
|
||||||
|
log.Debugf("Process: %s", fns)
|
||||||
|
f, err := os.Open(fmt.Sprintf("%s/%s", t.cfgPath, fns))
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("error opening app file %s: %#v", fns, err)
|
||||||
|
}
|
||||||
|
t.scanApp(f, fns)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *AppTagger) Register() error {
|
||||||
|
t.cfgPath = "./var/tagger/apps"
|
||||||
|
t.tagType = "app"
|
||||||
|
|
||||||
|
files, err := appFiles.ReadDir("apps")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error reading app folder: %#v", err)
|
||||||
|
}
|
||||||
|
t.apps = make(map[string]appInfo, 0)
|
||||||
|
for _, fn := range files {
|
||||||
|
fns := fn.Name()
|
||||||
|
log.Debugf("Process: %s", fns)
|
||||||
|
f, err := appFiles.Open(fmt.Sprintf("apps/%s", fns))
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error opening app file %s: %#v", fns, err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
t.scanApp(f, fns)
|
||||||
|
}
|
||||||
|
|
||||||
|
if util.CheckFileExists(t.cfgPath) {
|
||||||
|
t.EventCallback()
|
||||||
|
log.Infof("Setup file watch for %s", t.cfgPath)
|
||||||
|
util.AddListener(t.cfgPath, t)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *AppTagger) Match(job *schema.Job) {
|
||||||
|
r := repository.GetJobRepository()
|
||||||
|
metadata, err := r.FetchMetadata(job)
|
||||||
|
if err != nil {
|
||||||
|
log.Infof("Cannot fetch metadata for job: %d on %s", job.JobID, job.Cluster)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
jobscript, ok := metadata["jobScript"]
|
||||||
|
if ok {
|
||||||
|
id := *job.ID
|
||||||
|
|
||||||
|
out:
|
||||||
|
for _, a := range t.apps {
|
||||||
|
tag := a.tag
|
||||||
|
for _, s := range a.strings {
|
||||||
|
if strings.Contains(jobscript, s) {
|
||||||
|
if !r.HasTag(id, t.tagType, tag) {
|
||||||
|
r.AddTagOrCreateDirect(id, t.tagType, tag)
|
||||||
|
break out
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
log.Infof("Cannot extract job script for job: %d on %s", job.JobID, job.Cluster)
|
||||||
|
}
|
||||||
|
}
|
59
internal/tagger/detectApp_test.go
Normal file
59
internal/tagger/detectApp_test.go
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package tagger
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
)
|
||||||
|
|
||||||
|
func setup(tb testing.TB) *repository.JobRepository {
|
||||||
|
tb.Helper()
|
||||||
|
log.Init("warn", true)
|
||||||
|
dbfile := "../repository/testdata/job.db"
|
||||||
|
err := repository.MigrateDB("sqlite3", dbfile)
|
||||||
|
noErr(tb, err)
|
||||||
|
repository.Connect("sqlite3", dbfile)
|
||||||
|
return repository.GetJobRepository()
|
||||||
|
}
|
||||||
|
|
||||||
|
func noErr(tb testing.TB, err error) {
|
||||||
|
tb.Helper()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
tb.Fatal("Error is not nil:", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRegister(t *testing.T) {
|
||||||
|
var tagger AppTagger
|
||||||
|
|
||||||
|
err := tagger.Register()
|
||||||
|
noErr(t, err)
|
||||||
|
|
||||||
|
if len(tagger.apps) != 4 {
|
||||||
|
t.Errorf("wrong summary for diagnostic \ngot: %d \nwant: 3", len(tagger.apps))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatch(t *testing.T) {
|
||||||
|
r := setup(t)
|
||||||
|
|
||||||
|
job, err := r.FindByIdDirect(5)
|
||||||
|
noErr(t, err)
|
||||||
|
|
||||||
|
var tagger AppTagger
|
||||||
|
|
||||||
|
err = tagger.Register()
|
||||||
|
noErr(t, err)
|
||||||
|
|
||||||
|
tagger.Match(job)
|
||||||
|
|
||||||
|
if !r.HasTag(5, "app", "vasp") {
|
||||||
|
t.Errorf("missing tag vasp")
|
||||||
|
}
|
||||||
|
}
|
26
internal/tagger/jobclasses/highload.json
Normal file
26
internal/tagger/jobclasses/highload.json
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
{
|
||||||
|
"name": "Excessive CPU load",
|
||||||
|
"tag": "excessiveload",
|
||||||
|
"parameters": [
|
||||||
|
"excessivecpuload_threshold_factor",
|
||||||
|
"job_min_duration_seconds",
|
||||||
|
"sampling_interval_seconds"
|
||||||
|
],
|
||||||
|
"metrics": ["cpu_load"],
|
||||||
|
"requirements": [
|
||||||
|
"job.exclusive == 1",
|
||||||
|
"job.duration > job_min_duration_seconds"
|
||||||
|
],
|
||||||
|
"variables": [
|
||||||
|
{
|
||||||
|
"name": "load_threshold",
|
||||||
|
"expr": "(job.numCores / job.numNodes) * excessivecpuload_threshold_factor"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "load_perc",
|
||||||
|
"expr": "1.0 - (cpu_load.avg / cpu_load.limits.peak)"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"rule": "cpu_load.avg > cpu_load.limits.peak",
|
||||||
|
"hint": "This job was detected as excessiveload because the average cpu load {{.cpu_load.avg}} falls above the threshold {{.cpu_load.limits.peak}}."
|
||||||
|
}
|
26
internal/tagger/jobclasses/lowload.json
Normal file
26
internal/tagger/jobclasses/lowload.json
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
{
|
||||||
|
"name": "Low CPU load",
|
||||||
|
"tag": "lowload",
|
||||||
|
"parameters": [
|
||||||
|
"lowcpuload_threshold_factor",
|
||||||
|
"job_min_duration_seconds",
|
||||||
|
"sampling_interval_seconds"
|
||||||
|
],
|
||||||
|
"metrics": ["cpu_load"],
|
||||||
|
"requirements": [
|
||||||
|
"job.exclusive == 1",
|
||||||
|
"job.duration > job_min_duration_seconds"
|
||||||
|
],
|
||||||
|
"variables": [
|
||||||
|
{
|
||||||
|
"name": "load_threshold",
|
||||||
|
"expr": "job.numCores * lowcpuload_threshold_factor"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "load_perc",
|
||||||
|
"expr": "1.0 - (cpu_load.avg / cpu_load.limits.peak)"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"rule": "cpu_load.avg < cpu_load.limits.caution",
|
||||||
|
"hint": "This job was detected as lowload because the average cpu load {{.cpu_load}} falls below the threshold {{.cpu_load.limits.caution}}."
|
||||||
|
}
|
14
internal/tagger/jobclasses/parameters.json
Normal file
14
internal/tagger/jobclasses/parameters.json
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
{
|
||||||
|
"lowcpuload_threshold_factor": 0.9,
|
||||||
|
"excessivecpuload_threshold_factor": 1.1,
|
||||||
|
"highmemoryusage_threshold_factor": 0.9,
|
||||||
|
"node_load_imbalance_threshold_factor": 0.1,
|
||||||
|
"core_load_imbalance_threshold_factor": 0.1,
|
||||||
|
"high_memory_load_threshold_factor": 0.9,
|
||||||
|
"lowgpuload_threshold_factor": 0.7,
|
||||||
|
"memory_leak_slope_threshold": 0.1,
|
||||||
|
"job_min_duration_seconds": 600.0,
|
||||||
|
"sampling_interval_seconds": 30.0,
|
||||||
|
"cpu_load_pre_cutoff_samples": 11.0,
|
||||||
|
"cpu_load_core_pre_cutoff_samples": 6.0
|
||||||
|
}
|
21
internal/tagger/rules.json
Normal file
21
internal/tagger/rules.json
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
{
|
||||||
|
"and": [
|
||||||
|
{
|
||||||
|
"in": [
|
||||||
|
"a40",
|
||||||
|
{
|
||||||
|
"var": "metaData.jobScript"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
">": [
|
||||||
|
{
|
||||||
|
"var": "statistics.clock.min"
|
||||||
|
},
|
||||||
|
2000
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
88
internal/tagger/tagger.go
Normal file
88
internal/tagger/tagger.go
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
// Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package tagger
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Tagger interface {
|
||||||
|
Register() error
|
||||||
|
Match(job *schema.Job)
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
initOnce sync.Once
|
||||||
|
jobTagger *JobTagger
|
||||||
|
)
|
||||||
|
|
||||||
|
type JobTagger struct {
|
||||||
|
startTaggers []Tagger
|
||||||
|
stopTaggers []Tagger
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTagger() {
|
||||||
|
jobTagger = &JobTagger{}
|
||||||
|
jobTagger.startTaggers = make([]Tagger, 0)
|
||||||
|
jobTagger.startTaggers = append(jobTagger.startTaggers, &AppTagger{})
|
||||||
|
jobTagger.stopTaggers = make([]Tagger, 0)
|
||||||
|
jobTagger.stopTaggers = append(jobTagger.stopTaggers, &JobClassTagger{})
|
||||||
|
|
||||||
|
for _, tagger := range jobTagger.startTaggers {
|
||||||
|
tagger.Register()
|
||||||
|
}
|
||||||
|
for _, tagger := range jobTagger.stopTaggers {
|
||||||
|
tagger.Register()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Init() {
|
||||||
|
initOnce.Do(func() {
|
||||||
|
newTagger()
|
||||||
|
repository.RegisterJobJook(jobTagger)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (jt *JobTagger) JobStartCallback(job *schema.Job) {
|
||||||
|
for _, tagger := range jt.startTaggers {
|
||||||
|
tagger.Match(job)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (jt *JobTagger) JobStopCallback(job *schema.Job) {
|
||||||
|
for _, tagger := range jt.stopTaggers {
|
||||||
|
tagger.Match(job)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func RunTaggers() error {
|
||||||
|
newTagger()
|
||||||
|
r := repository.GetJobRepository()
|
||||||
|
jl, err := r.GetJobList()
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while getting job list %s", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, id := range jl {
|
||||||
|
job, err := r.FindByIdDirect(id)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while getting job %s", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, tagger := range jobTagger.startTaggers {
|
||||||
|
tagger.Match(job)
|
||||||
|
}
|
||||||
|
for _, tagger := range jobTagger.stopTaggers {
|
||||||
|
log.Infof("Run stop tagger for job %d", job.ID)
|
||||||
|
tagger.Match(job)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
31
internal/tagger/tagger_test.go
Normal file
31
internal/tagger/tagger_test.go
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package tagger
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestInit(t *testing.T) {
|
||||||
|
Init()
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestJobStartCallback(t *testing.T) {
|
||||||
|
Init()
|
||||||
|
r := setup(t)
|
||||||
|
job, err := r.FindByIdDirect(2)
|
||||||
|
noErr(t, err)
|
||||||
|
|
||||||
|
jobs := make([]*schema.Job, 0, 1)
|
||||||
|
jobs = append(jobs, job)
|
||||||
|
|
||||||
|
repository.CallJobStartHooks(jobs)
|
||||||
|
if !r.HasTag(2, "app", "python") {
|
||||||
|
t.Errorf("missing tag python")
|
||||||
|
}
|
||||||
|
}
|
35
internal/taskManager/commitJobService.go
Normal file
35
internal/taskManager/commitJobService.go
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package taskManager
|
||||||
|
|
||||||
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/go-co-op/gocron/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
func RegisterCommitJobService() {
|
||||||
|
var frequency string
|
||||||
|
if config.Keys.CronFrequency != nil && config.Keys.CronFrequency.CommitJobWorker != "" {
|
||||||
|
frequency = config.Keys.CronFrequency.CommitJobWorker
|
||||||
|
} else {
|
||||||
|
frequency = "2m"
|
||||||
|
}
|
||||||
|
d, _ := time.ParseDuration(frequency)
|
||||||
|
log.Infof("Register commitJob service with %s interval", frequency)
|
||||||
|
|
||||||
|
s.NewJob(gocron.DurationJob(d),
|
||||||
|
gocron.NewTask(
|
||||||
|
func() {
|
||||||
|
start := time.Now()
|
||||||
|
log.Printf("Jobcache sync started at %s", start.Format(time.RFC3339))
|
||||||
|
jobs, _ := jobRepo.SyncJobs()
|
||||||
|
repository.CallJobStartHooks(jobs)
|
||||||
|
log.Printf("Jobcache sync and job callbacks are done and took %s", time.Since(start))
|
||||||
|
}))
|
||||||
|
}
|
@ -81,6 +81,7 @@ func Start() {
|
|||||||
|
|
||||||
RegisterFootprintWorker()
|
RegisterFootprintWorker()
|
||||||
RegisterUpdateDurationWorker()
|
RegisterUpdateDurationWorker()
|
||||||
|
RegisterCommitJobService()
|
||||||
|
|
||||||
s.Start()
|
s.Start()
|
||||||
}
|
}
|
||||||
|
@ -73,11 +73,7 @@ func RegisterFootprintWorker() {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
jobMeta := &schema.JobMeta{
|
job.Statistics = make(map[string]schema.JobStatistics)
|
||||||
BaseJob: job.BaseJob,
|
|
||||||
StartTime: job.StartTime.Unix(),
|
|
||||||
Statistics: make(map[string]schema.JobStatistics),
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, metric := range allMetrics {
|
for _, metric := range allMetrics {
|
||||||
avg, min, max := 0.0, 0.0, 0.0
|
avg, min, max := 0.0, 0.0, 0.0
|
||||||
@ -95,7 +91,7 @@ func RegisterFootprintWorker() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Add values rounded to 2 digits: repo.LoadStats may return unrounded
|
// Add values rounded to 2 digits: repo.LoadStats may return unrounded
|
||||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
job.Statistics[metric] = schema.JobStatistics{
|
||||||
Unit: schema.Unit{
|
Unit: schema.Unit{
|
||||||
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||||
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||||
@ -108,7 +104,7 @@ func RegisterFootprintWorker() {
|
|||||||
|
|
||||||
// Build Statement per Job, Add to Pending Array
|
// Build Statement per Job, Add to Pending Array
|
||||||
stmt := sq.Update("job")
|
stmt := sq.Update("job")
|
||||||
stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta)
|
stmt, err = jobRepo.UpdateFootprint(stmt, job)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("update job (dbid: %d) statement build failed at footprint step: %s", job.ID, err.Error())
|
log.Errorf("update job (dbid: %d) statement build failed at footprint step: %s", job.ID, err.Error())
|
||||||
ce++
|
ce++
|
||||||
|
75
internal/util/fswatcher.go
Normal file
75
internal/util/fswatcher.go
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
// Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package util
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/fsnotify/fsnotify"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Listener interface {
|
||||||
|
EventCallback()
|
||||||
|
EventMatch(event string) bool
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
initOnce sync.Once
|
||||||
|
w *fsnotify.Watcher
|
||||||
|
listeners []Listener
|
||||||
|
)
|
||||||
|
|
||||||
|
func AddListener(path string, l Listener) {
|
||||||
|
var err error
|
||||||
|
|
||||||
|
initOnce.Do(func() {
|
||||||
|
var err error
|
||||||
|
w, err = fsnotify.NewWatcher()
|
||||||
|
if err != nil {
|
||||||
|
log.Error("creating a new watcher: %w", err)
|
||||||
|
}
|
||||||
|
listeners = make([]Listener, 0)
|
||||||
|
|
||||||
|
go watchLoop(w)
|
||||||
|
})
|
||||||
|
|
||||||
|
listeners = append(listeners, l)
|
||||||
|
err = w.Add(path)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("%q: %s", path, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func FsWatcherShutdown() {
|
||||||
|
if w != nil {
|
||||||
|
w.Close()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func watchLoop(w *fsnotify.Watcher) {
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
// Read from Errors.
|
||||||
|
case err, ok := <-w.Errors:
|
||||||
|
if !ok { // Channel was closed (i.e. Watcher.Close() was called).
|
||||||
|
return
|
||||||
|
}
|
||||||
|
log.Errorf("watch event loop: %s", err)
|
||||||
|
// Read from Events.
|
||||||
|
case e, ok := <-w.Events:
|
||||||
|
if !ok { // Channel was closed (i.e. Watcher.Close() was called).
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("Event %s", e)
|
||||||
|
for _, l := range listeners {
|
||||||
|
if l.EventMatch(e.String()) {
|
||||||
|
l.EventCallback()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -23,7 +23,7 @@ type ArchiveBackend interface {
|
|||||||
|
|
||||||
Exists(job *schema.Job) bool
|
Exists(job *schema.Job) bool
|
||||||
|
|
||||||
LoadJobMeta(job *schema.Job) (*schema.JobMeta, error)
|
LoadJobMeta(job *schema.Job) (*schema.Job, error)
|
||||||
|
|
||||||
LoadJobData(job *schema.Job) (schema.JobData, error)
|
LoadJobData(job *schema.Job) (schema.JobData, error)
|
||||||
|
|
||||||
@ -31,9 +31,9 @@ type ArchiveBackend interface {
|
|||||||
|
|
||||||
LoadClusterCfg(name string) (*schema.Cluster, error)
|
LoadClusterCfg(name string) (*schema.Cluster, error)
|
||||||
|
|
||||||
StoreJobMeta(jobMeta *schema.JobMeta) error
|
StoreJobMeta(jobMeta *schema.Job) error
|
||||||
|
|
||||||
ImportJob(jobMeta *schema.JobMeta, jobData *schema.JobData) error
|
ImportJob(jobMeta *schema.Job, jobData *schema.JobData) error
|
||||||
|
|
||||||
GetClusters() []string
|
GetClusters() []string
|
||||||
|
|
||||||
@ -51,7 +51,7 @@ type ArchiveBackend interface {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type JobContainer struct {
|
type JobContainer struct {
|
||||||
Meta *schema.JobMeta
|
Meta *schema.Job
|
||||||
Data *schema.JobData
|
Data *schema.JobData
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -162,7 +162,6 @@ func LoadScopedStatsFromArchive(
|
|||||||
metrics []string,
|
metrics []string,
|
||||||
scopes []schema.MetricScope,
|
scopes []schema.MetricScope,
|
||||||
) (schema.ScopedJobStats, error) {
|
) (schema.ScopedJobStats, error) {
|
||||||
|
|
||||||
data, err := ar.LoadJobStats(job)
|
data, err := ar.LoadJobStats(job)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Error while loading job stats from archiveBackend: %s", err.Error())
|
log.Errorf("Error while loading job stats from archiveBackend: %s", err.Error())
|
||||||
|
@ -9,7 +9,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
@ -32,12 +31,12 @@ func setup(t *testing.T) archive.ArchiveBackend {
|
|||||||
jobs[0] = &schema.Job{}
|
jobs[0] = &schema.Job{}
|
||||||
jobs[0].JobID = 1403244
|
jobs[0].JobID = 1403244
|
||||||
jobs[0].Cluster = "emmy"
|
jobs[0].Cluster = "emmy"
|
||||||
jobs[0].StartTime = time.Unix(1608923076, 0)
|
jobs[0].StartTime = 1608923076
|
||||||
|
|
||||||
jobs[1] = &schema.Job{}
|
jobs[1] = &schema.Job{}
|
||||||
jobs[0].JobID = 1404397
|
jobs[0].JobID = 1404397
|
||||||
jobs[0].Cluster = "emmy"
|
jobs[0].Cluster = "emmy"
|
||||||
jobs[0].StartTime = time.Unix(1609300556, 0)
|
jobs[0].StartTime = 1609300556
|
||||||
|
|
||||||
return archive.GetHandle()
|
return archive.GetHandle()
|
||||||
}
|
}
|
||||||
|
@ -69,16 +69,18 @@ func initClusterConfig() error {
|
|||||||
|
|
||||||
for _, sc := range cluster.SubClusters {
|
for _, sc := range cluster.SubClusters {
|
||||||
newMetric := &schema.MetricConfig{
|
newMetric := &schema.MetricConfig{
|
||||||
Unit: mc.Unit,
|
Metric: schema.Metric{
|
||||||
|
Name: mc.Name,
|
||||||
|
Unit: mc.Unit,
|
||||||
|
Peak: mc.Peak,
|
||||||
|
Normal: mc.Normal,
|
||||||
|
Caution: mc.Caution,
|
||||||
|
Alert: mc.Alert,
|
||||||
|
},
|
||||||
Energy: mc.Energy,
|
Energy: mc.Energy,
|
||||||
Name: mc.Name,
|
|
||||||
Scope: mc.Scope,
|
Scope: mc.Scope,
|
||||||
Aggregation: mc.Aggregation,
|
Aggregation: mc.Aggregation,
|
||||||
Peak: mc.Peak,
|
|
||||||
Caution: mc.Caution,
|
|
||||||
Alert: mc.Alert,
|
|
||||||
Timestep: mc.Timestep,
|
Timestep: mc.Timestep,
|
||||||
Normal: mc.Normal,
|
|
||||||
LowerIsBetter: mc.LowerIsBetter,
|
LowerIsBetter: mc.LowerIsBetter,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -167,6 +169,45 @@ func GetSubCluster(cluster, subcluster string) (*schema.SubCluster, error) {
|
|||||||
return nil, fmt.Errorf("subcluster '%v' not found for cluster '%v', or cluster '%v' not configured", subcluster, cluster, cluster)
|
return nil, fmt.Errorf("subcluster '%v' not found for cluster '%v', or cluster '%v' not configured", subcluster, cluster, cluster)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetMetricConfigSubCluster(cluster, subcluster string) map[string]*schema.Metric {
|
||||||
|
metrics := make(map[string]*schema.Metric)
|
||||||
|
|
||||||
|
for _, c := range Clusters {
|
||||||
|
if c.Name == cluster {
|
||||||
|
for _, m := range c.MetricConfig {
|
||||||
|
for _, s := range m.SubClusters {
|
||||||
|
if s.Name == subcluster {
|
||||||
|
metrics[m.Name] = &schema.Metric{
|
||||||
|
Name: m.Name,
|
||||||
|
Unit: s.Unit,
|
||||||
|
Peak: s.Peak,
|
||||||
|
Normal: s.Normal,
|
||||||
|
Caution: s.Caution,
|
||||||
|
Alert: s.Alert,
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_, ok := metrics[m.Name]
|
||||||
|
if !ok {
|
||||||
|
metrics[m.Name] = &schema.Metric{
|
||||||
|
Name: m.Name,
|
||||||
|
Unit: m.Unit,
|
||||||
|
Peak: m.Peak,
|
||||||
|
Normal: m.Normal,
|
||||||
|
Caution: m.Caution,
|
||||||
|
Alert: m.Alert,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return metrics
|
||||||
|
}
|
||||||
|
|
||||||
func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
|
func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
|
||||||
for _, c := range Clusters {
|
for _, c := range Clusters {
|
||||||
if c.Name == cluster {
|
if c.Name == cluster {
|
||||||
@ -182,7 +223,7 @@ func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
|
|||||||
|
|
||||||
// AssignSubCluster sets the `job.subcluster` property of the job based
|
// AssignSubCluster sets the `job.subcluster` property of the job based
|
||||||
// on its cluster and resources.
|
// on its cluster and resources.
|
||||||
func AssignSubCluster(job *schema.BaseJob) error {
|
func AssignSubCluster(job *schema.Job) error {
|
||||||
cluster := GetCluster(job.Cluster)
|
cluster := GetCluster(job.Cluster)
|
||||||
if cluster == nil {
|
if cluster == nil {
|
||||||
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > unkown cluster: %v", job.Cluster)
|
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > unkown cluster: %v", job.Cluster)
|
||||||
|
@ -53,28 +53,27 @@ func getDirectory(
|
|||||||
rootPath,
|
rootPath,
|
||||||
job.Cluster,
|
job.Cluster,
|
||||||
lvl1, lvl2,
|
lvl1, lvl2,
|
||||||
strconv.FormatInt(job.StartTime.Unix(), 10))
|
strconv.FormatInt(job.StartTime, 10))
|
||||||
}
|
}
|
||||||
|
|
||||||
func getPath(
|
func getPath(
|
||||||
job *schema.Job,
|
job *schema.Job,
|
||||||
rootPath string,
|
rootPath string,
|
||||||
file string) string {
|
file string,
|
||||||
|
) string {
|
||||||
return filepath.Join(
|
return filepath.Join(
|
||||||
getDirectory(job, rootPath), file)
|
getDirectory(job, rootPath), file)
|
||||||
}
|
}
|
||||||
|
|
||||||
func loadJobMeta(filename string) (*schema.JobMeta, error) {
|
func loadJobMeta(filename string) (*schema.Job, error) {
|
||||||
|
|
||||||
b, err := os.ReadFile(filename)
|
b, err := os.ReadFile(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("loadJobMeta() > open file error: %v", err)
|
log.Errorf("loadJobMeta() > open file error: %v", err)
|
||||||
return &schema.JobMeta{}, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if config.Keys.Validate {
|
if config.Keys.Validate {
|
||||||
if err := schema.Validate(schema.Meta, bytes.NewReader(b)); err != nil {
|
if err := schema.Validate(schema.Meta, bytes.NewReader(b)); err != nil {
|
||||||
return &schema.JobMeta{}, fmt.Errorf("validate job meta: %v", err)
|
return nil, fmt.Errorf("validate job meta: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -83,7 +82,6 @@ func loadJobMeta(filename string) (*schema.JobMeta, error) {
|
|||||||
|
|
||||||
func loadJobData(filename string, isCompressed bool) (schema.JobData, error) {
|
func loadJobData(filename string, isCompressed bool) (schema.JobData, error) {
|
||||||
f, err := os.Open(filename)
|
f, err := os.Open(filename)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("fsBackend LoadJobData()- %v", err)
|
log.Errorf("fsBackend LoadJobData()- %v", err)
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -117,7 +115,6 @@ func loadJobData(filename string, isCompressed bool) (schema.JobData, error) {
|
|||||||
|
|
||||||
func loadJobStats(filename string, isCompressed bool) (schema.ScopedJobStats, error) {
|
func loadJobStats(filename string, isCompressed bool) (schema.ScopedJobStats, error) {
|
||||||
f, err := os.Open(filename)
|
f, err := os.Open(filename)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("fsBackend LoadJobStats()- %v", err)
|
log.Errorf("fsBackend LoadJobStats()- %v", err)
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -150,7 +147,6 @@ func loadJobStats(filename string, isCompressed bool) (schema.ScopedJobStats, er
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) Init(rawConfig json.RawMessage) (uint64, error) {
|
func (fsa *FsArchive) Init(rawConfig json.RawMessage) (uint64, error) {
|
||||||
|
|
||||||
var config FsArchiveConfig
|
var config FsArchiveConfig
|
||||||
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
||||||
log.Warnf("Init() > Unmarshal error: %#v", err)
|
log.Warnf("Init() > Unmarshal error: %#v", err)
|
||||||
@ -276,7 +272,6 @@ func (fsa *FsArchive) Exists(job *schema.Job) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) Clean(before int64, after int64) {
|
func (fsa *FsArchive) Clean(before int64, after int64) {
|
||||||
|
|
||||||
if after == 0 {
|
if after == 0 {
|
||||||
after = math.MaxInt64
|
after = math.MaxInt64
|
||||||
}
|
}
|
||||||
@ -392,7 +387,6 @@ func (fsa *FsArchive) Compress(jobs []*schema.Job) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) CompressLast(starttime int64) int64 {
|
func (fsa *FsArchive) CompressLast(starttime int64) int64 {
|
||||||
|
|
||||||
filename := filepath.Join(fsa.path, "compress.txt")
|
filename := filepath.Join(fsa.path, "compress.txt")
|
||||||
b, err := os.ReadFile(filename)
|
b, err := os.ReadFile(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -435,13 +429,12 @@ func (fsa *FsArchive) LoadJobStats(job *schema.Job) (schema.ScopedJobStats, erro
|
|||||||
return loadJobStats(filename, isCompressed)
|
return loadJobStats(filename, isCompressed)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) LoadJobMeta(job *schema.Job) (*schema.JobMeta, error) {
|
func (fsa *FsArchive) LoadJobMeta(job *schema.Job) (*schema.Job, error) {
|
||||||
filename := getPath(job, fsa.path, "meta.json")
|
filename := getPath(job, fsa.path, "meta.json")
|
||||||
return loadJobMeta(filename)
|
return loadJobMeta(filename)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) LoadClusterCfg(name string) (*schema.Cluster, error) {
|
func (fsa *FsArchive) LoadClusterCfg(name string) (*schema.Cluster, error) {
|
||||||
|
|
||||||
b, err := os.ReadFile(filepath.Join(fsa.path, name, "cluster.json"))
|
b, err := os.ReadFile(filepath.Join(fsa.path, name, "cluster.json"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("LoadClusterCfg() > open file error: %v", err)
|
log.Errorf("LoadClusterCfg() > open file error: %v", err)
|
||||||
@ -456,7 +449,6 @@ func (fsa *FsArchive) LoadClusterCfg(name string) (*schema.Cluster, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) Iter(loadMetricData bool) <-chan JobContainer {
|
func (fsa *FsArchive) Iter(loadMetricData bool) <-chan JobContainer {
|
||||||
|
|
||||||
ch := make(chan JobContainer)
|
ch := make(chan JobContainer)
|
||||||
go func() {
|
go func() {
|
||||||
clustersDir, err := os.ReadDir(fsa.path)
|
clustersDir, err := os.ReadDir(fsa.path)
|
||||||
@ -526,19 +518,13 @@ func (fsa *FsArchive) Iter(loadMetricData bool) <-chan JobContainer {
|
|||||||
return ch
|
return ch
|
||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) StoreJobMeta(jobMeta *schema.JobMeta) error {
|
func (fsa *FsArchive) StoreJobMeta(job *schema.Job) error {
|
||||||
|
f, err := os.Create(getPath(job, fsa.path, "meta.json"))
|
||||||
job := schema.Job{
|
|
||||||
BaseJob: jobMeta.BaseJob,
|
|
||||||
StartTime: time.Unix(jobMeta.StartTime, 0),
|
|
||||||
StartTimeUnix: jobMeta.StartTime,
|
|
||||||
}
|
|
||||||
f, err := os.Create(getPath(&job, fsa.path, "meta.json"))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("Error while creating filepath for meta.json")
|
log.Error("Error while creating filepath for meta.json")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := EncodeJobMeta(f, jobMeta); err != nil {
|
if err := EncodeJobMeta(f, job); err != nil {
|
||||||
log.Error("Error while encoding job metadata to meta.json file")
|
log.Error("Error while encoding job metadata to meta.json file")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -555,15 +541,10 @@ func (fsa *FsArchive) GetClusters() []string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) ImportJob(
|
func (fsa *FsArchive) ImportJob(
|
||||||
jobMeta *schema.JobMeta,
|
jobMeta *schema.Job,
|
||||||
jobData *schema.JobData) error {
|
jobData *schema.JobData,
|
||||||
|
) error {
|
||||||
job := schema.Job{
|
dir := getPath(jobMeta, fsa.path, "")
|
||||||
BaseJob: jobMeta.BaseJob,
|
|
||||||
StartTime: time.Unix(jobMeta.StartTime, 0),
|
|
||||||
StartTimeUnix: jobMeta.StartTime,
|
|
||||||
}
|
|
||||||
dir := getPath(&job, fsa.path, "")
|
|
||||||
if err := os.MkdirAll(dir, 0777); err != nil {
|
if err := os.MkdirAll(dir, 0777); err != nil {
|
||||||
log.Error("Error while creating job archive path")
|
log.Error("Error while creating job archive path")
|
||||||
return err
|
return err
|
||||||
@ -583,28 +564,6 @@ func (fsa *FsArchive) ImportJob(
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// var isCompressed bool = true
|
|
||||||
// // TODO Use shortJob Config for check
|
|
||||||
// if jobMeta.Duration < 300 {
|
|
||||||
// isCompressed = false
|
|
||||||
// f, err = os.Create(path.Join(dir, "data.json"))
|
|
||||||
// } else {
|
|
||||||
// f, err = os.Create(path.Join(dir, "data.json.gz"))
|
|
||||||
// }
|
|
||||||
// if err != nil {
|
|
||||||
// return err
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// if isCompressed {
|
|
||||||
// if err := EncodeJobData(gzip.NewWriter(f), jobData); err != nil {
|
|
||||||
// return err
|
|
||||||
// }
|
|
||||||
// } else {
|
|
||||||
// if err := EncodeJobData(f, jobData); err != nil {
|
|
||||||
// return err
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
f, err = os.Create(path.Join(dir, "data.json"))
|
f, err = os.Create(path.Join(dir, "data.json"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("Error while creating filepath for data.json")
|
log.Error("Error while creating filepath for data.json")
|
||||||
|
@ -9,7 +9,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
@ -86,8 +85,11 @@ func TestLoadJobMeta(t *testing.T) {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
jobIn := schema.Job{BaseJob: schema.JobDefaults}
|
jobIn := schema.Job{
|
||||||
jobIn.StartTime = time.Unix(1608923076, 0)
|
Exclusive: 1,
|
||||||
|
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||||
|
}
|
||||||
|
jobIn.StartTime = 1608923076
|
||||||
jobIn.JobID = 1403244
|
jobIn.JobID = 1403244
|
||||||
jobIn.Cluster = "emmy"
|
jobIn.Cluster = "emmy"
|
||||||
|
|
||||||
@ -114,8 +116,11 @@ func TestLoadJobData(t *testing.T) {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
jobIn := schema.Job{BaseJob: schema.JobDefaults}
|
jobIn := schema.Job{
|
||||||
jobIn.StartTime = time.Unix(1608923076, 0)
|
Exclusive: 1,
|
||||||
|
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||||
|
}
|
||||||
|
jobIn.StartTime = 1608923076
|
||||||
jobIn.JobID = 1403244
|
jobIn.JobID = 1403244
|
||||||
jobIn.Cluster = "emmy"
|
jobIn.Cluster = "emmy"
|
||||||
|
|
||||||
@ -142,8 +147,11 @@ func BenchmarkLoadJobData(b *testing.B) {
|
|||||||
var fsa FsArchive
|
var fsa FsArchive
|
||||||
fsa.Init(json.RawMessage(archiveCfg))
|
fsa.Init(json.RawMessage(archiveCfg))
|
||||||
|
|
||||||
jobIn := schema.Job{BaseJob: schema.JobDefaults}
|
jobIn := schema.Job{
|
||||||
jobIn.StartTime = time.Unix(1608923076, 0)
|
Exclusive: 1,
|
||||||
|
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||||
|
}
|
||||||
|
jobIn.StartTime = 1608923076
|
||||||
jobIn.JobID = 1403244
|
jobIn.JobID = 1403244
|
||||||
jobIn.Cluster = "emmy"
|
jobIn.Cluster = "emmy"
|
||||||
|
|
||||||
@ -165,8 +173,11 @@ func BenchmarkLoadJobDataCompressed(b *testing.B) {
|
|||||||
var fsa FsArchive
|
var fsa FsArchive
|
||||||
fsa.Init(json.RawMessage(archiveCfg))
|
fsa.Init(json.RawMessage(archiveCfg))
|
||||||
|
|
||||||
jobIn := schema.Job{BaseJob: schema.JobDefaults}
|
jobIn := schema.Job{
|
||||||
jobIn.StartTime = time.Unix(1608923076, 0)
|
Exclusive: 1,
|
||||||
|
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||||
|
}
|
||||||
|
jobIn.StartTime = 1608923076
|
||||||
jobIn.JobID = 1403244
|
jobIn.JobID = 1403244
|
||||||
jobIn.Cluster = "emmy"
|
jobIn.Cluster = "emmy"
|
||||||
|
|
||||||
|
@ -69,8 +69,8 @@ func DecodeJobStats(r io.Reader, k string) (schema.ScopedJobStats, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func DecodeJobMeta(r io.Reader) (*schema.JobMeta, error) {
|
func DecodeJobMeta(r io.Reader) (*schema.Job, error) {
|
||||||
var d schema.JobMeta
|
var d schema.Job
|
||||||
if err := json.NewDecoder(r).Decode(&d); err != nil {
|
if err := json.NewDecoder(r).Decode(&d); err != nil {
|
||||||
log.Warn("Error while decoding raw job meta json")
|
log.Warn("Error while decoding raw job meta json")
|
||||||
return &d, err
|
return &d, err
|
||||||
@ -103,7 +103,7 @@ func EncodeJobData(w io.Writer, d *schema.JobData) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func EncodeJobMeta(w io.Writer, d *schema.JobMeta) error {
|
func EncodeJobMeta(w io.Writer, d *schema.Job) error {
|
||||||
// Sanitize parameters
|
// Sanitize parameters
|
||||||
if err := json.NewEncoder(w).Encode(d); err != nil {
|
if err := json.NewEncoder(w).Encode(d); err != nil {
|
||||||
log.Warn("Error while encoding new job meta json")
|
log.Warn("Error while encoding new job meta json")
|
||||||
|
@ -61,7 +61,7 @@ func (nl *NodeList) PrintList() []string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (nl *NodeList) NodeCount() int {
|
func (nl *NodeList) NodeCount() int {
|
||||||
var out int = 0
|
out := 0
|
||||||
for _, term := range *nl {
|
for _, term := range *nl {
|
||||||
if len(term) == 1 { // If only String-Part in Term: Single Node Name -> add one
|
if len(term) == 1 { // If only String-Part in Term: Single Node Name -> add one
|
||||||
out += 1
|
out += 1
|
||||||
@ -160,7 +160,7 @@ func (nle NLExprIntRange) limits() []map[string]int {
|
|||||||
m["start"] = int(nle.start)
|
m["start"] = int(nle.start)
|
||||||
m["end"] = int(nle.end)
|
m["end"] = int(nle.end)
|
||||||
m["digits"] = int(nle.digits)
|
m["digits"] = int(nle.digits)
|
||||||
if nle.zeroPadded == true {
|
if nle.zeroPadded {
|
||||||
m["zeroPadded"] = 1
|
m["zeroPadded"] = 1
|
||||||
} else {
|
} else {
|
||||||
m["zeroPadded"] = 0
|
m["zeroPadded"] = 0
|
||||||
@ -183,14 +183,15 @@ func ParseNodeList(raw string) (NodeList, error) {
|
|||||||
rawterms := []string{}
|
rawterms := []string{}
|
||||||
prevterm := 0
|
prevterm := 0
|
||||||
for i := 0; i < len(raw); i++ {
|
for i := 0; i < len(raw); i++ {
|
||||||
if raw[i] == '[' {
|
switch raw[i] {
|
||||||
|
case '[':
|
||||||
for i < len(raw) && raw[i] != ']' {
|
for i < len(raw) && raw[i] != ']' {
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
if i == len(raw) {
|
if i == len(raw) {
|
||||||
return nil, fmt.Errorf("ARCHIVE/NODELIST > unclosed '['")
|
return nil, fmt.Errorf("ARCHIVE/NODELIST > unclosed '['")
|
||||||
}
|
}
|
||||||
} else if raw[i] == ',' {
|
case ',':
|
||||||
rawterms = append(rawterms, raw[prevterm:i])
|
rawterms = append(rawterms, raw[prevterm:i])
|
||||||
prevterm = i + 1
|
prevterm = i + 1
|
||||||
}
|
}
|
||||||
|
@ -45,31 +45,31 @@ type SubCluster struct {
|
|||||||
ThreadsPerCore int `json:"threadsPerCore"`
|
ThreadsPerCore int `json:"threadsPerCore"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Metric struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Unit Unit `json:"unit"`
|
||||||
|
Peak float64 `json:"peak"`
|
||||||
|
Normal float64 `json:"normal"`
|
||||||
|
Caution float64 `json:"caution"`
|
||||||
|
Alert float64 `json:"alert"`
|
||||||
|
}
|
||||||
|
|
||||||
type SubClusterConfig struct {
|
type SubClusterConfig struct {
|
||||||
Name string `json:"name"`
|
Metric
|
||||||
Footprint string `json:"footprint,omitempty"`
|
Footprint string `json:"footprint,omitempty"`
|
||||||
Energy string `json:"energy"`
|
Energy string `json:"energy"`
|
||||||
Peak float64 `json:"peak"`
|
Remove bool `json:"remove"`
|
||||||
Normal float64 `json:"normal"`
|
LowerIsBetter bool `json:"lowerIsBetter"`
|
||||||
Caution float64 `json:"caution"`
|
|
||||||
Alert float64 `json:"alert"`
|
|
||||||
Remove bool `json:"remove"`
|
|
||||||
LowerIsBetter bool `json:"lowerIsBetter"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricConfig struct {
|
type MetricConfig struct {
|
||||||
Unit Unit `json:"unit"`
|
Metric
|
||||||
Energy string `json:"energy"`
|
Energy string `json:"energy"`
|
||||||
Name string `json:"name"`
|
|
||||||
Scope MetricScope `json:"scope"`
|
Scope MetricScope `json:"scope"`
|
||||||
Aggregation string `json:"aggregation"`
|
Aggregation string `json:"aggregation"`
|
||||||
Footprint string `json:"footprint,omitempty"`
|
Footprint string `json:"footprint,omitempty"`
|
||||||
SubClusters []*SubClusterConfig `json:"subClusters,omitempty"`
|
SubClusters []*SubClusterConfig `json:"subClusters,omitempty"`
|
||||||
Peak float64 `json:"peak"`
|
|
||||||
Caution float64 `json:"caution"`
|
|
||||||
Alert float64 `json:"alert"`
|
|
||||||
Timestep int `json:"timestep"`
|
Timestep int `json:"timestep"`
|
||||||
Normal float64 `json:"normal"`
|
|
||||||
LowerIsBetter bool `json:"lowerIsBetter"`
|
LowerIsBetter bool `json:"lowerIsBetter"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -127,7 +127,7 @@ func (topo *Topology) GetSocketsFromHWThreads(
|
|||||||
// those in the argument list are assigned to one of the sockets in the first
|
// those in the argument list are assigned to one of the sockets in the first
|
||||||
// return value, return true as the second value. TODO: Optimize this, there
|
// return value, return true as the second value. TODO: Optimize this, there
|
||||||
// must be a more efficient way/algorithm.
|
// must be a more efficient way/algorithm.
|
||||||
func (topo *Topology) GetSocketsFromCores (
|
func (topo *Topology) GetSocketsFromCores(
|
||||||
cores []int,
|
cores []int,
|
||||||
) (sockets []int, exclusive bool) {
|
) (sockets []int, exclusive bool) {
|
||||||
socketsMap := map[int]int{}
|
socketsMap := map[int]int{}
|
||||||
|
@ -89,6 +89,8 @@ type ResampleConfig struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type CronFrequency struct {
|
type CronFrequency struct {
|
||||||
|
// Duration Update Worker [Defaults to '2m']
|
||||||
|
CommitJobWorker string `json:"commit-job-worker"`
|
||||||
// Duration Update Worker [Defaults to '5m']
|
// Duration Update Worker [Defaults to '5m']
|
||||||
DurationWorker string `json:"duration-worker"`
|
DurationWorker string `json:"duration-worker"`
|
||||||
// Metric-Footprint Update Worker [Defaults to '10m']
|
// Metric-Footprint Update Worker [Defaults to '10m']
|
||||||
@ -129,6 +131,8 @@ type ProgramConfig struct {
|
|||||||
// do not write to the job-archive.
|
// do not write to the job-archive.
|
||||||
DisableArchive bool `json:"disable-archive"`
|
DisableArchive bool `json:"disable-archive"`
|
||||||
|
|
||||||
|
EnableJobTaggers bool `json:"enable-job-taggers"`
|
||||||
|
|
||||||
// Validate json input against schema
|
// Validate json input against schema
|
||||||
Validate bool `json:"validate"`
|
Validate bool `json:"validate"`
|
||||||
|
|
||||||
@ -150,7 +154,7 @@ type ProgramConfig struct {
|
|||||||
|
|
||||||
// If overwritten, at least all the options in the defaults below must
|
// If overwritten, at least all the options in the defaults below must
|
||||||
// be provided! Most options here can be overwritten by the user.
|
// be provided! Most options here can be overwritten by the user.
|
||||||
UiDefaults map[string]interface{} `json:"ui-defaults"`
|
UiDefaults map[string]any `json:"ui-defaults"`
|
||||||
|
|
||||||
// If exists, will enable dynamic zoom in frontend metric plots using the configured values
|
// If exists, will enable dynamic zoom in frontend metric plots using the configured values
|
||||||
EnableResampling *ResampleConfig `json:"enable-resampling"`
|
EnableResampling *ResampleConfig `json:"enable-resampling"`
|
||||||
|
@ -8,43 +8,8 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"time"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// BaseJob is the common part of the job metadata structs
|
|
||||||
//
|
|
||||||
// Common subset of Job and JobMeta. Use one of those, not this type directly.
|
|
||||||
|
|
||||||
type BaseJob struct {
|
|
||||||
Cluster string `json:"cluster" db:"cluster" example:"fritz"`
|
|
||||||
SubCluster string `json:"subCluster" db:"subcluster" example:"main"`
|
|
||||||
Partition string `json:"partition,omitempty" db:"cluster_partition" example:"main"`
|
|
||||||
Project string `json:"project" db:"project" example:"abcd200"`
|
|
||||||
User string `json:"user" db:"hpc_user" example:"abcd100h"`
|
|
||||||
State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"`
|
|
||||||
Tags []*Tag `json:"tags,omitempty"`
|
|
||||||
RawEnergyFootprint []byte `json:"-" db:"energy_footprint"`
|
|
||||||
RawFootprint []byte `json:"-" db:"footprint"`
|
|
||||||
RawMetaData []byte `json:"-" db:"meta_data"`
|
|
||||||
RawResources []byte `json:"-" db:"resources"`
|
|
||||||
Resources []*Resource `json:"resources"`
|
|
||||||
EnergyFootprint map[string]float64 `json:"energyFootprint"`
|
|
||||||
Footprint map[string]float64 `json:"footprint"`
|
|
||||||
MetaData map[string]string `json:"metaData"`
|
|
||||||
ConcurrentJobs JobLinkResultList `json:"concurrentJobs"`
|
|
||||||
Energy float64 `json:"energy" db:"energy"`
|
|
||||||
ArrayJobId int64 `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"`
|
|
||||||
Walltime int64 `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"`
|
|
||||||
JobID int64 `json:"jobId" db:"job_id" example:"123000"`
|
|
||||||
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"`
|
|
||||||
SMT int32 `json:"smt,omitempty" db:"smt" example:"4"`
|
|
||||||
MonitoringStatus int32 `json:"monitoringStatus,omitempty" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"`
|
|
||||||
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"`
|
|
||||||
NumAcc int32 `json:"numAcc,omitempty" db:"num_acc" example:"2" minimum:"1"`
|
|
||||||
NumHWThreads int32 `json:"numHwthreads,omitempty" db:"num_hwthreads" example:"20" minimum:"1"`
|
|
||||||
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Job struct type
|
// Job struct type
|
||||||
//
|
//
|
||||||
// This type is used as the GraphQL interface and using sqlx as a table row.
|
// This type is used as the GraphQL interface and using sqlx as a table row.
|
||||||
@ -52,10 +17,37 @@ type BaseJob struct {
|
|||||||
// Job model
|
// Job model
|
||||||
// @Description Information of a HPC job.
|
// @Description Information of a HPC job.
|
||||||
type Job struct {
|
type Job struct {
|
||||||
StartTime time.Time `json:"startTime"`
|
Cluster string `json:"cluster" db:"cluster" example:"fritz"`
|
||||||
BaseJob
|
SubCluster string `json:"subCluster" db:"subcluster" example:"main"`
|
||||||
ID int64 `json:"id" db:"id"`
|
Partition string `json:"partition,omitempty" db:"cluster_partition" example:"main"`
|
||||||
StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"`
|
Project string `json:"project" db:"project" example:"abcd200"`
|
||||||
|
User string `json:"user" db:"hpc_user" example:"abcd100h"`
|
||||||
|
State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"`
|
||||||
|
Tags []*Tag `json:"tags,omitempty"`
|
||||||
|
RawEnergyFootprint []byte `json:"-" db:"energy_footprint"`
|
||||||
|
RawFootprint []byte `json:"-" db:"footprint"`
|
||||||
|
RawMetaData []byte `json:"-" db:"meta_data"`
|
||||||
|
RawResources []byte `json:"-" db:"resources"`
|
||||||
|
Resources []*Resource `json:"resources"`
|
||||||
|
EnergyFootprint map[string]float64 `json:"energyFootprint"`
|
||||||
|
Footprint map[string]float64 `json:"footprint"`
|
||||||
|
MetaData map[string]string `json:"metaData"`
|
||||||
|
ConcurrentJobs JobLinkResultList `json:"concurrentJobs"`
|
||||||
|
Energy float64 `json:"energy" db:"energy"`
|
||||||
|
ArrayJobId int64 `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"`
|
||||||
|
Walltime int64 `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"`
|
||||||
|
RequestedMemory int64 `json:"requestedMemory,omitempty" db:"requested_memory" example:"128000" minimum:"1"` // in MB
|
||||||
|
JobID int64 `json:"jobId" db:"job_id" example:"123000"`
|
||||||
|
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"`
|
||||||
|
SMT int32 `json:"smt,omitempty" db:"smt" example:"4"`
|
||||||
|
MonitoringStatus int32 `json:"monitoringStatus,omitempty" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"`
|
||||||
|
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"`
|
||||||
|
NumAcc int32 `json:"numAcc,omitempty" db:"num_acc" example:"2" minimum:"1"`
|
||||||
|
NumHWThreads int32 `json:"numHwthreads,omitempty" db:"num_hwthreads" example:"20" minimum:"1"`
|
||||||
|
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"`
|
||||||
|
Statistics map[string]JobStatistics `json:"statistics"`
|
||||||
|
ID *int64 `json:"id,omitempty" db:"id"`
|
||||||
|
StartTime int64 `json:"startTime" db:"start_time" example:"1649723812"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// JobMeta struct type
|
// JobMeta struct type
|
||||||
@ -68,6 +60,14 @@ type Job struct {
|
|||||||
// *int64 `json:"id,omitempty"` >> never used in the job-archive, only
|
// *int64 `json:"id,omitempty"` >> never used in the job-archive, only
|
||||||
// available via REST-API
|
// available via REST-API
|
||||||
//
|
//
|
||||||
|
// JobMeta model
|
||||||
|
// @Description Meta data information of a HPC job.
|
||||||
|
// type JobMeta struct {
|
||||||
|
// ID *int64 `json:"id,omitempty"`
|
||||||
|
// BaseJob
|
||||||
|
// Statistics map[string]JobStatistics `json:"statistics"`
|
||||||
|
// StartTime int64 `json:"startTime" db:"start_time" example:"1649723812" minimum:"1"`
|
||||||
|
// }
|
||||||
|
|
||||||
type JobLink struct {
|
type JobLink struct {
|
||||||
ID int64 `json:"id"`
|
ID int64 `json:"id"`
|
||||||
@ -79,15 +79,6 @@ type JobLinkResultList struct {
|
|||||||
Count int `json:"count"`
|
Count int `json:"count"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// JobMeta model
|
|
||||||
// @Description Meta data information of a HPC job.
|
|
||||||
type JobMeta struct {
|
|
||||||
ID *int64 `json:"id,omitempty"`
|
|
||||||
Statistics map[string]JobStatistics `json:"statistics"`
|
|
||||||
BaseJob
|
|
||||||
StartTime int64 `json:"startTime" db:"start_time" example:"1649723812" minimum:"1"`
|
|
||||||
}
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
MonitoringStatusDisabled int32 = 0
|
MonitoringStatusDisabled int32 = 0
|
||||||
MonitoringStatusRunningOrArchiving int32 = 1
|
MonitoringStatusRunningOrArchiving int32 = 1
|
||||||
@ -95,10 +86,10 @@ const (
|
|||||||
MonitoringStatusArchivingSuccessful int32 = 3
|
MonitoringStatusArchivingSuccessful int32 = 3
|
||||||
)
|
)
|
||||||
|
|
||||||
var JobDefaults BaseJob = BaseJob{
|
// var JobDefaults Job = Job{
|
||||||
Exclusive: 1,
|
// Exclusive: 1,
|
||||||
MonitoringStatus: MonitoringStatusRunningOrArchiving,
|
// MonitoringStatus: MonitoringStatusRunningOrArchiving,
|
||||||
}
|
// }
|
||||||
|
|
||||||
type Unit struct {
|
type Unit struct {
|
||||||
Base string `json:"base"`
|
Base string `json:"base"`
|
||||||
@ -145,7 +136,12 @@ const (
|
|||||||
JobStateOutOfMemory JobState = "out_of_memory"
|
JobStateOutOfMemory JobState = "out_of_memory"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (e *JobState) UnmarshalGQL(v interface{}) error {
|
func (j Job) GoString() string {
|
||||||
|
return fmt.Sprintf("Job{ID:%d, StartTime:%d, JobID:%v, BaseJob:%v}",
|
||||||
|
j.ID, j.StartTime, j.JobID, j)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *JobState) UnmarshalGQL(v any) error {
|
||||||
str, ok := v.(string)
|
str, ok := v.(string)
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("SCHEMA/JOB > enums must be strings")
|
return fmt.Errorf("SCHEMA/JOB > enums must be strings")
|
||||||
|
35
pkg/schema/node.go
Normal file
35
pkg/schema/node.go
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package schema
|
||||||
|
|
||||||
|
type NodeState string
|
||||||
|
|
||||||
|
const (
|
||||||
|
NodeStateAllocated NodeState = "allocated"
|
||||||
|
NodeStateReserved NodeState = "reserved"
|
||||||
|
NodeStateIdle NodeState = "idle"
|
||||||
|
NodeStateMixed NodeState = "mixed"
|
||||||
|
NodeStateDown NodeState = "down"
|
||||||
|
NodeStateUnknown NodeState = "unknown"
|
||||||
|
)
|
||||||
|
|
||||||
|
type MonitoringState string
|
||||||
|
|
||||||
|
const (
|
||||||
|
MonitoringStateFull MonitoringState = "full"
|
||||||
|
MonitoringStatePartial MonitoringState = "partial"
|
||||||
|
MonitoringStateFailed MonitoringState = "failed"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Node struct {
|
||||||
|
ID int64 `json:"id" db:"id"`
|
||||||
|
Hostname string `json:"hostname" db:"hostname" example:"fritz"`
|
||||||
|
Cluster string `json:"cluster" db:"cluster" example:"fritz"`
|
||||||
|
SubCluster string `json:"subCluster" db:"subcluster" example:"main"`
|
||||||
|
NodeState NodeState `json:"nodeState" db:"node_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"`
|
||||||
|
HealthState MonitoringState `json:"healthState" db:"health_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"`
|
||||||
|
RawMetaData []byte `json:"-" db:"meta_data"`
|
||||||
|
MetaData map[string]string `json:"metaData"`
|
||||||
|
}
|
@ -6,6 +6,7 @@ package schema
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -50,12 +51,7 @@ type User struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (u *User) HasProject(project string) bool {
|
func (u *User) HasProject(project string) bool {
|
||||||
for _, p := range u.Projects {
|
return slices.Contains(u.Projects, project)
|
||||||
if p == project {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetRoleString(roleInt Role) string {
|
func GetRoleString(roleInt Role) string {
|
||||||
|
@ -28,12 +28,13 @@ const (
|
|||||||
//go:embed schemas/*
|
//go:embed schemas/*
|
||||||
var schemaFiles embed.FS
|
var schemaFiles embed.FS
|
||||||
|
|
||||||
func Validate(k Kind, r io.Reader) (err error) {
|
func Validate(k Kind, r io.Reader) error {
|
||||||
jsonschema.Loaders["embedfs"] = func(s string) (io.ReadCloser, error) {
|
jsonschema.Loaders["embedfs"] = func(s string) (io.ReadCloser, error) {
|
||||||
f := filepath.Join("schemas", strings.Split(s, "//")[1])
|
f := filepath.Join("schemas", strings.Split(s, "//")[1])
|
||||||
return schemaFiles.Open(f)
|
return schemaFiles.Open(f)
|
||||||
}
|
}
|
||||||
var s *jsonschema.Schema
|
var s *jsonschema.Schema
|
||||||
|
var err error
|
||||||
|
|
||||||
switch k {
|
switch k {
|
||||||
case Meta:
|
case Meta:
|
||||||
@ -54,7 +55,7 @@ func Validate(k Kind, r io.Reader) (err error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var v interface{}
|
var v interface{}
|
||||||
if err := json.NewDecoder(r).Decode(&v); err != nil {
|
if err = json.NewDecoder(r).Decode(&v); err != nil {
|
||||||
log.Warnf("Error while decoding raw json schema: %#v", err)
|
log.Warnf("Error while decoding raw json schema: %#v", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user