mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-07-01 11:13:50 +02:00
Merge branch 'dev' into port-to-cclib
This commit is contained in:
commit
544fb35121
@ -4,61 +4,78 @@ scalar Any
|
|||||||
scalar NullableFloat
|
scalar NullableFloat
|
||||||
scalar MetricScope
|
scalar MetricScope
|
||||||
scalar JobState
|
scalar JobState
|
||||||
|
scalar NodeState
|
||||||
|
scalar MonitoringState
|
||||||
|
|
||||||
|
type Node {
|
||||||
|
id: ID!
|
||||||
|
hostname: String!
|
||||||
|
cluster: String!
|
||||||
|
subCluster: String!
|
||||||
|
nodeState: NodeState!
|
||||||
|
HealthState: MonitoringState!
|
||||||
|
metaData: Any
|
||||||
|
}
|
||||||
|
|
||||||
|
type NodeStats {
|
||||||
|
state: String!
|
||||||
|
count: Int!
|
||||||
|
}
|
||||||
|
|
||||||
type Job {
|
type Job {
|
||||||
id: ID!
|
id: ID!
|
||||||
jobId: Int!
|
jobId: Int!
|
||||||
user: String!
|
user: String!
|
||||||
project: String!
|
project: String!
|
||||||
cluster: String!
|
cluster: String!
|
||||||
subCluster: String!
|
subCluster: String!
|
||||||
startTime: Time!
|
startTime: Time!
|
||||||
duration: Int!
|
duration: Int!
|
||||||
walltime: Int!
|
walltime: Int!
|
||||||
numNodes: Int!
|
numNodes: Int!
|
||||||
numHWThreads: Int!
|
numHWThreads: Int!
|
||||||
numAcc: Int!
|
numAcc: Int!
|
||||||
energy: Float!
|
energy: Float!
|
||||||
SMT: Int!
|
SMT: Int!
|
||||||
exclusive: Int!
|
exclusive: Int!
|
||||||
partition: String!
|
partition: String!
|
||||||
arrayJobId: Int!
|
arrayJobId: Int!
|
||||||
monitoringStatus: Int!
|
monitoringStatus: Int!
|
||||||
state: JobState!
|
state: JobState!
|
||||||
tags: [Tag!]!
|
tags: [Tag!]!
|
||||||
resources: [Resource!]!
|
resources: [Resource!]!
|
||||||
concurrentJobs: JobLinkResultList
|
concurrentJobs: JobLinkResultList
|
||||||
footprint: [FootprintValue]
|
footprint: [FootprintValue]
|
||||||
energyFootprint: [EnergyFootprintValue]
|
energyFootprint: [EnergyFootprintValue]
|
||||||
metaData: Any
|
metaData: Any
|
||||||
userData: User
|
userData: User
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobLink {
|
type JobLink {
|
||||||
id: ID!
|
id: ID!
|
||||||
jobId: Int!
|
jobId: Int!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Cluster {
|
type Cluster {
|
||||||
name: String!
|
name: String!
|
||||||
partitions: [String!]! # Slurm partitions
|
partitions: [String!]! # Slurm partitions
|
||||||
subClusters: [SubCluster!]! # Hardware partitions/subclusters
|
subClusters: [SubCluster!]! # Hardware partitions/subclusters
|
||||||
}
|
}
|
||||||
|
|
||||||
type SubCluster {
|
type SubCluster {
|
||||||
name: String!
|
name: String!
|
||||||
nodes: String!
|
nodes: String!
|
||||||
numberOfNodes: Int!
|
numberOfNodes: Int!
|
||||||
processorType: String!
|
processorType: String!
|
||||||
socketsPerNode: Int!
|
socketsPerNode: Int!
|
||||||
coresPerSocket: Int!
|
coresPerSocket: Int!
|
||||||
threadsPerCore: Int!
|
threadsPerCore: Int!
|
||||||
flopRateScalar: MetricValue!
|
flopRateScalar: MetricValue!
|
||||||
flopRateSimd: MetricValue!
|
flopRateSimd: MetricValue!
|
||||||
memoryBandwidth: MetricValue!
|
memoryBandwidth: MetricValue!
|
||||||
topology: Topology!
|
topology: Topology!
|
||||||
metricConfig: [MetricConfig!]!
|
metricConfig: [MetricConfig!]!
|
||||||
footprint: [String!]!
|
footprint: [String!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type FootprintValue {
|
type FootprintValue {
|
||||||
@ -80,99 +97,112 @@ type MetricValue {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Topology {
|
type Topology {
|
||||||
node: [Int!]
|
node: [Int!]
|
||||||
socket: [[Int!]!]
|
socket: [[Int!]!]
|
||||||
memoryDomain: [[Int!]!]
|
memoryDomain: [[Int!]!]
|
||||||
die: [[Int!]!]
|
die: [[Int!]!]
|
||||||
core: [[Int!]!]
|
core: [[Int!]!]
|
||||||
accelerators: [Accelerator!]
|
accelerators: [Accelerator!]
|
||||||
}
|
}
|
||||||
|
|
||||||
type Accelerator {
|
type Accelerator {
|
||||||
id: String!
|
id: String!
|
||||||
type: String!
|
type: String!
|
||||||
model: String!
|
model: String!
|
||||||
}
|
}
|
||||||
|
|
||||||
type SubClusterConfig {
|
type SubClusterConfig {
|
||||||
name: String!
|
name: String!
|
||||||
peak: Float
|
peak: Float
|
||||||
normal: Float
|
normal: Float
|
||||||
caution: Float
|
caution: Float
|
||||||
alert: Float
|
alert: Float
|
||||||
remove: Boolean
|
remove: Boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricConfig {
|
type MetricConfig {
|
||||||
name: String!
|
name: String!
|
||||||
unit: Unit!
|
unit: Unit!
|
||||||
scope: MetricScope!
|
scope: MetricScope!
|
||||||
aggregation: String!
|
aggregation: String!
|
||||||
timestep: Int!
|
timestep: Int!
|
||||||
peak: Float!
|
peak: Float!
|
||||||
normal: Float
|
normal: Float
|
||||||
caution: Float!
|
caution: Float!
|
||||||
alert: Float!
|
alert: Float!
|
||||||
lowerIsBetter: Boolean
|
lowerIsBetter: Boolean
|
||||||
subClusters: [SubClusterConfig!]!
|
subClusters: [SubClusterConfig!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Tag {
|
type Tag {
|
||||||
id: ID!
|
id: ID!
|
||||||
type: String!
|
type: String!
|
||||||
name: String!
|
name: String!
|
||||||
scope: String!
|
scope: String!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Resource {
|
type Resource {
|
||||||
hostname: String!
|
hostname: String!
|
||||||
hwthreads: [Int!]
|
hwthreads: [Int!]
|
||||||
accelerators: [String!]
|
accelerators: [String!]
|
||||||
configuration: String
|
configuration: String
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobMetricWithName {
|
type JobMetricWithName {
|
||||||
name: String!
|
name: String!
|
||||||
scope: MetricScope!
|
scope: MetricScope!
|
||||||
metric: JobMetric!
|
metric: JobMetric!
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobMetric {
|
type JobMetric {
|
||||||
unit: Unit
|
unit: Unit
|
||||||
timestep: Int!
|
timestep: Int!
|
||||||
series: [Series!]
|
series: [Series!]
|
||||||
statisticsSeries: StatsSeries
|
statisticsSeries: StatsSeries
|
||||||
}
|
}
|
||||||
|
|
||||||
type Series {
|
type Series {
|
||||||
hostname: String!
|
hostname: String!
|
||||||
id: String
|
id: String
|
||||||
statistics: MetricStatistics
|
statistics: MetricStatistics
|
||||||
data: [NullableFloat!]!
|
data: [NullableFloat!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type StatsSeries {
|
type StatsSeries {
|
||||||
mean: [NullableFloat!]!
|
mean: [NullableFloat!]!
|
||||||
median: [NullableFloat!]!
|
median: [NullableFloat!]!
|
||||||
min: [NullableFloat!]!
|
min: [NullableFloat!]!
|
||||||
max: [NullableFloat!]!
|
max: [NullableFloat!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobStatsWithScope {
|
type NamedStatsWithScope {
|
||||||
name: String!
|
name: String!
|
||||||
scope: MetricScope!
|
scope: MetricScope!
|
||||||
stats: [ScopedStats!]!
|
stats: [ScopedStats!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type ScopedStats {
|
type ScopedStats {
|
||||||
hostname: String!
|
hostname: String!
|
||||||
id: String
|
id: String
|
||||||
data: MetricStatistics!
|
data: MetricStatistics!
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobStats {
|
type JobStats {
|
||||||
name: String!
|
id: Int!
|
||||||
stats: MetricStatistics!
|
jobId: String!
|
||||||
|
startTime: Int!
|
||||||
|
duration: Int!
|
||||||
|
cluster: String!
|
||||||
|
subCluster: String!
|
||||||
|
numNodes: Int!
|
||||||
|
numHWThreads: Int
|
||||||
|
numAccelerators: Int
|
||||||
|
stats: [NamedStats!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
type NamedStats {
|
||||||
|
name: String!
|
||||||
|
data: MetricStatistics!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Unit {
|
type Unit {
|
||||||
@ -188,12 +218,12 @@ type MetricStatistics {
|
|||||||
|
|
||||||
type MetricFootprints {
|
type MetricFootprints {
|
||||||
metric: String!
|
metric: String!
|
||||||
data: [NullableFloat!]!
|
data: [NullableFloat!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Footprints {
|
type Footprints {
|
||||||
timeWeights: TimeWeights!
|
timeWeights: TimeWeights!
|
||||||
metrics: [MetricFootprints!]!
|
metrics: [MetricFootprints!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type TimeWeights {
|
type TimeWeights {
|
||||||
@ -202,20 +232,33 @@ type TimeWeights {
|
|||||||
coreHours: [NullableFloat!]!
|
coreHours: [NullableFloat!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
enum Aggregate { USER, PROJECT, CLUSTER }
|
enum Aggregate {
|
||||||
enum SortByAggregate { TOTALWALLTIME, TOTALJOBS, TOTALNODES, TOTALNODEHOURS, TOTALCORES, TOTALCOREHOURS, TOTALACCS, TOTALACCHOURS }
|
USER
|
||||||
|
PROJECT
|
||||||
|
CLUSTER
|
||||||
|
}
|
||||||
|
enum SortByAggregate {
|
||||||
|
TOTALWALLTIME
|
||||||
|
TOTALJOBS
|
||||||
|
TOTALNODES
|
||||||
|
TOTALNODEHOURS
|
||||||
|
TOTALCORES
|
||||||
|
TOTALCOREHOURS
|
||||||
|
TOTALACCS
|
||||||
|
TOTALACCHOURS
|
||||||
|
}
|
||||||
|
|
||||||
type NodeMetrics {
|
type NodeMetrics {
|
||||||
host: String!
|
host: String!
|
||||||
subCluster: String!
|
subCluster: String!
|
||||||
metrics: [JobMetricWithName!]!
|
metrics: [JobMetricWithName!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type NodesResultList {
|
type NodesResultList {
|
||||||
items: [NodeMetrics!]!
|
items: [NodeMetrics!]!
|
||||||
offset: Int
|
offset: Int
|
||||||
limit: Int
|
limit: Int
|
||||||
count: Int
|
count: Int
|
||||||
totalNodes: Int
|
totalNodes: Int
|
||||||
hasNextPage: Boolean
|
hasNextPage: Boolean
|
||||||
}
|
}
|
||||||
@ -234,14 +277,14 @@ type GlobalMetricListItem {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Count {
|
type Count {
|
||||||
name: String!
|
name: String!
|
||||||
count: Int!
|
count: Int!
|
||||||
}
|
}
|
||||||
|
|
||||||
type User {
|
type User {
|
||||||
username: String!
|
username: String!
|
||||||
name: String!
|
name: String!
|
||||||
email: String!
|
email: String!
|
||||||
}
|
}
|
||||||
|
|
||||||
input MetricStatItem {
|
input MetricStatItem {
|
||||||
@ -250,26 +293,81 @@ input MetricStatItem {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Query {
|
type Query {
|
||||||
clusters: [Cluster!]! # List of all clusters
|
clusters: [Cluster!]! # List of all clusters
|
||||||
tags: [Tag!]! # List of all tags
|
tags: [Tag!]! # List of all tags
|
||||||
globalMetrics: [GlobalMetricListItem!]!
|
globalMetrics: [GlobalMetricListItem!]!
|
||||||
|
|
||||||
user(username: String!): User
|
user(username: String!): User
|
||||||
allocatedNodes(cluster: String!): [Count!]!
|
allocatedNodes(cluster: String!): [Count!]!
|
||||||
|
|
||||||
|
node(id: ID!): Node
|
||||||
|
nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList!
|
||||||
|
nodeStats(filter: [NodeFilter!]): [NodeStats!]!
|
||||||
|
|
||||||
job(id: ID!): Job
|
job(id: ID!): Job
|
||||||
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!], resolution: Int): [JobMetricWithName!]!
|
jobMetrics(
|
||||||
jobStats(id: ID!, metrics: [String!]): [JobStats!]!
|
id: ID!
|
||||||
scopedJobStats(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobStatsWithScope!]!
|
metrics: [String!]
|
||||||
|
scopes: [MetricScope!]
|
||||||
|
resolution: Int
|
||||||
|
): [JobMetricWithName!]!
|
||||||
|
|
||||||
|
jobStats(id: ID!, metrics: [String!]): [NamedStats!]!
|
||||||
|
|
||||||
|
scopedJobStats(
|
||||||
|
id: ID!
|
||||||
|
metrics: [String!]
|
||||||
|
scopes: [MetricScope!]
|
||||||
|
): [NamedStatsWithScope!]!
|
||||||
|
|
||||||
|
jobs(
|
||||||
|
filter: [JobFilter!]
|
||||||
|
page: PageRequest
|
||||||
|
order: OrderByInput
|
||||||
|
): JobResultList!
|
||||||
|
|
||||||
|
jobsStatistics(
|
||||||
|
filter: [JobFilter!]
|
||||||
|
metrics: [String!]
|
||||||
|
page: PageRequest
|
||||||
|
sortBy: SortByAggregate
|
||||||
|
groupBy: Aggregate
|
||||||
|
numDurationBins: String
|
||||||
|
numMetricBins: Int
|
||||||
|
): [JobsStatistics!]!
|
||||||
|
|
||||||
|
jobsMetricStats(filter: [JobFilter!], metrics: [String!]): [JobStats!]!
|
||||||
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
|
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
|
||||||
|
|
||||||
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
|
rooflineHeatmap(
|
||||||
jobsStatistics(filter: [JobFilter!], metrics: [String!], page: PageRequest, sortBy: SortByAggregate, groupBy: Aggregate, numDurationBins: String, numMetricBins: Int): [JobsStatistics!]!
|
filter: [JobFilter!]!
|
||||||
|
rows: Int!
|
||||||
|
cols: Int!
|
||||||
|
minX: Float!
|
||||||
|
minY: Float!
|
||||||
|
maxX: Float!
|
||||||
|
maxY: Float!
|
||||||
|
): [[Float!]!]!
|
||||||
|
|
||||||
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
|
nodeMetrics(
|
||||||
|
cluster: String!
|
||||||
nodeMetrics(cluster: String!, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]!
|
nodes: [String!]
|
||||||
nodeMetricsList(cluster: String!, subCluster: String!, nodeFilter: String!, scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!, page: PageRequest, resolution: Int): NodesResultList!
|
scopes: [MetricScope!]
|
||||||
|
metrics: [String!]
|
||||||
|
from: Time!
|
||||||
|
to: Time!
|
||||||
|
): [NodeMetrics!]!
|
||||||
|
nodeMetricsList(
|
||||||
|
cluster: String!
|
||||||
|
subCluster: String!
|
||||||
|
nodeFilter: String!
|
||||||
|
scopes: [MetricScope!]
|
||||||
|
metrics: [String!]
|
||||||
|
from: Time!
|
||||||
|
to: Time!
|
||||||
|
page: PageRequest
|
||||||
|
resolution: Int
|
||||||
|
): NodesResultList!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Mutation {
|
type Mutation {
|
||||||
@ -282,37 +380,52 @@ type Mutation {
|
|||||||
updateConfiguration(name: String!, value: String!): String
|
updateConfiguration(name: String!, value: String!): String
|
||||||
}
|
}
|
||||||
|
|
||||||
type IntRangeOutput { from: Int!, to: Int! }
|
type IntRangeOutput {
|
||||||
type TimeRangeOutput { range: String, from: Time!, to: Time! }
|
from: Int!
|
||||||
|
to: Int!
|
||||||
|
}
|
||||||
|
type TimeRangeOutput {
|
||||||
|
range: String
|
||||||
|
from: Time!
|
||||||
|
to: Time!
|
||||||
|
}
|
||||||
|
|
||||||
|
input NodeFilter {
|
||||||
|
hostname: StringInput
|
||||||
|
cluster: StringInput
|
||||||
|
nodeState: NodeState
|
||||||
|
healthState: MonitoringState
|
||||||
|
}
|
||||||
|
|
||||||
input JobFilter {
|
input JobFilter {
|
||||||
tags: [ID!]
|
tags: [ID!]
|
||||||
jobId: StringInput
|
dbId: [ID!]
|
||||||
arrayJobId: Int
|
jobId: StringInput
|
||||||
user: StringInput
|
arrayJobId: Int
|
||||||
project: StringInput
|
user: StringInput
|
||||||
jobName: StringInput
|
project: StringInput
|
||||||
cluster: StringInput
|
jobName: StringInput
|
||||||
partition: StringInput
|
cluster: StringInput
|
||||||
duration: IntRange
|
partition: StringInput
|
||||||
energy: FloatRange
|
duration: IntRange
|
||||||
|
energy: FloatRange
|
||||||
|
|
||||||
minRunningFor: Int
|
minRunningFor: Int
|
||||||
|
|
||||||
numNodes: IntRange
|
numNodes: IntRange
|
||||||
numAccelerators: IntRange
|
numAccelerators: IntRange
|
||||||
numHWThreads: IntRange
|
numHWThreads: IntRange
|
||||||
|
|
||||||
startTime: TimeRange
|
startTime: TimeRange
|
||||||
state: [JobState!]
|
state: [JobState!]
|
||||||
metricStats: [MetricStatItem!]
|
metricStats: [MetricStatItem!]
|
||||||
exclusive: Int
|
exclusive: Int
|
||||||
node: StringInput
|
node: StringInput
|
||||||
}
|
}
|
||||||
|
|
||||||
input OrderByInput {
|
input OrderByInput {
|
||||||
field: String!
|
field: String!
|
||||||
type: String!,
|
type: String!
|
||||||
order: SortDirectionEnum! = ASC
|
order: SortDirectionEnum! = ASC
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -322,34 +435,46 @@ enum SortDirectionEnum {
|
|||||||
}
|
}
|
||||||
|
|
||||||
input StringInput {
|
input StringInput {
|
||||||
eq: String
|
eq: String
|
||||||
neq: String
|
neq: String
|
||||||
contains: String
|
contains: String
|
||||||
startsWith: String
|
startsWith: String
|
||||||
endsWith: String
|
endsWith: String
|
||||||
in: [String!]
|
in: [String!]
|
||||||
}
|
}
|
||||||
|
|
||||||
input IntRange { from: Int!, to: Int! }
|
input IntRange {
|
||||||
input TimeRange { range: String, from: Time, to: Time }
|
from: Int!
|
||||||
|
to: Int!
|
||||||
|
}
|
||||||
|
input TimeRange {
|
||||||
|
range: String
|
||||||
|
from: Time
|
||||||
|
to: Time
|
||||||
|
}
|
||||||
|
|
||||||
input FloatRange {
|
input FloatRange {
|
||||||
from: Float!
|
from: Float!
|
||||||
to: Float!
|
to: Float!
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type NodeStateResultList {
|
||||||
|
items: [Node!]!
|
||||||
|
count: Int
|
||||||
|
}
|
||||||
|
|
||||||
type JobResultList {
|
type JobResultList {
|
||||||
items: [Job!]!
|
items: [Job!]!
|
||||||
offset: Int
|
offset: Int
|
||||||
limit: Int
|
limit: Int
|
||||||
count: Int
|
count: Int
|
||||||
hasNextPage: Boolean
|
hasNextPage: Boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobLinkResultList {
|
type JobLinkResultList {
|
||||||
listQuery: String
|
listQuery: String
|
||||||
items: [JobLink!]!
|
items: [JobLink!]!
|
||||||
count: Int
|
count: Int
|
||||||
}
|
}
|
||||||
|
|
||||||
type HistoPoint {
|
type HistoPoint {
|
||||||
@ -371,27 +496,27 @@ type MetricHistoPoint {
|
|||||||
max: Int
|
max: Int
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobsStatistics {
|
type JobsStatistics {
|
||||||
id: ID! # If `groupBy` was used, ID of the user/project/cluster
|
id: ID! # If `groupBy` was used, ID of the user/project/cluster
|
||||||
name: String! # if User-Statistics: Given Name of Account (ID) Owner
|
name: String! # if User-Statistics: Given Name of Account (ID) Owner
|
||||||
totalJobs: Int! # Number of jobs
|
totalJobs: Int! # Number of jobs
|
||||||
runningJobs: Int! # Number of running jobs
|
runningJobs: Int! # Number of running jobs
|
||||||
shortJobs: Int! # Number of jobs with a duration of less than duration
|
shortJobs: Int! # Number of jobs with a duration of less than duration
|
||||||
totalWalltime: Int! # Sum of the duration of all matched jobs in hours
|
totalWalltime: Int! # Sum of the duration of all matched jobs in hours
|
||||||
totalNodes: Int! # Sum of the nodes of all matched jobs
|
totalNodes: Int! # Sum of the nodes of all matched jobs
|
||||||
totalNodeHours: Int! # Sum of the node hours of all matched jobs
|
totalNodeHours: Int! # Sum of the node hours of all matched jobs
|
||||||
totalCores: Int! # Sum of the cores of all matched jobs
|
totalCores: Int! # Sum of the cores of all matched jobs
|
||||||
totalCoreHours: Int! # Sum of the core hours of all matched jobs
|
totalCoreHours: Int! # Sum of the core hours of all matched jobs
|
||||||
totalAccs: Int! # Sum of the accs of all matched jobs
|
totalAccs: Int! # Sum of the accs of all matched jobs
|
||||||
totalAccHours: Int! # Sum of the gpu hours of all matched jobs
|
totalAccHours: Int! # Sum of the gpu hours of all matched jobs
|
||||||
histDuration: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value
|
histDuration: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value
|
||||||
histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes
|
histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes
|
||||||
histNumCores: [HistoPoint!]! # value: number of cores, count: number of jobs with that number of cores
|
histNumCores: [HistoPoint!]! # value: number of cores, count: number of jobs with that number of cores
|
||||||
histNumAccs: [HistoPoint!]! # value: number of accs, count: number of jobs with that number of accs
|
histNumAccs: [HistoPoint!]! # value: number of accs, count: number of jobs with that number of accs
|
||||||
histMetrics: [MetricHistoPoints!]! # metric: metricname, data array of histopoints: value: metric average bin, count: number of jobs with that metric average
|
histMetrics: [MetricHistoPoints!]! # metric: metricname, data array of histopoints: value: metric average bin, count: number of jobs with that metric average
|
||||||
}
|
}
|
||||||
|
|
||||||
input PageRequest {
|
input PageRequest {
|
||||||
itemsPerPage: Int!
|
itemsPerPage: Int!
|
||||||
page: Int!
|
page: Int!
|
||||||
}
|
}
|
||||||
|
273
api/swagger.json
273
api/swagger.json
@ -201,7 +201,7 @@
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Success message",
|
"description": "Success message",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/api.DefaultJobApiResponse"
|
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@ -271,7 +271,7 @@
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Success message",
|
"description": "Success message",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/api.DefaultJobApiResponse"
|
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@ -341,7 +341,7 @@
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Success message",
|
"description": "Success message",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/api.DefaultJobApiResponse"
|
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@ -460,7 +460,7 @@
|
|||||||
"ApiKeyAuth": []
|
"ApiKeyAuth": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"description": "Job specified in request body will be saved to database as \"running\" with new DB ID.\nJob specifications follow the 'JobMeta' scheme, API will fail to execute if requirements are not met.",
|
"description": "Job specified in request body will be saved to database as \"running\" with new DB ID.\nJob specifications follow the 'Job' scheme, API will fail to execute if requirements are not met.",
|
||||||
"consumes": [
|
"consumes": [
|
||||||
"application/json"
|
"application/json"
|
||||||
],
|
],
|
||||||
@ -478,7 +478,7 @@
|
|||||||
"in": "body",
|
"in": "body",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/schema.JobMeta"
|
"$ref": "#/definitions/schema.Job"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -486,7 +486,7 @@
|
|||||||
"201": {
|
"201": {
|
||||||
"description": "Job added successfully",
|
"description": "Job added successfully",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/api.DefaultJobApiResponse"
|
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@ -529,7 +529,7 @@
|
|||||||
"ApiKeyAuth": []
|
"ApiKeyAuth": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"description": "Job to stop is specified by request body. All fields are required in this case.\nReturns full job resource information according to 'JobMeta' scheme.",
|
"description": "Job to stop is specified by request body. All fields are required in this case.\nReturns full job resource information according to 'Job' scheme.",
|
||||||
"produces": [
|
"produces": [
|
||||||
"application/json"
|
"application/json"
|
||||||
],
|
],
|
||||||
@ -552,7 +552,7 @@
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Success message",
|
"description": "Success message",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/schema.JobMeta"
|
"$ref": "#/definitions/schema.Job"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@ -674,7 +674,7 @@
|
|||||||
"ApiKeyAuth": []
|
"ApiKeyAuth": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"description": "Job to get is specified by database ID\nReturns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.",
|
"description": "Job to get is specified by database ID\nReturns full job resource information according to 'Job' scheme and all metrics according to 'JobData'.",
|
||||||
"produces": [
|
"produces": [
|
||||||
"application/json"
|
"application/json"
|
||||||
],
|
],
|
||||||
@ -748,7 +748,7 @@
|
|||||||
"ApiKeyAuth": []
|
"ApiKeyAuth": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"description": "Job to get is specified by database ID\nReturns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.",
|
"description": "Job to get is specified by database ID\nReturns full job resource information according to 'Job' scheme and all metrics according to 'JobData'.",
|
||||||
"consumes": [
|
"consumes": [
|
||||||
"application/json"
|
"application/json"
|
||||||
],
|
],
|
||||||
@ -826,6 +826,66 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/api/nodestats/": {
|
||||||
|
"post": {
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"ApiKeyAuth": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "Returns a JSON-encoded list of users.\nRequired query-parameter defines if all users or only users with additional special roles are returned.",
|
||||||
|
"produces": [
|
||||||
|
"application/json"
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
"Nodestates"
|
||||||
|
],
|
||||||
|
"summary": "Deliver updated Slurm node states",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"description": "Request body containing nodes and their states",
|
||||||
|
"name": "request",
|
||||||
|
"in": "body",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/api.UpdateNodeStatesRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Success message",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"description": "Bad Request",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/api.ErrorResponse"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"401": {
|
||||||
|
"description": "Unauthorized",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/api.ErrorResponse"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"403": {
|
||||||
|
"description": "Forbidden",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/api.ErrorResponse"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"description": "Internal Server Error",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/api.ErrorResponse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/api/users/": {
|
"/api/users/": {
|
||||||
"get": {
|
"get": {
|
||||||
"security": [
|
"security": [
|
||||||
@ -1074,7 +1134,7 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"api.DefaultJobApiResponse": {
|
"api.DefaultApiResponse": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"msg": {
|
"msg": {
|
||||||
@ -1168,7 +1228,7 @@
|
|||||||
"description": "Array of jobs",
|
"description": "Array of jobs",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/definitions/schema.JobMeta"
|
"$ref": "#/definitions/schema.Job"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"page": {
|
"page": {
|
||||||
@ -1191,6 +1251,20 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"api.Node": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"hostname": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"states": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"api.StopJobApiRequest": {
|
"api.StopJobApiRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"required": [
|
"required": [
|
||||||
@ -1224,6 +1298,21 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"api.UpdateNodeStatesRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"cluster": {
|
||||||
|
"type": "string",
|
||||||
|
"example": "fritz"
|
||||||
|
},
|
||||||
|
"nodes": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/api.Node"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"schema.Accelerator": {
|
"schema.Accelerator": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@ -1259,7 +1348,6 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"schema.Job": {
|
"schema.Job": {
|
||||||
"description": "Information of a HPC job.",
|
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"arrayJobId": {
|
"arrayJobId": {
|
||||||
@ -1357,6 +1445,12 @@
|
|||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "abcd200"
|
"example": "abcd200"
|
||||||
},
|
},
|
||||||
|
"requestedMemory": {
|
||||||
|
"description": "in MB",
|
||||||
|
"type": "integer",
|
||||||
|
"minimum": 1,
|
||||||
|
"example": 128000
|
||||||
|
},
|
||||||
"resources": {
|
"resources": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
@ -1368,7 +1462,14 @@
|
|||||||
"example": 4
|
"example": 4
|
||||||
},
|
},
|
||||||
"startTime": {
|
"startTime": {
|
||||||
"type": "string"
|
"type": "integer",
|
||||||
|
"example": 1649723812
|
||||||
|
},
|
||||||
|
"statistics": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"$ref": "#/definitions/schema.JobStatistics"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"subCluster": {
|
"subCluster": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
@ -1416,147 +1517,6 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"schema.JobMeta": {
|
|
||||||
"description": "Meta data information of a HPC job.",
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"arrayJobId": {
|
|
||||||
"type": "integer",
|
|
||||||
"example": 123000
|
|
||||||
},
|
|
||||||
"cluster": {
|
|
||||||
"type": "string",
|
|
||||||
"example": "fritz"
|
|
||||||
},
|
|
||||||
"concurrentJobs": {
|
|
||||||
"$ref": "#/definitions/schema.JobLinkResultList"
|
|
||||||
},
|
|
||||||
"duration": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 1,
|
|
||||||
"example": 43200
|
|
||||||
},
|
|
||||||
"energy": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"energyFootprint": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"type": "number"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"exclusive": {
|
|
||||||
"type": "integer",
|
|
||||||
"maximum": 2,
|
|
||||||
"minimum": 0,
|
|
||||||
"example": 1
|
|
||||||
},
|
|
||||||
"footprint": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"type": "number"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"id": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"jobId": {
|
|
||||||
"type": "integer",
|
|
||||||
"example": 123000
|
|
||||||
},
|
|
||||||
"jobState": {
|
|
||||||
"enum": [
|
|
||||||
"completed",
|
|
||||||
"failed",
|
|
||||||
"cancelled",
|
|
||||||
"stopped",
|
|
||||||
"timeout",
|
|
||||||
"out_of_memory"
|
|
||||||
],
|
|
||||||
"allOf": [
|
|
||||||
{
|
|
||||||
"$ref": "#/definitions/schema.JobState"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"example": "completed"
|
|
||||||
},
|
|
||||||
"metaData": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"monitoringStatus": {
|
|
||||||
"type": "integer",
|
|
||||||
"maximum": 3,
|
|
||||||
"minimum": 0,
|
|
||||||
"example": 1
|
|
||||||
},
|
|
||||||
"numAcc": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 1,
|
|
||||||
"example": 2
|
|
||||||
},
|
|
||||||
"numHwthreads": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 1,
|
|
||||||
"example": 20
|
|
||||||
},
|
|
||||||
"numNodes": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 1,
|
|
||||||
"example": 2
|
|
||||||
},
|
|
||||||
"partition": {
|
|
||||||
"type": "string",
|
|
||||||
"example": "main"
|
|
||||||
},
|
|
||||||
"project": {
|
|
||||||
"type": "string",
|
|
||||||
"example": "abcd200"
|
|
||||||
},
|
|
||||||
"resources": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"$ref": "#/definitions/schema.Resource"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"smt": {
|
|
||||||
"type": "integer",
|
|
||||||
"example": 4
|
|
||||||
},
|
|
||||||
"startTime": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 1,
|
|
||||||
"example": 1649723812
|
|
||||||
},
|
|
||||||
"statistics": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"$ref": "#/definitions/schema.JobStatistics"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"subCluster": {
|
|
||||||
"type": "string",
|
|
||||||
"example": "main"
|
|
||||||
},
|
|
||||||
"tags": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"$ref": "#/definitions/schema.Tag"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"user": {
|
|
||||||
"type": "string",
|
|
||||||
"example": "abcd100h"
|
|
||||||
},
|
|
||||||
"walltime": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 1,
|
|
||||||
"example": 86400
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"schema.JobMetric": {
|
"schema.JobMetric": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@ -1884,6 +1844,9 @@
|
|||||||
},
|
},
|
||||||
"remove": {
|
"remove": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"unit": {
|
||||||
|
"$ref": "#/definitions/schema.Unit"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
201
api/swagger.yaml
201
api/swagger.yaml
@ -31,7 +31,7 @@ definitions:
|
|||||||
example: Debug
|
example: Debug
|
||||||
type: string
|
type: string
|
||||||
type: object
|
type: object
|
||||||
api.DefaultJobApiResponse:
|
api.DefaultApiResponse:
|
||||||
properties:
|
properties:
|
||||||
msg:
|
msg:
|
||||||
type: string
|
type: string
|
||||||
@ -96,7 +96,7 @@ definitions:
|
|||||||
jobs:
|
jobs:
|
||||||
description: Array of jobs
|
description: Array of jobs
|
||||||
items:
|
items:
|
||||||
$ref: '#/definitions/schema.JobMeta'
|
$ref: '#/definitions/schema.Job'
|
||||||
type: array
|
type: array
|
||||||
page:
|
page:
|
||||||
description: Page id returned
|
description: Page id returned
|
||||||
@ -111,6 +111,15 @@ definitions:
|
|||||||
scope:
|
scope:
|
||||||
$ref: '#/definitions/schema.MetricScope'
|
$ref: '#/definitions/schema.MetricScope'
|
||||||
type: object
|
type: object
|
||||||
|
api.Node:
|
||||||
|
properties:
|
||||||
|
hostname:
|
||||||
|
type: string
|
||||||
|
states:
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
type: object
|
||||||
api.StopJobApiRequest:
|
api.StopJobApiRequest:
|
||||||
properties:
|
properties:
|
||||||
cluster:
|
cluster:
|
||||||
@ -133,6 +142,16 @@ definitions:
|
|||||||
- jobState
|
- jobState
|
||||||
- stopTime
|
- stopTime
|
||||||
type: object
|
type: object
|
||||||
|
api.UpdateNodeStatesRequest:
|
||||||
|
properties:
|
||||||
|
cluster:
|
||||||
|
example: fritz
|
||||||
|
type: string
|
||||||
|
nodes:
|
||||||
|
items:
|
||||||
|
$ref: '#/definitions/api.Node'
|
||||||
|
type: array
|
||||||
|
type: object
|
||||||
schema.Accelerator:
|
schema.Accelerator:
|
||||||
properties:
|
properties:
|
||||||
id:
|
id:
|
||||||
@ -156,7 +175,6 @@ definitions:
|
|||||||
type: array
|
type: array
|
||||||
type: object
|
type: object
|
||||||
schema.Job:
|
schema.Job:
|
||||||
description: Information of a HPC job.
|
|
||||||
properties:
|
properties:
|
||||||
arrayJobId:
|
arrayJobId:
|
||||||
example: 123000
|
example: 123000
|
||||||
@ -228,6 +246,11 @@ definitions:
|
|||||||
project:
|
project:
|
||||||
example: abcd200
|
example: abcd200
|
||||||
type: string
|
type: string
|
||||||
|
requestedMemory:
|
||||||
|
description: in MB
|
||||||
|
example: 128000
|
||||||
|
minimum: 1
|
||||||
|
type: integer
|
||||||
resources:
|
resources:
|
||||||
items:
|
items:
|
||||||
$ref: '#/definitions/schema.Resource'
|
$ref: '#/definitions/schema.Resource'
|
||||||
@ -236,7 +259,12 @@ definitions:
|
|||||||
example: 4
|
example: 4
|
||||||
type: integer
|
type: integer
|
||||||
startTime:
|
startTime:
|
||||||
type: string
|
example: 1649723812
|
||||||
|
type: integer
|
||||||
|
statistics:
|
||||||
|
additionalProperties:
|
||||||
|
$ref: '#/definitions/schema.JobStatistics'
|
||||||
|
type: object
|
||||||
subCluster:
|
subCluster:
|
||||||
example: main
|
example: main
|
||||||
type: string
|
type: string
|
||||||
@ -268,109 +296,6 @@ definitions:
|
|||||||
$ref: '#/definitions/schema.JobLink'
|
$ref: '#/definitions/schema.JobLink'
|
||||||
type: array
|
type: array
|
||||||
type: object
|
type: object
|
||||||
schema.JobMeta:
|
|
||||||
description: Meta data information of a HPC job.
|
|
||||||
properties:
|
|
||||||
arrayJobId:
|
|
||||||
example: 123000
|
|
||||||
type: integer
|
|
||||||
cluster:
|
|
||||||
example: fritz
|
|
||||||
type: string
|
|
||||||
concurrentJobs:
|
|
||||||
$ref: '#/definitions/schema.JobLinkResultList'
|
|
||||||
duration:
|
|
||||||
example: 43200
|
|
||||||
minimum: 1
|
|
||||||
type: integer
|
|
||||||
energy:
|
|
||||||
type: number
|
|
||||||
energyFootprint:
|
|
||||||
additionalProperties:
|
|
||||||
type: number
|
|
||||||
type: object
|
|
||||||
exclusive:
|
|
||||||
example: 1
|
|
||||||
maximum: 2
|
|
||||||
minimum: 0
|
|
||||||
type: integer
|
|
||||||
footprint:
|
|
||||||
additionalProperties:
|
|
||||||
type: number
|
|
||||||
type: object
|
|
||||||
id:
|
|
||||||
type: integer
|
|
||||||
jobId:
|
|
||||||
example: 123000
|
|
||||||
type: integer
|
|
||||||
jobState:
|
|
||||||
allOf:
|
|
||||||
- $ref: '#/definitions/schema.JobState'
|
|
||||||
enum:
|
|
||||||
- completed
|
|
||||||
- failed
|
|
||||||
- cancelled
|
|
||||||
- stopped
|
|
||||||
- timeout
|
|
||||||
- out_of_memory
|
|
||||||
example: completed
|
|
||||||
metaData:
|
|
||||||
additionalProperties:
|
|
||||||
type: string
|
|
||||||
type: object
|
|
||||||
monitoringStatus:
|
|
||||||
example: 1
|
|
||||||
maximum: 3
|
|
||||||
minimum: 0
|
|
||||||
type: integer
|
|
||||||
numAcc:
|
|
||||||
example: 2
|
|
||||||
minimum: 1
|
|
||||||
type: integer
|
|
||||||
numHwthreads:
|
|
||||||
example: 20
|
|
||||||
minimum: 1
|
|
||||||
type: integer
|
|
||||||
numNodes:
|
|
||||||
example: 2
|
|
||||||
minimum: 1
|
|
||||||
type: integer
|
|
||||||
partition:
|
|
||||||
example: main
|
|
||||||
type: string
|
|
||||||
project:
|
|
||||||
example: abcd200
|
|
||||||
type: string
|
|
||||||
resources:
|
|
||||||
items:
|
|
||||||
$ref: '#/definitions/schema.Resource'
|
|
||||||
type: array
|
|
||||||
smt:
|
|
||||||
example: 4
|
|
||||||
type: integer
|
|
||||||
startTime:
|
|
||||||
example: 1649723812
|
|
||||||
minimum: 1
|
|
||||||
type: integer
|
|
||||||
statistics:
|
|
||||||
additionalProperties:
|
|
||||||
$ref: '#/definitions/schema.JobStatistics'
|
|
||||||
type: object
|
|
||||||
subCluster:
|
|
||||||
example: main
|
|
||||||
type: string
|
|
||||||
tags:
|
|
||||||
items:
|
|
||||||
$ref: '#/definitions/schema.Tag'
|
|
||||||
type: array
|
|
||||||
user:
|
|
||||||
example: abcd100h
|
|
||||||
type: string
|
|
||||||
walltime:
|
|
||||||
example: 86400
|
|
||||||
minimum: 1
|
|
||||||
type: integer
|
|
||||||
type: object
|
|
||||||
schema.JobMetric:
|
schema.JobMetric:
|
||||||
properties:
|
properties:
|
||||||
series:
|
series:
|
||||||
@ -599,6 +524,8 @@ definitions:
|
|||||||
type: number
|
type: number
|
||||||
remove:
|
remove:
|
||||||
type: boolean
|
type: boolean
|
||||||
|
unit:
|
||||||
|
$ref: '#/definitions/schema.Unit'
|
||||||
type: object
|
type: object
|
||||||
schema.Tag:
|
schema.Tag:
|
||||||
description: Defines a tag using name and type.
|
description: Defines a tag using name and type.
|
||||||
@ -776,7 +703,7 @@ paths:
|
|||||||
get:
|
get:
|
||||||
description: |-
|
description: |-
|
||||||
Job to get is specified by database ID
|
Job to get is specified by database ID
|
||||||
Returns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.
|
Returns full job resource information according to 'Job' scheme and all metrics according to 'JobData'.
|
||||||
parameters:
|
parameters:
|
||||||
- description: Database ID of Job
|
- description: Database ID of Job
|
||||||
in: path
|
in: path
|
||||||
@ -829,7 +756,7 @@ paths:
|
|||||||
- application/json
|
- application/json
|
||||||
description: |-
|
description: |-
|
||||||
Job to get is specified by database ID
|
Job to get is specified by database ID
|
||||||
Returns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.
|
Returns full job resource information according to 'Job' scheme and all metrics according to 'JobData'.
|
||||||
parameters:
|
parameters:
|
||||||
- description: Database ID of Job
|
- description: Database ID of Job
|
||||||
in: path
|
in: path
|
||||||
@ -900,7 +827,7 @@ paths:
|
|||||||
"200":
|
"200":
|
||||||
description: Success message
|
description: Success message
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/api.DefaultJobApiResponse'
|
$ref: '#/definitions/api.DefaultApiResponse'
|
||||||
"400":
|
"400":
|
||||||
description: Bad Request
|
description: Bad Request
|
||||||
schema:
|
schema:
|
||||||
@ -947,7 +874,7 @@ paths:
|
|||||||
"200":
|
"200":
|
||||||
description: Success message
|
description: Success message
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/api.DefaultJobApiResponse'
|
$ref: '#/definitions/api.DefaultApiResponse'
|
||||||
"400":
|
"400":
|
||||||
description: Bad Request
|
description: Bad Request
|
||||||
schema:
|
schema:
|
||||||
@ -994,7 +921,7 @@ paths:
|
|||||||
"200":
|
"200":
|
||||||
description: Success message
|
description: Success message
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/api.DefaultJobApiResponse'
|
$ref: '#/definitions/api.DefaultApiResponse'
|
||||||
"400":
|
"400":
|
||||||
description: Bad Request
|
description: Bad Request
|
||||||
schema:
|
schema:
|
||||||
@ -1078,21 +1005,21 @@ paths:
|
|||||||
- application/json
|
- application/json
|
||||||
description: |-
|
description: |-
|
||||||
Job specified in request body will be saved to database as "running" with new DB ID.
|
Job specified in request body will be saved to database as "running" with new DB ID.
|
||||||
Job specifications follow the 'JobMeta' scheme, API will fail to execute if requirements are not met.
|
Job specifications follow the 'Job' scheme, API will fail to execute if requirements are not met.
|
||||||
parameters:
|
parameters:
|
||||||
- description: Job to add
|
- description: Job to add
|
||||||
in: body
|
in: body
|
||||||
name: request
|
name: request
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/schema.JobMeta'
|
$ref: '#/definitions/schema.Job'
|
||||||
produces:
|
produces:
|
||||||
- application/json
|
- application/json
|
||||||
responses:
|
responses:
|
||||||
"201":
|
"201":
|
||||||
description: Job added successfully
|
description: Job added successfully
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/api.DefaultJobApiResponse'
|
$ref: '#/definitions/api.DefaultApiResponse'
|
||||||
"400":
|
"400":
|
||||||
description: Bad Request
|
description: Bad Request
|
||||||
schema:
|
schema:
|
||||||
@ -1123,7 +1050,7 @@ paths:
|
|||||||
post:
|
post:
|
||||||
description: |-
|
description: |-
|
||||||
Job to stop is specified by request body. All fields are required in this case.
|
Job to stop is specified by request body. All fields are required in this case.
|
||||||
Returns full job resource information according to 'JobMeta' scheme.
|
Returns full job resource information according to 'Job' scheme.
|
||||||
parameters:
|
parameters:
|
||||||
- description: All fields required
|
- description: All fields required
|
||||||
in: body
|
in: body
|
||||||
@ -1137,7 +1064,7 @@ paths:
|
|||||||
"200":
|
"200":
|
||||||
description: Success message
|
description: Success message
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/schema.JobMeta'
|
$ref: '#/definitions/schema.Job'
|
||||||
"400":
|
"400":
|
||||||
description: Bad Request
|
description: Bad Request
|
||||||
schema:
|
schema:
|
||||||
@ -1217,6 +1144,46 @@ paths:
|
|||||||
summary: Adds one or more tags to a job
|
summary: Adds one or more tags to a job
|
||||||
tags:
|
tags:
|
||||||
- Job add and modify
|
- Job add and modify
|
||||||
|
/api/nodestats/:
|
||||||
|
post:
|
||||||
|
description: |-
|
||||||
|
Returns a JSON-encoded list of users.
|
||||||
|
Required query-parameter defines if all users or only users with additional special roles are returned.
|
||||||
|
parameters:
|
||||||
|
- description: Request body containing nodes and their states
|
||||||
|
in: body
|
||||||
|
name: request
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
$ref: '#/definitions/api.UpdateNodeStatesRequest'
|
||||||
|
produces:
|
||||||
|
- application/json
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: Success message
|
||||||
|
schema:
|
||||||
|
$ref: '#/definitions/api.DefaultApiResponse'
|
||||||
|
"400":
|
||||||
|
description: Bad Request
|
||||||
|
schema:
|
||||||
|
$ref: '#/definitions/api.ErrorResponse'
|
||||||
|
"401":
|
||||||
|
description: Unauthorized
|
||||||
|
schema:
|
||||||
|
$ref: '#/definitions/api.ErrorResponse'
|
||||||
|
"403":
|
||||||
|
description: Forbidden
|
||||||
|
schema:
|
||||||
|
$ref: '#/definitions/api.ErrorResponse'
|
||||||
|
"500":
|
||||||
|
description: Internal Server Error
|
||||||
|
schema:
|
||||||
|
$ref: '#/definitions/api.ErrorResponse'
|
||||||
|
security:
|
||||||
|
- ApiKeyAuth: []
|
||||||
|
summary: Deliver updated Slurm node states
|
||||||
|
tags:
|
||||||
|
- Nodestates
|
||||||
/api/users/:
|
/api/users/:
|
||||||
get:
|
get:
|
||||||
description: |-
|
description: |-
|
||||||
|
@ -7,8 +7,9 @@ package main
|
|||||||
import "flag"
|
import "flag"
|
||||||
|
|
||||||
var (
|
var (
|
||||||
flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB, flagForceDB, flagDev, flagVersion, flagLogDateTime bool
|
flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB,
|
||||||
flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
flagForceDB, flagDev, flagVersion, flagLogDateTime, flagApplyTags bool
|
||||||
|
flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
||||||
)
|
)
|
||||||
|
|
||||||
func cliInit() {
|
func cliInit() {
|
||||||
@ -21,6 +22,7 @@ func cliInit() {
|
|||||||
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
|
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
|
||||||
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
|
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
|
||||||
flag.BoolVar(&flagRevertDB, "revert-db", false, "Migrate database to previous version and exit")
|
flag.BoolVar(&flagRevertDB, "revert-db", false, "Migrate database to previous version and exit")
|
||||||
|
flag.BoolVar(&flagApplyTags, "apply-tags", false, "Run taggers on all completed jobs and exit")
|
||||||
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
|
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
|
||||||
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
||||||
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
||||||
|
@ -19,12 +19,15 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/tagger"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/taskManager"
|
"github.com/ClusterCockpit/cc-backend/internal/taskManager"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
|
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
"github.com/google/gops/agent"
|
"github.com/google/gops/agent"
|
||||||
|
"github.com/joho/godotenv"
|
||||||
|
|
||||||
_ "github.com/go-sql-driver/mysql"
|
_ "github.com/go-sql-driver/mysql"
|
||||||
_ "github.com/mattn/go-sqlite3"
|
_ "github.com/mattn/go-sqlite3"
|
||||||
@ -76,7 +79,8 @@ func main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := runtimeEnv.LoadEnv("./.env"); err != nil && !os.IsNotExist(err) {
|
err := godotenv.Load()
|
||||||
|
if err != nil {
|
||||||
log.Abortf("Could not parse existing .env file at location './.env'. Application startup failed, exited.\nError: %s\n", err.Error())
|
log.Abortf("Could not parse existing .env file at location './.env'. Application startup failed, exited.\nError: %s\n", err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -209,11 +213,22 @@ func main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if config.Keys.EnableJobTaggers {
|
||||||
|
tagger.Init()
|
||||||
|
}
|
||||||
|
|
||||||
|
if flagApplyTags {
|
||||||
|
if err := tagger.RunTaggers(); err != nil {
|
||||||
|
log.Abortf("Running job taggers.\nError: %s\n", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if !flagServer {
|
if !flagServer {
|
||||||
log.Exit("No errors, server flag not set. Exiting cc-backend.")
|
log.Exit("No errors, server flag not set. Exiting cc-backend.")
|
||||||
}
|
}
|
||||||
|
|
||||||
archiver.Start(repository.GetJobRepository())
|
archiver.Start(repository.GetJobRepository())
|
||||||
|
|
||||||
taskManager.Start()
|
taskManager.Start()
|
||||||
serverInit()
|
serverInit()
|
||||||
|
|
||||||
@ -235,6 +250,8 @@ func main() {
|
|||||||
|
|
||||||
serverShutdown()
|
serverShutdown()
|
||||||
|
|
||||||
|
util.FsWatcherShutdown()
|
||||||
|
|
||||||
taskManager.Shutdown()
|
taskManager.Shutdown()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
7
go.mod
7
go.mod
@ -9,6 +9,8 @@ require (
|
|||||||
github.com/ClusterCockpit/cc-units v0.4.0
|
github.com/ClusterCockpit/cc-units v0.4.0
|
||||||
github.com/Masterminds/squirrel v1.5.4
|
github.com/Masterminds/squirrel v1.5.4
|
||||||
github.com/coreos/go-oidc/v3 v3.12.0
|
github.com/coreos/go-oidc/v3 v3.12.0
|
||||||
|
github.com/expr-lang/expr v1.17.3
|
||||||
|
github.com/fsnotify/fsnotify v1.9.0
|
||||||
github.com/go-co-op/gocron/v2 v2.16.0
|
github.com/go-co-op/gocron/v2 v2.16.0
|
||||||
github.com/go-ldap/ldap/v3 v3.4.10
|
github.com/go-ldap/ldap/v3 v3.4.10
|
||||||
github.com/go-sql-driver/mysql v1.9.0
|
github.com/go-sql-driver/mysql v1.9.0
|
||||||
@ -18,8 +20,8 @@ require (
|
|||||||
github.com/gorilla/handlers v1.5.2
|
github.com/gorilla/handlers v1.5.2
|
||||||
github.com/gorilla/mux v1.8.1
|
github.com/gorilla/mux v1.8.1
|
||||||
github.com/gorilla/sessions v1.4.0
|
github.com/gorilla/sessions v1.4.0
|
||||||
github.com/influxdata/influxdb-client-go/v2 v2.14.0
|
|
||||||
github.com/jmoiron/sqlx v1.4.0
|
github.com/jmoiron/sqlx v1.4.0
|
||||||
|
github.com/joho/godotenv v1.5.1
|
||||||
github.com/mattn/go-sqlite3 v1.14.24
|
github.com/mattn/go-sqlite3 v1.14.24
|
||||||
github.com/prometheus/client_golang v1.21.0
|
github.com/prometheus/client_golang v1.21.0
|
||||||
github.com/prometheus/common v0.62.0
|
github.com/prometheus/common v0.62.0
|
||||||
@ -39,7 +41,6 @@ require (
|
|||||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
|
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
|
||||||
github.com/KyleBanks/depth v1.2.1 // indirect
|
github.com/KyleBanks/depth v1.2.1 // indirect
|
||||||
github.com/agnivade/levenshtein v1.2.1 // indirect
|
github.com/agnivade/levenshtein v1.2.1 // indirect
|
||||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
|
|
||||||
github.com/beorn7/perks v1.0.1 // indirect
|
github.com/beorn7/perks v1.0.1 // indirect
|
||||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect
|
github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect
|
||||||
@ -57,7 +58,6 @@ require (
|
|||||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||||
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
||||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect
|
|
||||||
github.com/jonboulle/clockwork v0.5.0 // indirect
|
github.com/jonboulle/clockwork v0.5.0 // indirect
|
||||||
github.com/josharian/intern v1.0.0 // indirect
|
github.com/josharian/intern v1.0.0 // indirect
|
||||||
github.com/jpillora/backoff v1.0.0 // indirect
|
github.com/jpillora/backoff v1.0.0 // indirect
|
||||||
@ -69,7 +69,6 @@ require (
|
|||||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
|
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
|
||||||
github.com/oapi-codegen/runtime v1.1.1 // indirect
|
|
||||||
github.com/prometheus/client_model v0.6.1 // indirect
|
github.com/prometheus/client_model v0.6.1 // indirect
|
||||||
github.com/prometheus/procfs v0.15.1 // indirect
|
github.com/prometheus/procfs v0.15.1 // indirect
|
||||||
github.com/robfig/cron/v3 v3.0.1 // indirect
|
github.com/robfig/cron/v3 v3.0.1 // indirect
|
||||||
|
18
go.sum
18
go.sum
@ -16,7 +16,6 @@ github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERo
|
|||||||
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
|
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
|
||||||
github.com/PuerkitoBio/goquery v1.9.3 h1:mpJr/ikUA9/GNJB/DBZcGeFDXUtosHRyRrwh7KGdTG0=
|
github.com/PuerkitoBio/goquery v1.9.3 h1:mpJr/ikUA9/GNJB/DBZcGeFDXUtosHRyRrwh7KGdTG0=
|
||||||
github.com/PuerkitoBio/goquery v1.9.3/go.mod h1:1ndLHPdTz+DyQPICCWYlYQMPl0oXZj0G6D4LCYA6u4U=
|
github.com/PuerkitoBio/goquery v1.9.3/go.mod h1:1ndLHPdTz+DyQPICCWYlYQMPl0oXZj0G6D4LCYA6u4U=
|
||||||
github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk=
|
|
||||||
github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM=
|
github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM=
|
||||||
github.com/agnivade/levenshtein v1.2.1/go.mod h1:QVVI16kDrtSuwcpd0p1+xMC6Z/VfhtCyDIjcwga4/DU=
|
github.com/agnivade/levenshtein v1.2.1/go.mod h1:QVVI16kDrtSuwcpd0p1+xMC6Z/VfhtCyDIjcwga4/DU=
|
||||||
github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa h1:LHTHcTQiSGT7VVbI0o4wBRNQIgn917usHWOd6VAffYI=
|
github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa h1:LHTHcTQiSGT7VVbI0o4wBRNQIgn917usHWOd6VAffYI=
|
||||||
@ -25,13 +24,10 @@ github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 h1:bvNMNQO63//z+xNg
|
|||||||
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8=
|
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8=
|
||||||
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
|
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
|
||||||
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
|
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
|
||||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ=
|
|
||||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk=
|
|
||||||
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
|
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
|
||||||
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
|
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
|
||||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||||
github.com/bmatcuk/doublestar v1.1.1/go.mod h1:UD6OnuiIn0yFxxA2le/rnRU1G4RaI4UvFv1sNto9p6w=
|
|
||||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||||
github.com/coreos/go-oidc/v3 v3.12.0 h1:sJk+8G2qq94rDI6ehZ71Bol3oUHy63qNYmkiSjrc/Jo=
|
github.com/coreos/go-oidc/v3 v3.12.0 h1:sJk+8G2qq94rDI6ehZ71Bol3oUHy63qNYmkiSjrc/Jo=
|
||||||
@ -53,8 +49,12 @@ github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj
|
|||||||
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
|
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
|
||||||
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
|
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
|
||||||
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
|
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
|
||||||
|
github.com/expr-lang/expr v1.17.3 h1:myeTTuDFz7k6eFe/JPlep/UsiIjVhG61FMHFu63U7j0=
|
||||||
|
github.com/expr-lang/expr v1.17.3/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
|
||||||
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
||||||
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||||
|
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
|
||||||
|
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
|
||||||
github.com/go-asn1-ber/asn1-ber v1.5.7 h1:DTX+lbVTWaTw1hQ+PbZPlnDZPEIs0SS/GCZAl535dDk=
|
github.com/go-asn1-ber/asn1-ber v1.5.7 h1:DTX+lbVTWaTw1hQ+PbZPlnDZPEIs0SS/GCZAl535dDk=
|
||||||
github.com/go-asn1-ber/asn1-ber v1.5.7/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
|
github.com/go-asn1-ber/asn1-ber v1.5.7/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
|
||||||
github.com/go-co-op/gocron/v2 v2.16.0 h1:uqUF6WFZ4enRU45pWFNcn1xpDLc+jBOTKhPQI16Z1xs=
|
github.com/go-co-op/gocron/v2 v2.16.0 h1:uqUF6WFZ4enRU45pWFNcn1xpDLc+jBOTKhPQI16Z1xs=
|
||||||
@ -119,10 +119,6 @@ github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/C
|
|||||||
github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
|
github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
|
||||||
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
|
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
|
||||||
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
|
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
|
||||||
github.com/influxdata/influxdb-client-go/v2 v2.14.0 h1:AjbBfJuq+QoaXNcrova8smSjwJdUHnwvfjMF71M1iI4=
|
|
||||||
github.com/influxdata/influxdb-client-go/v2 v2.14.0/go.mod h1:Ahpm3QXKMJslpXl3IftVLVezreAUtBOTZssDrjZEFHI=
|
|
||||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU=
|
|
||||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
|
|
||||||
github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8=
|
github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8=
|
||||||
github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs=
|
github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs=
|
||||||
github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo=
|
github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo=
|
||||||
@ -137,6 +133,8 @@ github.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZ
|
|||||||
github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc=
|
github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc=
|
||||||
github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o=
|
github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o=
|
||||||
github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY=
|
github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY=
|
||||||
|
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
|
||||||
|
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
|
||||||
github.com/jonboulle/clockwork v0.5.0 h1:Hyh9A8u51kptdkR+cqRpT1EebBwTn1oK9YfGYbdFz6I=
|
github.com/jonboulle/clockwork v0.5.0 h1:Hyh9A8u51kptdkR+cqRpT1EebBwTn1oK9YfGYbdFz6I=
|
||||||
github.com/jonboulle/clockwork v0.5.0/go.mod h1:3mZlmanh0g2NDKO5TWZVJAfofYk64M7XN3SzBPjZF60=
|
github.com/jonboulle/clockwork v0.5.0/go.mod h1:3mZlmanh0g2NDKO5TWZVJAfofYk64M7XN3SzBPjZF60=
|
||||||
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
|
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
|
||||||
@ -145,7 +143,6 @@ github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2E
|
|||||||
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
|
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
|
||||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||||
github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
|
|
||||||
github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
|
github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
|
||||||
github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
|
github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
|
||||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||||
@ -180,8 +177,6 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq
|
|||||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
|
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
|
||||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
||||||
github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro=
|
|
||||||
github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
|
|
||||||
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
|
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
|
||||||
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
|
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
|
||||||
github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
|
github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
|
||||||
@ -213,7 +208,6 @@ github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8=
|
|||||||
github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I=
|
github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I=
|
||||||
github.com/sosodev/duration v1.3.1 h1:qtHBDMQ6lvMQsL15g4aopM4HEfOaYuhWBw3NPTtlqq4=
|
github.com/sosodev/duration v1.3.1 h1:qtHBDMQ6lvMQsL15g4aopM4HEfOaYuhWBw3NPTtlqq4=
|
||||||
github.com/sosodev/duration v1.3.1/go.mod h1:RQIBBX0+fMLc/D9+Jb/fwvVmo0eZvDDEERAikUR6SDg=
|
github.com/sosodev/duration v1.3.1/go.mod h1:RQIBBX0+fMLc/D9+Jb/fwvVmo0eZvDDEERAikUR6SDg=
|
||||||
github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0=
|
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||||
|
@ -62,6 +62,11 @@ models:
|
|||||||
fields:
|
fields:
|
||||||
partitions:
|
partitions:
|
||||||
resolver: true
|
resolver: true
|
||||||
|
Node:
|
||||||
|
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Node"
|
||||||
|
fields:
|
||||||
|
metaData:
|
||||||
|
resolver: true
|
||||||
NullableFloat:
|
NullableFloat:
|
||||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
|
||||||
MetricScope:
|
MetricScope:
|
||||||
@ -81,6 +86,10 @@ models:
|
|||||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
|
||||||
JobState:
|
JobState:
|
||||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
|
||||||
|
MonitoringState:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.NodeState" }
|
||||||
|
HealthState:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MonitoringState" }
|
||||||
TimeRange:
|
TimeRange:
|
||||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
|
||||||
IntRange:
|
IntRange:
|
||||||
|
@ -123,7 +123,7 @@ func setup(t *testing.T) *api.RestApi {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 2)), 0666); err != nil {
|
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), fmt.Appendf(nil, "%d", 2), 0666); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -204,11 +204,11 @@ func TestRestApi(t *testing.T) {
|
|||||||
restapi.MountApiRoutes(r)
|
restapi.MountApiRoutes(r)
|
||||||
|
|
||||||
var TestJobId int64 = 123
|
var TestJobId int64 = 123
|
||||||
var TestClusterName string = "testcluster"
|
TestClusterName := "testcluster"
|
||||||
var TestStartTime int64 = 123456789
|
var TestStartTime int64 = 123456789
|
||||||
|
|
||||||
const startJobBody string = `{
|
const startJobBody string = `{
|
||||||
"jobId": 123,
|
"jobId": 123,
|
||||||
"user": "testuser",
|
"user": "testuser",
|
||||||
"project": "testproj",
|
"project": "testproj",
|
||||||
"cluster": "testcluster",
|
"cluster": "testcluster",
|
||||||
@ -221,7 +221,6 @@ func TestRestApi(t *testing.T) {
|
|||||||
"exclusive": 1,
|
"exclusive": 1,
|
||||||
"monitoringStatus": 1,
|
"monitoringStatus": 1,
|
||||||
"smt": 1,
|
"smt": 1,
|
||||||
"tags": [{ "type": "testTagType", "name": "testTagName", "scope": "testuser" }],
|
|
||||||
"resources": [
|
"resources": [
|
||||||
{
|
{
|
||||||
"hostname": "host123",
|
"hostname": "host123",
|
||||||
@ -252,16 +251,17 @@ func TestRestApi(t *testing.T) {
|
|||||||
if response.StatusCode != http.StatusCreated {
|
if response.StatusCode != http.StatusCreated {
|
||||||
t.Fatal(response.Status, recorder.Body.String())
|
t.Fatal(response.Status, recorder.Body.String())
|
||||||
}
|
}
|
||||||
resolver := graph.GetResolverInstance()
|
// resolver := graph.GetResolverInstance()
|
||||||
|
restapi.JobRepository.SyncJobs()
|
||||||
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
|
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
job.Tags, err = resolver.Job().Tags(ctx, job)
|
// job.Tags, err = resolver.Job().Tags(ctx, job)
|
||||||
if err != nil {
|
// if err != nil {
|
||||||
t.Fatal(err)
|
// t.Fatal(err)
|
||||||
}
|
// }
|
||||||
|
|
||||||
if job.JobID != 123 ||
|
if job.JobID != 123 ||
|
||||||
job.User != "testuser" ||
|
job.User != "testuser" ||
|
||||||
@ -278,13 +278,13 @@ func TestRestApi(t *testing.T) {
|
|||||||
job.MonitoringStatus != 1 ||
|
job.MonitoringStatus != 1 ||
|
||||||
job.SMT != 1 ||
|
job.SMT != 1 ||
|
||||||
!reflect.DeepEqual(job.Resources, []*schema.Resource{{Hostname: "host123", HWThreads: []int{0, 1, 2, 3, 4, 5, 6, 7}}}) ||
|
!reflect.DeepEqual(job.Resources, []*schema.Resource{{Hostname: "host123", HWThreads: []int{0, 1, 2, 3, 4, 5, 6, 7}}}) ||
|
||||||
job.StartTime.Unix() != 123456789 {
|
job.StartTime != 123456789 {
|
||||||
t.Fatalf("unexpected job properties: %#v", job)
|
t.Fatalf("unexpected job properties: %#v", job)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" || job.Tags[0].Scope != "testuser" {
|
// if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" || job.Tags[0].Scope != "testuser" {
|
||||||
t.Fatalf("unexpected tags: %#v", job.Tags)
|
// t.Fatalf("unexpected tags: %#v", job.Tags)
|
||||||
}
|
// }
|
||||||
}); !ok {
|
}); !ok {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -352,7 +352,7 @@ func TestRestApi(t *testing.T) {
|
|||||||
|
|
||||||
t.Run("CheckDoubleStart", func(t *testing.T) {
|
t.Run("CheckDoubleStart", func(t *testing.T) {
|
||||||
// Starting a job with the same jobId and cluster should only be allowed if the startTime is far appart!
|
// Starting a job with the same jobId and cluster should only be allowed if the startTime is far appart!
|
||||||
body := strings.Replace(startJobBody, `"startTime": 123456789`, `"startTime": 123456790`, -1)
|
body := strings.ReplaceAll(startJobBody, `"startTime": 123456789`, `"startTime": 123456790`)
|
||||||
|
|
||||||
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(body)))
|
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(body)))
|
||||||
recorder := httptest.NewRecorder()
|
recorder := httptest.NewRecorder()
|
||||||
@ -402,6 +402,7 @@ func TestRestApi(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
time.Sleep(1 * time.Second)
|
time.Sleep(1 * time.Second)
|
||||||
|
restapi.JobRepository.SyncJobs()
|
||||||
|
|
||||||
const stopJobBodyFailed string = `{
|
const stopJobBodyFailed string = `{
|
||||||
"jobId": 12345,
|
"jobId": 12345,
|
||||||
|
70
internal/api/cluster.go
Normal file
70
internal/api/cluster.go
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GetClustersApiResponse model
|
||||||
|
type GetClustersApiResponse struct {
|
||||||
|
Clusters []*schema.Cluster `json:"clusters"` // Array of clusters
|
||||||
|
}
|
||||||
|
|
||||||
|
// getClusters godoc
|
||||||
|
// @summary Lists all cluster configs
|
||||||
|
// @tags Cluster query
|
||||||
|
// @description Get a list of all cluster configs. Specific cluster can be requested using query parameter.
|
||||||
|
// @produce json
|
||||||
|
// @param cluster query string false "Job Cluster"
|
||||||
|
// @success 200 {object} api.GetClustersApiResponse "Array of clusters"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/clusters/ [get]
|
||||||
|
func (api *RestApi) getClusters(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||||
|
!user.HasRole(schema.RoleApi) {
|
||||||
|
|
||||||
|
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
bw := bufio.NewWriter(rw)
|
||||||
|
defer bw.Flush()
|
||||||
|
|
||||||
|
var clusters []*schema.Cluster
|
||||||
|
|
||||||
|
if r.URL.Query().Has("cluster") {
|
||||||
|
name := r.URL.Query().Get("cluster")
|
||||||
|
cluster := archive.GetCluster(name)
|
||||||
|
if cluster == nil {
|
||||||
|
handleError(fmt.Errorf("unknown cluster: %s", name), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
clusters = append(clusters, cluster)
|
||||||
|
} else {
|
||||||
|
clusters = archive.Clusters
|
||||||
|
}
|
||||||
|
|
||||||
|
payload := GetClustersApiResponse{
|
||||||
|
Clusters: clusters,
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewEncoder(bw).Encode(payload); err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
@ -208,7 +208,7 @@ const docTemplate = `{
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Success message",
|
"description": "Success message",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/api.DefaultJobApiResponse"
|
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@ -278,7 +278,7 @@ const docTemplate = `{
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Success message",
|
"description": "Success message",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/api.DefaultJobApiResponse"
|
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@ -348,7 +348,7 @@ const docTemplate = `{
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Success message",
|
"description": "Success message",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/api.DefaultJobApiResponse"
|
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@ -467,7 +467,7 @@ const docTemplate = `{
|
|||||||
"ApiKeyAuth": []
|
"ApiKeyAuth": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"description": "Job specified in request body will be saved to database as \"running\" with new DB ID.\nJob specifications follow the 'JobMeta' scheme, API will fail to execute if requirements are not met.",
|
"description": "Job specified in request body will be saved to database as \"running\" with new DB ID.\nJob specifications follow the 'Job' scheme, API will fail to execute if requirements are not met.",
|
||||||
"consumes": [
|
"consumes": [
|
||||||
"application/json"
|
"application/json"
|
||||||
],
|
],
|
||||||
@ -485,7 +485,7 @@ const docTemplate = `{
|
|||||||
"in": "body",
|
"in": "body",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/schema.JobMeta"
|
"$ref": "#/definitions/schema.Job"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -493,7 +493,7 @@ const docTemplate = `{
|
|||||||
"201": {
|
"201": {
|
||||||
"description": "Job added successfully",
|
"description": "Job added successfully",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/api.DefaultJobApiResponse"
|
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@ -536,7 +536,7 @@ const docTemplate = `{
|
|||||||
"ApiKeyAuth": []
|
"ApiKeyAuth": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"description": "Job to stop is specified by request body. All fields are required in this case.\nReturns full job resource information according to 'JobMeta' scheme.",
|
"description": "Job to stop is specified by request body. All fields are required in this case.\nReturns full job resource information according to 'Job' scheme.",
|
||||||
"produces": [
|
"produces": [
|
||||||
"application/json"
|
"application/json"
|
||||||
],
|
],
|
||||||
@ -559,7 +559,7 @@ const docTemplate = `{
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Success message",
|
"description": "Success message",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/schema.JobMeta"
|
"$ref": "#/definitions/schema.Job"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@ -681,7 +681,7 @@ const docTemplate = `{
|
|||||||
"ApiKeyAuth": []
|
"ApiKeyAuth": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"description": "Job to get is specified by database ID\nReturns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.",
|
"description": "Job to get is specified by database ID\nReturns full job resource information according to 'Job' scheme and all metrics according to 'JobData'.",
|
||||||
"produces": [
|
"produces": [
|
||||||
"application/json"
|
"application/json"
|
||||||
],
|
],
|
||||||
@ -755,7 +755,7 @@ const docTemplate = `{
|
|||||||
"ApiKeyAuth": []
|
"ApiKeyAuth": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"description": "Job to get is specified by database ID\nReturns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.",
|
"description": "Job to get is specified by database ID\nReturns full job resource information according to 'Job' scheme and all metrics according to 'JobData'.",
|
||||||
"consumes": [
|
"consumes": [
|
||||||
"application/json"
|
"application/json"
|
||||||
],
|
],
|
||||||
@ -833,6 +833,66 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/api/nodestats/": {
|
||||||
|
"post": {
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"ApiKeyAuth": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "Returns a JSON-encoded list of users.\nRequired query-parameter defines if all users or only users with additional special roles are returned.",
|
||||||
|
"produces": [
|
||||||
|
"application/json"
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
"Nodestates"
|
||||||
|
],
|
||||||
|
"summary": "Deliver updated Slurm node states",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"description": "Request body containing nodes and their states",
|
||||||
|
"name": "request",
|
||||||
|
"in": "body",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/api.UpdateNodeStatesRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Success message",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"description": "Bad Request",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/api.ErrorResponse"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"401": {
|
||||||
|
"description": "Unauthorized",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/api.ErrorResponse"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"403": {
|
||||||
|
"description": "Forbidden",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/api.ErrorResponse"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"description": "Internal Server Error",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/api.ErrorResponse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/api/users/": {
|
"/api/users/": {
|
||||||
"get": {
|
"get": {
|
||||||
"security": [
|
"security": [
|
||||||
@ -1081,7 +1141,7 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"api.DefaultJobApiResponse": {
|
"api.DefaultApiResponse": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"msg": {
|
"msg": {
|
||||||
@ -1175,7 +1235,7 @@ const docTemplate = `{
|
|||||||
"description": "Array of jobs",
|
"description": "Array of jobs",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/definitions/schema.JobMeta"
|
"$ref": "#/definitions/schema.Job"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"page": {
|
"page": {
|
||||||
@ -1198,6 +1258,20 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"api.Node": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"hostname": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"states": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"api.StopJobApiRequest": {
|
"api.StopJobApiRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"required": [
|
"required": [
|
||||||
@ -1231,6 +1305,21 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"api.UpdateNodeStatesRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"cluster": {
|
||||||
|
"type": "string",
|
||||||
|
"example": "fritz"
|
||||||
|
},
|
||||||
|
"nodes": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/api.Node"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"schema.Accelerator": {
|
"schema.Accelerator": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@ -1266,7 +1355,6 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"schema.Job": {
|
"schema.Job": {
|
||||||
"description": "Information of a HPC job.",
|
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"arrayJobId": {
|
"arrayJobId": {
|
||||||
@ -1364,6 +1452,12 @@ const docTemplate = `{
|
|||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "abcd200"
|
"example": "abcd200"
|
||||||
},
|
},
|
||||||
|
"requestedMemory": {
|
||||||
|
"description": "in MB",
|
||||||
|
"type": "integer",
|
||||||
|
"minimum": 1,
|
||||||
|
"example": 128000
|
||||||
|
},
|
||||||
"resources": {
|
"resources": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
@ -1375,7 +1469,14 @@ const docTemplate = `{
|
|||||||
"example": 4
|
"example": 4
|
||||||
},
|
},
|
||||||
"startTime": {
|
"startTime": {
|
||||||
"type": "string"
|
"type": "integer",
|
||||||
|
"example": 1649723812
|
||||||
|
},
|
||||||
|
"statistics": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"$ref": "#/definitions/schema.JobStatistics"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"subCluster": {
|
"subCluster": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
@ -1423,147 +1524,6 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"schema.JobMeta": {
|
|
||||||
"description": "Meta data information of a HPC job.",
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"arrayJobId": {
|
|
||||||
"type": "integer",
|
|
||||||
"example": 123000
|
|
||||||
},
|
|
||||||
"cluster": {
|
|
||||||
"type": "string",
|
|
||||||
"example": "fritz"
|
|
||||||
},
|
|
||||||
"concurrentJobs": {
|
|
||||||
"$ref": "#/definitions/schema.JobLinkResultList"
|
|
||||||
},
|
|
||||||
"duration": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 1,
|
|
||||||
"example": 43200
|
|
||||||
},
|
|
||||||
"energy": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"energyFootprint": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"type": "number"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"exclusive": {
|
|
||||||
"type": "integer",
|
|
||||||
"maximum": 2,
|
|
||||||
"minimum": 0,
|
|
||||||
"example": 1
|
|
||||||
},
|
|
||||||
"footprint": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"type": "number"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"id": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"jobId": {
|
|
||||||
"type": "integer",
|
|
||||||
"example": 123000
|
|
||||||
},
|
|
||||||
"jobState": {
|
|
||||||
"enum": [
|
|
||||||
"completed",
|
|
||||||
"failed",
|
|
||||||
"cancelled",
|
|
||||||
"stopped",
|
|
||||||
"timeout",
|
|
||||||
"out_of_memory"
|
|
||||||
],
|
|
||||||
"allOf": [
|
|
||||||
{
|
|
||||||
"$ref": "#/definitions/schema.JobState"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"example": "completed"
|
|
||||||
},
|
|
||||||
"metaData": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"monitoringStatus": {
|
|
||||||
"type": "integer",
|
|
||||||
"maximum": 3,
|
|
||||||
"minimum": 0,
|
|
||||||
"example": 1
|
|
||||||
},
|
|
||||||
"numAcc": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 1,
|
|
||||||
"example": 2
|
|
||||||
},
|
|
||||||
"numHwthreads": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 1,
|
|
||||||
"example": 20
|
|
||||||
},
|
|
||||||
"numNodes": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 1,
|
|
||||||
"example": 2
|
|
||||||
},
|
|
||||||
"partition": {
|
|
||||||
"type": "string",
|
|
||||||
"example": "main"
|
|
||||||
},
|
|
||||||
"project": {
|
|
||||||
"type": "string",
|
|
||||||
"example": "abcd200"
|
|
||||||
},
|
|
||||||
"resources": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"$ref": "#/definitions/schema.Resource"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"smt": {
|
|
||||||
"type": "integer",
|
|
||||||
"example": 4
|
|
||||||
},
|
|
||||||
"startTime": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 1,
|
|
||||||
"example": 1649723812
|
|
||||||
},
|
|
||||||
"statistics": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"$ref": "#/definitions/schema.JobStatistics"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"subCluster": {
|
|
||||||
"type": "string",
|
|
||||||
"example": "main"
|
|
||||||
},
|
|
||||||
"tags": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"$ref": "#/definitions/schema.Tag"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"user": {
|
|
||||||
"type": "string",
|
|
||||||
"example": "abcd100h"
|
|
||||||
},
|
|
||||||
"walltime": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 1,
|
|
||||||
"example": 86400
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"schema.JobMetric": {
|
"schema.JobMetric": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@ -1891,6 +1851,9 @@ const docTemplate = `{
|
|||||||
},
|
},
|
||||||
"remove": {
|
"remove": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"unit": {
|
||||||
|
"$ref": "#/definitions/schema.Unit"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
982
internal/api/job.go
Normal file
982
internal/api/job.go
Normal file
@ -0,0 +1,982 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"database/sql"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
"github.com/gorilla/mux"
|
||||||
|
)
|
||||||
|
|
||||||
|
// StopJobApiRequest model
|
||||||
|
type StopJobApiRequest struct {
|
||||||
|
JobId *int64 `json:"jobId" example:"123000"`
|
||||||
|
Cluster *string `json:"cluster" example:"fritz"`
|
||||||
|
StartTime *int64 `json:"startTime" example:"1649723812"`
|
||||||
|
State schema.JobState `json:"jobState" validate:"required" example:"completed"`
|
||||||
|
StopTime int64 `json:"stopTime" validate:"required" example:"1649763839"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeleteJobApiRequest model
|
||||||
|
type DeleteJobApiRequest struct {
|
||||||
|
JobId *int64 `json:"jobId" validate:"required" example:"123000"` // Cluster Job ID of job
|
||||||
|
Cluster *string `json:"cluster" example:"fritz"` // Cluster of job
|
||||||
|
StartTime *int64 `json:"startTime" example:"1649723812"` // Start Time of job as epoch
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetJobsApiResponse model
|
||||||
|
type GetJobsApiResponse struct {
|
||||||
|
Jobs []*schema.Job `json:"jobs"` // Array of jobs
|
||||||
|
Items int `json:"items"` // Number of jobs returned
|
||||||
|
Page int `json:"page"` // Page id returned
|
||||||
|
}
|
||||||
|
|
||||||
|
// ApiTag model
|
||||||
|
type ApiTag struct {
|
||||||
|
// Tag Type
|
||||||
|
Type string `json:"type" example:"Debug"`
|
||||||
|
Name string `json:"name" example:"Testjob"` // Tag Name
|
||||||
|
Scope string `json:"scope" example:"global"` // Tag Scope for Frontend Display
|
||||||
|
}
|
||||||
|
|
||||||
|
// ApiMeta model
|
||||||
|
type EditMetaRequest struct {
|
||||||
|
Key string `json:"key" example:"jobScript"`
|
||||||
|
Value string `json:"value" example:"bash script"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type TagJobApiRequest []*ApiTag
|
||||||
|
|
||||||
|
type GetJobApiRequest []string
|
||||||
|
|
||||||
|
type GetJobApiResponse struct {
|
||||||
|
Meta *schema.Job
|
||||||
|
Data []*JobMetricWithName
|
||||||
|
}
|
||||||
|
|
||||||
|
type GetCompleteJobApiResponse struct {
|
||||||
|
Meta *schema.Job
|
||||||
|
Data schema.JobData
|
||||||
|
}
|
||||||
|
|
||||||
|
type JobMetricWithName struct {
|
||||||
|
Metric *schema.JobMetric `json:"metric"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Scope schema.MetricScope `json:"scope"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// getJobs godoc
|
||||||
|
// @summary Lists all jobs
|
||||||
|
// @tags Job query
|
||||||
|
// @description Get a list of all jobs. Filters can be applied using query parameters.
|
||||||
|
// @description Number of results can be limited by page. Results are sorted by descending startTime.
|
||||||
|
// @produce json
|
||||||
|
// @param state query string false "Job State" Enums(running, completed, failed, cancelled, stopped, timeout)
|
||||||
|
// @param cluster query string false "Job Cluster"
|
||||||
|
// @param start-time query string false "Syntax: '$from-$to', as unix epoch timestamps in seconds"
|
||||||
|
// @param items-per-page query int false "Items per page (Default: 25)"
|
||||||
|
// @param page query int false "Page Number (Default: 1)"
|
||||||
|
// @param with-metadata query bool false "Include metadata (e.g. jobScript) in response"
|
||||||
|
// @success 200 {object} api.GetJobsApiResponse "Job array and page info"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/ [get]
|
||||||
|
func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
withMetadata := false
|
||||||
|
filter := &model.JobFilter{}
|
||||||
|
page := &model.PageRequest{ItemsPerPage: 25, Page: 1}
|
||||||
|
order := &model.OrderByInput{Field: "startTime", Type: "col", Order: model.SortDirectionEnumDesc}
|
||||||
|
|
||||||
|
for key, vals := range r.URL.Query() {
|
||||||
|
switch key {
|
||||||
|
case "state":
|
||||||
|
for _, s := range vals {
|
||||||
|
state := schema.JobState(s)
|
||||||
|
if !state.Valid() {
|
||||||
|
handleError(fmt.Errorf("invalid query parameter value: state"),
|
||||||
|
http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
filter.State = append(filter.State, state)
|
||||||
|
}
|
||||||
|
case "cluster":
|
||||||
|
filter.Cluster = &model.StringInput{Eq: &vals[0]}
|
||||||
|
case "start-time":
|
||||||
|
st := strings.Split(vals[0], "-")
|
||||||
|
if len(st) != 2 {
|
||||||
|
handleError(fmt.Errorf("invalid query parameter value: startTime"),
|
||||||
|
http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
from, err := strconv.ParseInt(st[0], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
to, err := strconv.ParseInt(st[1], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ufrom, uto := time.Unix(from, 0), time.Unix(to, 0)
|
||||||
|
filter.StartTime = &schema.TimeRange{From: &ufrom, To: &uto}
|
||||||
|
case "page":
|
||||||
|
x, err := strconv.Atoi(vals[0])
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
page.Page = x
|
||||||
|
case "items-per-page":
|
||||||
|
x, err := strconv.Atoi(vals[0])
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
page.ItemsPerPage = x
|
||||||
|
case "with-metadata":
|
||||||
|
withMetadata = true
|
||||||
|
default:
|
||||||
|
handleError(fmt.Errorf("invalid query parameter: %s", key),
|
||||||
|
http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
jobs, err := api.JobRepository.QueryJobs(r.Context(), []*model.JobFilter{filter}, page, order)
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
results := make([]*schema.Job, 0, len(jobs))
|
||||||
|
for _, job := range jobs {
|
||||||
|
if withMetadata {
|
||||||
|
if _, err = api.JobRepository.FetchMetadata(job); err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), job.ID)
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if job.MonitoringStatus == schema.MonitoringStatusArchivingSuccessful {
|
||||||
|
job.Statistics, err = archive.GetStatistics(job)
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
results = append(results, job)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugf("/api/jobs: %d jobs returned", len(results))
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
bw := bufio.NewWriter(rw)
|
||||||
|
defer bw.Flush()
|
||||||
|
|
||||||
|
payload := GetJobsApiResponse{
|
||||||
|
Jobs: results,
|
||||||
|
Items: page.ItemsPerPage,
|
||||||
|
Page: page.Page,
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewEncoder(bw).Encode(payload); err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// getCompleteJobById godoc
|
||||||
|
// @summary Get job meta and optional all metric data
|
||||||
|
// @tags Job query
|
||||||
|
// @description Job to get is specified by database ID
|
||||||
|
// @description Returns full job resource information according to 'Job' scheme and all metrics according to 'JobData'.
|
||||||
|
// @produce json
|
||||||
|
// @param id path int true "Database ID of Job"
|
||||||
|
// @param all-metrics query bool false "Include all available metrics"
|
||||||
|
// @success 200 {object} api.GetJobApiResponse "Job resource"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Resource not found"
|
||||||
|
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/{id} [get]
|
||||||
|
func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// Fetch job from db
|
||||||
|
id, ok := mux.Vars(r)["id"]
|
||||||
|
var job *schema.Job
|
||||||
|
var err error
|
||||||
|
if ok {
|
||||||
|
id, e := strconv.ParseInt(id, 10, 64)
|
||||||
|
if e != nil {
|
||||||
|
handleError(fmt.Errorf("integer expected in path for id: %w", e), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job, err = api.JobRepository.FindById(r.Context(), id) // Get Job from Repo by ID
|
||||||
|
} else {
|
||||||
|
handleError(fmt.Errorf("the parameter 'id' is required"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
handleError(fmt.Errorf("finding job with db id %s failed: %w", id, err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), job.ID)
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
|
||||||
|
}
|
||||||
|
if _, err = api.JobRepository.FetchMetadata(job); err != nil {
|
||||||
|
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var scopes []schema.MetricScope
|
||||||
|
|
||||||
|
if job.NumNodes == 1 {
|
||||||
|
scopes = []schema.MetricScope{"core"}
|
||||||
|
} else {
|
||||||
|
scopes = []schema.MetricScope{"node"}
|
||||||
|
}
|
||||||
|
|
||||||
|
var data schema.JobData
|
||||||
|
|
||||||
|
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||||
|
resolution := 0
|
||||||
|
|
||||||
|
for _, mc := range metricConfigs {
|
||||||
|
resolution = max(resolution, mc.Timestep)
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.URL.Query().Get("all-metrics") == "true" {
|
||||||
|
data, err = metricDataDispatcher.LoadData(job, nil, scopes, r.Context(), resolution)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("REST: error while loading all-metrics job data for JobID %d on %s", job.JobID, job.Cluster)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugf("/api/job/%s: get job %d", id, job.JobID)
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
bw := bufio.NewWriter(rw)
|
||||||
|
defer bw.Flush()
|
||||||
|
|
||||||
|
payload := GetCompleteJobApiResponse{
|
||||||
|
Meta: job,
|
||||||
|
Data: data,
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewEncoder(bw).Encode(payload); err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// getJobById godoc
|
||||||
|
// @summary Get job meta and configurable metric data
|
||||||
|
// @tags Job query
|
||||||
|
// @description Job to get is specified by database ID
|
||||||
|
// @description Returns full job resource information according to 'Job' scheme and all metrics according to 'JobData'.
|
||||||
|
// @accept json
|
||||||
|
// @produce json
|
||||||
|
// @param id path int true "Database ID of Job"
|
||||||
|
// @param request body api.GetJobApiRequest true "Array of metric names"
|
||||||
|
// @success 200 {object} api.GetJobApiResponse "Job resource"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Resource not found"
|
||||||
|
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/{id} [post]
|
||||||
|
func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// Fetch job from db
|
||||||
|
id, ok := mux.Vars(r)["id"]
|
||||||
|
var job *schema.Job
|
||||||
|
var err error
|
||||||
|
if ok {
|
||||||
|
id, e := strconv.ParseInt(id, 10, 64)
|
||||||
|
if e != nil {
|
||||||
|
handleError(fmt.Errorf("integer expected in path for id: %w", e), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job, err = api.JobRepository.FindById(r.Context(), id)
|
||||||
|
} else {
|
||||||
|
handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
handleError(fmt.Errorf("finding job with db id %s failed: %w", id, err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), job.ID)
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
|
||||||
|
}
|
||||||
|
if _, err = api.JobRepository.FetchMetadata(job); err != nil {
|
||||||
|
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var metrics GetJobApiRequest
|
||||||
|
if err = decode(r.Body, &metrics); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var scopes []schema.MetricScope
|
||||||
|
|
||||||
|
if job.NumNodes == 1 {
|
||||||
|
scopes = []schema.MetricScope{"core"}
|
||||||
|
} else {
|
||||||
|
scopes = []schema.MetricScope{"node"}
|
||||||
|
}
|
||||||
|
|
||||||
|
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||||
|
resolution := 0
|
||||||
|
|
||||||
|
for _, mc := range metricConfigs {
|
||||||
|
resolution = max(resolution, mc.Timestep)
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := metricDataDispatcher.LoadData(job, metrics, scopes, r.Context(), resolution)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("REST: error while loading job data for JobID %d on %s", job.JobID, job.Cluster)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
res := []*JobMetricWithName{}
|
||||||
|
for name, md := range data {
|
||||||
|
for scope, metric := range md {
|
||||||
|
res = append(res, &JobMetricWithName{
|
||||||
|
Name: name,
|
||||||
|
Scope: scope,
|
||||||
|
Metric: metric,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugf("/api/job/%s: get job %d", id, job.JobID)
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
bw := bufio.NewWriter(rw)
|
||||||
|
defer bw.Flush()
|
||||||
|
|
||||||
|
payload := GetJobApiResponse{
|
||||||
|
Meta: job,
|
||||||
|
Data: res,
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewEncoder(bw).Encode(payload); err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// editMeta godoc
|
||||||
|
// @summary Edit meta-data json
|
||||||
|
// @tags Job add and modify
|
||||||
|
// @description Edit key value pairs in job metadata json
|
||||||
|
// @description If a key already exists its content will be overwritten
|
||||||
|
// @accept json
|
||||||
|
// @produce json
|
||||||
|
// @param id path int true "Job Database ID"
|
||||||
|
// @param request body api.EditMetaRequest true "Kay value pair to add"
|
||||||
|
// @success 200 {object} schema.Job "Updated job resource"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Job does not exist"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/edit_meta/{id} [post]
|
||||||
|
func (api *RestApi) editMeta(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job, err := api.JobRepository.FindById(r.Context(), id)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var req EditMetaRequest
|
||||||
|
if err := decode(r.Body, &req); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := api.JobRepository.UpdateMetadata(job, req.Key, req.Value); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(job)
|
||||||
|
}
|
||||||
|
|
||||||
|
// tagJob godoc
|
||||||
|
// @summary Adds one or more tags to a job
|
||||||
|
// @tags Job add and modify
|
||||||
|
// @description Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.
|
||||||
|
// @description Tag Scope for frontend visibility will default to "global" if none entered, other options: "admin" or specific username.
|
||||||
|
// @description If tagged job is already finished: Tag will be written directly to respective archive files.
|
||||||
|
// @accept json
|
||||||
|
// @produce json
|
||||||
|
// @param id path int true "Job Database ID"
|
||||||
|
// @param request body api.TagJobApiRequest true "Array of tag-objects to add"
|
||||||
|
// @success 200 {object} schema.Job "Updated job resource"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Job or tag does not exist"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/tag_job/{id} [post]
|
||||||
|
func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job, err := api.JobRepository.FindById(r.Context(), id)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), job.ID)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var req TagJobApiRequest
|
||||||
|
if err := decode(r.Body, &req); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tag := range req {
|
||||||
|
tagId, err := api.JobRepository.AddTagOrCreate(repository.GetUserFromContext(r.Context()), *job.ID, tag.Type, tag.Name, tag.Scope)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags = append(job.Tags, &schema.Tag{
|
||||||
|
ID: tagId,
|
||||||
|
Type: tag.Type,
|
||||||
|
Name: tag.Name,
|
||||||
|
Scope: tag.Scope,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(job)
|
||||||
|
}
|
||||||
|
|
||||||
|
// removeTagJob godoc
|
||||||
|
// @summary Removes one or more tags from a job
|
||||||
|
// @tags Job add and modify
|
||||||
|
// @description Removes tag(s) from a job specified by DB ID. Name and Type of Tag(s) must match.
|
||||||
|
// @description Tag Scope is required for matching, options: "global", "admin". Private tags can not be deleted via API.
|
||||||
|
// @description If tagged job is already finished: Tag will be removed from respective archive files.
|
||||||
|
// @accept json
|
||||||
|
// @produce json
|
||||||
|
// @param id path int true "Job Database ID"
|
||||||
|
// @param request body api.TagJobApiRequest true "Array of tag-objects to remove"
|
||||||
|
// @success 200 {object} schema.Job "Updated job resource"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Job or tag does not exist"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /jobs/tag_job/{id} [delete]
|
||||||
|
func (api *RestApi) removeTagJob(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job, err := api.JobRepository.FindById(r.Context(), id)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), job.ID)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var req TagJobApiRequest
|
||||||
|
if err := decode(r.Body, &req); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, rtag := range req {
|
||||||
|
// Only Global and Admin Tags
|
||||||
|
if rtag.Scope != "global" && rtag.Scope != "admin" {
|
||||||
|
log.Warnf("Cannot delete private tag for job %d: Skip", job.JobID)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
remainingTags, err := api.JobRepository.RemoveJobTagByRequest(repository.GetUserFromContext(r.Context()), *job.ID, rtag.Type, rtag.Name, rtag.Scope)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Tags = remainingTags
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(job)
|
||||||
|
}
|
||||||
|
|
||||||
|
// removeTags godoc
|
||||||
|
// @summary Removes all tags and job-relations for type:name tuple
|
||||||
|
// @tags Tag remove
|
||||||
|
// @description Removes tags by type and name. Name and Type of Tag(s) must match.
|
||||||
|
// @description Tag Scope is required for matching, options: "global", "admin". Private tags can not be deleted via API.
|
||||||
|
// @description Tag wills be removed from respective archive files.
|
||||||
|
// @accept json
|
||||||
|
// @produce plain
|
||||||
|
// @param request body api.TagJobApiRequest true "Array of tag-objects to remove"
|
||||||
|
// @success 200 {string} string "Success Response"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Job or tag does not exist"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /tags/ [delete]
|
||||||
|
func (api *RestApi) removeTags(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
var req TagJobApiRequest
|
||||||
|
if err := decode(r.Body, &req); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
targetCount := len(req)
|
||||||
|
currentCount := 0
|
||||||
|
for _, rtag := range req {
|
||||||
|
// Only Global and Admin Tags
|
||||||
|
if rtag.Scope != "global" && rtag.Scope != "admin" {
|
||||||
|
log.Warn("Cannot delete private tag: Skip")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
err := api.JobRepository.RemoveTagByRequest(rtag.Type, rtag.Name, rtag.Scope)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
} else {
|
||||||
|
currentCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
fmt.Fprintf(rw, "Deleted Tags from DB: %d successfull of %d requested\n", currentCount, targetCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
// startJob godoc
|
||||||
|
// @summary Adds a new job as "running"
|
||||||
|
// @tags Job add and modify
|
||||||
|
// @description Job specified in request body will be saved to database as "running" with new DB ID.
|
||||||
|
// @description Job specifications follow the 'Job' scheme, API will fail to execute if requirements are not met.
|
||||||
|
// @accept json
|
||||||
|
// @produce json
|
||||||
|
// @param request body schema.Job true "Job to add"
|
||||||
|
// @success 201 {object} api.DefaultApiResponse "Job added successfully"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: The combination of jobId, clusterId and startTime does already exist"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/start_job/ [post]
|
||||||
|
func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
req := schema.Job{
|
||||||
|
Exclusive: 1,
|
||||||
|
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||||
|
}
|
||||||
|
if err := decode(r.Body, &req); err != nil {
|
||||||
|
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("REST: %s\n", req.GoString())
|
||||||
|
req.State = schema.JobStateRunning
|
||||||
|
|
||||||
|
if err := importer.SanityChecks(&req); err != nil {
|
||||||
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// aquire lock to avoid race condition between API calls
|
||||||
|
var unlockOnce sync.Once
|
||||||
|
api.RepositoryMutex.Lock()
|
||||||
|
defer unlockOnce.Do(api.RepositoryMutex.Unlock)
|
||||||
|
|
||||||
|
// Check if combination of (job_id, cluster_id, start_time) already exists:
|
||||||
|
jobs, err := api.JobRepository.FindAll(&req.JobID, &req.Cluster, nil)
|
||||||
|
if err != nil && err != sql.ErrNoRows {
|
||||||
|
handleError(fmt.Errorf("checking for duplicate failed: %w", err), http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
} else if err == nil {
|
||||||
|
for _, job := range jobs {
|
||||||
|
if (req.StartTime - job.StartTime) < 86400 {
|
||||||
|
handleError(fmt.Errorf("a job with that jobId, cluster and startTime already exists: dbid: %d, jobid: %d", job.ID, job.JobID), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
id, err := api.JobRepository.Start(&req)
|
||||||
|
if err != nil {
|
||||||
|
handleError(fmt.Errorf("insert into database failed: %w", err), http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// unlock here, adding Tags can be async
|
||||||
|
unlockOnce.Do(api.RepositoryMutex.Unlock)
|
||||||
|
|
||||||
|
for _, tag := range req.Tags {
|
||||||
|
if _, err := api.JobRepository.AddTagOrCreate(repository.GetUserFromContext(r.Context()), id, tag.Type, tag.Name, tag.Scope); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
handleError(fmt.Errorf("adding tag to new job %d failed: %w", id, err), http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d", id, req.Cluster, req.JobID, req.User, req.StartTime)
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusCreated)
|
||||||
|
json.NewEncoder(rw).Encode(DefaultApiResponse{
|
||||||
|
Message: "success",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// stopJobByRequest godoc
|
||||||
|
// @summary Marks job as completed and triggers archiving
|
||||||
|
// @tags Job add and modify
|
||||||
|
// @description Job to stop is specified by request body. All fields are required in this case.
|
||||||
|
// @description Returns full job resource information according to 'Job' scheme.
|
||||||
|
// @produce json
|
||||||
|
// @param request body api.StopJobApiRequest true "All fields required"
|
||||||
|
// @success 200 {object} schema.Job "Success message"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Resource not found"
|
||||||
|
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: job has already been stopped"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/stop_job/ [post]
|
||||||
|
func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// Parse request body
|
||||||
|
req := StopJobApiRequest{}
|
||||||
|
if err := decode(r.Body, &req); err != nil {
|
||||||
|
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch job (that will be stopped) from db
|
||||||
|
var job *schema.Job
|
||||||
|
var err error
|
||||||
|
if req.JobId == nil {
|
||||||
|
handleError(errors.New("the field 'jobId' is required"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// log.Printf("loading db job for stopJobByRequest... : stopJobApiRequest=%v", req)
|
||||||
|
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
|
||||||
|
if err != nil {
|
||||||
|
job, err = api.JobRepository.FindCached(req.JobId, req.Cluster, req.StartTime)
|
||||||
|
// FIXME: Previous error is hidden
|
||||||
|
if err != nil {
|
||||||
|
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
api.checkAndHandleStopJob(rw, job, req)
|
||||||
|
}
|
||||||
|
|
||||||
|
// deleteJobById godoc
|
||||||
|
// @summary Remove a job from the sql database
|
||||||
|
// @tags Job remove
|
||||||
|
// @description Job to remove is specified by database ID. This will not remove the job from the job archive.
|
||||||
|
// @produce json
|
||||||
|
// @param id path int true "Database ID of Job"
|
||||||
|
// @success 200 {object} api.DefaultApiResponse "Success message"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Resource not found"
|
||||||
|
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/delete_job/{id} [delete]
|
||||||
|
func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// Fetch job (that will be stopped) from db
|
||||||
|
id, ok := mux.Vars(r)["id"]
|
||||||
|
var err error
|
||||||
|
if ok {
|
||||||
|
id, e := strconv.ParseInt(id, 10, 64)
|
||||||
|
if e != nil {
|
||||||
|
handleError(fmt.Errorf("integer expected in path for id: %w", e), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
err = api.JobRepository.DeleteJobById(id)
|
||||||
|
} else {
|
||||||
|
handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
handleError(fmt.Errorf("deleting job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(DefaultApiResponse{
|
||||||
|
Message: fmt.Sprintf("Successfully deleted job %s", id),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// deleteJobByRequest godoc
|
||||||
|
// @summary Remove a job from the sql database
|
||||||
|
// @tags Job remove
|
||||||
|
// @description Job to delete is specified by request body. All fields are required in this case.
|
||||||
|
// @accept json
|
||||||
|
// @produce json
|
||||||
|
// @param request body api.DeleteJobApiRequest true "All fields required"
|
||||||
|
// @success 200 {object} api.DefaultApiResponse "Success message"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Resource not found"
|
||||||
|
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/delete_job/ [delete]
|
||||||
|
func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// Parse request body
|
||||||
|
req := DeleteJobApiRequest{}
|
||||||
|
if err := decode(r.Body, &req); err != nil {
|
||||||
|
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch job (that will be deleted) from db
|
||||||
|
var job *schema.Job
|
||||||
|
var err error
|
||||||
|
if req.JobId == nil {
|
||||||
|
handleError(errors.New("the field 'jobId' is required"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
|
||||||
|
if err != nil {
|
||||||
|
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
err = api.JobRepository.DeleteJobById(*job.ID)
|
||||||
|
if err != nil {
|
||||||
|
handleError(fmt.Errorf("deleting job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(DefaultApiResponse{
|
||||||
|
Message: fmt.Sprintf("Successfully deleted job %d", job.ID),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// deleteJobBefore godoc
|
||||||
|
// @summary Remove a job from the sql database
|
||||||
|
// @tags Job remove
|
||||||
|
// @description Remove all jobs with start time before timestamp. The jobs will not be removed from the job archive.
|
||||||
|
// @produce json
|
||||||
|
// @param ts path int true "Unix epoch timestamp"
|
||||||
|
// @success 200 {object} api.DefaultApiResponse "Success message"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 404 {object} api.ErrorResponse "Resource not found"
|
||||||
|
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/jobs/delete_job_before/{ts} [delete]
|
||||||
|
func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
var cnt int
|
||||||
|
// Fetch job (that will be stopped) from db
|
||||||
|
id, ok := mux.Vars(r)["ts"]
|
||||||
|
var err error
|
||||||
|
if ok {
|
||||||
|
ts, e := strconv.ParseInt(id, 10, 64)
|
||||||
|
if e != nil {
|
||||||
|
handleError(fmt.Errorf("integer expected in path for ts: %w", e), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
cnt, err = api.JobRepository.DeleteJobsBefore(ts)
|
||||||
|
} else {
|
||||||
|
handleError(errors.New("the parameter 'ts' is required"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
handleError(fmt.Errorf("deleting jobs failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(DefaultApiResponse{
|
||||||
|
Message: fmt.Sprintf("Successfully deleted %d jobs", cnt),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Job, req StopJobApiRequest) {
|
||||||
|
// Sanity checks
|
||||||
|
if job.State != schema.JobStateRunning {
|
||||||
|
handleError(fmt.Errorf("jobId %d (id %d) on %s : job has already been stopped (state is: %s)", job.JobID, job.ID, job.Cluster, job.State), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if job == nil || job.StartTime > req.StopTime {
|
||||||
|
handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger/equal than startTime %d", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if req.State != "" && !req.State.Valid() {
|
||||||
|
handleError(fmt.Errorf("jobId %d (id %d) on %s : invalid requested job state: %#v", job.JobID, job.ID, job.Cluster, req.State), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
} else if req.State == "" {
|
||||||
|
req.State = schema.JobStateCompleted
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark job as stopped in the database (update state and duration)
|
||||||
|
job.Duration = int32(req.StopTime - job.StartTime)
|
||||||
|
job.State = req.State
|
||||||
|
api.JobRepository.Mutex.Lock()
|
||||||
|
if err := api.JobRepository.Stop(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||||
|
if err := api.JobRepository.StopCached(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||||
|
api.JobRepository.Mutex.Unlock()
|
||||||
|
handleError(fmt.Errorf("jobId %d (id %d) on %s : marking job as '%s' (duration: %d) in DB failed: %w", job.JobID, job.ID, job.Cluster, job.State, job.Duration, err), http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
api.JobRepository.Mutex.Unlock()
|
||||||
|
|
||||||
|
log.Printf("archiving job... (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%d, duration=%d, state=%s", job.ID, job.Cluster, job.JobID, job.User, job.StartTime, job.Duration, job.State)
|
||||||
|
|
||||||
|
// Send a response (with status OK). This means that errors that happen from here on forward
|
||||||
|
// can *NOT* be communicated to the client. If reading from a MetricDataRepository or
|
||||||
|
// writing to the filesystem fails, the client will not know.
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(rw).Encode(job)
|
||||||
|
|
||||||
|
// Monitoring is disabled...
|
||||||
|
if job.MonitoringStatus == schema.MonitoringStatusDisabled {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trigger async archiving
|
||||||
|
archiver.TriggerArchiving(job)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
id := mux.Vars(r)["id"]
|
||||||
|
metrics := r.URL.Query()["metric"]
|
||||||
|
var scopes []schema.MetricScope
|
||||||
|
for _, scope := range r.URL.Query()["scope"] {
|
||||||
|
var s schema.MetricScope
|
||||||
|
if err := s.UnmarshalGQL(scope); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
scopes = append(scopes, s)
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
|
||||||
|
type Respone struct {
|
||||||
|
Data *struct {
|
||||||
|
JobMetrics []*model.JobMetricWithName `json:"jobMetrics"`
|
||||||
|
} `json:"data"`
|
||||||
|
Error *struct {
|
||||||
|
Message string `json:"message"`
|
||||||
|
} `json:"error"`
|
||||||
|
}
|
||||||
|
|
||||||
|
resolver := graph.GetResolverInstance()
|
||||||
|
data, err := resolver.Query().JobMetrics(r.Context(), id, metrics, scopes, nil)
|
||||||
|
if err != nil {
|
||||||
|
json.NewEncoder(rw).Encode(Respone{
|
||||||
|
Error: &struct {
|
||||||
|
Message string "json:\"message\""
|
||||||
|
}{Message: err.Error()},
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
json.NewEncoder(rw).Encode(Respone{
|
||||||
|
Data: &struct {
|
||||||
|
JobMetrics []*model.JobMetricWithName "json:\"jobMetrics\""
|
||||||
|
}{JobMetrics: data},
|
||||||
|
})
|
||||||
|
}
|
80
internal/api/node.go
Normal file
80
internal/api/node.go
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Node struct {
|
||||||
|
Name string `json:"hostname"`
|
||||||
|
States []string `json:"states"`
|
||||||
|
CpusAllocated int `json:"cpusAllocated"`
|
||||||
|
CpusTotal int `json:"cpusTotal"`
|
||||||
|
MemoryAllocated int `json:"memoryAllocated"`
|
||||||
|
MemoryTotal int `json:"memoryTotal"`
|
||||||
|
GpusAllocated int `json:"gpusAllocated"`
|
||||||
|
GpusTotal int `json:"gpusTotal"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// updateNodeStatesRequest model
|
||||||
|
type UpdateNodeStatesRequest struct {
|
||||||
|
Nodes []Node `json:"nodes"`
|
||||||
|
Cluster string `json:"cluster" example:"fritz"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// this routine assumes that only one of them exists per node
|
||||||
|
func determineState(states []string) schema.NodeState {
|
||||||
|
for _, state := range states {
|
||||||
|
switch strings.ToLower(state) {
|
||||||
|
case "allocated":
|
||||||
|
return schema.NodeStateAllocated
|
||||||
|
case "reserved":
|
||||||
|
return schema.NodeStateReserved
|
||||||
|
case "idle":
|
||||||
|
return schema.NodeStateIdle
|
||||||
|
case "down":
|
||||||
|
return schema.NodeStateDown
|
||||||
|
case "mixed":
|
||||||
|
return schema.NodeStateMixed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return schema.NodeStateUnknown
|
||||||
|
}
|
||||||
|
|
||||||
|
// updateNodeStates godoc
|
||||||
|
// @summary Deliver updated Slurm node states
|
||||||
|
// @tags Nodestates
|
||||||
|
// @description Returns a JSON-encoded list of users.
|
||||||
|
// @description Required query-parameter defines if all users or only users with additional special roles are returned.
|
||||||
|
// @produce json
|
||||||
|
// @param request body UpdateNodeStatesRequest true "Request body containing nodes and their states"
|
||||||
|
// @success 200 {object} api.DefaultApiResponse "Success message"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/nodestats/ [post]
|
||||||
|
func (api *RestApi) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// Parse request body
|
||||||
|
req := UpdateNodeStatesRequest{}
|
||||||
|
if err := decode(r.Body, &req); err != nil {
|
||||||
|
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
repo := repository.GetNodeRepository()
|
||||||
|
|
||||||
|
for _, node := range req.Nodes {
|
||||||
|
state := determineState(node.States)
|
||||||
|
repo.UpdateNodeState(node.Name, req.Cluster, &state)
|
||||||
|
}
|
||||||
|
}
|
1166
internal/api/rest.go
1166
internal/api/rest.go
File diff suppressed because it is too large
Load Diff
159
internal/api/user.go
Normal file
159
internal/api/user.go
Normal file
@ -0,0 +1,159 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
"github.com/gorilla/mux"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ApiReturnedUser struct {
|
||||||
|
Username string `json:"username"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Roles []string `json:"roles"`
|
||||||
|
Email string `json:"email"`
|
||||||
|
Projects []string `json:"projects"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// getUsers godoc
|
||||||
|
// @summary Returns a list of users
|
||||||
|
// @tags User
|
||||||
|
// @description Returns a JSON-encoded list of users.
|
||||||
|
// @description Required query-parameter defines if all users or only users with additional special roles are returned.
|
||||||
|
// @produce json
|
||||||
|
// @param not-just-user query bool true "If returned list should contain all users or only users with additional special roles"
|
||||||
|
// @success 200 {array} api.ApiReturnedUser "List of users returned successfully"
|
||||||
|
// @failure 400 {string} string "Bad Request"
|
||||||
|
// @failure 401 {string} string "Unauthorized"
|
||||||
|
// @failure 403 {string} string "Forbidden"
|
||||||
|
// @failure 500 {string} string "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/users/ [get]
|
||||||
|
func (api *RestApi) getUsers(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// SecuredCheck() only worked with TokenAuth: Removed
|
||||||
|
|
||||||
|
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||||
|
http.Error(rw, "Only admins are allowed to fetch a list of users", http.StatusForbidden)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
users, err := repository.GetUserRepository().ListUsers(r.URL.Query().Get("not-just-user") == "true")
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
json.NewEncoder(rw).Encode(users)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) updateUser(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// SecuredCheck() only worked with TokenAuth: Removed
|
||||||
|
|
||||||
|
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||||
|
http.Error(rw, "Only admins are allowed to update a user", http.StatusForbidden)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get Values
|
||||||
|
newrole := r.FormValue("add-role")
|
||||||
|
delrole := r.FormValue("remove-role")
|
||||||
|
newproj := r.FormValue("add-project")
|
||||||
|
delproj := r.FormValue("remove-project")
|
||||||
|
|
||||||
|
// TODO: Handle anything but roles...
|
||||||
|
if newrole != "" {
|
||||||
|
if err := repository.GetUserRepository().AddRole(r.Context(), mux.Vars(r)["id"], newrole); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
rw.Write([]byte("Add Role Success"))
|
||||||
|
} else if delrole != "" {
|
||||||
|
if err := repository.GetUserRepository().RemoveRole(r.Context(), mux.Vars(r)["id"], delrole); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
rw.Write([]byte("Remove Role Success"))
|
||||||
|
} else if newproj != "" {
|
||||||
|
if err := repository.GetUserRepository().AddProject(r.Context(), mux.Vars(r)["id"], newproj); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
rw.Write([]byte("Add Project Success"))
|
||||||
|
} else if delproj != "" {
|
||||||
|
if err := repository.GetUserRepository().RemoveProject(r.Context(), mux.Vars(r)["id"], delproj); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
rw.Write([]byte("Remove Project Success"))
|
||||||
|
} else {
|
||||||
|
http.Error(rw, "Not Add or Del [role|project]?", http.StatusInternalServerError)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) createUser(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// SecuredCheck() only worked with TokenAuth: Removed
|
||||||
|
|
||||||
|
rw.Header().Set("Content-Type", "text/plain")
|
||||||
|
me := repository.GetUserFromContext(r.Context())
|
||||||
|
if !me.HasRole(schema.RoleAdmin) {
|
||||||
|
http.Error(rw, "Only admins are allowed to create new users", http.StatusForbidden)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
username, password, role, name, email, project := r.FormValue("username"),
|
||||||
|
r.FormValue("password"), r.FormValue("role"), r.FormValue("name"),
|
||||||
|
r.FormValue("email"), r.FormValue("project")
|
||||||
|
|
||||||
|
if len(password) == 0 && role != schema.GetRoleString(schema.RoleApi) {
|
||||||
|
http.Error(rw, "Only API users are allowed to have a blank password (login will be impossible)", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(project) != 0 && role != schema.GetRoleString(schema.RoleManager) {
|
||||||
|
http.Error(rw, "only managers require a project (can be changed later)",
|
||||||
|
http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
} else if len(project) == 0 && role == schema.GetRoleString(schema.RoleManager) {
|
||||||
|
http.Error(rw, "managers require a project to manage (can be changed later)",
|
||||||
|
http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := repository.GetUserRepository().AddUser(&schema.User{
|
||||||
|
Username: username,
|
||||||
|
Name: name,
|
||||||
|
Password: password,
|
||||||
|
Email: email,
|
||||||
|
Projects: []string{project},
|
||||||
|
Roles: []string{role},
|
||||||
|
}); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(rw, "User %v successfully created!\n", username)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) deleteUser(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// SecuredCheck() only worked with TokenAuth: Removed
|
||||||
|
|
||||||
|
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||||
|
http.Error(rw, "Only admins are allowed to delete a user", http.StatusForbidden)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
username := r.FormValue("username")
|
||||||
|
if err := repository.GetUserRepository().DelUser(username); err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
}
|
@ -41,7 +41,7 @@ func archivingWorker() {
|
|||||||
// will fail if job meta not in repository
|
// will fail if job meta not in repository
|
||||||
if _, err := jobRepo.FetchMetadata(job); err != nil {
|
if _, err := jobRepo.FetchMetadata(job); err != nil {
|
||||||
log.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
|
log.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
|
||||||
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
jobRepo.UpdateMonitoringStatus(*job.ID, schema.MonitoringStatusArchivingFailed)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -50,7 +50,7 @@ func archivingWorker() {
|
|||||||
jobMeta, err := ArchiveJob(job, context.Background())
|
jobMeta, err := ArchiveJob(job, context.Background())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
|
log.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
|
||||||
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
jobRepo.UpdateMonitoringStatus(*job.ID, schema.MonitoringStatusArchivingFailed)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -72,7 +72,11 @@ func archivingWorker() {
|
|||||||
}
|
}
|
||||||
log.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
|
log.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
|
||||||
log.Printf("archiving job (dbid: %d) successful", job.ID)
|
log.Printf("archiving job (dbid: %d) successful", job.ID)
|
||||||
|
|
||||||
|
repository.CallJobStopHooks(job)
|
||||||
archivePending.Done()
|
archivePending.Done()
|
||||||
|
default:
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// Writes a running job to the job-archive
|
// Writes a running job to the job-archive
|
||||||
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.Job, error) {
|
||||||
allMetrics := make([]string, 0)
|
allMetrics := make([]string, 0)
|
||||||
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||||
for _, mc := range metricConfigs {
|
for _, mc := range metricConfigs {
|
||||||
@ -40,11 +40,7 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
jobMeta := &schema.JobMeta{
|
job.Statistics = make(map[string]schema.JobStatistics)
|
||||||
BaseJob: job.BaseJob,
|
|
||||||
StartTime: job.StartTime.Unix(),
|
|
||||||
Statistics: make(map[string]schema.JobStatistics),
|
|
||||||
}
|
|
||||||
|
|
||||||
for metric, data := range jobData {
|
for metric, data := range jobData {
|
||||||
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||||
@ -61,7 +57,7 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Round AVG Result to 2 Digits
|
// Round AVG Result to 2 Digits
|
||||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
job.Statistics[metric] = schema.JobStatistics{
|
||||||
Unit: schema.Unit{
|
Unit: schema.Unit{
|
||||||
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||||
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||||
@ -76,8 +72,8 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
|||||||
// only return the JobMeta structure as the
|
// only return the JobMeta structure as the
|
||||||
// statistics in there are needed.
|
// statistics in there are needed.
|
||||||
if config.Keys.DisableArchive {
|
if config.Keys.DisableArchive {
|
||||||
return jobMeta, nil
|
return job, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
|
return job, archive.GetHandle().ImportJob(job, &jobData)
|
||||||
}
|
}
|
||||||
|
@ -237,7 +237,7 @@ func (auth *Authentication) Login(
|
|||||||
limiter := getIPUserLimiter(ip, username)
|
limiter := getIPUserLimiter(ip, username)
|
||||||
if !limiter.Allow() {
|
if !limiter.Allow() {
|
||||||
log.Warnf("AUTH/RATE > Too many login attempts for combination IP: %s, Username: %s", ip, username)
|
log.Warnf("AUTH/RATE > Too many login attempts for combination IP: %s, Username: %s", ip, username)
|
||||||
onfailure(rw, r, errors.New("Too many login attempts, try again in a few minutes."))
|
onfailure(rw, r, errors.New("too many login attempts, try again in a few minutes"))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -50,6 +50,7 @@ type IntRangeOutput struct {
|
|||||||
|
|
||||||
type JobFilter struct {
|
type JobFilter struct {
|
||||||
Tags []string `json:"tags,omitempty"`
|
Tags []string `json:"tags,omitempty"`
|
||||||
|
DbID []string `json:"dbId,omitempty"`
|
||||||
JobID *StringInput `json:"jobId,omitempty"`
|
JobID *StringInput `json:"jobId,omitempty"`
|
||||||
ArrayJobID *int `json:"arrayJobId,omitempty"`
|
ArrayJobID *int `json:"arrayJobId,omitempty"`
|
||||||
User *StringInput `json:"user,omitempty"`
|
User *StringInput `json:"user,omitempty"`
|
||||||
@ -96,14 +97,16 @@ type JobResultList struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type JobStats struct {
|
type JobStats struct {
|
||||||
Name string `json:"name"`
|
ID int `json:"id"`
|
||||||
Stats *schema.MetricStatistics `json:"stats"`
|
JobID string `json:"jobId"`
|
||||||
}
|
StartTime int `json:"startTime"`
|
||||||
|
Duration int `json:"duration"`
|
||||||
type JobStatsWithScope struct {
|
Cluster string `json:"cluster"`
|
||||||
Name string `json:"name"`
|
SubCluster string `json:"subCluster"`
|
||||||
Scope schema.MetricScope `json:"scope"`
|
NumNodes int `json:"numNodes"`
|
||||||
Stats []*ScopedStats `json:"stats"`
|
NumHWThreads *int `json:"numHWThreads,omitempty"`
|
||||||
|
NumAccelerators *int `json:"numAccelerators,omitempty"`
|
||||||
|
Stats []*NamedStats `json:"stats"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobsStatistics struct {
|
type JobsStatistics struct {
|
||||||
@ -153,12 +156,41 @@ type MetricStatItem struct {
|
|||||||
type Mutation struct {
|
type Mutation struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type NamedStats struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Data *schema.MetricStatistics `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type NamedStatsWithScope struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Scope schema.MetricScope `json:"scope"`
|
||||||
|
Stats []*ScopedStats `json:"stats"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type NodeFilter struct {
|
||||||
|
Hostname *StringInput `json:"hostname,omitempty"`
|
||||||
|
Cluster *StringInput `json:"cluster,omitempty"`
|
||||||
|
SubCluster *StringInput `json:"subCluster,omitempty"`
|
||||||
|
NodeState *string `json:"nodeState,omitempty"`
|
||||||
|
HealthState *schema.NodeState `json:"healthState,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type NodeMetrics struct {
|
type NodeMetrics struct {
|
||||||
Host string `json:"host"`
|
Host string `json:"host"`
|
||||||
SubCluster string `json:"subCluster"`
|
SubCluster string `json:"subCluster"`
|
||||||
Metrics []*JobMetricWithName `json:"metrics"`
|
Metrics []*JobMetricWithName `json:"metrics"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type NodeStateResultList struct {
|
||||||
|
Items []*schema.Node `json:"items"`
|
||||||
|
Count *int `json:"count,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type NodeStats struct {
|
||||||
|
State string `json:"state"`
|
||||||
|
Count int `json:"count"`
|
||||||
|
}
|
||||||
|
|
||||||
type NodesResultList struct {
|
type NodesResultList struct {
|
||||||
Items []*NodeMetrics `json:"items"`
|
Items []*NodeMetrics `json:"items"`
|
||||||
Offset *int `json:"offset,omitempty"`
|
Offset *int `json:"offset,omitempty"`
|
||||||
|
@ -29,9 +29,15 @@ func (r *clusterResolver) Partitions(ctx context.Context, obj *schema.Cluster) (
|
|||||||
return r.Repo.Partitions(obj.Name)
|
return r.Repo.Partitions(obj.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// StartTime is the resolver for the startTime field.
|
||||||
|
func (r *jobResolver) StartTime(ctx context.Context, obj *schema.Job) (*time.Time, error) {
|
||||||
|
timestamp := time.Unix(obj.StartTime, 0)
|
||||||
|
return ×tamp, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Tags is the resolver for the tags field.
|
// Tags is the resolver for the tags field.
|
||||||
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
||||||
return r.Repo.GetTags(repository.GetUserFromContext(ctx), &obj.ID)
|
return r.Repo.GetTags(repository.GetUserFromContext(ctx), obj.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ConcurrentJobs is the resolver for the concurrentJobs field.
|
// ConcurrentJobs is the resolver for the concurrentJobs field.
|
||||||
@ -143,7 +149,7 @@ func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name s
|
|||||||
return &schema.Tag{ID: id, Type: typeArg, Name: name, Scope: scope}, nil
|
return &schema.Tag{ID: id, Type: typeArg, Name: name, Scope: scope}, nil
|
||||||
} else {
|
} else {
|
||||||
log.Warnf("Not authorized to create tag with scope: %s", scope)
|
log.Warnf("Not authorized to create tag with scope: %s", scope)
|
||||||
return nil, fmt.Errorf("Not authorized to create tag with scope: %s", scope)
|
return nil, fmt.Errorf("not authorized to create tag with scope: %s", scope)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -179,7 +185,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
|||||||
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
||||||
if !exists {
|
if !exists {
|
||||||
log.Warnf("Tag does not exist (ID): %d", tid)
|
log.Warnf("Tag does not exist (ID): %d", tid)
|
||||||
return nil, fmt.Errorf("Tag does not exist (ID): %d", tid)
|
return nil, fmt.Errorf("tag does not exist (ID): %d", tid)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
|
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
|
||||||
@ -193,7 +199,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log.Warnf("Not authorized to add tag: %d", tid)
|
log.Warnf("Not authorized to add tag: %d", tid)
|
||||||
return nil, fmt.Errorf("Not authorized to add tag: %d", tid)
|
return nil, fmt.Errorf("not authorized to add tag: %d", tid)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -226,7 +232,7 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
|
|||||||
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
||||||
if !exists {
|
if !exists {
|
||||||
log.Warnf("Tag does not exist (ID): %d", tid)
|
log.Warnf("Tag does not exist (ID): %d", tid)
|
||||||
return nil, fmt.Errorf("Tag does not exist (ID): %d", tid)
|
return nil, fmt.Errorf("tag does not exist (ID): %d", tid)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
|
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
|
||||||
@ -240,7 +246,7 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log.Warnf("Not authorized to remove tag: %d", tid)
|
log.Warnf("Not authorized to remove tag: %d", tid)
|
||||||
return nil, fmt.Errorf("Not authorized to remove tag: %d", tid)
|
return nil, fmt.Errorf("not authorized to remove tag: %d", tid)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -269,7 +275,7 @@ func (r *mutationResolver) RemoveTagFromList(ctx context.Context, tagIds []strin
|
|||||||
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
||||||
if !exists {
|
if !exists {
|
||||||
log.Warnf("Tag does not exist (ID): %d", tid)
|
log.Warnf("Tag does not exist (ID): %d", tid)
|
||||||
return nil, fmt.Errorf("Tag does not exist (ID): %d", tid)
|
return nil, fmt.Errorf("tag does not exist (ID): %d", tid)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test Access: Admins && Admin Tag OR Everyone && Private Tag
|
// Test Access: Admins && Admin Tag OR Everyone && Private Tag
|
||||||
@ -283,7 +289,7 @@ func (r *mutationResolver) RemoveTagFromList(ctx context.Context, tagIds []strin
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log.Warnf("Not authorized to remove tag: %d", tid)
|
log.Warnf("Not authorized to remove tag: %d", tid)
|
||||||
return nil, fmt.Errorf("Not authorized to remove tag: %d", tid)
|
return nil, fmt.Errorf("not authorized to remove tag: %d", tid)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return tags, nil
|
return tags, nil
|
||||||
@ -299,6 +305,21 @@ func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string,
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NodeState is the resolver for the nodeState field.
|
||||||
|
func (r *nodeResolver) NodeState(ctx context.Context, obj *schema.Node) (string, error) {
|
||||||
|
panic(fmt.Errorf("not implemented: NodeState - nodeState"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// HealthState is the resolver for the HealthState field.
|
||||||
|
func (r *nodeResolver) HealthState(ctx context.Context, obj *schema.Node) (schema.NodeState, error) {
|
||||||
|
panic(fmt.Errorf("not implemented: HealthState - HealthState"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// MetaData is the resolver for the metaData field.
|
||||||
|
func (r *nodeResolver) MetaData(ctx context.Context, obj *schema.Node) (any, error) {
|
||||||
|
panic(fmt.Errorf("not implemented: MetaData - metaData"))
|
||||||
|
}
|
||||||
|
|
||||||
// Clusters is the resolver for the clusters field.
|
// Clusters is the resolver for the clusters field.
|
||||||
func (r *queryResolver) Clusters(ctx context.Context) ([]*schema.Cluster, error) {
|
func (r *queryResolver) Clusters(ctx context.Context) ([]*schema.Cluster, error) {
|
||||||
return archive.Clusters, nil
|
return archive.Clusters, nil
|
||||||
@ -338,6 +359,30 @@ func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]*
|
|||||||
return counts, nil
|
return counts, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Node is the resolver for the node field.
|
||||||
|
func (r *queryResolver) Node(ctx context.Context, id string) (*schema.Node, error) {
|
||||||
|
repo := repository.GetNodeRepository()
|
||||||
|
numericId, err := strconv.ParseInt(id, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while parsing job id")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return repo.GetNode(numericId, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Nodes is the resolver for the nodes field.
|
||||||
|
func (r *queryResolver) Nodes(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error) {
|
||||||
|
repo := repository.GetNodeRepository()
|
||||||
|
nodes, err := repo.QueryNodes(ctx, filter, order)
|
||||||
|
count := len(nodes)
|
||||||
|
return &model.NodeStateResultList{Items: nodes, Count: &count}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// NodeStats is the resolver for the nodeStats field.
|
||||||
|
func (r *queryResolver) NodeStats(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStats, error) {
|
||||||
|
panic(fmt.Errorf("not implemented: NodeStats - nodeStats"))
|
||||||
|
}
|
||||||
|
|
||||||
// Job is the resolver for the job field.
|
// Job is the resolver for the job field.
|
||||||
func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) {
|
func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) {
|
||||||
numericId, err := strconv.ParseInt(id, 10, 64)
|
numericId, err := strconv.ParseInt(id, 10, 64)
|
||||||
@ -400,7 +445,7 @@ func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []str
|
|||||||
}
|
}
|
||||||
|
|
||||||
// JobStats is the resolver for the jobStats field.
|
// JobStats is the resolver for the jobStats field.
|
||||||
func (r *queryResolver) JobStats(ctx context.Context, id string, metrics []string) ([]*model.JobStats, error) {
|
func (r *queryResolver) JobStats(ctx context.Context, id string, metrics []string) ([]*model.NamedStats, error) {
|
||||||
job, err := r.Query().Job(ctx, id)
|
job, err := r.Query().Job(ctx, id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("Error while querying job %s for metadata", id)
|
log.Warnf("Error while querying job %s for metadata", id)
|
||||||
@ -413,11 +458,11 @@ func (r *queryResolver) JobStats(ctx context.Context, id string, metrics []strin
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
res := []*model.JobStats{}
|
res := []*model.NamedStats{}
|
||||||
for name, md := range data {
|
for name, md := range data {
|
||||||
res = append(res, &model.JobStats{
|
res = append(res, &model.NamedStats{
|
||||||
Name: name,
|
Name: name,
|
||||||
Stats: &md,
|
Data: &md,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -425,7 +470,7 @@ func (r *queryResolver) JobStats(ctx context.Context, id string, metrics []strin
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ScopedJobStats is the resolver for the scopedJobStats field.
|
// ScopedJobStats is the resolver for the scopedJobStats field.
|
||||||
func (r *queryResolver) ScopedJobStats(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobStatsWithScope, error) {
|
func (r *queryResolver) ScopedJobStats(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.NamedStatsWithScope, error) {
|
||||||
job, err := r.Query().Job(ctx, id)
|
job, err := r.Query().Job(ctx, id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("Error while querying job %s for metadata", id)
|
log.Warnf("Error while querying job %s for metadata", id)
|
||||||
@ -438,7 +483,7 @@ func (r *queryResolver) ScopedJobStats(ctx context.Context, id string, metrics [
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
res := make([]*model.JobStatsWithScope, 0)
|
res := make([]*model.NamedStatsWithScope, 0)
|
||||||
for name, scoped := range data {
|
for name, scoped := range data {
|
||||||
for scope, stats := range scoped {
|
for scope, stats := range scoped {
|
||||||
|
|
||||||
@ -451,7 +496,7 @@ func (r *queryResolver) ScopedJobStats(ctx context.Context, id string, metrics [
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
res = append(res, &model.JobStatsWithScope{
|
res = append(res, &model.NamedStatsWithScope{
|
||||||
Name: name,
|
Name: name,
|
||||||
Scope: scope,
|
Scope: scope,
|
||||||
Stats: mdlStats,
|
Stats: mdlStats,
|
||||||
@ -462,12 +507,6 @@ func (r *queryResolver) ScopedJobStats(ctx context.Context, id string, metrics [
|
|||||||
return res, nil
|
return res, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// JobsFootprints is the resolver for the jobsFootprints field.
|
|
||||||
func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
|
|
||||||
// NOTE: Legacy Naming! This resolver is for normalized histograms in analysis view only - *Not* related to DB "footprint" column!
|
|
||||||
return r.jobsFootprints(ctx, filter, metrics)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Jobs is the resolver for the jobs field.
|
// Jobs is the resolver for the jobs field.
|
||||||
func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, page *model.PageRequest, order *model.OrderByInput) (*model.JobResultList, error) {
|
func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, page *model.PageRequest, order *model.OrderByInput) (*model.JobResultList, error) {
|
||||||
if page == nil {
|
if page == nil {
|
||||||
@ -505,10 +544,7 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
hasNextPage := false
|
hasNextPage := len(nextJobs) == 1
|
||||||
if len(nextJobs) == 1 {
|
|
||||||
hasNextPage = true
|
|
||||||
}
|
|
||||||
|
|
||||||
return &model.JobResultList{Items: jobs, Count: &count, HasNextPage: &hasNextPage}, nil
|
return &model.JobResultList{Items: jobs, Count: &count, HasNextPage: &hasNextPage}, nil
|
||||||
}
|
}
|
||||||
@ -519,8 +555,8 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
|
|||||||
var stats []*model.JobsStatistics
|
var stats []*model.JobsStatistics
|
||||||
|
|
||||||
// Top Level Defaults
|
// Top Level Defaults
|
||||||
var defaultDurationBins string = "1h"
|
defaultDurationBins := "1h"
|
||||||
var defaultMetricBins int = 10
|
defaultMetricBins := 10
|
||||||
|
|
||||||
if requireField(ctx, "totalJobs") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") ||
|
if requireField(ctx, "totalJobs") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") ||
|
||||||
requireField(ctx, "totalAccs") || requireField(ctx, "totalNodeHours") || requireField(ctx, "totalCoreHours") || requireField(ctx, "totalAccHours") {
|
requireField(ctx, "totalAccs") || requireField(ctx, "totalNodeHours") || requireField(ctx, "totalCoreHours") || requireField(ctx, "totalAccHours") {
|
||||||
@ -589,6 +625,62 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
|
|||||||
return stats, nil
|
return stats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// JobsMetricStats is the resolver for the jobsMetricStats field.
|
||||||
|
func (r *queryResolver) JobsMetricStats(ctx context.Context, filter []*model.JobFilter, metrics []string) ([]*model.JobStats, error) {
|
||||||
|
// No Paging, Fixed Order by StartTime ASC
|
||||||
|
order := &model.OrderByInput{
|
||||||
|
Field: "startTime",
|
||||||
|
Type: "col",
|
||||||
|
Order: "ASC",
|
||||||
|
}
|
||||||
|
|
||||||
|
jobs, err := r.Repo.QueryJobs(ctx, filter, nil, order)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while querying jobs for comparison")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
res := []*model.JobStats{}
|
||||||
|
for _, job := range jobs {
|
||||||
|
data, err := metricDataDispatcher.LoadJobStats(job, metrics, ctx)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("Error while loading comparison jobStats data for job id %d", job.JobID)
|
||||||
|
continue
|
||||||
|
// return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
sres := []*model.NamedStats{}
|
||||||
|
for name, md := range data {
|
||||||
|
sres = append(sres, &model.NamedStats{
|
||||||
|
Name: name,
|
||||||
|
Data: &md,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
numThreadsInt := int(job.NumHWThreads)
|
||||||
|
numAccsInt := int(job.NumAcc)
|
||||||
|
res = append(res, &model.JobStats{
|
||||||
|
ID: int(*job.ID),
|
||||||
|
JobID: strconv.Itoa(int(job.JobID)),
|
||||||
|
StartTime: int(job.StartTime),
|
||||||
|
Duration: int(job.Duration),
|
||||||
|
Cluster: job.Cluster,
|
||||||
|
SubCluster: job.SubCluster,
|
||||||
|
NumNodes: int(job.NumNodes),
|
||||||
|
NumHWThreads: &numThreadsInt,
|
||||||
|
NumAccelerators: &numAccsInt,
|
||||||
|
Stats: sres,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return res, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// JobsFootprints is the resolver for the jobsFootprints field.
|
||||||
|
func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
|
||||||
|
// NOTE: Legacy Naming! This resolver is for normalized histograms in analysis view only - *Not* related to DB "footprint" column!
|
||||||
|
return r.jobsFootprints(ctx, filter, metrics)
|
||||||
|
}
|
||||||
|
|
||||||
// RooflineHeatmap is the resolver for the rooflineHeatmap field.
|
// RooflineHeatmap is the resolver for the rooflineHeatmap field.
|
||||||
func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
|
func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
|
||||||
return r.rooflineHeatmap(ctx, filter, rows, cols, minX, minY, maxX, maxY)
|
return r.rooflineHeatmap(ctx, filter, rows, cols, minX, minY, maxX, maxY)
|
||||||
@ -723,15 +815,21 @@ func (r *Resolver) MetricValue() generated.MetricValueResolver { return &metricV
|
|||||||
// Mutation returns generated.MutationResolver implementation.
|
// Mutation returns generated.MutationResolver implementation.
|
||||||
func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResolver{r} }
|
func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResolver{r} }
|
||||||
|
|
||||||
|
// Node returns generated.NodeResolver implementation.
|
||||||
|
func (r *Resolver) Node() generated.NodeResolver { return &nodeResolver{r} }
|
||||||
|
|
||||||
// Query returns generated.QueryResolver implementation.
|
// Query returns generated.QueryResolver implementation.
|
||||||
func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }
|
func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }
|
||||||
|
|
||||||
// SubCluster returns generated.SubClusterResolver implementation.
|
// SubCluster returns generated.SubClusterResolver implementation.
|
||||||
func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subClusterResolver{r} }
|
func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subClusterResolver{r} }
|
||||||
|
|
||||||
type clusterResolver struct{ *Resolver }
|
type (
|
||||||
type jobResolver struct{ *Resolver }
|
clusterResolver struct{ *Resolver }
|
||||||
type metricValueResolver struct{ *Resolver }
|
jobResolver struct{ *Resolver }
|
||||||
type mutationResolver struct{ *Resolver }
|
metricValueResolver struct{ *Resolver }
|
||||||
type queryResolver struct{ *Resolver }
|
mutationResolver struct{ *Resolver }
|
||||||
type subClusterResolver struct{ *Resolver }
|
nodeResolver struct{ *Resolver }
|
||||||
|
queryResolver struct{ *Resolver }
|
||||||
|
subClusterResolver struct{ *Resolver }
|
||||||
|
)
|
||||||
|
@ -42,7 +42,10 @@ func HandleImportFlag(flag string) error {
|
|||||||
}
|
}
|
||||||
dec := json.NewDecoder(bytes.NewReader(raw))
|
dec := json.NewDecoder(bytes.NewReader(raw))
|
||||||
dec.DisallowUnknownFields()
|
dec.DisallowUnknownFields()
|
||||||
job := schema.JobMeta{BaseJob: schema.JobDefaults}
|
job := schema.Job{
|
||||||
|
Exclusive: 1,
|
||||||
|
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||||
|
}
|
||||||
if err = dec.Decode(&job); err != nil {
|
if err = dec.Decode(&job); err != nil {
|
||||||
log.Warn("Error while decoding raw json metadata for import")
|
log.Warn("Error while decoding raw json metadata for import")
|
||||||
return err
|
return err
|
||||||
@ -141,7 +144,7 @@ func HandleImportFlag(flag string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err = SanityChecks(&job.BaseJob); err != nil {
|
if err = SanityChecks(&job); err != nil {
|
||||||
log.Warn("BaseJob SanityChecks failed")
|
log.Warn("BaseJob SanityChecks failed")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -166,7 +166,7 @@ func TestHandleImportFlag(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
result := readResult(t, testname)
|
result := readResult(t, testname)
|
||||||
job, err := r.Find(&result.JobId, &result.Cluster, &result.StartTime)
|
job, err := r.FindCached(&result.JobId, &result.Cluster, &result.StartTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
@ -60,11 +60,6 @@ func InitDB() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||||
job := schema.Job{
|
|
||||||
BaseJob: jobMeta.BaseJob,
|
|
||||||
StartTime: time.Unix(jobMeta.StartTime, 0),
|
|
||||||
StartTimeUnix: jobMeta.StartTime,
|
|
||||||
}
|
|
||||||
|
|
||||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -72,7 +67,7 @@ func InitDB() error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
job.Footprint = make(map[string]float64)
|
jobMeta.Footprint = make(map[string]float64)
|
||||||
|
|
||||||
for _, fp := range sc.Footprint {
|
for _, fp := range sc.Footprint {
|
||||||
statType := "avg"
|
statType := "avg"
|
||||||
@ -83,16 +78,16 @@ func InitDB() error {
|
|||||||
|
|
||||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||||
|
|
||||||
job.Footprint[name] = repository.LoadJobStat(jobMeta, fp, statType)
|
jobMeta.Footprint[name] = repository.LoadJobStat(jobMeta, fp, statType)
|
||||||
}
|
}
|
||||||
|
|
||||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
jobMeta.RawFootprint, err = json.Marshal(jobMeta.Footprint)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while marshaling job footprint")
|
log.Warn("Error while marshaling job footprint")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
job.EnergyFootprint = make(map[string]float64)
|
jobMeta.EnergyFootprint = make(map[string]float64)
|
||||||
|
|
||||||
// Total Job Energy Outside Loop
|
// Total Job Energy Outside Loop
|
||||||
totalEnergy := 0.0
|
totalEnergy := 0.0
|
||||||
@ -117,45 +112,45 @@ func InitDB() error {
|
|||||||
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
job.EnergyFootprint[fp] = metricEnergy
|
jobMeta.EnergyFootprint[fp] = metricEnergy
|
||||||
totalEnergy += metricEnergy
|
totalEnergy += metricEnergy
|
||||||
}
|
}
|
||||||
|
|
||||||
job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
|
jobMeta.Energy = (math.Round(totalEnergy*100.0) / 100.0)
|
||||||
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
|
if jobMeta.RawEnergyFootprint, err = json.Marshal(jobMeta.EnergyFootprint); err != nil {
|
||||||
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
job.RawResources, err = json.Marshal(job.Resources)
|
jobMeta.RawResources, err = json.Marshal(jobMeta.Resources)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("repository initDB(): %v", err)
|
log.Errorf("repository initDB(): %v", err)
|
||||||
errorOccured++
|
errorOccured++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
jobMeta.RawMetaData, err = json.Marshal(jobMeta.MetaData)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("repository initDB(): %v", err)
|
log.Errorf("repository initDB(): %v", err)
|
||||||
errorOccured++
|
errorOccured++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := SanityChecks(&job.BaseJob); err != nil {
|
if err := SanityChecks(jobMeta); err != nil {
|
||||||
log.Errorf("repository initDB(): %v", err)
|
log.Errorf("repository initDB(): %v", err)
|
||||||
errorOccured++
|
errorOccured++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
id, err := r.TransactionAddNamed(t,
|
id, err := r.TransactionAddNamed(t,
|
||||||
repository.NamedJobInsert, job)
|
repository.NamedJobInsert, jobMeta)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("repository initDB(): %v", err)
|
log.Errorf("repository initDB(): %v", err)
|
||||||
errorOccured++
|
errorOccured++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tag := range job.Tags {
|
for _, tag := range jobMeta.Tags {
|
||||||
tagstr := tag.Name + ":" + tag.Type
|
tagstr := tag.Name + ":" + tag.Type
|
||||||
tagId, ok := tags[tagstr]
|
tagId, ok := tags[tagstr]
|
||||||
if !ok {
|
if !ok {
|
||||||
@ -190,7 +185,7 @@ func InitDB() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// This function also sets the subcluster if necessary!
|
// This function also sets the subcluster if necessary!
|
||||||
func SanityChecks(job *schema.BaseJob) error {
|
func SanityChecks(job *schema.Job) error {
|
||||||
if c := archive.GetCluster(job.Cluster); c == nil {
|
if c := archive.GetCluster(job.Cluster); c == nil {
|
||||||
return fmt.Errorf("no such cluster: %v", job.Cluster)
|
return fmt.Errorf("no such cluster: %v", job.Cluster)
|
||||||
}
|
}
|
||||||
|
@ -183,8 +183,8 @@ func (ccms *CCMetricStore) LoadData(
|
|||||||
|
|
||||||
req := ApiQueryRequest{
|
req := ApiQueryRequest{
|
||||||
Cluster: job.Cluster,
|
Cluster: job.Cluster,
|
||||||
From: job.StartTime.Unix(),
|
From: job.StartTime,
|
||||||
To: job.StartTime.Add(time.Duration(job.Duration) * time.Second).Unix(),
|
To: job.StartTime + int64(job.Duration),
|
||||||
Queries: queries,
|
Queries: queries,
|
||||||
WithStats: true,
|
WithStats: true,
|
||||||
WithData: true,
|
WithData: true,
|
||||||
@ -570,7 +570,6 @@ func (ccms *CCMetricStore) LoadStats(
|
|||||||
metrics []string,
|
metrics []string,
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
) (map[string]map[string]schema.MetricStatistics, error) {
|
) (map[string]map[string]schema.MetricStatistics, error) {
|
||||||
|
|
||||||
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0) // #166 Add scope shere for analysis view accelerator normalization?
|
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0) // #166 Add scope shere for analysis view accelerator normalization?
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Error while building queries for jobId %d, Metrics %v: %s", job.JobID, metrics, err.Error())
|
log.Errorf("Error while building queries for jobId %d, Metrics %v: %s", job.JobID, metrics, err.Error())
|
||||||
@ -579,8 +578,8 @@ func (ccms *CCMetricStore) LoadStats(
|
|||||||
|
|
||||||
req := ApiQueryRequest{
|
req := ApiQueryRequest{
|
||||||
Cluster: job.Cluster,
|
Cluster: job.Cluster,
|
||||||
From: job.StartTime.Unix(),
|
From: job.StartTime,
|
||||||
To: job.StartTime.Add(time.Duration(job.Duration) * time.Second).Unix(),
|
To: job.StartTime + int64(job.Duration),
|
||||||
Queries: queries,
|
Queries: queries,
|
||||||
WithStats: true,
|
WithStats: true,
|
||||||
WithData: false,
|
WithData: false,
|
||||||
@ -638,8 +637,8 @@ func (ccms *CCMetricStore) LoadScopedStats(
|
|||||||
|
|
||||||
req := ApiQueryRequest{
|
req := ApiQueryRequest{
|
||||||
Cluster: job.Cluster,
|
Cluster: job.Cluster,
|
||||||
From: job.StartTime.Unix(),
|
From: job.StartTime,
|
||||||
To: job.StartTime.Add(time.Duration(job.Duration) * time.Second).Unix(),
|
To: job.StartTime + int64(job.Duration),
|
||||||
Queries: queries,
|
Queries: queries,
|
||||||
WithStats: true,
|
WithStats: true,
|
||||||
WithData: false,
|
WithData: false,
|
||||||
@ -816,7 +815,6 @@ func (ccms *CCMetricStore) LoadNodeListData(
|
|||||||
page *model.PageRequest,
|
page *model.PageRequest,
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
) (map[string]schema.JobData, int, bool, error) {
|
) (map[string]schema.JobData, int, bool, error) {
|
||||||
|
|
||||||
// 0) Init additional vars
|
// 0) Init additional vars
|
||||||
var totalNodes int = 0
|
var totalNodes int = 0
|
||||||
var hasNextPage bool = false
|
var hasNextPage bool = false
|
||||||
@ -852,7 +850,7 @@ func (ccms *CCMetricStore) LoadNodeListData(
|
|||||||
if len(nodes) > page.ItemsPerPage {
|
if len(nodes) > page.ItemsPerPage {
|
||||||
start := (page.Page - 1) * page.ItemsPerPage
|
start := (page.Page - 1) * page.ItemsPerPage
|
||||||
end := start + page.ItemsPerPage
|
end := start + page.ItemsPerPage
|
||||||
if end > len(nodes) {
|
if end >= len(nodes) {
|
||||||
end = len(nodes)
|
end = len(nodes)
|
||||||
hasNextPage = false
|
hasNextPage = false
|
||||||
} else {
|
} else {
|
||||||
@ -975,7 +973,6 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
|||||||
scopes []schema.MetricScope,
|
scopes []schema.MetricScope,
|
||||||
resolution int,
|
resolution int,
|
||||||
) ([]ApiQuery, []schema.MetricScope, error) {
|
) ([]ApiQuery, []schema.MetricScope, error) {
|
||||||
|
|
||||||
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(nodes))
|
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(nodes))
|
||||||
assignedScope := []schema.MetricScope{}
|
assignedScope := []schema.MetricScope{}
|
||||||
|
|
||||||
|
@ -1,575 +0,0 @@
|
|||||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
// All rights reserved.
|
|
||||||
// Use of this source code is governed by a MIT-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
package metricdata
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"crypto/tls"
|
|
||||||
"encoding/json"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"math"
|
|
||||||
"sort"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
|
||||||
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
|
||||||
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
|
||||||
)
|
|
||||||
|
|
||||||
type InfluxDBv2DataRepositoryConfig struct {
|
|
||||||
Url string `json:"url"`
|
|
||||||
Token string `json:"token"`
|
|
||||||
Bucket string `json:"bucket"`
|
|
||||||
Org string `json:"org"`
|
|
||||||
SkipTls bool `json:"skiptls"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type InfluxDBv2DataRepository struct {
|
|
||||||
client influxdb2.Client
|
|
||||||
queryClient influxdb2Api.QueryAPI
|
|
||||||
bucket, measurement string
|
|
||||||
}
|
|
||||||
|
|
||||||
func (idb *InfluxDBv2DataRepository) Init(rawConfig json.RawMessage) error {
|
|
||||||
var config InfluxDBv2DataRepositoryConfig
|
|
||||||
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
|
||||||
log.Warn("Error while unmarshaling raw json config")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
idb.client = influxdb2.NewClientWithOptions(config.Url, config.Token, influxdb2.DefaultOptions().SetTLSConfig(&tls.Config{InsecureSkipVerify: config.SkipTls}))
|
|
||||||
idb.queryClient = idb.client.QueryAPI(config.Org)
|
|
||||||
idb.bucket = config.Bucket
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (idb *InfluxDBv2DataRepository) formatTime(t time.Time) string {
|
|
||||||
return t.Format(time.RFC3339) // Like “2006-01-02T15:04:05Z07:00”
|
|
||||||
}
|
|
||||||
|
|
||||||
func (idb *InfluxDBv2DataRepository) epochToTime(epoch int64) time.Time {
|
|
||||||
return time.Unix(epoch, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (idb *InfluxDBv2DataRepository) LoadData(
|
|
||||||
job *schema.Job,
|
|
||||||
metrics []string,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
ctx context.Context,
|
|
||||||
resolution int) (schema.JobData, error) {
|
|
||||||
|
|
||||||
log.Infof("InfluxDB 2 Backend: Resolution Scaling not Implemented, will return default timestep. Requested Resolution %d", resolution)
|
|
||||||
|
|
||||||
measurementsConds := make([]string, 0, len(metrics))
|
|
||||||
for _, m := range metrics {
|
|
||||||
measurementsConds = append(measurementsConds, fmt.Sprintf(`r["_measurement"] == "%s"`, m))
|
|
||||||
}
|
|
||||||
measurementsCond := strings.Join(measurementsConds, " or ")
|
|
||||||
|
|
||||||
hostsConds := make([]string, 0, len(job.Resources))
|
|
||||||
for _, h := range job.Resources {
|
|
||||||
if h.HWThreads != nil || h.Accelerators != nil {
|
|
||||||
// TODO
|
|
||||||
return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
|
|
||||||
}
|
|
||||||
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname))
|
|
||||||
}
|
|
||||||
hostsCond := strings.Join(hostsConds, " or ")
|
|
||||||
|
|
||||||
jobData := make(schema.JobData) // Empty Schema: map[<string>FIELD]map[<MetricScope>SCOPE]<*JobMetric>METRIC
|
|
||||||
// Requested Scopes
|
|
||||||
for _, scope := range scopes {
|
|
||||||
query := ""
|
|
||||||
switch scope {
|
|
||||||
case "node":
|
|
||||||
// Get Finest Granularity, Groupy By Measurement and Hostname (== Metric / Node), Calculate Mean for 60s windows <-- Resolution could be added here?
|
|
||||||
// log.Info("Scope 'node' requested. ")
|
|
||||||
query = fmt.Sprintf(`
|
|
||||||
from(bucket: "%s")
|
|
||||||
|> range(start: %s, stop: %s)
|
|
||||||
|> filter(fn: (r) => (%s) and (%s) )
|
|
||||||
|> drop(columns: ["_start", "_stop"])
|
|
||||||
|> group(columns: ["hostname", "_measurement"])
|
|
||||||
|> aggregateWindow(every: 60s, fn: mean)
|
|
||||||
|> drop(columns: ["_time"])`,
|
|
||||||
idb.bucket,
|
|
||||||
idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix+int64(job.Duration)+int64(1))),
|
|
||||||
measurementsCond, hostsCond)
|
|
||||||
case "socket":
|
|
||||||
log.Info("Scope 'socket' requested, but not yet supported: Will return 'node' scope only. ")
|
|
||||||
continue
|
|
||||||
case "core":
|
|
||||||
log.Info(" Scope 'core' requested, but not yet supported: Will return 'node' scope only. ")
|
|
||||||
continue
|
|
||||||
// Get Finest Granularity only, Set NULL to 0.0
|
|
||||||
// query = fmt.Sprintf(`
|
|
||||||
// from(bucket: "%s")
|
|
||||||
// |> range(start: %s, stop: %s)
|
|
||||||
// |> filter(fn: (r) => %s )
|
|
||||||
// |> filter(fn: (r) => %s )
|
|
||||||
// |> drop(columns: ["_start", "_stop", "cluster"])
|
|
||||||
// |> map(fn: (r) => (if exists r._value then {r with _value: r._value} else {r with _value: 0.0}))`,
|
|
||||||
// idb.bucket,
|
|
||||||
// idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix + int64(job.Duration) + int64(1) )),
|
|
||||||
// measurementsCond, hostsCond)
|
|
||||||
case "hwthread":
|
|
||||||
log.Info(" Scope 'hwthread' requested, but not yet supported: Will return 'node' scope only. ")
|
|
||||||
continue
|
|
||||||
case "accelerator":
|
|
||||||
log.Info(" Scope 'accelerator' requested, but not yet supported: Will return 'node' scope only. ")
|
|
||||||
continue
|
|
||||||
default:
|
|
||||||
log.Infof("Unknown scope '%s' requested: Will return 'node' scope.", scope)
|
|
||||||
continue
|
|
||||||
// return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support other scopes than 'node'")
|
|
||||||
}
|
|
||||||
|
|
||||||
rows, err := idb.queryClient.Query(ctx, query)
|
|
||||||
if err != nil {
|
|
||||||
log.Error("Error while performing query")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Init Metrics: Only Node level now -> TODO: Matching /check on scope level ...
|
|
||||||
for _, metric := range metrics {
|
|
||||||
jobMetric, ok := jobData[metric]
|
|
||||||
if !ok {
|
|
||||||
mc := archive.GetMetricConfig(job.Cluster, metric)
|
|
||||||
jobMetric = map[schema.MetricScope]*schema.JobMetric{
|
|
||||||
scope: { // uses scope var from above!
|
|
||||||
Unit: mc.Unit,
|
|
||||||
Timestep: mc.Timestep,
|
|
||||||
Series: make([]schema.Series, 0, len(job.Resources)),
|
|
||||||
StatisticsSeries: nil, // Should be: &schema.StatsSeries{},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
jobData[metric] = jobMetric
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process Result: Time-Data
|
|
||||||
field, host, hostSeries := "", "", schema.Series{}
|
|
||||||
// typeId := 0
|
|
||||||
switch scope {
|
|
||||||
case "node":
|
|
||||||
for rows.Next() {
|
|
||||||
row := rows.Record()
|
|
||||||
if host == "" || host != row.ValueByKey("hostname").(string) || rows.TableChanged() {
|
|
||||||
if host != "" {
|
|
||||||
// Append Series before reset
|
|
||||||
jobData[field][scope].Series = append(jobData[field][scope].Series, hostSeries)
|
|
||||||
}
|
|
||||||
field, host = row.Measurement(), row.ValueByKey("hostname").(string)
|
|
||||||
hostSeries = schema.Series{
|
|
||||||
Hostname: host,
|
|
||||||
Statistics: schema.MetricStatistics{}, //TODO Add Statistics
|
|
||||||
Data: make([]schema.Float, 0),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
val, ok := row.Value().(float64)
|
|
||||||
if ok {
|
|
||||||
hostSeries.Data = append(hostSeries.Data, schema.Float(val))
|
|
||||||
} else {
|
|
||||||
hostSeries.Data = append(hostSeries.Data, schema.Float(0))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case "socket":
|
|
||||||
continue
|
|
||||||
case "accelerator":
|
|
||||||
continue
|
|
||||||
case "hwthread":
|
|
||||||
// See below @ core
|
|
||||||
continue
|
|
||||||
case "core":
|
|
||||||
continue
|
|
||||||
// Include Series.Id in hostSeries
|
|
||||||
// for rows.Next() {
|
|
||||||
// row := rows.Record()
|
|
||||||
// if ( host == "" || host != row.ValueByKey("hostname").(string) || typeId != row.ValueByKey("type-id").(int) || rows.TableChanged() ) {
|
|
||||||
// if ( host != "" ) {
|
|
||||||
// // Append Series before reset
|
|
||||||
// jobData[field][scope].Series = append(jobData[field][scope].Series, hostSeries)
|
|
||||||
// }
|
|
||||||
// field, host, typeId = row.Measurement(), row.ValueByKey("hostname").(string), row.ValueByKey("type-id").(int)
|
|
||||||
// hostSeries = schema.Series{
|
|
||||||
// Hostname: host,
|
|
||||||
// Id: &typeId,
|
|
||||||
// Statistics: nil,
|
|
||||||
// Data: make([]schema.Float, 0),
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// val := row.Value().(float64)
|
|
||||||
// hostSeries.Data = append(hostSeries.Data, schema.Float(val))
|
|
||||||
// }
|
|
||||||
default:
|
|
||||||
log.Infof("Unknown scope '%s' requested: Will return 'node' scope.", scope)
|
|
||||||
continue
|
|
||||||
// return nil, errors.New("the InfluxDB metric data repository does not yet support other scopes than 'node, core'")
|
|
||||||
}
|
|
||||||
// Append last Series
|
|
||||||
jobData[field][scope].Series = append(jobData[field][scope].Series, hostSeries)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get Stats
|
|
||||||
stats, err := idb.LoadStats(job, metrics, ctx)
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("Error while loading statistics")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, scope := range scopes {
|
|
||||||
if scope == "node" { // No 'socket/core' support yet
|
|
||||||
for metric, nodes := range stats {
|
|
||||||
for node, stats := range nodes {
|
|
||||||
for index, _ := range jobData[metric][scope].Series {
|
|
||||||
if jobData[metric][scope].Series[index].Hostname == node {
|
|
||||||
jobData[metric][scope].Series[index].Statistics = schema.MetricStatistics{Avg: stats.Avg, Min: stats.Min, Max: stats.Max}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return jobData, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (idb *InfluxDBv2DataRepository) LoadStats(
|
|
||||||
job *schema.Job,
|
|
||||||
metrics []string,
|
|
||||||
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
|
|
||||||
|
|
||||||
stats := map[string]map[string]schema.MetricStatistics{}
|
|
||||||
|
|
||||||
hostsConds := make([]string, 0, len(job.Resources))
|
|
||||||
for _, h := range job.Resources {
|
|
||||||
if h.HWThreads != nil || h.Accelerators != nil {
|
|
||||||
// TODO
|
|
||||||
return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
|
|
||||||
}
|
|
||||||
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname))
|
|
||||||
}
|
|
||||||
hostsCond := strings.Join(hostsConds, " or ")
|
|
||||||
|
|
||||||
// lenMet := len(metrics)
|
|
||||||
|
|
||||||
for _, metric := range metrics {
|
|
||||||
// log.Debugf("<< You are here: %s (Index %d of %d metrics)", metric, index, lenMet)
|
|
||||||
|
|
||||||
query := fmt.Sprintf(`
|
|
||||||
data = from(bucket: "%s")
|
|
||||||
|> range(start: %s, stop: %s)
|
|
||||||
|> filter(fn: (r) => r._measurement == "%s" and r._field == "value" and (%s))
|
|
||||||
union(tables: [data |> mean(column: "_value") |> set(key: "_field", value: "avg"),
|
|
||||||
data |> min(column: "_value") |> set(key: "_field", value: "min"),
|
|
||||||
data |> max(column: "_value") |> set(key: "_field", value: "max")])
|
|
||||||
|> pivot(rowKey: ["hostname"], columnKey: ["_field"], valueColumn: "_value")
|
|
||||||
|> group()`,
|
|
||||||
idb.bucket,
|
|
||||||
idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix+int64(job.Duration)+int64(1))),
|
|
||||||
metric, hostsCond)
|
|
||||||
|
|
||||||
rows, err := idb.queryClient.Query(ctx, query)
|
|
||||||
if err != nil {
|
|
||||||
log.Error("Error while performing query")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
nodes := map[string]schema.MetricStatistics{}
|
|
||||||
for rows.Next() {
|
|
||||||
row := rows.Record()
|
|
||||||
host := row.ValueByKey("hostname").(string)
|
|
||||||
|
|
||||||
avg, avgok := row.ValueByKey("avg").(float64)
|
|
||||||
if !avgok {
|
|
||||||
// log.Debugf(">> Assertion error for metric %s, statistic AVG. Expected 'float64', got %v", metric, avg)
|
|
||||||
avg = 0.0
|
|
||||||
}
|
|
||||||
min, minok := row.ValueByKey("min").(float64)
|
|
||||||
if !minok {
|
|
||||||
// log.Debugf(">> Assertion error for metric %s, statistic MIN. Expected 'float64', got %v", metric, min)
|
|
||||||
min = 0.0
|
|
||||||
}
|
|
||||||
max, maxok := row.ValueByKey("max").(float64)
|
|
||||||
if !maxok {
|
|
||||||
// log.Debugf(">> Assertion error for metric %s, statistic MAX. Expected 'float64', got %v", metric, max)
|
|
||||||
max = 0.0
|
|
||||||
}
|
|
||||||
|
|
||||||
nodes[host] = schema.MetricStatistics{
|
|
||||||
Avg: avg,
|
|
||||||
Min: min,
|
|
||||||
Max: max,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stats[metric] = nodes
|
|
||||||
}
|
|
||||||
|
|
||||||
return stats, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Used in Job-View StatsTable
|
|
||||||
// UNTESTED
|
|
||||||
func (idb *InfluxDBv2DataRepository) LoadScopedStats(
|
|
||||||
job *schema.Job,
|
|
||||||
metrics []string,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
ctx context.Context) (schema.ScopedJobStats, error) {
|
|
||||||
|
|
||||||
// Assumption: idb.loadData() only returns series node-scope - use node scope for statsTable
|
|
||||||
scopedJobStats := make(schema.ScopedJobStats)
|
|
||||||
data, err := idb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("Error while loading job for scopedJobStats")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
for metric, metricData := range data {
|
|
||||||
for _, scope := range scopes {
|
|
||||||
if scope != schema.MetricScopeNode {
|
|
||||||
logOnce.Do(func() {
|
|
||||||
log.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
|
|
||||||
})
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, ok := scopedJobStats[metric]; !ok {
|
|
||||||
scopedJobStats[metric] = make(map[schema.MetricScope][]*schema.ScopedStats)
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, ok := scopedJobStats[metric][scope]; !ok {
|
|
||||||
scopedJobStats[metric][scope] = make([]*schema.ScopedStats, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, series := range metricData[scope].Series {
|
|
||||||
scopedJobStats[metric][scope] = append(scopedJobStats[metric][scope], &schema.ScopedStats{
|
|
||||||
Hostname: series.Hostname,
|
|
||||||
Data: &series.Statistics,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return scopedJobStats, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Used in Systems-View @ Node-Overview
|
|
||||||
// UNTESTED
|
|
||||||
func (idb *InfluxDBv2DataRepository) LoadNodeData(
|
|
||||||
cluster string,
|
|
||||||
metrics, nodes []string,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
from, to time.Time,
|
|
||||||
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
|
|
||||||
|
|
||||||
// Note: scopes[] Array will be ignored, only return node scope
|
|
||||||
|
|
||||||
// CONVERT ARGS TO INFLUX
|
|
||||||
measurementsConds := make([]string, 0)
|
|
||||||
for _, m := range metrics {
|
|
||||||
measurementsConds = append(measurementsConds, fmt.Sprintf(`r["_measurement"] == "%s"`, m))
|
|
||||||
}
|
|
||||||
measurementsCond := strings.Join(measurementsConds, " or ")
|
|
||||||
|
|
||||||
hostsConds := make([]string, 0)
|
|
||||||
if nodes == nil {
|
|
||||||
var allNodes []string
|
|
||||||
subClusterNodeLists := archive.NodeLists[cluster]
|
|
||||||
for _, nodeList := range subClusterNodeLists {
|
|
||||||
allNodes = append(nodes, nodeList.PrintList()...)
|
|
||||||
}
|
|
||||||
for _, node := range allNodes {
|
|
||||||
nodes = append(nodes, node)
|
|
||||||
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, node))
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for _, node := range nodes {
|
|
||||||
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, node))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
hostsCond := strings.Join(hostsConds, " or ")
|
|
||||||
|
|
||||||
// BUILD AND PERFORM QUERY
|
|
||||||
query := fmt.Sprintf(`
|
|
||||||
from(bucket: "%s")
|
|
||||||
|> range(start: %s, stop: %s)
|
|
||||||
|> filter(fn: (r) => (%s) and (%s) )
|
|
||||||
|> drop(columns: ["_start", "_stop"])
|
|
||||||
|> group(columns: ["hostname", "_measurement"])
|
|
||||||
|> aggregateWindow(every: 60s, fn: mean)
|
|
||||||
|> drop(columns: ["_time"])`,
|
|
||||||
idb.bucket,
|
|
||||||
idb.formatTime(from), idb.formatTime(to),
|
|
||||||
measurementsCond, hostsCond)
|
|
||||||
|
|
||||||
rows, err := idb.queryClient.Query(ctx, query)
|
|
||||||
if err != nil {
|
|
||||||
log.Error("Error while performing query")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// HANDLE QUERY RETURN
|
|
||||||
// Collect Float Arrays for Node@Metric -> No Scope Handling!
|
|
||||||
influxData := make(map[string]map[string][]schema.Float)
|
|
||||||
for rows.Next() {
|
|
||||||
row := rows.Record()
|
|
||||||
host, field := row.ValueByKey("hostname").(string), row.Measurement()
|
|
||||||
|
|
||||||
influxHostData, ok := influxData[host]
|
|
||||||
if !ok {
|
|
||||||
influxHostData = make(map[string][]schema.Float)
|
|
||||||
influxData[host] = influxHostData
|
|
||||||
}
|
|
||||||
|
|
||||||
influxFieldData, ok := influxData[host][field]
|
|
||||||
if !ok {
|
|
||||||
influxFieldData = make([]schema.Float, 0)
|
|
||||||
influxData[host][field] = influxFieldData
|
|
||||||
}
|
|
||||||
|
|
||||||
val, ok := row.Value().(float64)
|
|
||||||
if ok {
|
|
||||||
influxData[host][field] = append(influxData[host][field], schema.Float(val))
|
|
||||||
} else {
|
|
||||||
influxData[host][field] = append(influxData[host][field], schema.Float(0))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// BUILD FUNCTION RETURN
|
|
||||||
data := make(map[string]map[string][]*schema.JobMetric)
|
|
||||||
for node, metricData := range influxData {
|
|
||||||
|
|
||||||
nodeData, ok := data[node]
|
|
||||||
if !ok {
|
|
||||||
nodeData = make(map[string][]*schema.JobMetric)
|
|
||||||
data[node] = nodeData
|
|
||||||
}
|
|
||||||
|
|
||||||
for metric, floatArray := range metricData {
|
|
||||||
avg, min, max := 0.0, 0.0, 0.0
|
|
||||||
for _, val := range floatArray {
|
|
||||||
avg += float64(val)
|
|
||||||
min = math.Min(min, float64(val))
|
|
||||||
max = math.Max(max, float64(val))
|
|
||||||
}
|
|
||||||
|
|
||||||
stats := schema.MetricStatistics{
|
|
||||||
Avg: (math.Round((avg/float64(len(floatArray)))*100) / 100),
|
|
||||||
Min: (math.Round(min*100) / 100),
|
|
||||||
Max: (math.Round(max*100) / 100),
|
|
||||||
}
|
|
||||||
|
|
||||||
mc := archive.GetMetricConfig(cluster, metric)
|
|
||||||
nodeData[metric] = append(nodeData[metric], &schema.JobMetric{
|
|
||||||
Unit: mc.Unit,
|
|
||||||
Timestep: mc.Timestep,
|
|
||||||
Series: []schema.Series{
|
|
||||||
{
|
|
||||||
Hostname: node,
|
|
||||||
Statistics: stats,
|
|
||||||
Data: floatArray,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return data, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Used in Systems-View @ Node-List
|
|
||||||
// UNTESTED
|
|
||||||
func (idb *InfluxDBv2DataRepository) LoadNodeListData(
|
|
||||||
cluster, subCluster, nodeFilter string,
|
|
||||||
metrics []string,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
resolution int,
|
|
||||||
from, to time.Time,
|
|
||||||
page *model.PageRequest,
|
|
||||||
ctx context.Context,
|
|
||||||
) (map[string]schema.JobData, int, bool, error) {
|
|
||||||
|
|
||||||
// Assumption: idb.loadData() only returns series node-scope - use node scope for NodeList
|
|
||||||
|
|
||||||
// 0) Init additional vars
|
|
||||||
var totalNodes int = 0
|
|
||||||
var hasNextPage bool = false
|
|
||||||
|
|
||||||
// 1) Get list of all nodes
|
|
||||||
var nodes []string
|
|
||||||
if subCluster != "" {
|
|
||||||
scNodes := archive.NodeLists[cluster][subCluster]
|
|
||||||
nodes = scNodes.PrintList()
|
|
||||||
} else {
|
|
||||||
subClusterNodeLists := archive.NodeLists[cluster]
|
|
||||||
for _, nodeList := range subClusterNodeLists {
|
|
||||||
nodes = append(nodes, nodeList.PrintList()...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2) Filter nodes
|
|
||||||
if nodeFilter != "" {
|
|
||||||
filteredNodes := []string{}
|
|
||||||
for _, node := range nodes {
|
|
||||||
if strings.Contains(node, nodeFilter) {
|
|
||||||
filteredNodes = append(filteredNodes, node)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
nodes = filteredNodes
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2.1) Count total nodes && Sort nodes -> Sorting invalidated after return ...
|
|
||||||
totalNodes = len(nodes)
|
|
||||||
sort.Strings(nodes)
|
|
||||||
|
|
||||||
// 3) Apply paging
|
|
||||||
if len(nodes) > page.ItemsPerPage {
|
|
||||||
start := (page.Page - 1) * page.ItemsPerPage
|
|
||||||
end := start + page.ItemsPerPage
|
|
||||||
if end > len(nodes) {
|
|
||||||
end = len(nodes)
|
|
||||||
hasNextPage = false
|
|
||||||
} else {
|
|
||||||
hasNextPage = true
|
|
||||||
}
|
|
||||||
nodes = nodes[start:end]
|
|
||||||
}
|
|
||||||
|
|
||||||
// 4) Fetch And Convert Data, use idb.LoadNodeData() for query
|
|
||||||
|
|
||||||
rawNodeData, err := idb.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
|
||||||
if err != nil {
|
|
||||||
log.Error(fmt.Sprintf("Error while loading influx nodeData for nodeListData %#v\n", err))
|
|
||||||
return nil, totalNodes, hasNextPage, err
|
|
||||||
}
|
|
||||||
|
|
||||||
data := make(map[string]schema.JobData)
|
|
||||||
for node, nodeData := range rawNodeData {
|
|
||||||
// Init Nested Map Data Structures If Not Found
|
|
||||||
hostData, ok := data[node]
|
|
||||||
if !ok {
|
|
||||||
hostData = make(schema.JobData)
|
|
||||||
data[node] = hostData
|
|
||||||
}
|
|
||||||
|
|
||||||
for metric, nodeMetricData := range nodeData {
|
|
||||||
metricData, ok := hostData[metric]
|
|
||||||
if !ok {
|
|
||||||
metricData = make(map[schema.MetricScope]*schema.JobMetric)
|
|
||||||
data[node][metric] = metricData
|
|
||||||
}
|
|
||||||
|
|
||||||
data[node][metric][schema.MetricScopeNode] = nodeMetricData[0] // Only Node Scope Returned from loadNodeData
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return data, totalNodes, hasNextPage, nil
|
|
||||||
}
|
|
@ -54,8 +54,6 @@ func Init() error {
|
|||||||
switch kind.Kind {
|
switch kind.Kind {
|
||||||
case "cc-metric-store":
|
case "cc-metric-store":
|
||||||
mdr = &CCMetricStore{}
|
mdr = &CCMetricStore{}
|
||||||
case "influxdb":
|
|
||||||
mdr = &InfluxDBv2DataRepository{}
|
|
||||||
case "prometheus":
|
case "prometheus":
|
||||||
mdr = &PrometheusDataRepository{}
|
mdr = &PrometheusDataRepository{}
|
||||||
case "test":
|
case "test":
|
||||||
|
@ -279,8 +279,8 @@ func (pdb *PrometheusDataRepository) LoadData(
|
|||||||
for i, resource := range job.Resources {
|
for i, resource := range job.Resources {
|
||||||
nodes[i] = resource.Hostname
|
nodes[i] = resource.Hostname
|
||||||
}
|
}
|
||||||
from := job.StartTime
|
from := time.Unix(job.StartTime, 0)
|
||||||
to := job.StartTime.Add(time.Duration(job.Duration) * time.Second)
|
to := time.Unix(job.StartTime+int64(job.Duration), 0)
|
||||||
|
|
||||||
for _, scope := range scopes {
|
for _, scope := range scopes {
|
||||||
if scope != schema.MetricScopeNode {
|
if scope != schema.MetricScopeNode {
|
||||||
@ -453,8 +453,8 @@ func (pdb *PrometheusDataRepository) LoadScopedStats(
|
|||||||
job *schema.Job,
|
job *schema.Job,
|
||||||
metrics []string,
|
metrics []string,
|
||||||
scopes []schema.MetricScope,
|
scopes []schema.MetricScope,
|
||||||
ctx context.Context) (schema.ScopedJobStats, error) {
|
ctx context.Context,
|
||||||
|
) (schema.ScopedJobStats, error) {
|
||||||
// Assumption: pdb.loadData() only returns series node-scope - use node scope for statsTable
|
// Assumption: pdb.loadData() only returns series node-scope - use node scope for statsTable
|
||||||
scopedJobStats := make(schema.ScopedJobStats)
|
scopedJobStats := make(schema.ScopedJobStats)
|
||||||
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
|
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
|
||||||
@ -502,7 +502,6 @@ func (pdb *PrometheusDataRepository) LoadNodeListData(
|
|||||||
page *model.PageRequest,
|
page *model.PageRequest,
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
) (map[string]schema.JobData, int, bool, error) {
|
) (map[string]schema.JobData, int, bool, error) {
|
||||||
|
|
||||||
// Assumption: pdb.loadData() only returns series node-scope - use node scope for NodeList
|
// Assumption: pdb.loadData() only returns series node-scope - use node scope for NodeList
|
||||||
|
|
||||||
// 0) Init additional vars
|
// 0) Init additional vars
|
||||||
@ -540,7 +539,7 @@ func (pdb *PrometheusDataRepository) LoadNodeListData(
|
|||||||
if len(nodes) > page.ItemsPerPage {
|
if len(nodes) > page.ItemsPerPage {
|
||||||
start := (page.Page - 1) * page.ItemsPerPage
|
start := (page.Page - 1) * page.ItemsPerPage
|
||||||
end := start + page.ItemsPerPage
|
end := start + page.ItemsPerPage
|
||||||
if end > len(nodes) {
|
if end >= len(nodes) {
|
||||||
end = len(nodes)
|
end = len(nodes)
|
||||||
hasNextPage = false
|
hasNextPage = false
|
||||||
} else {
|
} else {
|
||||||
|
@ -15,13 +15,13 @@ import (
|
|||||||
type Hooks struct{}
|
type Hooks struct{}
|
||||||
|
|
||||||
// Before hook will print the query with it's args and return the context with the timestamp
|
// Before hook will print the query with it's args and return the context with the timestamp
|
||||||
func (h *Hooks) Before(ctx context.Context, query string, args ...interface{}) (context.Context, error) {
|
func (h *Hooks) Before(ctx context.Context, query string, args ...any) (context.Context, error) {
|
||||||
log.Debugf("SQL query %s %q", query, args)
|
log.Debugf("SQL query %s %q", query, args)
|
||||||
return context.WithValue(ctx, "begin", time.Now()), nil
|
return context.WithValue(ctx, "begin", time.Now()), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// After hook will get the timestamp registered on the Before hook and print the elapsed time
|
// After hook will get the timestamp registered on the Before hook and print the elapsed time
|
||||||
func (h *Hooks) After(ctx context.Context, query string, args ...interface{}) (context.Context, error) {
|
func (h *Hooks) After(ctx context.Context, query string, args ...any) (context.Context, error) {
|
||||||
begin := ctx.Value("begin").(time.Time)
|
begin := ctx.Value("begin").(time.Time)
|
||||||
log.Debugf("Took: %s\n", time.Since(begin))
|
log.Debugf("Took: %s\n", time.Since(begin))
|
||||||
return ctx, nil
|
return ctx, nil
|
||||||
|
@ -9,12 +9,12 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"maps"
|
||||||
"math"
|
"math"
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||||
@ -33,6 +33,7 @@ type JobRepository struct {
|
|||||||
stmtCache *sq.StmtCache
|
stmtCache *sq.StmtCache
|
||||||
cache *lrucache.Cache
|
cache *lrucache.Cache
|
||||||
driver string
|
driver string
|
||||||
|
Mutex sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetJobRepository() *JobRepository {
|
func GetJobRepository() *JobRepository {
|
||||||
@ -51,17 +52,29 @@ func GetJobRepository() *JobRepository {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var jobColumns []string = []string{
|
var jobColumns []string = []string{
|
||||||
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.cluster_partition", "job.array_job_id",
|
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster",
|
||||||
"job.num_nodes", "job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.smt", "job.job_state",
|
"job.start_time", "job.cluster_partition", "job.array_job_id", "job.num_nodes",
|
||||||
"job.duration", "job.walltime", "job.resources", "job.footprint", "job.energy",
|
"job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status",
|
||||||
|
"job.smt", "job.job_state", "job.duration", "job.walltime", "job.resources",
|
||||||
|
"job.footprint", "job.energy",
|
||||||
}
|
}
|
||||||
|
|
||||||
func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
|
var jobCacheColumns []string = []string{
|
||||||
|
"job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.cluster",
|
||||||
|
"job_cache.subcluster", "job_cache.start_time", "job_cache.cluster_partition",
|
||||||
|
"job_cache.array_job_id", "job_cache.num_nodes", "job_cache.num_hwthreads",
|
||||||
|
"job_cache.num_acc", "job_cache.exclusive", "job_cache.monitoring_status", "job_cache.smt",
|
||||||
|
"job_cache.job_state", "job_cache.duration", "job_cache.walltime", "job_cache.resources",
|
||||||
|
"job_cache.footprint", "job_cache.energy",
|
||||||
|
}
|
||||||
|
|
||||||
|
func scanJob(row interface{ Scan(...any) error }) (*schema.Job, error) {
|
||||||
job := &schema.Job{}
|
job := &schema.Job{}
|
||||||
|
|
||||||
if err := row.Scan(
|
if err := row.Scan(
|
||||||
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId,
|
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster,
|
||||||
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
|
&job.StartTime, &job.Partition, &job.ArrayJobId, &job.NumNodes, &job.NumHWThreads,
|
||||||
|
&job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
|
||||||
&job.Duration, &job.Walltime, &job.RawResources, &job.RawFootprint, &job.Energy); err != nil {
|
&job.Duration, &job.Walltime, &job.RawResources, &job.RawFootprint, &job.Energy); err != nil {
|
||||||
log.Warnf("Error while scanning rows (Job): %v", err)
|
log.Warnf("Error while scanning rows (Job): %v", err)
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -79,10 +92,9 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
|
|||||||
}
|
}
|
||||||
job.RawFootprint = nil
|
job.RawFootprint = nil
|
||||||
|
|
||||||
job.StartTime = time.Unix(job.StartTimeUnix, 0)
|
|
||||||
// Always ensure accurate duration for running jobs
|
// Always ensure accurate duration for running jobs
|
||||||
if job.State == schema.JobStateRunning {
|
if job.State == schema.JobStateRunning {
|
||||||
job.Duration = int32(time.Since(job.StartTime).Seconds())
|
job.Duration = int32(time.Now().Unix() - job.StartTime)
|
||||||
}
|
}
|
||||||
|
|
||||||
return job, nil
|
return job, nil
|
||||||
@ -138,17 +150,6 @@ func (r *JobRepository) Flush() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func scanJobLink(row interface{ Scan(...interface{}) error }) (*model.JobLink, error) {
|
|
||||||
jobLink := &model.JobLink{}
|
|
||||||
if err := row.Scan(
|
|
||||||
&jobLink.ID, &jobLink.JobID); err != nil {
|
|
||||||
log.Warn("Error while scanning rows (jobLink)")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return jobLink, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error) {
|
func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
cachekey := fmt.Sprintf("metadata:%d", job.ID)
|
cachekey := fmt.Sprintf("metadata:%d", job.ID)
|
||||||
@ -189,9 +190,7 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
|
|||||||
|
|
||||||
if job.MetaData != nil {
|
if job.MetaData != nil {
|
||||||
cpy := make(map[string]string, len(job.MetaData)+1)
|
cpy := make(map[string]string, len(job.MetaData)+1)
|
||||||
for k, v := range job.MetaData {
|
maps.Copy(cpy, job.MetaData)
|
||||||
cpy[k] = v
|
|
||||||
}
|
|
||||||
cpy[key] = val
|
cpy[key] = val
|
||||||
job.MetaData = cpy
|
job.MetaData = cpy
|
||||||
} else {
|
} else {
|
||||||
@ -389,7 +388,7 @@ func (r *JobRepository) FindColumnValues(user *schema.User, query string, table
|
|||||||
func (r *JobRepository) Partitions(cluster string) ([]string, error) {
|
func (r *JobRepository) Partitions(cluster string) ([]string, error) {
|
||||||
var err error
|
var err error
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
partitions := r.cache.Get("partitions:"+cluster, func() (interface{}, time.Duration, int) {
|
partitions := r.cache.Get("partitions:"+cluster, func() (any, time.Duration, int) {
|
||||||
parts := []string{}
|
parts := []string{}
|
||||||
if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
|
if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
|
||||||
return nil, 0, 1000
|
return nil, 0, 1000
|
||||||
@ -477,6 +476,33 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) FindJobIdsByTag(tagId int64) ([]int64, error) {
|
||||||
|
query := sq.Select("job.id").From("job").
|
||||||
|
Join("jobtag ON jobtag.job_id = job.id").
|
||||||
|
Where(sq.Eq{"jobtag.tag_id": tagId}).Distinct()
|
||||||
|
rows, err := query.RunWith(r.stmtCache).Query()
|
||||||
|
if err != nil {
|
||||||
|
log.Error("Error while running query")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
jobIds := make([]int64, 0, 100)
|
||||||
|
|
||||||
|
for rows.Next() {
|
||||||
|
var jobId int64
|
||||||
|
|
||||||
|
if err := rows.Scan(&jobId); err != nil {
|
||||||
|
rows.Close()
|
||||||
|
log.Warn("Error while scanning rows")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
jobIds = append(jobIds, jobId)
|
||||||
|
}
|
||||||
|
|
||||||
|
return jobIds, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: Reconsider filtering short jobs with harcoded threshold
|
||||||
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
|
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
|
||||||
query := sq.Select(jobColumns...).From("job").
|
query := sq.Select(jobColumns...).From("job").
|
||||||
Where(fmt.Sprintf("job.cluster = '%s'", cluster)).
|
Where(fmt.Sprintf("job.cluster = '%s'", cluster)).
|
||||||
@ -581,7 +607,7 @@ func (r *JobRepository) MarkArchived(
|
|||||||
|
|
||||||
func (r *JobRepository) UpdateEnergy(
|
func (r *JobRepository) UpdateEnergy(
|
||||||
stmt sq.UpdateBuilder,
|
stmt sq.UpdateBuilder,
|
||||||
jobMeta *schema.JobMeta,
|
jobMeta *schema.Job,
|
||||||
) (sq.UpdateBuilder, error) {
|
) (sq.UpdateBuilder, error) {
|
||||||
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
||||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||||
@ -631,7 +657,7 @@ func (r *JobRepository) UpdateEnergy(
|
|||||||
|
|
||||||
func (r *JobRepository) UpdateFootprint(
|
func (r *JobRepository) UpdateFootprint(
|
||||||
stmt sq.UpdateBuilder,
|
stmt sq.UpdateBuilder,
|
||||||
jobMeta *schema.JobMeta,
|
jobMeta *schema.Job,
|
||||||
) (sq.UpdateBuilder, error) {
|
) (sq.UpdateBuilder, error) {
|
||||||
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
||||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||||
|
@ -13,6 +13,14 @@ import (
|
|||||||
sq "github.com/Masterminds/squirrel"
|
sq "github.com/Masterminds/squirrel"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const NamedJobCacheInsert string = `INSERT INTO job_cache (
|
||||||
|
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||||
|
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
||||||
|
) VALUES (
|
||||||
|
:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||||
|
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
||||||
|
);`
|
||||||
|
|
||||||
const NamedJobInsert string = `INSERT INTO job (
|
const NamedJobInsert string = `INSERT INTO job (
|
||||||
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
||||||
@ -21,8 +29,10 @@ const NamedJobInsert string = `INSERT INTO job (
|
|||||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
||||||
);`
|
);`
|
||||||
|
|
||||||
func (r *JobRepository) InsertJob(job *schema.JobMeta) (int64, error) {
|
func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
|
||||||
res, err := r.DB.NamedExec(NamedJobInsert, job)
|
r.Mutex.Lock()
|
||||||
|
res, err := r.DB.NamedExec(NamedJobCacheInsert, job)
|
||||||
|
r.Mutex.Unlock()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while NamedJobInsert")
|
log.Warn("Error while NamedJobInsert")
|
||||||
return 0, err
|
return 0, err
|
||||||
@ -36,9 +46,48 @@ func (r *JobRepository) InsertJob(job *schema.JobMeta) (int64, error) {
|
|||||||
return id, nil
|
return id, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
|
||||||
|
r.Mutex.Lock()
|
||||||
|
defer r.Mutex.Unlock()
|
||||||
|
|
||||||
|
query := sq.Select(jobCacheColumns...).From("job_cache")
|
||||||
|
|
||||||
|
rows, err := query.RunWith(r.stmtCache).Query()
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while running query %v", err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
jobs := make([]*schema.Job, 0, 50)
|
||||||
|
for rows.Next() {
|
||||||
|
job, err := scanJob(rows)
|
||||||
|
if err != nil {
|
||||||
|
rows.Close()
|
||||||
|
log.Warn("Error while scanning rows")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
jobs = append(jobs, job)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = r.DB.Exec(
|
||||||
|
"INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, exclusive, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, exclusive, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("Error while Job sync: %v", err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = r.DB.Exec("DELETE FROM job_cache")
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("Error while Job cache clean: %v", err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return jobs, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Start inserts a new job in the table, returning the unique job ID.
|
// Start inserts a new job in the table, returning the unique job ID.
|
||||||
// Statistics are not transfered!
|
// Statistics are not transfered!
|
||||||
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
|
func (r *JobRepository) Start(job *schema.Job) (id int64, err error) {
|
||||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding footprint field failed: %w", err)
|
return -1, fmt.Errorf("REPOSITORY/JOB > encoding footprint field failed: %w", err)
|
||||||
@ -73,3 +122,19 @@ func (r *JobRepository) Stop(
|
|||||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) StopCached(
|
||||||
|
jobId int64,
|
||||||
|
duration int32,
|
||||||
|
state schema.JobState,
|
||||||
|
monitoringStatus int32,
|
||||||
|
) (err error) {
|
||||||
|
stmt := sq.Update("job_cache").
|
||||||
|
Set("job_state", state).
|
||||||
|
Set("duration", duration).
|
||||||
|
Set("monitoring_status", monitoringStatus).
|
||||||
|
Where("job.id = ?", jobId)
|
||||||
|
|
||||||
|
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
@ -43,6 +43,26 @@ func (r *JobRepository) Find(
|
|||||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) FindCached(
|
||||||
|
jobId *int64,
|
||||||
|
cluster *string,
|
||||||
|
startTime *int64,
|
||||||
|
) (*schema.Job, error) {
|
||||||
|
q := sq.Select(jobCacheColumns...).From("job_cache").
|
||||||
|
Where("job_cache.job_id = ?", *jobId)
|
||||||
|
|
||||||
|
if cluster != nil {
|
||||||
|
q = q.Where("job_cache.cluster = ?", *cluster)
|
||||||
|
}
|
||||||
|
if startTime != nil {
|
||||||
|
q = q.Where("job_cache.start_time = ?", *startTime)
|
||||||
|
}
|
||||||
|
|
||||||
|
q = q.OrderBy("job_cache.id DESC") // always use newest matching job by db id if more than one match
|
||||||
|
|
||||||
|
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||||
|
}
|
||||||
|
|
||||||
// Find executes a SQL query to find a specific batch job.
|
// Find executes a SQL query to find a specific batch job.
|
||||||
// The job is queried using the batch job id, the cluster name,
|
// The job is queried using the batch job id, the cluster name,
|
||||||
// and the start time of the job in UNIX epoch time seconds.
|
// and the start time of the job in UNIX epoch time seconds.
|
||||||
@ -83,6 +103,35 @@ func (r *JobRepository) FindAll(
|
|||||||
return jobs, nil
|
return jobs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get complete joblist only consisting of db ids.
|
||||||
|
// This is useful to process large job counts and intended to be used
|
||||||
|
// together with FindById to process jobs one by one
|
||||||
|
func (r *JobRepository) GetJobList() ([]int64, error) {
|
||||||
|
query := sq.Select("id").From("job").
|
||||||
|
Where("job.job_state != 'running'")
|
||||||
|
|
||||||
|
rows, err := query.RunWith(r.stmtCache).Query()
|
||||||
|
if err != nil {
|
||||||
|
log.Error("Error while running query")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
jl := make([]int64, 0, 1000)
|
||||||
|
for rows.Next() {
|
||||||
|
var id int64
|
||||||
|
err := rows.Scan(&id)
|
||||||
|
if err != nil {
|
||||||
|
rows.Close()
|
||||||
|
log.Warn("Error while scanning rows")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
jl = append(jl, id)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("Return job count %d", len(jl))
|
||||||
|
return jl, nil
|
||||||
|
}
|
||||||
|
|
||||||
// FindById executes a SQL query to find a specific batch job.
|
// FindById executes a SQL query to find a specific batch job.
|
||||||
// The job is queried using the database id.
|
// The job is queried using the database id.
|
||||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||||
@ -178,7 +227,7 @@ func (r *JobRepository) FindConcurrentJobs(
|
|||||||
var startTime int64
|
var startTime int64
|
||||||
var stopTime int64
|
var stopTime int64
|
||||||
|
|
||||||
startTime = job.StartTimeUnix
|
startTime = job.StartTime
|
||||||
hostname := job.Resources[0].Hostname
|
hostname := job.Resources[0].Hostname
|
||||||
|
|
||||||
if job.State == schema.JobStateRunning {
|
if job.State == schema.JobStateRunning {
|
||||||
|
57
internal/repository/jobHooks.go
Normal file
57
internal/repository/jobHooks.go
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package repository
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
type JobHook interface {
|
||||||
|
JobStartCallback(job *schema.Job)
|
||||||
|
JobStopCallback(job *schema.Job)
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
initOnce sync.Once
|
||||||
|
hooks []JobHook
|
||||||
|
)
|
||||||
|
|
||||||
|
func RegisterJobJook(hook JobHook) {
|
||||||
|
initOnce.Do(func() {
|
||||||
|
hooks = make([]JobHook, 0)
|
||||||
|
})
|
||||||
|
|
||||||
|
if hook != nil {
|
||||||
|
hooks = append(hooks, hook)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func CallJobStartHooks(jobs []*schema.Job) {
|
||||||
|
if hooks == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, hook := range hooks {
|
||||||
|
if hook != nil {
|
||||||
|
for _, job := range jobs {
|
||||||
|
hook.JobStartCallback(job)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func CallJobStopHooks(job *schema.Job) {
|
||||||
|
if hooks == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, hook := range hooks {
|
||||||
|
if hook != nil {
|
||||||
|
hook.JobStopCallback(job)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -146,6 +146,11 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
|||||||
// This is an OR-Logic query: Returns all distinct jobs with at least one of the requested tags; TODO: AND-Logic query?
|
// This is an OR-Logic query: Returns all distinct jobs with at least one of the requested tags; TODO: AND-Logic query?
|
||||||
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags}).Distinct()
|
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags}).Distinct()
|
||||||
}
|
}
|
||||||
|
if filter.DbID != nil {
|
||||||
|
dbIDs := make([]string, len(filter.DbID))
|
||||||
|
copy(dbIDs, filter.DbID)
|
||||||
|
query = query.Where(sq.Eq{"job.id": dbIDs})
|
||||||
|
}
|
||||||
if filter.JobID != nil {
|
if filter.JobID != nil {
|
||||||
query = buildStringCondition("job.job_id", filter.JobID, query)
|
query = buildStringCondition("job.job_id", filter.JobID, query)
|
||||||
}
|
}
|
||||||
|
@ -24,7 +24,7 @@ func TestFind(t *testing.T) {
|
|||||||
|
|
||||||
// fmt.Printf("%+v", job)
|
// fmt.Printf("%+v", job)
|
||||||
|
|
||||||
if job.ID != 5 {
|
if *job.ID != 5 {
|
||||||
t.Errorf("wrong summary for diagnostic 3\ngot: %d \nwant: 1366", job.JobID)
|
t.Errorf("wrong summary for diagnostic 3\ngot: %d \nwant: 1366", job.JobID)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,7 @@ import (
|
|||||||
"github.com/golang-migrate/migrate/v4/source/iofs"
|
"github.com/golang-migrate/migrate/v4/source/iofs"
|
||||||
)
|
)
|
||||||
|
|
||||||
const Version uint = 8
|
const Version uint = 10
|
||||||
|
|
||||||
//go:embed migrations/*
|
//go:embed migrations/*
|
||||||
var migrationFiles embed.FS
|
var migrationFiles embed.FS
|
||||||
@ -115,8 +115,17 @@ func MigrateDB(backend string, db string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
v, dirty, err := m.Version()
|
v, dirty, err := m.Version()
|
||||||
|
if err != nil {
|
||||||
|
if err == migrate.ErrNilVersion {
|
||||||
|
log.Warn("Legacy database without version or missing database file!")
|
||||||
|
} else {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
log.Infof("unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend -migrate-db", v, Version)
|
if v < Version {
|
||||||
|
log.Infof("unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend -migrate-db", v, Version)
|
||||||
|
}
|
||||||
|
|
||||||
if dirty {
|
if dirty {
|
||||||
return fmt.Errorf("last migration to version %d has failed, please fix the db manually and force version with -force-db flag", Version)
|
return fmt.Errorf("last migration to version %d has failed, please fix the db manually and force version with -force-db flag", Version)
|
||||||
|
@ -0,0 +1 @@
|
|||||||
|
DROP TABLE IF EXISTS job_cache;
|
@ -0,0 +1,31 @@
|
|||||||
|
CREATE TABLE "job_cache" (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
job_id BIGINT NOT NULL,
|
||||||
|
cluster VARCHAR(255) NOT NULL,
|
||||||
|
subcluster VARCHAR(255) NOT NULL,
|
||||||
|
start_time BIGINT NOT NULL, -- Unix timestamp
|
||||||
|
hpc_user VARCHAR(255) NOT NULL,
|
||||||
|
project VARCHAR(255) NOT NULL,
|
||||||
|
cluster_partition VARCHAR(255),
|
||||||
|
array_job_id BIGINT,
|
||||||
|
duration INT NOT NULL,
|
||||||
|
walltime INT NOT NULL,
|
||||||
|
job_state VARCHAR(255) NOT NULL
|
||||||
|
CHECK (job_state IN (
|
||||||
|
'running', 'completed', 'failed', 'cancelled',
|
||||||
|
'stopped', 'timeout', 'preempted', 'out_of_memory'
|
||||||
|
)),
|
||||||
|
meta_data TEXT, -- JSON
|
||||||
|
resources TEXT NOT NULL, -- JSON
|
||||||
|
num_nodes INT NOT NULL,
|
||||||
|
num_hwthreads INT,
|
||||||
|
num_acc INT,
|
||||||
|
smt TINYINT NOT NULL DEFAULT 1 CHECK (smt IN (0, 1)),
|
||||||
|
exclusive TINYINT NOT NULL DEFAULT 1 CHECK (exclusive IN (0, 1, 2)),
|
||||||
|
monitoring_status TINYINT NOT NULL DEFAULT 1
|
||||||
|
CHECK (monitoring_status IN (0, 1, 2, 3)),
|
||||||
|
energy REAL NOT NULL DEFAULT 0.0,
|
||||||
|
energy_footprint TEXT DEFAULT NULL,
|
||||||
|
footprint TEXT DEFAULT NULL,
|
||||||
|
UNIQUE (job_id, cluster, start_time)
|
||||||
|
);
|
@ -0,0 +1 @@
|
|||||||
|
DROP TABLE IF EXISTS node;
|
23
internal/repository/migrations/sqlite3/10_node-table.up.sql
Normal file
23
internal/repository/migrations/sqlite3/10_node-table.up.sql
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
CREATE TABLE "node" (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
hostname VARCHAR(255) NOT NULL,
|
||||||
|
cluster VARCHAR(255) NOT NULL,
|
||||||
|
subcluster VARCHAR(255) NOT NULL,
|
||||||
|
cpus_allocated INTEGER NOT NULL,
|
||||||
|
cpus_total INTEGER NOT NULL,
|
||||||
|
memory_allocated INTEGER NOT NULL,
|
||||||
|
memory_total INTEGER NOT NULL,
|
||||||
|
gpus_allocated INTEGER NOT NULL,
|
||||||
|
gpus_total INTEGER NOT NULL,
|
||||||
|
node_state VARCHAR(255) NOT NULL
|
||||||
|
CHECK (node_state IN (
|
||||||
|
'allocated', 'reserved', 'idle', 'mixed',
|
||||||
|
'down', 'unknown'
|
||||||
|
)),
|
||||||
|
health_state VARCHAR(255) NOT NULL
|
||||||
|
CHECK (health_state IN (
|
||||||
|
'full', 'partial', 'failed'
|
||||||
|
)),
|
||||||
|
meta_data TEXT, -- JSON
|
||||||
|
UNIQUE (hostname, cluster)
|
||||||
|
);
|
289
internal/repository/node.go
Normal file
289
internal/repository/node.go
Normal file
@ -0,0 +1,289 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package repository
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"maps"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
sq "github.com/Masterminds/squirrel"
|
||||||
|
"github.com/jmoiron/sqlx"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
nodeRepoOnce sync.Once
|
||||||
|
nodeRepoInstance *NodeRepository
|
||||||
|
)
|
||||||
|
|
||||||
|
type NodeRepository struct {
|
||||||
|
DB *sqlx.DB
|
||||||
|
stmtCache *sq.StmtCache
|
||||||
|
cache *lrucache.Cache
|
||||||
|
driver string
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetNodeRepository() *NodeRepository {
|
||||||
|
nodeRepoOnce.Do(func() {
|
||||||
|
db := GetConnection()
|
||||||
|
|
||||||
|
nodeRepoInstance = &NodeRepository{
|
||||||
|
DB: db.DB,
|
||||||
|
driver: db.Driver,
|
||||||
|
|
||||||
|
stmtCache: sq.NewStmtCache(db.DB),
|
||||||
|
cache: lrucache.New(1024 * 1024),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return nodeRepoInstance
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *NodeRepository) FetchMetadata(node *schema.Node) (map[string]string, error) {
|
||||||
|
start := time.Now()
|
||||||
|
cachekey := fmt.Sprintf("metadata:%d", node.ID)
|
||||||
|
if cached := r.cache.Get(cachekey, nil); cached != nil {
|
||||||
|
node.MetaData = cached.(map[string]string)
|
||||||
|
return node.MetaData, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := sq.Select("node.meta_data").From("node").Where("node.id = ?", node.ID).
|
||||||
|
RunWith(r.stmtCache).QueryRow().Scan(&node.RawMetaData); err != nil {
|
||||||
|
log.Warn("Error while scanning for node metadata")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(node.RawMetaData) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.Unmarshal(node.RawMetaData, &node.MetaData); err != nil {
|
||||||
|
log.Warn("Error while unmarshaling raw metadata json")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
r.cache.Put(cachekey, node.MetaData, len(node.RawMetaData), 24*time.Hour)
|
||||||
|
log.Debugf("Timer FetchMetadata %s", time.Since(start))
|
||||||
|
return node.MetaData, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *NodeRepository) UpdateMetadata(node *schema.Node, key, val string) (err error) {
|
||||||
|
cachekey := fmt.Sprintf("metadata:%d", node.ID)
|
||||||
|
r.cache.Del(cachekey)
|
||||||
|
if node.MetaData == nil {
|
||||||
|
if _, err = r.FetchMetadata(node); err != nil {
|
||||||
|
log.Warnf("Error while fetching metadata for node, DB ID '%v'", node.ID)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if node.MetaData != nil {
|
||||||
|
cpy := make(map[string]string, len(node.MetaData)+1)
|
||||||
|
maps.Copy(cpy, node.MetaData)
|
||||||
|
cpy[key] = val
|
||||||
|
node.MetaData = cpy
|
||||||
|
} else {
|
||||||
|
node.MetaData = map[string]string{key: val}
|
||||||
|
}
|
||||||
|
|
||||||
|
if node.RawMetaData, err = json.Marshal(node.MetaData); err != nil {
|
||||||
|
log.Warnf("Error while marshaling metadata for node, DB ID '%v'", node.ID)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err = sq.Update("node").
|
||||||
|
Set("meta_data", node.RawMetaData).
|
||||||
|
Where("node.id = ?", node.ID).
|
||||||
|
RunWith(r.stmtCache).Exec(); err != nil {
|
||||||
|
log.Warnf("Error while updating metadata for node, DB ID '%v'", node.ID)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
r.cache.Put(cachekey, node.MetaData, len(node.RawMetaData), 24*time.Hour)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *NodeRepository) GetNode(id int64, withMeta bool) (*schema.Node, error) {
|
||||||
|
node := &schema.Node{}
|
||||||
|
if err := sq.Select("id", "hostname", "cluster", "subcluster", "node_state",
|
||||||
|
"health_state").From("node").
|
||||||
|
Where("node.id = ?", id).RunWith(r.DB).
|
||||||
|
QueryRow().Scan(&node.ID, &node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState,
|
||||||
|
&node.HealthState); err != nil {
|
||||||
|
log.Warnf("Error while querying node '%v' from database", id)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if withMeta {
|
||||||
|
var err error
|
||||||
|
var meta map[string]string
|
||||||
|
if meta, err = r.FetchMetadata(node); err != nil {
|
||||||
|
log.Warnf("Error while fetching metadata for node '%v'", id)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
node.MetaData = meta
|
||||||
|
}
|
||||||
|
|
||||||
|
return node, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
const NamedNodeInsert string = `
|
||||||
|
INSERT INTO node (hostname, cluster, subcluster, node_state, health_state)
|
||||||
|
VALUES (:hostname, :cluster, :subcluster, :node_state, :health_state);`
|
||||||
|
|
||||||
|
func (r *NodeRepository) AddNode(node *schema.Node) (int64, error) {
|
||||||
|
var err error
|
||||||
|
|
||||||
|
res, err := r.DB.NamedExec(NamedNodeInsert, node)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while adding node '%v' to database", node.Hostname)
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
node.ID, err = res.LastInsertId()
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while getting last insert id for node '%v' from database", node.Hostname)
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return node.ID, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *NodeRepository) UpdateNodeState(hostname string, cluster string, nodeState *schema.NodeState) error {
|
||||||
|
var id int64
|
||||||
|
if err := sq.Select("id").From("node").
|
||||||
|
Where("node.hostname = ?", hostname).Where("node.cluster = ?", cluster).RunWith(r.DB).
|
||||||
|
QueryRow().Scan(&id); err != nil {
|
||||||
|
if err == sql.ErrNoRows {
|
||||||
|
subcluster, err := archive.GetSubClusterByNode(cluster, hostname)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while getting subcluster for node '%s' in cluster '%s': %v", hostname, cluster, err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
node := schema.Node{
|
||||||
|
Hostname: hostname, Cluster: cluster, SubCluster: subcluster, NodeState: *nodeState,
|
||||||
|
HealthState: schema.MonitoringStateFull,
|
||||||
|
}
|
||||||
|
_, err = r.AddNode(&node)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while adding node '%s' to database: %v", hostname, err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("Added node '%s' to database", hostname)
|
||||||
|
return nil
|
||||||
|
} else {
|
||||||
|
log.Warnf("Error while querying node '%v' from database", id)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := sq.Update("node").Set("node_state", nodeState).Where("node.id = ?", id).RunWith(r.DB).Exec(); err != nil {
|
||||||
|
log.Errorf("error while updating node '%s'", hostname)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
log.Infof("Updated node '%s' in database", hostname)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// func (r *NodeRepository) UpdateHealthState(hostname string, healthState *schema.MonitoringState) error {
|
||||||
|
// if _, err := sq.Update("node").Set("health_state", healthState).Where("node.id = ?", id).RunWith(r.DB).Exec(); err != nil {
|
||||||
|
// log.Errorf("error while updating node '%d'", id)
|
||||||
|
// return err
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// return nil
|
||||||
|
// }
|
||||||
|
|
||||||
|
func (r *NodeRepository) DeleteNode(id int64) error {
|
||||||
|
_, err := r.DB.Exec(`DELETE FROM node WHERE node.id = ?`, id)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while deleting node '%d' from DB", id)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
log.Infof("deleted node '%d' from DB", id)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Implement order by
|
||||||
|
func (r *NodeRepository) QueryNodes(
|
||||||
|
ctx context.Context,
|
||||||
|
filters []*model.NodeFilter,
|
||||||
|
order *model.OrderByInput,
|
||||||
|
) ([]*schema.Node, error) {
|
||||||
|
query, qerr := SecurityCheck(ctx, sq.Select(jobColumns...).From("node"))
|
||||||
|
if qerr != nil {
|
||||||
|
return nil, qerr
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, f := range filters {
|
||||||
|
if f.Hostname != nil {
|
||||||
|
query = buildStringCondition("node.hostname", f.Hostname, query)
|
||||||
|
}
|
||||||
|
if f.Cluster != nil {
|
||||||
|
query = buildStringCondition("node.cluster", f.Cluster, query)
|
||||||
|
}
|
||||||
|
if f.NodeState != nil {
|
||||||
|
query = query.Where("node.node_state = ?", f.NodeState)
|
||||||
|
}
|
||||||
|
if f.HealthState != nil {
|
||||||
|
query = query.Where("node.health_state = ?", f.HealthState)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rows, err := query.RunWith(r.stmtCache).Query()
|
||||||
|
if err != nil {
|
||||||
|
queryString, queryVars, _ := query.ToSql()
|
||||||
|
log.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
nodes := make([]*schema.Node, 0, 50)
|
||||||
|
for rows.Next() {
|
||||||
|
node := schema.Node{}
|
||||||
|
|
||||||
|
if err := rows.Scan(&node.Hostname, &node.Cluster, &node.SubCluster,
|
||||||
|
&node.NodeState, &node.HealthState); err != nil {
|
||||||
|
rows.Close()
|
||||||
|
log.Warn("Error while scanning rows (Nodes)")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
nodes = append(nodes, &node)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nodes, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) {
|
||||||
|
q := sq.Select("hostname", "cluster", "subcluster", "node_state",
|
||||||
|
"health_state").From("node").Where("node.cluster = ?", cluster).OrderBy("node.hostname ASC")
|
||||||
|
|
||||||
|
rows, err := q.RunWith(r.DB).Query()
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while querying user list")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
nodeList := make([]*schema.Node, 0, 100)
|
||||||
|
defer rows.Close()
|
||||||
|
for rows.Next() {
|
||||||
|
node := &schema.Node{}
|
||||||
|
if err := rows.Scan(&node.Hostname, &node.Cluster,
|
||||||
|
&node.SubCluster, &node.NodeState, &node.HealthState); err != nil {
|
||||||
|
log.Warn("Error while scanning node list")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeList = append(nodeList, node)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nodeList, nil
|
||||||
|
}
|
@ -291,7 +291,7 @@ func (r *JobRepository) JobsStats(
|
|||||||
return stats, nil
|
return stats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func LoadJobStat(job *schema.JobMeta, metric string, statType string) float64 {
|
func LoadJobStat(job *schema.Job, metric string, statType string) float64 {
|
||||||
if stats, ok := job.Statistics[metric]; ok {
|
if stats, ok := job.Statistics[metric]; ok {
|
||||||
switch statType {
|
switch statType {
|
||||||
case "avg":
|
case "avg":
|
||||||
@ -686,7 +686,7 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
|||||||
|
|
||||||
mainQuery := sq.Select(
|
mainQuery := sq.Select(
|
||||||
fmt.Sprintf(`%s + 1 as bin`, binQuery),
|
fmt.Sprintf(`%s + 1 as bin`, binQuery),
|
||||||
fmt.Sprintf(`count(*) as count`),
|
`count(*) as count`,
|
||||||
// For Debug: // fmt.Sprintf(`CAST((%f / %d) as INTEGER ) * %s as min`, peak, *bins, binQuery),
|
// For Debug: // fmt.Sprintf(`CAST((%f / %d) as INTEGER ) * %s as min`, peak, *bins, binQuery),
|
||||||
// For Debug: // fmt.Sprintf(`CAST((%f / %d) as INTEGER ) * (%s + 1) as max`, peak, *bins, binQuery),
|
// For Debug: // fmt.Sprintf(`CAST((%f / %d) as INTEGER ) * (%s + 1) as max`, peak, *bins, binQuery),
|
||||||
).From("job").Where(
|
).From("job").Where(
|
||||||
@ -759,7 +759,6 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
|
|||||||
filters []*model.JobFilter,
|
filters []*model.JobFilter,
|
||||||
bins *int,
|
bins *int,
|
||||||
) []*model.MetricHistoPoints {
|
) []*model.MetricHistoPoints {
|
||||||
|
|
||||||
// Get Jobs
|
// Get Jobs
|
||||||
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil)
|
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -45,7 +45,38 @@ func (r *JobRepository) AddTag(user *schema.User, job int64, tag int64) ([]*sche
|
|||||||
return tags, archive.UpdateTags(j, archiveTags)
|
return tags, archive.UpdateTags(j, archiveTags)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Removes a tag from a job by tag id
|
func (r *JobRepository) AddTagDirect(job int64, tag int64) ([]*schema.Tag, error) {
|
||||||
|
j, err := r.FindByIdDirect(job)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while finding job by id")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(job, tag)
|
||||||
|
|
||||||
|
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||||
|
s, _, _ := q.ToSql()
|
||||||
|
log.Errorf("Error adding tag with %s: %v", s, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
tags, err := r.GetTagsDirect(&job)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while getting tags for job")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
archiveTags, err := r.getArchiveTags(&job)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while getting tags for job")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return tags, archive.UpdateTags(j, archiveTags)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Removes a tag from a job by tag id.
|
||||||
|
// Used by GraphQL API
|
||||||
func (r *JobRepository) RemoveTag(user *schema.User, job, tag int64) ([]*schema.Tag, error) {
|
func (r *JobRepository) RemoveTag(user *schema.User, job, tag int64) ([]*schema.Tag, error) {
|
||||||
j, err := r.FindByIdWithUser(user, job)
|
j, err := r.FindByIdWithUser(user, job)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -77,12 +108,13 @@ func (r *JobRepository) RemoveTag(user *schema.User, job, tag int64) ([]*schema.
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Removes a tag from a job by tag info
|
// Removes a tag from a job by tag info
|
||||||
|
// Used by REST API
|
||||||
func (r *JobRepository) RemoveJobTagByRequest(user *schema.User, job int64, tagType string, tagName string, tagScope string) ([]*schema.Tag, error) {
|
func (r *JobRepository) RemoveJobTagByRequest(user *schema.User, job int64, tagType string, tagName string, tagScope string) ([]*schema.Tag, error) {
|
||||||
// Get Tag ID to delete
|
// Get Tag ID to delete
|
||||||
tagID, exists := r.TagId(tagType, tagName, tagScope)
|
tagID, exists := r.TagId(tagType, tagName, tagScope)
|
||||||
if !exists {
|
if !exists {
|
||||||
log.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
log.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
||||||
return nil, fmt.Errorf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
return nil, fmt.Errorf("tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get Job
|
// Get Job
|
||||||
@ -116,38 +148,45 @@ func (r *JobRepository) RemoveJobTagByRequest(user *schema.User, job int64, tagT
|
|||||||
return tags, archive.UpdateTags(j, archiveTags)
|
return tags, archive.UpdateTags(j, archiveTags)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) removeTagFromArchiveJobs(jobIds []int64) {
|
||||||
|
for _, j := range jobIds {
|
||||||
|
tags, err := r.getArchiveTags(&j)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("Error while getting tags for job %d", j)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
job, err := r.FindByIdDirect(j)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("Error while getting job %d", j)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
archive.UpdateTags(job, tags)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Removes a tag from db by tag info
|
// Removes a tag from db by tag info
|
||||||
|
// Used by REST API. Does not update tagged jobs in Job archive.
|
||||||
func (r *JobRepository) RemoveTagByRequest(tagType string, tagName string, tagScope string) error {
|
func (r *JobRepository) RemoveTagByRequest(tagType string, tagName string, tagScope string) error {
|
||||||
// Get Tag ID to delete
|
// Get Tag ID to delete
|
||||||
tagID, exists := r.TagId(tagType, tagName, tagScope)
|
tagID, exists := r.TagId(tagType, tagName, tagScope)
|
||||||
if !exists {
|
if !exists {
|
||||||
log.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
log.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
||||||
return fmt.Errorf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
return fmt.Errorf("tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle Delete JobTagTable
|
return r.RemoveTagById(tagID)
|
||||||
qJobTag := sq.Delete("jobtag").Where("jobtag.tag_id = ?", tagID)
|
|
||||||
|
|
||||||
if _, err := qJobTag.RunWith(r.stmtCache).Exec(); err != nil {
|
|
||||||
s, _, _ := qJobTag.ToSql()
|
|
||||||
log.Errorf("Error removing tag from table 'jobTag' with %s: %v", s, err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle Delete TagTable
|
|
||||||
qTag := sq.Delete("tag").Where("tag.id = ?", tagID)
|
|
||||||
|
|
||||||
if _, err := qTag.RunWith(r.stmtCache).Exec(); err != nil {
|
|
||||||
s, _, _ := qTag.ToSql()
|
|
||||||
log.Errorf("Error removing tag from table 'tag' with %s: %v", s, err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Removes a tag from db by tag id
|
// Removes a tag from db by tag id
|
||||||
|
// Used by GraphQL API.
|
||||||
func (r *JobRepository) RemoveTagById(tagID int64) error {
|
func (r *JobRepository) RemoveTagById(tagID int64) error {
|
||||||
|
jobIds, err := r.FindJobIdsByTag(tagID)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// Handle Delete JobTagTable
|
// Handle Delete JobTagTable
|
||||||
qJobTag := sq.Delete("jobtag").Where("jobtag.tag_id = ?", tagID)
|
qJobTag := sq.Delete("jobtag").Where("jobtag.tag_id = ?", tagID)
|
||||||
|
|
||||||
@ -166,6 +205,9 @@ func (r *JobRepository) RemoveTagById(tagID int64) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// asynchronously update archive jobs
|
||||||
|
go r.removeTagFromArchiveJobs(jobIds)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -291,6 +333,38 @@ func (r *JobRepository) AddTagOrCreate(user *schema.User, jobId int64, tagType s
|
|||||||
return tagId, nil
|
return tagId, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// used in auto tagger plugins
|
||||||
|
func (r *JobRepository) AddTagOrCreateDirect(jobId int64, tagType string, tagName string) (tagId int64, err error) {
|
||||||
|
tagScope := "global"
|
||||||
|
|
||||||
|
tagId, exists := r.TagId(tagType, tagName, tagScope)
|
||||||
|
if !exists {
|
||||||
|
tagId, err = r.CreateTag(tagType, tagName, tagScope)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := r.AddTagDirect(jobId, tagId); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return tagId, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) HasTag(jobId int64, tagType string, tagName string) bool {
|
||||||
|
var id int64
|
||||||
|
q := sq.Select("id").From("tag").Join("jobtag ON jobtag.tag_id = tag.id").
|
||||||
|
Where("jobtag.job_id = ?", jobId).Where("tag.tag_type = ?", tagType).
|
||||||
|
Where("tag.tag_name = ?", tagName)
|
||||||
|
err := q.RunWith(r.stmtCache).QueryRow().Scan(&id)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
} else {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TagId returns the database id of the tag with the specified type and name.
|
// TagId returns the database id of the tag with the specified type and name.
|
||||||
func (r *JobRepository) TagId(tagType string, tagName string, tagScope string) (tagId int64, exists bool) {
|
func (r *JobRepository) TagId(tagType string, tagName string, tagScope string) (tagId int64, exists bool) {
|
||||||
exists = true
|
exists = true
|
||||||
@ -346,6 +420,32 @@ func (r *JobRepository) GetTags(user *schema.User, job *int64) ([]*schema.Tag, e
|
|||||||
return tags, nil
|
return tags, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) GetTagsDirect(job *int64) ([]*schema.Tag, error) {
|
||||||
|
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
|
||||||
|
if job != nil {
|
||||||
|
q = q.Join("jobtag ON jobtag.tag_id = tag.id").Where("jobtag.job_id = ?", *job)
|
||||||
|
}
|
||||||
|
|
||||||
|
rows, err := q.RunWith(r.stmtCache).Query()
|
||||||
|
if err != nil {
|
||||||
|
s, _, _ := q.ToSql()
|
||||||
|
log.Errorf("Error get tags with %s: %v", s, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
tags := make([]*schema.Tag, 0)
|
||||||
|
for rows.Next() {
|
||||||
|
tag := &schema.Tag{}
|
||||||
|
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
|
||||||
|
log.Warn("Error while scanning rows")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
tags = append(tags, tag)
|
||||||
|
}
|
||||||
|
|
||||||
|
return tags, nil
|
||||||
|
}
|
||||||
|
|
||||||
// GetArchiveTags returns a list of all tags *regardless of scope* for archiving if job is nil or of the tags that the job with that database ID has.
|
// GetArchiveTags returns a list of all tags *regardless of scope* for archiving if job is nil or of the tags that the job with that database ID has.
|
||||||
func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
|
func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
|
||||||
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
|
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
|
||||||
|
BIN
internal/repository/testdata/job.db
vendored
BIN
internal/repository/testdata/job.db
vendored
Binary file not shown.
@ -161,7 +161,7 @@ func setupNodeRoute(i InfoType, r *http.Request) InfoType {
|
|||||||
i["hostname"] = vars["hostname"]
|
i["hostname"] = vars["hostname"]
|
||||||
i["id"] = fmt.Sprintf("%s (%s)", vars["cluster"], vars["hostname"])
|
i["id"] = fmt.Sprintf("%s (%s)", vars["cluster"], vars["hostname"])
|
||||||
from, to := r.URL.Query().Get("from"), r.URL.Query().Get("to")
|
from, to := r.URL.Query().Get("from"), r.URL.Query().Get("to")
|
||||||
if from != "" || to != "" {
|
if from != "" && to != "" {
|
||||||
i["from"] = from
|
i["from"] = from
|
||||||
i["to"] = to
|
i["to"] = to
|
||||||
}
|
}
|
||||||
@ -297,6 +297,9 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if len(query["dbId"]) != 0 {
|
||||||
|
filterPresets["dbId"] = query["dbId"]
|
||||||
|
}
|
||||||
if query.Get("jobId") != "" {
|
if query.Get("jobId") != "" {
|
||||||
if len(query["jobId"]) == 1 {
|
if len(query["jobId"]) == 1 {
|
||||||
filterPresets["jobId"] = query.Get("jobId")
|
filterPresets["jobId"] = query.Get("jobId")
|
||||||
|
1
internal/tagger/apps/alf.txt
Normal file
1
internal/tagger/apps/alf.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
alf
|
7
internal/tagger/apps/caracal.txt
Normal file
7
internal/tagger/apps/caracal.txt
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
calc_rate
|
||||||
|
qmdffgen
|
||||||
|
dynamic
|
||||||
|
evbopt
|
||||||
|
explore
|
||||||
|
black_box
|
||||||
|
poly_qmdff
|
3
internal/tagger/apps/chroma.txt
Normal file
3
internal/tagger/apps/chroma.txt
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
chroma
|
||||||
|
qdp
|
||||||
|
qmp
|
1
internal/tagger/apps/cp2k.txt
Normal file
1
internal/tagger/apps/cp2k.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
cp2k
|
1
internal/tagger/apps/cpmd.txt
Normal file
1
internal/tagger/apps/cpmd.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
cpmd
|
1
internal/tagger/apps/flame.txt
Normal file
1
internal/tagger/apps/flame.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
flame
|
3
internal/tagger/apps/gromacs.txt
Normal file
3
internal/tagger/apps/gromacs.txt
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
gromacs
|
||||||
|
gmx
|
||||||
|
mdrun
|
1
internal/tagger/apps/julia.txt
Normal file
1
internal/tagger/apps/julia.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
julia
|
1
internal/tagger/apps/lammps.txt
Normal file
1
internal/tagger/apps/lammps.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
lmp
|
1
internal/tagger/apps/matlab.txt
Normal file
1
internal/tagger/apps/matlab.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
matlab
|
1
internal/tagger/apps/openfoam.txt
Normal file
1
internal/tagger/apps/openfoam.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
openfoam
|
1
internal/tagger/apps/orca.txt
Normal file
1
internal/tagger/apps/orca.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
orca
|
4
internal/tagger/apps/python.txt
Normal file
4
internal/tagger/apps/python.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
python
|
||||||
|
pip
|
||||||
|
anaconda
|
||||||
|
conda
|
2
internal/tagger/apps/starccm.txt
Normal file
2
internal/tagger/apps/starccm.txt
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
starccm+
|
||||||
|
-podkey
|
10
internal/tagger/apps/turbomole.txt
Normal file
10
internal/tagger/apps/turbomole.txt
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
dscf
|
||||||
|
grad
|
||||||
|
ridft
|
||||||
|
rdgrad
|
||||||
|
ricc2
|
||||||
|
statpt
|
||||||
|
aoforce
|
||||||
|
escf
|
||||||
|
egrad
|
||||||
|
odft
|
1
internal/tagger/apps/vasp.txt
Normal file
1
internal/tagger/apps/vasp.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
vasp
|
322
internal/tagger/classifyJob.go
Normal file
322
internal/tagger/classifyJob.go
Normal file
@ -0,0 +1,322 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package tagger
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"embed"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"maps"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"text/template"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
"github.com/expr-lang/expr"
|
||||||
|
"github.com/expr-lang/expr/vm"
|
||||||
|
)
|
||||||
|
|
||||||
|
//go:embed jobclasses/*
|
||||||
|
var jobclassFiles embed.FS
|
||||||
|
|
||||||
|
type Variable struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Expr string `json:"expr"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ruleVariable struct {
|
||||||
|
name string
|
||||||
|
expr *vm.Program
|
||||||
|
}
|
||||||
|
|
||||||
|
type RuleFormat struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Tag string `json:"tag"`
|
||||||
|
Parameters []string `json:"parameters"`
|
||||||
|
Metrics []string `json:"metrics"`
|
||||||
|
Requirements []string `json:"requirements"`
|
||||||
|
Variables []Variable `json:"variables"`
|
||||||
|
Rule string `json:"rule"`
|
||||||
|
Hint string `json:"hint"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ruleInfo struct {
|
||||||
|
env map[string]any
|
||||||
|
metrics []string
|
||||||
|
requirements []*vm.Program
|
||||||
|
variables []ruleVariable
|
||||||
|
rule *vm.Program
|
||||||
|
hint *template.Template
|
||||||
|
}
|
||||||
|
|
||||||
|
type JobClassTagger struct {
|
||||||
|
rules map[string]ruleInfo
|
||||||
|
parameters map[string]any
|
||||||
|
tagType string
|
||||||
|
cfgPath string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *JobClassTagger) prepareRule(b []byte, fns string) {
|
||||||
|
var rule RuleFormat
|
||||||
|
if err := json.NewDecoder(bytes.NewReader(b)).Decode(&rule); err != nil {
|
||||||
|
log.Warn("Error while decoding raw job meta json")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ri := ruleInfo{}
|
||||||
|
ri.env = make(map[string]any)
|
||||||
|
ri.metrics = make([]string, 0)
|
||||||
|
ri.requirements = make([]*vm.Program, 0)
|
||||||
|
ri.variables = make([]ruleVariable, 0)
|
||||||
|
|
||||||
|
// check if all required parameters are available
|
||||||
|
for _, p := range rule.Parameters {
|
||||||
|
param, ok := t.parameters[p]
|
||||||
|
if !ok {
|
||||||
|
log.Warnf("prepareRule() > missing parameter %s in rule %s", p, fns)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ri.env[p] = param
|
||||||
|
}
|
||||||
|
|
||||||
|
// set all required metrics
|
||||||
|
ri.metrics = append(ri.metrics, rule.Metrics...)
|
||||||
|
|
||||||
|
// compile requirements
|
||||||
|
for _, r := range rule.Requirements {
|
||||||
|
req, err := expr.Compile(r, expr.AsBool())
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("error compiling requirement %s: %#v", r, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ri.requirements = append(ri.requirements, req)
|
||||||
|
}
|
||||||
|
|
||||||
|
// compile variables
|
||||||
|
for _, v := range rule.Variables {
|
||||||
|
req, err := expr.Compile(v.Expr, expr.AsFloat64())
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("error compiling requirement %s: %#v", v.Name, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ri.variables = append(ri.variables, ruleVariable{name: v.Name, expr: req})
|
||||||
|
}
|
||||||
|
|
||||||
|
// compile rule
|
||||||
|
exp, err := expr.Compile(rule.Rule, expr.AsBool())
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("error compiling rule %s: %#v", fns, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ri.rule = exp
|
||||||
|
|
||||||
|
// prepare hint template
|
||||||
|
ri.hint, err = template.New(fns).Parse(rule.Hint)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("error processing template %s: %#v", fns, err)
|
||||||
|
}
|
||||||
|
log.Infof("prepareRule() > processing %s with %d requirements and %d variables", fns, len(ri.requirements), len(ri.variables))
|
||||||
|
|
||||||
|
t.rules[rule.Tag] = ri
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *JobClassTagger) EventMatch(s string) bool {
|
||||||
|
return strings.Contains(s, "jobclasses")
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: Only process the file that caused the event
|
||||||
|
func (t *JobClassTagger) EventCallback() {
|
||||||
|
files, err := os.ReadDir(t.cfgPath)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if util.CheckFileExists(t.cfgPath + "/parameters.json") {
|
||||||
|
log.Info("Merge parameters")
|
||||||
|
b, err := os.ReadFile(t.cfgPath + "/parameters.json")
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("prepareRule() > open file error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var paramTmp map[string]any
|
||||||
|
if err := json.NewDecoder(bytes.NewReader(b)).Decode(¶mTmp); err != nil {
|
||||||
|
log.Warn("Error while decoding parameters.json")
|
||||||
|
}
|
||||||
|
|
||||||
|
maps.Copy(t.parameters, paramTmp)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, fn := range files {
|
||||||
|
fns := fn.Name()
|
||||||
|
if fns != "parameters.json" {
|
||||||
|
log.Debugf("Process: %s", fns)
|
||||||
|
filename := fmt.Sprintf("%s/%s", t.cfgPath, fns)
|
||||||
|
b, err := os.ReadFile(filename)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("prepareRule() > open file error: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.prepareRule(b, fns)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *JobClassTagger) initParameters() error {
|
||||||
|
log.Info("Initialize parameters")
|
||||||
|
b, err := jobclassFiles.ReadFile("jobclasses/parameters.json")
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("prepareRule() > open file error: %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewDecoder(bytes.NewReader(b)).Decode(&t.parameters); err != nil {
|
||||||
|
log.Warn("Error while decoding parameters.json")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *JobClassTagger) Register() error {
|
||||||
|
t.cfgPath = "./var/tagger/jobclasses"
|
||||||
|
t.tagType = "jobClass"
|
||||||
|
|
||||||
|
err := t.initParameters()
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("error reading parameters.json: %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
files, err := jobclassFiles.ReadDir("jobclasses")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error reading app folder: %#v", err)
|
||||||
|
}
|
||||||
|
t.rules = make(map[string]ruleInfo, 0)
|
||||||
|
for _, fn := range files {
|
||||||
|
fns := fn.Name()
|
||||||
|
if fns != "parameters.json" {
|
||||||
|
filename := fmt.Sprintf("jobclasses/%s", fns)
|
||||||
|
log.Infof("Process: %s", fns)
|
||||||
|
|
||||||
|
b, err := jobclassFiles.ReadFile(filename)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("prepareRule() > open file error: %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
t.prepareRule(b, fns)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if util.CheckFileExists(t.cfgPath) {
|
||||||
|
t.EventCallback()
|
||||||
|
log.Infof("Setup file watch for %s", t.cfgPath)
|
||||||
|
util.AddListener(t.cfgPath, t)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *JobClassTagger) Match(job *schema.Job) {
|
||||||
|
r := repository.GetJobRepository()
|
||||||
|
jobstats, err := archive.GetStatistics(job)
|
||||||
|
metricsList := archive.GetMetricConfigSubCluster(job.Cluster, job.SubCluster)
|
||||||
|
log.Infof("Enter match rule with %d rules for job %d", len(t.rules), job.JobID)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("job classification failed for job %d: %#v", job.JobID, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for tag, ri := range t.rules {
|
||||||
|
env := make(map[string]any)
|
||||||
|
maps.Copy(env, ri.env)
|
||||||
|
log.Infof("Try to match rule %s for job %d", tag, job.JobID)
|
||||||
|
|
||||||
|
// Initialize environment
|
||||||
|
env["job"] = map[string]any{
|
||||||
|
"exclusive": job.Exclusive,
|
||||||
|
"duration": job.Duration,
|
||||||
|
"numCores": job.NumHWThreads,
|
||||||
|
"numNodes": job.NumNodes,
|
||||||
|
"jobState": job.State,
|
||||||
|
"numAcc": job.NumAcc,
|
||||||
|
"smt": job.SMT,
|
||||||
|
}
|
||||||
|
|
||||||
|
// add metrics to env
|
||||||
|
for _, m := range ri.metrics {
|
||||||
|
stats, ok := jobstats[m]
|
||||||
|
if !ok {
|
||||||
|
log.Errorf("job classification failed for job %d: missing metric '%s'", job.JobID, m)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
env[m] = map[string]any{
|
||||||
|
"min": stats.Min,
|
||||||
|
"max": stats.Max,
|
||||||
|
"avg": stats.Avg,
|
||||||
|
"limits": map[string]float64{
|
||||||
|
"peak": metricsList[m].Peak,
|
||||||
|
"normal": metricsList[m].Normal,
|
||||||
|
"caution": metricsList[m].Caution,
|
||||||
|
"alert": metricsList[m].Alert,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// check rule requirements apply
|
||||||
|
for _, r := range ri.requirements {
|
||||||
|
ok, err := expr.Run(r, env)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("error running requirement for rule %s: %#v", tag, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if !ok.(bool) {
|
||||||
|
log.Infof("requirement for rule %s not met", tag)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// validate rule expression
|
||||||
|
for _, v := range ri.variables {
|
||||||
|
value, err := expr.Run(v.expr, env)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("error running rule %s: %#v", tag, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
env[v.name] = value
|
||||||
|
}
|
||||||
|
|
||||||
|
// dump.P(env)
|
||||||
|
|
||||||
|
match, err := expr.Run(ri.rule, env)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("error running rule %s: %#v", tag, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if match.(bool) {
|
||||||
|
log.Info("Rule matches!")
|
||||||
|
id := *job.ID
|
||||||
|
if !r.HasTag(id, t.tagType, tag) {
|
||||||
|
r.AddTagOrCreateDirect(id, t.tagType, tag)
|
||||||
|
}
|
||||||
|
|
||||||
|
// process hint template
|
||||||
|
var msg bytes.Buffer
|
||||||
|
if err := ri.hint.Execute(&msg, env); err != nil {
|
||||||
|
log.Errorf("Template error: %s", err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: Handle case where multiple tags apply
|
||||||
|
r.UpdateMetadata(job, "message", msg.String())
|
||||||
|
} else {
|
||||||
|
log.Info("Rule does not match!")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
127
internal/tagger/detectApp.go
Normal file
127
internal/tagger/detectApp.go
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package tagger
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"embed"
|
||||||
|
"fmt"
|
||||||
|
"io/fs"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
//go:embed apps/*
|
||||||
|
var appFiles embed.FS
|
||||||
|
|
||||||
|
type appInfo struct {
|
||||||
|
tag string
|
||||||
|
strings []string
|
||||||
|
}
|
||||||
|
|
||||||
|
type AppTagger struct {
|
||||||
|
apps map[string]appInfo
|
||||||
|
tagType string
|
||||||
|
cfgPath string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *AppTagger) scanApp(f fs.File, fns string) {
|
||||||
|
scanner := bufio.NewScanner(f)
|
||||||
|
ai := appInfo{tag: strings.TrimSuffix(fns, filepath.Ext(fns)), strings: make([]string, 0)}
|
||||||
|
|
||||||
|
for scanner.Scan() {
|
||||||
|
ai.strings = append(ai.strings, scanner.Text())
|
||||||
|
}
|
||||||
|
delete(t.apps, ai.tag)
|
||||||
|
t.apps[ai.tag] = ai
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *AppTagger) EventMatch(s string) bool {
|
||||||
|
return strings.Contains(s, "apps")
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: Only process the file that caused the event
|
||||||
|
func (t *AppTagger) EventCallback() {
|
||||||
|
files, err := os.ReadDir(t.cfgPath)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, fn := range files {
|
||||||
|
fns := fn.Name()
|
||||||
|
log.Debugf("Process: %s", fns)
|
||||||
|
f, err := os.Open(fmt.Sprintf("%s/%s", t.cfgPath, fns))
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("error opening app file %s: %#v", fns, err)
|
||||||
|
}
|
||||||
|
t.scanApp(f, fns)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *AppTagger) Register() error {
|
||||||
|
t.cfgPath = "./var/tagger/apps"
|
||||||
|
t.tagType = "app"
|
||||||
|
|
||||||
|
files, err := appFiles.ReadDir("apps")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error reading app folder: %#v", err)
|
||||||
|
}
|
||||||
|
t.apps = make(map[string]appInfo, 0)
|
||||||
|
for _, fn := range files {
|
||||||
|
fns := fn.Name()
|
||||||
|
log.Debugf("Process: %s", fns)
|
||||||
|
f, err := appFiles.Open(fmt.Sprintf("apps/%s", fns))
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error opening app file %s: %#v", fns, err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
t.scanApp(f, fns)
|
||||||
|
}
|
||||||
|
|
||||||
|
if util.CheckFileExists(t.cfgPath) {
|
||||||
|
t.EventCallback()
|
||||||
|
log.Infof("Setup file watch for %s", t.cfgPath)
|
||||||
|
util.AddListener(t.cfgPath, t)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *AppTagger) Match(job *schema.Job) {
|
||||||
|
r := repository.GetJobRepository()
|
||||||
|
metadata, err := r.FetchMetadata(job)
|
||||||
|
if err != nil {
|
||||||
|
log.Infof("Cannot fetch metadata for job: %d on %s", job.JobID, job.Cluster)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
jobscript, ok := metadata["jobScript"]
|
||||||
|
if ok {
|
||||||
|
id := *job.ID
|
||||||
|
|
||||||
|
out:
|
||||||
|
for _, a := range t.apps {
|
||||||
|
tag := a.tag
|
||||||
|
for _, s := range a.strings {
|
||||||
|
matched, _ := regexp.MatchString(s, strings.ToLower(jobscript))
|
||||||
|
if matched {
|
||||||
|
if !r.HasTag(id, t.tagType, tag) {
|
||||||
|
r.AddTagOrCreateDirect(id, t.tagType, tag)
|
||||||
|
break out
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
log.Infof("Cannot extract job script for job: %d on %s", job.JobID, job.Cluster)
|
||||||
|
}
|
||||||
|
}
|
59
internal/tagger/detectApp_test.go
Normal file
59
internal/tagger/detectApp_test.go
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package tagger
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
)
|
||||||
|
|
||||||
|
func setup(tb testing.TB) *repository.JobRepository {
|
||||||
|
tb.Helper()
|
||||||
|
log.Init("warn", true)
|
||||||
|
dbfile := "../repository/testdata/job.db"
|
||||||
|
err := repository.MigrateDB("sqlite3", dbfile)
|
||||||
|
noErr(tb, err)
|
||||||
|
repository.Connect("sqlite3", dbfile)
|
||||||
|
return repository.GetJobRepository()
|
||||||
|
}
|
||||||
|
|
||||||
|
func noErr(tb testing.TB, err error) {
|
||||||
|
tb.Helper()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
tb.Fatal("Error is not nil:", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRegister(t *testing.T) {
|
||||||
|
var tagger AppTagger
|
||||||
|
|
||||||
|
err := tagger.Register()
|
||||||
|
noErr(t, err)
|
||||||
|
|
||||||
|
if len(tagger.apps) != 16 {
|
||||||
|
t.Errorf("wrong summary for diagnostic \ngot: %d \nwant: 16", len(tagger.apps))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatch(t *testing.T) {
|
||||||
|
r := setup(t)
|
||||||
|
|
||||||
|
job, err := r.FindByIdDirect(5)
|
||||||
|
noErr(t, err)
|
||||||
|
|
||||||
|
var tagger AppTagger
|
||||||
|
|
||||||
|
err = tagger.Register()
|
||||||
|
noErr(t, err)
|
||||||
|
|
||||||
|
tagger.Match(job)
|
||||||
|
|
||||||
|
if !r.HasTag(5, "app", "vasp") {
|
||||||
|
t.Errorf("missing tag vasp")
|
||||||
|
}
|
||||||
|
}
|
26
internal/tagger/jobclasses/highload.json
Normal file
26
internal/tagger/jobclasses/highload.json
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
{
|
||||||
|
"name": "Excessive CPU load",
|
||||||
|
"tag": "excessiveload",
|
||||||
|
"parameters": [
|
||||||
|
"excessivecpuload_threshold_factor",
|
||||||
|
"job_min_duration_seconds",
|
||||||
|
"sampling_interval_seconds"
|
||||||
|
],
|
||||||
|
"metrics": ["cpu_load"],
|
||||||
|
"requirements": [
|
||||||
|
"job.exclusive == 1",
|
||||||
|
"job.duration > job_min_duration_seconds"
|
||||||
|
],
|
||||||
|
"variables": [
|
||||||
|
{
|
||||||
|
"name": "load_threshold",
|
||||||
|
"expr": "cpu_load.limits.peak * excessivecpuload_threshold_factor"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "load_perc",
|
||||||
|
"expr": "1.0 - (cpu_load.avg / cpu_load.limits.peak)"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"rule": "cpu_load.avg > load_threshold",
|
||||||
|
"hint": "This job was detected as excessiveload because the average cpu load {{.cpu_load.avg}} falls above the threshold {{.load_threshold}}."
|
||||||
|
}
|
22
internal/tagger/jobclasses/lowUtilization.json
Normal file
22
internal/tagger/jobclasses/lowUtilization.json
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
{
|
||||||
|
"name": "Low ressource utilization",
|
||||||
|
"tag": "lowutilization",
|
||||||
|
"parameters": ["job_min_duration_seconds"],
|
||||||
|
"metrics": ["flops_any", "mem_bw"],
|
||||||
|
"requirements": [
|
||||||
|
"job.exclusive == 1",
|
||||||
|
"job.duration > job_min_duration_seconds"
|
||||||
|
],
|
||||||
|
"variables": [
|
||||||
|
{
|
||||||
|
"name": "mem_bw_perc",
|
||||||
|
"expr": "1.0 - (mem_bw.avg / mem_bw.limits.peak)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "flops_any_perc",
|
||||||
|
"expr": "1.0 - (flops_any.avg / flops_any.limits.peak)"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"rule": "flops_any.avg < flops_any.limits.alert && mem_bw.avg < mem_bw.limits.alert",
|
||||||
|
"hint": "This job was detected as low utilization because the average flop rate {{.flops_any.avg}} falls below the threshold {{.flops_any.limits.alert}}."
|
||||||
|
}
|
26
internal/tagger/jobclasses/lowload.json
Normal file
26
internal/tagger/jobclasses/lowload.json
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
{
|
||||||
|
"name": "Low CPU load",
|
||||||
|
"tag": "lowload",
|
||||||
|
"parameters": [
|
||||||
|
"lowcpuload_threshold_factor",
|
||||||
|
"job_min_duration_seconds",
|
||||||
|
"sampling_interval_seconds"
|
||||||
|
],
|
||||||
|
"metrics": ["cpu_load"],
|
||||||
|
"requirements": [
|
||||||
|
"job.exclusive == 1",
|
||||||
|
"job.duration > job_min_duration_seconds"
|
||||||
|
],
|
||||||
|
"variables": [
|
||||||
|
{
|
||||||
|
"name": "load_threshold",
|
||||||
|
"expr": "job.numCores * lowcpuload_threshold_factor"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "load_perc",
|
||||||
|
"expr": "1.0 - (cpu_load.avg / cpu_load.limits.peak)"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"rule": "cpu_load.avg < cpu_load.limits.caution",
|
||||||
|
"hint": "This job was detected as lowload because the average cpu load {{.cpu_load}} falls below the threshold {{.cpu_load.limits.caution}}."
|
||||||
|
}
|
14
internal/tagger/jobclasses/parameters.json
Normal file
14
internal/tagger/jobclasses/parameters.json
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
{
|
||||||
|
"lowcpuload_threshold_factor": 0.9,
|
||||||
|
"excessivecpuload_threshold_factor": 1.1,
|
||||||
|
"highmemoryusage_threshold_factor": 0.9,
|
||||||
|
"node_load_imbalance_threshold_factor": 0.1,
|
||||||
|
"core_load_imbalance_threshold_factor": 0.1,
|
||||||
|
"high_memory_load_threshold_factor": 0.9,
|
||||||
|
"lowgpuload_threshold_factor": 0.7,
|
||||||
|
"memory_leak_slope_threshold": 0.1,
|
||||||
|
"job_min_duration_seconds": 600.0,
|
||||||
|
"sampling_interval_seconds": 30.0,
|
||||||
|
"cpu_load_pre_cutoff_samples": 11.0,
|
||||||
|
"cpu_load_core_pre_cutoff_samples": 6.0
|
||||||
|
}
|
88
internal/tagger/tagger.go
Normal file
88
internal/tagger/tagger.go
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
// Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package tagger
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Tagger interface {
|
||||||
|
Register() error
|
||||||
|
Match(job *schema.Job)
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
initOnce sync.Once
|
||||||
|
jobTagger *JobTagger
|
||||||
|
)
|
||||||
|
|
||||||
|
type JobTagger struct {
|
||||||
|
startTaggers []Tagger
|
||||||
|
stopTaggers []Tagger
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTagger() {
|
||||||
|
jobTagger = &JobTagger{}
|
||||||
|
jobTagger.startTaggers = make([]Tagger, 0)
|
||||||
|
jobTagger.startTaggers = append(jobTagger.startTaggers, &AppTagger{})
|
||||||
|
jobTagger.stopTaggers = make([]Tagger, 0)
|
||||||
|
jobTagger.stopTaggers = append(jobTagger.stopTaggers, &JobClassTagger{})
|
||||||
|
|
||||||
|
for _, tagger := range jobTagger.startTaggers {
|
||||||
|
tagger.Register()
|
||||||
|
}
|
||||||
|
for _, tagger := range jobTagger.stopTaggers {
|
||||||
|
tagger.Register()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Init() {
|
||||||
|
initOnce.Do(func() {
|
||||||
|
newTagger()
|
||||||
|
repository.RegisterJobJook(jobTagger)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (jt *JobTagger) JobStartCallback(job *schema.Job) {
|
||||||
|
for _, tagger := range jt.startTaggers {
|
||||||
|
tagger.Match(job)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (jt *JobTagger) JobStopCallback(job *schema.Job) {
|
||||||
|
for _, tagger := range jt.stopTaggers {
|
||||||
|
tagger.Match(job)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func RunTaggers() error {
|
||||||
|
newTagger()
|
||||||
|
r := repository.GetJobRepository()
|
||||||
|
jl, err := r.GetJobList()
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while getting job list %s", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, id := range jl {
|
||||||
|
job, err := r.FindByIdDirect(id)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while getting job %s", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, tagger := range jobTagger.startTaggers {
|
||||||
|
tagger.Match(job)
|
||||||
|
}
|
||||||
|
for _, tagger := range jobTagger.stopTaggers {
|
||||||
|
log.Infof("Run stop tagger for job %d", job.ID)
|
||||||
|
tagger.Match(job)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
31
internal/tagger/tagger_test.go
Normal file
31
internal/tagger/tagger_test.go
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package tagger
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestInit(t *testing.T) {
|
||||||
|
Init()
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestJobStartCallback(t *testing.T) {
|
||||||
|
Init()
|
||||||
|
r := setup(t)
|
||||||
|
job, err := r.FindByIdDirect(2)
|
||||||
|
noErr(t, err)
|
||||||
|
|
||||||
|
jobs := make([]*schema.Job, 0, 1)
|
||||||
|
jobs = append(jobs, job)
|
||||||
|
|
||||||
|
repository.CallJobStartHooks(jobs)
|
||||||
|
if !r.HasTag(2, "app", "python") {
|
||||||
|
t.Errorf("missing tag python")
|
||||||
|
}
|
||||||
|
}
|
35
internal/taskManager/commitJobService.go
Normal file
35
internal/taskManager/commitJobService.go
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package taskManager
|
||||||
|
|
||||||
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/go-co-op/gocron/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
func RegisterCommitJobService() {
|
||||||
|
var frequency string
|
||||||
|
if config.Keys.CronFrequency != nil && config.Keys.CronFrequency.CommitJobWorker != "" {
|
||||||
|
frequency = config.Keys.CronFrequency.CommitJobWorker
|
||||||
|
} else {
|
||||||
|
frequency = "2m"
|
||||||
|
}
|
||||||
|
d, _ := time.ParseDuration(frequency)
|
||||||
|
log.Infof("Register commitJob service with %s interval", frequency)
|
||||||
|
|
||||||
|
s.NewJob(gocron.DurationJob(d),
|
||||||
|
gocron.NewTask(
|
||||||
|
func() {
|
||||||
|
start := time.Now()
|
||||||
|
log.Printf("Jobcache sync started at %s", start.Format(time.RFC3339))
|
||||||
|
jobs, _ := jobRepo.SyncJobs()
|
||||||
|
repository.CallJobStartHooks(jobs)
|
||||||
|
log.Printf("Jobcache sync and job callbacks are done and took %s", time.Since(start))
|
||||||
|
}))
|
||||||
|
}
|
@ -81,6 +81,7 @@ func Start() {
|
|||||||
|
|
||||||
RegisterFootprintWorker()
|
RegisterFootprintWorker()
|
||||||
RegisterUpdateDurationWorker()
|
RegisterUpdateDurationWorker()
|
||||||
|
RegisterCommitJobService()
|
||||||
|
|
||||||
s.Start()
|
s.Start()
|
||||||
}
|
}
|
||||||
|
@ -73,11 +73,7 @@ func RegisterFootprintWorker() {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
jobMeta := &schema.JobMeta{
|
job.Statistics = make(map[string]schema.JobStatistics)
|
||||||
BaseJob: job.BaseJob,
|
|
||||||
StartTime: job.StartTime.Unix(),
|
|
||||||
Statistics: make(map[string]schema.JobStatistics),
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, metric := range allMetrics {
|
for _, metric := range allMetrics {
|
||||||
avg, min, max := 0.0, 0.0, 0.0
|
avg, min, max := 0.0, 0.0, 0.0
|
||||||
@ -95,7 +91,7 @@ func RegisterFootprintWorker() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Add values rounded to 2 digits: repo.LoadStats may return unrounded
|
// Add values rounded to 2 digits: repo.LoadStats may return unrounded
|
||||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
job.Statistics[metric] = schema.JobStatistics{
|
||||||
Unit: schema.Unit{
|
Unit: schema.Unit{
|
||||||
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||||
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||||
@ -108,7 +104,7 @@ func RegisterFootprintWorker() {
|
|||||||
|
|
||||||
// Build Statement per Job, Add to Pending Array
|
// Build Statement per Job, Add to Pending Array
|
||||||
stmt := sq.Update("job")
|
stmt := sq.Update("job")
|
||||||
stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta)
|
stmt, err = jobRepo.UpdateFootprint(stmt, job)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("update job (dbid: %d) statement build failed at footprint step: %s", job.ID, err.Error())
|
log.Errorf("update job (dbid: %d) statement build failed at footprint step: %s", job.ID, err.Error())
|
||||||
ce++
|
ce++
|
||||||
|
75
internal/util/fswatcher.go
Normal file
75
internal/util/fswatcher.go
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
// Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package util
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/fsnotify/fsnotify"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Listener interface {
|
||||||
|
EventCallback()
|
||||||
|
EventMatch(event string) bool
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
initOnce sync.Once
|
||||||
|
w *fsnotify.Watcher
|
||||||
|
listeners []Listener
|
||||||
|
)
|
||||||
|
|
||||||
|
func AddListener(path string, l Listener) {
|
||||||
|
var err error
|
||||||
|
|
||||||
|
initOnce.Do(func() {
|
||||||
|
var err error
|
||||||
|
w, err = fsnotify.NewWatcher()
|
||||||
|
if err != nil {
|
||||||
|
log.Error("creating a new watcher: %w", err)
|
||||||
|
}
|
||||||
|
listeners = make([]Listener, 0)
|
||||||
|
|
||||||
|
go watchLoop(w)
|
||||||
|
})
|
||||||
|
|
||||||
|
listeners = append(listeners, l)
|
||||||
|
err = w.Add(path)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("%q: %s", path, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func FsWatcherShutdown() {
|
||||||
|
if w != nil {
|
||||||
|
w.Close()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func watchLoop(w *fsnotify.Watcher) {
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
// Read from Errors.
|
||||||
|
case err, ok := <-w.Errors:
|
||||||
|
if !ok { // Channel was closed (i.e. Watcher.Close() was called).
|
||||||
|
return
|
||||||
|
}
|
||||||
|
log.Errorf("watch event loop: %s", err)
|
||||||
|
// Read from Events.
|
||||||
|
case e, ok := <-w.Events:
|
||||||
|
if !ok { // Channel was closed (i.e. Watcher.Close() was called).
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("Event %s", e)
|
||||||
|
for _, l := range listeners {
|
||||||
|
if l.EventMatch(e.String()) {
|
||||||
|
l.EventCallback()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -7,6 +7,7 @@ package archive
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"maps"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
@ -23,7 +24,7 @@ type ArchiveBackend interface {
|
|||||||
|
|
||||||
Exists(job *schema.Job) bool
|
Exists(job *schema.Job) bool
|
||||||
|
|
||||||
LoadJobMeta(job *schema.Job) (*schema.JobMeta, error)
|
LoadJobMeta(job *schema.Job) (*schema.Job, error)
|
||||||
|
|
||||||
LoadJobData(job *schema.Job) (schema.JobData, error)
|
LoadJobData(job *schema.Job) (schema.JobData, error)
|
||||||
|
|
||||||
@ -31,9 +32,9 @@ type ArchiveBackend interface {
|
|||||||
|
|
||||||
LoadClusterCfg(name string) (*schema.Cluster, error)
|
LoadClusterCfg(name string) (*schema.Cluster, error)
|
||||||
|
|
||||||
StoreJobMeta(jobMeta *schema.JobMeta) error
|
StoreJobMeta(jobMeta *schema.Job) error
|
||||||
|
|
||||||
ImportJob(jobMeta *schema.JobMeta, jobData *schema.JobData) error
|
ImportJob(jobMeta *schema.Job, jobData *schema.JobData) error
|
||||||
|
|
||||||
GetClusters() []string
|
GetClusters() []string
|
||||||
|
|
||||||
@ -51,7 +52,7 @@ type ArchiveBackend interface {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type JobContainer struct {
|
type JobContainer struct {
|
||||||
Meta *schema.JobMeta
|
Meta *schema.Job
|
||||||
Data *schema.JobData
|
Data *schema.JobData
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -60,6 +61,7 @@ var (
|
|||||||
cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
|
cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
|
||||||
ar ArchiveBackend
|
ar ArchiveBackend
|
||||||
useArchive bool
|
useArchive bool
|
||||||
|
mutex sync.Mutex
|
||||||
)
|
)
|
||||||
|
|
||||||
func Init(rawConfig json.RawMessage, disableArchive bool) error {
|
func Init(rawConfig json.RawMessage, disableArchive bool) error {
|
||||||
@ -162,7 +164,6 @@ func LoadScopedStatsFromArchive(
|
|||||||
metrics []string,
|
metrics []string,
|
||||||
scopes []schema.MetricScope,
|
scopes []schema.MetricScope,
|
||||||
) (schema.ScopedJobStats, error) {
|
) (schema.ScopedJobStats, error) {
|
||||||
|
|
||||||
data, err := ar.LoadJobStats(job)
|
data, err := ar.LoadJobStats(job)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Error while loading job stats from archiveBackend: %s", err.Error())
|
log.Errorf("Error while loading job stats from archiveBackend: %s", err.Error())
|
||||||
@ -185,6 +186,9 @@ func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
|
|||||||
// If the job is archived, find its `meta.json` file and override the Metadata
|
// If the job is archived, find its `meta.json` file and override the Metadata
|
||||||
// in that JSON file. If the job is not archived, nothing is done.
|
// in that JSON file. If the job is not archived, nothing is done.
|
||||||
func UpdateMetadata(job *schema.Job, metadata map[string]string) error {
|
func UpdateMetadata(job *schema.Job, metadata map[string]string) error {
|
||||||
|
mutex.Lock()
|
||||||
|
defer mutex.Unlock()
|
||||||
|
|
||||||
if job.State == schema.JobStateRunning || !useArchive {
|
if job.State == schema.JobStateRunning || !useArchive {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -195,9 +199,7 @@ func UpdateMetadata(job *schema.Job, metadata map[string]string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
for k, v := range metadata {
|
maps.Copy(jobMeta.MetaData, metadata)
|
||||||
jobMeta.MetaData[k] = v
|
|
||||||
}
|
|
||||||
|
|
||||||
return ar.StoreJobMeta(jobMeta)
|
return ar.StoreJobMeta(jobMeta)
|
||||||
}
|
}
|
||||||
@ -205,6 +207,9 @@ func UpdateMetadata(job *schema.Job, metadata map[string]string) error {
|
|||||||
// If the job is archived, find its `meta.json` file and override the tags list
|
// If the job is archived, find its `meta.json` file and override the tags list
|
||||||
// in that JSON file. If the job is not archived, nothing is done.
|
// in that JSON file. If the job is not archived, nothing is done.
|
||||||
func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
|
func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
|
||||||
|
mutex.Lock()
|
||||||
|
defer mutex.Unlock()
|
||||||
|
|
||||||
if job.State == schema.JobStateRunning || !useArchive {
|
if job.State == schema.JobStateRunning || !useArchive {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -9,7 +9,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
@ -32,12 +31,12 @@ func setup(t *testing.T) archive.ArchiveBackend {
|
|||||||
jobs[0] = &schema.Job{}
|
jobs[0] = &schema.Job{}
|
||||||
jobs[0].JobID = 1403244
|
jobs[0].JobID = 1403244
|
||||||
jobs[0].Cluster = "emmy"
|
jobs[0].Cluster = "emmy"
|
||||||
jobs[0].StartTime = time.Unix(1608923076, 0)
|
jobs[0].StartTime = 1608923076
|
||||||
|
|
||||||
jobs[1] = &schema.Job{}
|
jobs[1] = &schema.Job{}
|
||||||
jobs[0].JobID = 1404397
|
jobs[0].JobID = 1404397
|
||||||
jobs[0].Cluster = "emmy"
|
jobs[0].Cluster = "emmy"
|
||||||
jobs[0].StartTime = time.Unix(1609300556, 0)
|
jobs[0].StartTime = 1609300556
|
||||||
|
|
||||||
return archive.GetHandle()
|
return archive.GetHandle()
|
||||||
}
|
}
|
||||||
|
@ -69,16 +69,18 @@ func initClusterConfig() error {
|
|||||||
|
|
||||||
for _, sc := range cluster.SubClusters {
|
for _, sc := range cluster.SubClusters {
|
||||||
newMetric := &schema.MetricConfig{
|
newMetric := &schema.MetricConfig{
|
||||||
Unit: mc.Unit,
|
Metric: schema.Metric{
|
||||||
|
Name: mc.Name,
|
||||||
|
Unit: mc.Unit,
|
||||||
|
Peak: mc.Peak,
|
||||||
|
Normal: mc.Normal,
|
||||||
|
Caution: mc.Caution,
|
||||||
|
Alert: mc.Alert,
|
||||||
|
},
|
||||||
Energy: mc.Energy,
|
Energy: mc.Energy,
|
||||||
Name: mc.Name,
|
|
||||||
Scope: mc.Scope,
|
Scope: mc.Scope,
|
||||||
Aggregation: mc.Aggregation,
|
Aggregation: mc.Aggregation,
|
||||||
Peak: mc.Peak,
|
|
||||||
Caution: mc.Caution,
|
|
||||||
Alert: mc.Alert,
|
|
||||||
Timestep: mc.Timestep,
|
Timestep: mc.Timestep,
|
||||||
Normal: mc.Normal,
|
|
||||||
LowerIsBetter: mc.LowerIsBetter,
|
LowerIsBetter: mc.LowerIsBetter,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -167,6 +169,45 @@ func GetSubCluster(cluster, subcluster string) (*schema.SubCluster, error) {
|
|||||||
return nil, fmt.Errorf("subcluster '%v' not found for cluster '%v', or cluster '%v' not configured", subcluster, cluster, cluster)
|
return nil, fmt.Errorf("subcluster '%v' not found for cluster '%v', or cluster '%v' not configured", subcluster, cluster, cluster)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetMetricConfigSubCluster(cluster, subcluster string) map[string]*schema.Metric {
|
||||||
|
metrics := make(map[string]*schema.Metric)
|
||||||
|
|
||||||
|
for _, c := range Clusters {
|
||||||
|
if c.Name == cluster {
|
||||||
|
for _, m := range c.MetricConfig {
|
||||||
|
for _, s := range m.SubClusters {
|
||||||
|
if s.Name == subcluster {
|
||||||
|
metrics[m.Name] = &schema.Metric{
|
||||||
|
Name: m.Name,
|
||||||
|
Unit: s.Unit,
|
||||||
|
Peak: s.Peak,
|
||||||
|
Normal: s.Normal,
|
||||||
|
Caution: s.Caution,
|
||||||
|
Alert: s.Alert,
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_, ok := metrics[m.Name]
|
||||||
|
if !ok {
|
||||||
|
metrics[m.Name] = &schema.Metric{
|
||||||
|
Name: m.Name,
|
||||||
|
Unit: m.Unit,
|
||||||
|
Peak: m.Peak,
|
||||||
|
Normal: m.Normal,
|
||||||
|
Caution: m.Caution,
|
||||||
|
Alert: m.Alert,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return metrics
|
||||||
|
}
|
||||||
|
|
||||||
func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
|
func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
|
||||||
for _, c := range Clusters {
|
for _, c := range Clusters {
|
||||||
if c.Name == cluster {
|
if c.Name == cluster {
|
||||||
@ -182,7 +223,7 @@ func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
|
|||||||
|
|
||||||
// AssignSubCluster sets the `job.subcluster` property of the job based
|
// AssignSubCluster sets the `job.subcluster` property of the job based
|
||||||
// on its cluster and resources.
|
// on its cluster and resources.
|
||||||
func AssignSubCluster(job *schema.BaseJob) error {
|
func AssignSubCluster(job *schema.Job) error {
|
||||||
cluster := GetCluster(job.Cluster)
|
cluster := GetCluster(job.Cluster)
|
||||||
if cluster == nil {
|
if cluster == nil {
|
||||||
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > unkown cluster: %v", job.Cluster)
|
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > unkown cluster: %v", job.Cluster)
|
||||||
|
@ -53,28 +53,27 @@ func getDirectory(
|
|||||||
rootPath,
|
rootPath,
|
||||||
job.Cluster,
|
job.Cluster,
|
||||||
lvl1, lvl2,
|
lvl1, lvl2,
|
||||||
strconv.FormatInt(job.StartTime.Unix(), 10))
|
strconv.FormatInt(job.StartTime, 10))
|
||||||
}
|
}
|
||||||
|
|
||||||
func getPath(
|
func getPath(
|
||||||
job *schema.Job,
|
job *schema.Job,
|
||||||
rootPath string,
|
rootPath string,
|
||||||
file string) string {
|
file string,
|
||||||
|
) string {
|
||||||
return filepath.Join(
|
return filepath.Join(
|
||||||
getDirectory(job, rootPath), file)
|
getDirectory(job, rootPath), file)
|
||||||
}
|
}
|
||||||
|
|
||||||
func loadJobMeta(filename string) (*schema.JobMeta, error) {
|
func loadJobMeta(filename string) (*schema.Job, error) {
|
||||||
|
|
||||||
b, err := os.ReadFile(filename)
|
b, err := os.ReadFile(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("loadJobMeta() > open file error: %v", err)
|
log.Errorf("loadJobMeta() > open file error: %v", err)
|
||||||
return &schema.JobMeta{}, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if config.Keys.Validate {
|
if config.Keys.Validate {
|
||||||
if err := schema.Validate(schema.Meta, bytes.NewReader(b)); err != nil {
|
if err := schema.Validate(schema.Meta, bytes.NewReader(b)); err != nil {
|
||||||
return &schema.JobMeta{}, fmt.Errorf("validate job meta: %v", err)
|
return nil, fmt.Errorf("validate job meta: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -83,7 +82,6 @@ func loadJobMeta(filename string) (*schema.JobMeta, error) {
|
|||||||
|
|
||||||
func loadJobData(filename string, isCompressed bool) (schema.JobData, error) {
|
func loadJobData(filename string, isCompressed bool) (schema.JobData, error) {
|
||||||
f, err := os.Open(filename)
|
f, err := os.Open(filename)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("fsBackend LoadJobData()- %v", err)
|
log.Errorf("fsBackend LoadJobData()- %v", err)
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -117,7 +115,6 @@ func loadJobData(filename string, isCompressed bool) (schema.JobData, error) {
|
|||||||
|
|
||||||
func loadJobStats(filename string, isCompressed bool) (schema.ScopedJobStats, error) {
|
func loadJobStats(filename string, isCompressed bool) (schema.ScopedJobStats, error) {
|
||||||
f, err := os.Open(filename)
|
f, err := os.Open(filename)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("fsBackend LoadJobStats()- %v", err)
|
log.Errorf("fsBackend LoadJobStats()- %v", err)
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -150,7 +147,6 @@ func loadJobStats(filename string, isCompressed bool) (schema.ScopedJobStats, er
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) Init(rawConfig json.RawMessage) (uint64, error) {
|
func (fsa *FsArchive) Init(rawConfig json.RawMessage) (uint64, error) {
|
||||||
|
|
||||||
var config FsArchiveConfig
|
var config FsArchiveConfig
|
||||||
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
||||||
log.Warnf("Init() > Unmarshal error: %#v", err)
|
log.Warnf("Init() > Unmarshal error: %#v", err)
|
||||||
@ -276,7 +272,6 @@ func (fsa *FsArchive) Exists(job *schema.Job) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) Clean(before int64, after int64) {
|
func (fsa *FsArchive) Clean(before int64, after int64) {
|
||||||
|
|
||||||
if after == 0 {
|
if after == 0 {
|
||||||
after = math.MaxInt64
|
after = math.MaxInt64
|
||||||
}
|
}
|
||||||
@ -392,7 +387,6 @@ func (fsa *FsArchive) Compress(jobs []*schema.Job) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) CompressLast(starttime int64) int64 {
|
func (fsa *FsArchive) CompressLast(starttime int64) int64 {
|
||||||
|
|
||||||
filename := filepath.Join(fsa.path, "compress.txt")
|
filename := filepath.Join(fsa.path, "compress.txt")
|
||||||
b, err := os.ReadFile(filename)
|
b, err := os.ReadFile(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -435,13 +429,12 @@ func (fsa *FsArchive) LoadJobStats(job *schema.Job) (schema.ScopedJobStats, erro
|
|||||||
return loadJobStats(filename, isCompressed)
|
return loadJobStats(filename, isCompressed)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) LoadJobMeta(job *schema.Job) (*schema.JobMeta, error) {
|
func (fsa *FsArchive) LoadJobMeta(job *schema.Job) (*schema.Job, error) {
|
||||||
filename := getPath(job, fsa.path, "meta.json")
|
filename := getPath(job, fsa.path, "meta.json")
|
||||||
return loadJobMeta(filename)
|
return loadJobMeta(filename)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) LoadClusterCfg(name string) (*schema.Cluster, error) {
|
func (fsa *FsArchive) LoadClusterCfg(name string) (*schema.Cluster, error) {
|
||||||
|
|
||||||
b, err := os.ReadFile(filepath.Join(fsa.path, name, "cluster.json"))
|
b, err := os.ReadFile(filepath.Join(fsa.path, name, "cluster.json"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("LoadClusterCfg() > open file error: %v", err)
|
log.Errorf("LoadClusterCfg() > open file error: %v", err)
|
||||||
@ -456,7 +449,6 @@ func (fsa *FsArchive) LoadClusterCfg(name string) (*schema.Cluster, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) Iter(loadMetricData bool) <-chan JobContainer {
|
func (fsa *FsArchive) Iter(loadMetricData bool) <-chan JobContainer {
|
||||||
|
|
||||||
ch := make(chan JobContainer)
|
ch := make(chan JobContainer)
|
||||||
go func() {
|
go func() {
|
||||||
clustersDir, err := os.ReadDir(fsa.path)
|
clustersDir, err := os.ReadDir(fsa.path)
|
||||||
@ -526,19 +518,13 @@ func (fsa *FsArchive) Iter(loadMetricData bool) <-chan JobContainer {
|
|||||||
return ch
|
return ch
|
||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) StoreJobMeta(jobMeta *schema.JobMeta) error {
|
func (fsa *FsArchive) StoreJobMeta(job *schema.Job) error {
|
||||||
|
f, err := os.Create(getPath(job, fsa.path, "meta.json"))
|
||||||
job := schema.Job{
|
|
||||||
BaseJob: jobMeta.BaseJob,
|
|
||||||
StartTime: time.Unix(jobMeta.StartTime, 0),
|
|
||||||
StartTimeUnix: jobMeta.StartTime,
|
|
||||||
}
|
|
||||||
f, err := os.Create(getPath(&job, fsa.path, "meta.json"))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("Error while creating filepath for meta.json")
|
log.Error("Error while creating filepath for meta.json")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := EncodeJobMeta(f, jobMeta); err != nil {
|
if err := EncodeJobMeta(f, job); err != nil {
|
||||||
log.Error("Error while encoding job metadata to meta.json file")
|
log.Error("Error while encoding job metadata to meta.json file")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -555,15 +541,10 @@ func (fsa *FsArchive) GetClusters() []string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (fsa *FsArchive) ImportJob(
|
func (fsa *FsArchive) ImportJob(
|
||||||
jobMeta *schema.JobMeta,
|
jobMeta *schema.Job,
|
||||||
jobData *schema.JobData) error {
|
jobData *schema.JobData,
|
||||||
|
) error {
|
||||||
job := schema.Job{
|
dir := getPath(jobMeta, fsa.path, "")
|
||||||
BaseJob: jobMeta.BaseJob,
|
|
||||||
StartTime: time.Unix(jobMeta.StartTime, 0),
|
|
||||||
StartTimeUnix: jobMeta.StartTime,
|
|
||||||
}
|
|
||||||
dir := getPath(&job, fsa.path, "")
|
|
||||||
if err := os.MkdirAll(dir, 0777); err != nil {
|
if err := os.MkdirAll(dir, 0777); err != nil {
|
||||||
log.Error("Error while creating job archive path")
|
log.Error("Error while creating job archive path")
|
||||||
return err
|
return err
|
||||||
@ -583,28 +564,6 @@ func (fsa *FsArchive) ImportJob(
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// var isCompressed bool = true
|
|
||||||
// // TODO Use shortJob Config for check
|
|
||||||
// if jobMeta.Duration < 300 {
|
|
||||||
// isCompressed = false
|
|
||||||
// f, err = os.Create(path.Join(dir, "data.json"))
|
|
||||||
// } else {
|
|
||||||
// f, err = os.Create(path.Join(dir, "data.json.gz"))
|
|
||||||
// }
|
|
||||||
// if err != nil {
|
|
||||||
// return err
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// if isCompressed {
|
|
||||||
// if err := EncodeJobData(gzip.NewWriter(f), jobData); err != nil {
|
|
||||||
// return err
|
|
||||||
// }
|
|
||||||
// } else {
|
|
||||||
// if err := EncodeJobData(f, jobData); err != nil {
|
|
||||||
// return err
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
f, err = os.Create(path.Join(dir, "data.json"))
|
f, err = os.Create(path.Join(dir, "data.json"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("Error while creating filepath for data.json")
|
log.Error("Error while creating filepath for data.json")
|
||||||
|
@ -9,7 +9,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
@ -86,8 +85,11 @@ func TestLoadJobMeta(t *testing.T) {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
jobIn := schema.Job{BaseJob: schema.JobDefaults}
|
jobIn := schema.Job{
|
||||||
jobIn.StartTime = time.Unix(1608923076, 0)
|
Exclusive: 1,
|
||||||
|
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||||
|
}
|
||||||
|
jobIn.StartTime = 1608923076
|
||||||
jobIn.JobID = 1403244
|
jobIn.JobID = 1403244
|
||||||
jobIn.Cluster = "emmy"
|
jobIn.Cluster = "emmy"
|
||||||
|
|
||||||
@ -114,8 +116,11 @@ func TestLoadJobData(t *testing.T) {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
jobIn := schema.Job{BaseJob: schema.JobDefaults}
|
jobIn := schema.Job{
|
||||||
jobIn.StartTime = time.Unix(1608923076, 0)
|
Exclusive: 1,
|
||||||
|
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||||
|
}
|
||||||
|
jobIn.StartTime = 1608923076
|
||||||
jobIn.JobID = 1403244
|
jobIn.JobID = 1403244
|
||||||
jobIn.Cluster = "emmy"
|
jobIn.Cluster = "emmy"
|
||||||
|
|
||||||
@ -142,8 +147,11 @@ func BenchmarkLoadJobData(b *testing.B) {
|
|||||||
var fsa FsArchive
|
var fsa FsArchive
|
||||||
fsa.Init(json.RawMessage(archiveCfg))
|
fsa.Init(json.RawMessage(archiveCfg))
|
||||||
|
|
||||||
jobIn := schema.Job{BaseJob: schema.JobDefaults}
|
jobIn := schema.Job{
|
||||||
jobIn.StartTime = time.Unix(1608923076, 0)
|
Exclusive: 1,
|
||||||
|
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||||
|
}
|
||||||
|
jobIn.StartTime = 1608923076
|
||||||
jobIn.JobID = 1403244
|
jobIn.JobID = 1403244
|
||||||
jobIn.Cluster = "emmy"
|
jobIn.Cluster = "emmy"
|
||||||
|
|
||||||
@ -165,8 +173,11 @@ func BenchmarkLoadJobDataCompressed(b *testing.B) {
|
|||||||
var fsa FsArchive
|
var fsa FsArchive
|
||||||
fsa.Init(json.RawMessage(archiveCfg))
|
fsa.Init(json.RawMessage(archiveCfg))
|
||||||
|
|
||||||
jobIn := schema.Job{BaseJob: schema.JobDefaults}
|
jobIn := schema.Job{
|
||||||
jobIn.StartTime = time.Unix(1608923076, 0)
|
Exclusive: 1,
|
||||||
|
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||||
|
}
|
||||||
|
jobIn.StartTime = 1608923076
|
||||||
jobIn.JobID = 1403244
|
jobIn.JobID = 1403244
|
||||||
jobIn.Cluster = "emmy"
|
jobIn.Cluster = "emmy"
|
||||||
|
|
||||||
|
@ -69,8 +69,8 @@ func DecodeJobStats(r io.Reader, k string) (schema.ScopedJobStats, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func DecodeJobMeta(r io.Reader) (*schema.JobMeta, error) {
|
func DecodeJobMeta(r io.Reader) (*schema.Job, error) {
|
||||||
var d schema.JobMeta
|
var d schema.Job
|
||||||
if err := json.NewDecoder(r).Decode(&d); err != nil {
|
if err := json.NewDecoder(r).Decode(&d); err != nil {
|
||||||
log.Warn("Error while decoding raw job meta json")
|
log.Warn("Error while decoding raw job meta json")
|
||||||
return &d, err
|
return &d, err
|
||||||
@ -103,7 +103,7 @@ func EncodeJobData(w io.Writer, d *schema.JobData) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func EncodeJobMeta(w io.Writer, d *schema.JobMeta) error {
|
func EncodeJobMeta(w io.Writer, d *schema.Job) error {
|
||||||
// Sanitize parameters
|
// Sanitize parameters
|
||||||
if err := json.NewEncoder(w).Encode(d); err != nil {
|
if err := json.NewEncoder(w).Encode(d); err != nil {
|
||||||
log.Warn("Error while encoding new job meta json")
|
log.Warn("Error while encoding new job meta json")
|
||||||
|
@ -61,7 +61,7 @@ func (nl *NodeList) PrintList() []string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (nl *NodeList) NodeCount() int {
|
func (nl *NodeList) NodeCount() int {
|
||||||
var out int = 0
|
out := 0
|
||||||
for _, term := range *nl {
|
for _, term := range *nl {
|
||||||
if len(term) == 1 { // If only String-Part in Term: Single Node Name -> add one
|
if len(term) == 1 { // If only String-Part in Term: Single Node Name -> add one
|
||||||
out += 1
|
out += 1
|
||||||
@ -160,7 +160,7 @@ func (nle NLExprIntRange) limits() []map[string]int {
|
|||||||
m["start"] = int(nle.start)
|
m["start"] = int(nle.start)
|
||||||
m["end"] = int(nle.end)
|
m["end"] = int(nle.end)
|
||||||
m["digits"] = int(nle.digits)
|
m["digits"] = int(nle.digits)
|
||||||
if nle.zeroPadded == true {
|
if nle.zeroPadded {
|
||||||
m["zeroPadded"] = 1
|
m["zeroPadded"] = 1
|
||||||
} else {
|
} else {
|
||||||
m["zeroPadded"] = 0
|
m["zeroPadded"] = 0
|
||||||
@ -183,14 +183,15 @@ func ParseNodeList(raw string) (NodeList, error) {
|
|||||||
rawterms := []string{}
|
rawterms := []string{}
|
||||||
prevterm := 0
|
prevterm := 0
|
||||||
for i := 0; i < len(raw); i++ {
|
for i := 0; i < len(raw); i++ {
|
||||||
if raw[i] == '[' {
|
switch raw[i] {
|
||||||
|
case '[':
|
||||||
for i < len(raw) && raw[i] != ']' {
|
for i < len(raw) && raw[i] != ']' {
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
if i == len(raw) {
|
if i == len(raw) {
|
||||||
return nil, fmt.Errorf("ARCHIVE/NODELIST > unclosed '['")
|
return nil, fmt.Errorf("ARCHIVE/NODELIST > unclosed '['")
|
||||||
}
|
}
|
||||||
} else if raw[i] == ',' {
|
case ',':
|
||||||
rawterms = append(rawterms, raw[prevterm:i])
|
rawterms = append(rawterms, raw[prevterm:i])
|
||||||
prevterm = i + 1
|
prevterm = i + 1
|
||||||
}
|
}
|
||||||
|
@ -5,85 +5,16 @@
|
|||||||
package runtimeEnv
|
package runtimeEnv
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"os/user"
|
"os/user"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
|
||||||
"syscall"
|
"syscall"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Very simple and limited .env file reader.
|
|
||||||
// All variable definitions found are directly
|
|
||||||
// added to the processes environment.
|
|
||||||
func LoadEnv(file string) error {
|
|
||||||
f, err := os.Open(file)
|
|
||||||
if err != nil {
|
|
||||||
log.Error("Error while opening .env file")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
defer f.Close()
|
|
||||||
s := bufio.NewScanner(bufio.NewReader(f))
|
|
||||||
for s.Scan() {
|
|
||||||
line := s.Text()
|
|
||||||
if strings.HasPrefix(line, "#") || len(line) == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if strings.Contains(line, "#") {
|
|
||||||
return errors.New("'#' are only supported at the start of a line")
|
|
||||||
}
|
|
||||||
|
|
||||||
line = strings.TrimPrefix(line, "export ")
|
|
||||||
parts := strings.SplitN(line, "=", 2)
|
|
||||||
if len(parts) != 2 {
|
|
||||||
return fmt.Errorf("RUNTIME/SETUP > unsupported line: %#v", line)
|
|
||||||
}
|
|
||||||
|
|
||||||
key := strings.TrimSpace(parts[0])
|
|
||||||
val := strings.TrimSpace(parts[1])
|
|
||||||
if strings.HasPrefix(val, "\"") {
|
|
||||||
if !strings.HasSuffix(val, "\"") {
|
|
||||||
return fmt.Errorf("RUNTIME/SETUP > unsupported line: %#v", line)
|
|
||||||
}
|
|
||||||
|
|
||||||
runes := []rune(val[1 : len(val)-1])
|
|
||||||
sb := strings.Builder{}
|
|
||||||
for i := 0; i < len(runes); i++ {
|
|
||||||
if runes[i] == '\\' {
|
|
||||||
i++
|
|
||||||
switch runes[i] {
|
|
||||||
case 'n':
|
|
||||||
sb.WriteRune('\n')
|
|
||||||
case 'r':
|
|
||||||
sb.WriteRune('\r')
|
|
||||||
case 't':
|
|
||||||
sb.WriteRune('\t')
|
|
||||||
case '"':
|
|
||||||
sb.WriteRune('"')
|
|
||||||
default:
|
|
||||||
return fmt.Errorf("RUNTIME/SETUP > unsupported escape sequence in quoted string: backslash %#v", runes[i])
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
sb.WriteRune(runes[i])
|
|
||||||
}
|
|
||||||
|
|
||||||
val = sb.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
os.Setenv(key, val)
|
|
||||||
}
|
|
||||||
|
|
||||||
return s.Err()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Changes the processes user and group to that
|
// Changes the processes user and group to that
|
||||||
// specified in the config.json. The go runtime
|
// specified in the config.json. The go runtime
|
||||||
// takes care of all threads (and not only the calling one)
|
// takes care of all threads (and not only the calling one)
|
||||||
|
@ -45,31 +45,31 @@ type SubCluster struct {
|
|||||||
ThreadsPerCore int `json:"threadsPerCore"`
|
ThreadsPerCore int `json:"threadsPerCore"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Metric struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Unit Unit `json:"unit"`
|
||||||
|
Peak float64 `json:"peak"`
|
||||||
|
Normal float64 `json:"normal"`
|
||||||
|
Caution float64 `json:"caution"`
|
||||||
|
Alert float64 `json:"alert"`
|
||||||
|
}
|
||||||
|
|
||||||
type SubClusterConfig struct {
|
type SubClusterConfig struct {
|
||||||
Name string `json:"name"`
|
Metric
|
||||||
Footprint string `json:"footprint,omitempty"`
|
Footprint string `json:"footprint,omitempty"`
|
||||||
Energy string `json:"energy"`
|
Energy string `json:"energy"`
|
||||||
Peak float64 `json:"peak"`
|
Remove bool `json:"remove"`
|
||||||
Normal float64 `json:"normal"`
|
LowerIsBetter bool `json:"lowerIsBetter"`
|
||||||
Caution float64 `json:"caution"`
|
|
||||||
Alert float64 `json:"alert"`
|
|
||||||
Remove bool `json:"remove"`
|
|
||||||
LowerIsBetter bool `json:"lowerIsBetter"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricConfig struct {
|
type MetricConfig struct {
|
||||||
Unit Unit `json:"unit"`
|
Metric
|
||||||
Energy string `json:"energy"`
|
Energy string `json:"energy"`
|
||||||
Name string `json:"name"`
|
|
||||||
Scope MetricScope `json:"scope"`
|
Scope MetricScope `json:"scope"`
|
||||||
Aggregation string `json:"aggregation"`
|
Aggregation string `json:"aggregation"`
|
||||||
Footprint string `json:"footprint,omitempty"`
|
Footprint string `json:"footprint,omitempty"`
|
||||||
SubClusters []*SubClusterConfig `json:"subClusters,omitempty"`
|
SubClusters []*SubClusterConfig `json:"subClusters,omitempty"`
|
||||||
Peak float64 `json:"peak"`
|
|
||||||
Caution float64 `json:"caution"`
|
|
||||||
Alert float64 `json:"alert"`
|
|
||||||
Timestep int `json:"timestep"`
|
Timestep int `json:"timestep"`
|
||||||
Normal float64 `json:"normal"`
|
|
||||||
LowerIsBetter bool `json:"lowerIsBetter"`
|
LowerIsBetter bool `json:"lowerIsBetter"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -127,7 +127,7 @@ func (topo *Topology) GetSocketsFromHWThreads(
|
|||||||
// those in the argument list are assigned to one of the sockets in the first
|
// those in the argument list are assigned to one of the sockets in the first
|
||||||
// return value, return true as the second value. TODO: Optimize this, there
|
// return value, return true as the second value. TODO: Optimize this, there
|
||||||
// must be a more efficient way/algorithm.
|
// must be a more efficient way/algorithm.
|
||||||
func (topo *Topology) GetSocketsFromCores (
|
func (topo *Topology) GetSocketsFromCores(
|
||||||
cores []int,
|
cores []int,
|
||||||
) (sockets []int, exclusive bool) {
|
) (sockets []int, exclusive bool) {
|
||||||
socketsMap := map[int]int{}
|
socketsMap := map[int]int{}
|
||||||
|
@ -89,6 +89,8 @@ type ResampleConfig struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type CronFrequency struct {
|
type CronFrequency struct {
|
||||||
|
// Duration Update Worker [Defaults to '2m']
|
||||||
|
CommitJobWorker string `json:"commit-job-worker"`
|
||||||
// Duration Update Worker [Defaults to '5m']
|
// Duration Update Worker [Defaults to '5m']
|
||||||
DurationWorker string `json:"duration-worker"`
|
DurationWorker string `json:"duration-worker"`
|
||||||
// Metric-Footprint Update Worker [Defaults to '10m']
|
// Metric-Footprint Update Worker [Defaults to '10m']
|
||||||
@ -129,6 +131,8 @@ type ProgramConfig struct {
|
|||||||
// do not write to the job-archive.
|
// do not write to the job-archive.
|
||||||
DisableArchive bool `json:"disable-archive"`
|
DisableArchive bool `json:"disable-archive"`
|
||||||
|
|
||||||
|
EnableJobTaggers bool `json:"enable-job-taggers"`
|
||||||
|
|
||||||
// Validate json input against schema
|
// Validate json input against schema
|
||||||
Validate bool `json:"validate"`
|
Validate bool `json:"validate"`
|
||||||
|
|
||||||
@ -150,7 +154,7 @@ type ProgramConfig struct {
|
|||||||
|
|
||||||
// If overwritten, at least all the options in the defaults below must
|
// If overwritten, at least all the options in the defaults below must
|
||||||
// be provided! Most options here can be overwritten by the user.
|
// be provided! Most options here can be overwritten by the user.
|
||||||
UiDefaults map[string]interface{} `json:"ui-defaults"`
|
UiDefaults map[string]any `json:"ui-defaults"`
|
||||||
|
|
||||||
// If exists, will enable dynamic zoom in frontend metric plots using the configured values
|
// If exists, will enable dynamic zoom in frontend metric plots using the configured values
|
||||||
EnableResampling *ResampleConfig `json:"enable-resampling"`
|
EnableResampling *ResampleConfig `json:"enable-resampling"`
|
||||||
|
@ -8,66 +8,48 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"time"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// BaseJob is the common part of the job metadata structs
|
|
||||||
//
|
|
||||||
// Common subset of Job and JobMeta. Use one of those, not this type directly.
|
|
||||||
|
|
||||||
type BaseJob struct {
|
|
||||||
Cluster string `json:"cluster" db:"cluster" example:"fritz"`
|
|
||||||
SubCluster string `json:"subCluster" db:"subcluster" example:"main"`
|
|
||||||
Partition string `json:"partition,omitempty" db:"cluster_partition" example:"main"`
|
|
||||||
Project string `json:"project" db:"project" example:"abcd200"`
|
|
||||||
User string `json:"user" db:"hpc_user" example:"abcd100h"`
|
|
||||||
State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"`
|
|
||||||
Tags []*Tag `json:"tags,omitempty"`
|
|
||||||
RawEnergyFootprint []byte `json:"-" db:"energy_footprint"`
|
|
||||||
RawFootprint []byte `json:"-" db:"footprint"`
|
|
||||||
RawMetaData []byte `json:"-" db:"meta_data"`
|
|
||||||
RawResources []byte `json:"-" db:"resources"`
|
|
||||||
Resources []*Resource `json:"resources"`
|
|
||||||
EnergyFootprint map[string]float64 `json:"energyFootprint"`
|
|
||||||
Footprint map[string]float64 `json:"footprint"`
|
|
||||||
MetaData map[string]string `json:"metaData"`
|
|
||||||
ConcurrentJobs JobLinkResultList `json:"concurrentJobs"`
|
|
||||||
Energy float64 `json:"energy" db:"energy"`
|
|
||||||
ArrayJobId int64 `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"`
|
|
||||||
Walltime int64 `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"`
|
|
||||||
JobID int64 `json:"jobId" db:"job_id" example:"123000"`
|
|
||||||
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"`
|
|
||||||
SMT int32 `json:"smt,omitempty" db:"smt" example:"4"`
|
|
||||||
MonitoringStatus int32 `json:"monitoringStatus,omitempty" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"`
|
|
||||||
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"`
|
|
||||||
NumAcc int32 `json:"numAcc,omitempty" db:"num_acc" example:"2" minimum:"1"`
|
|
||||||
NumHWThreads int32 `json:"numHwthreads,omitempty" db:"num_hwthreads" example:"20" minimum:"1"`
|
|
||||||
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Job struct type
|
// Job struct type
|
||||||
//
|
//
|
||||||
// This type is used as the GraphQL interface and using sqlx as a table row.
|
// This type contains all metadata of a HPC job.
|
||||||
//
|
//
|
||||||
// Job model
|
// Job model
|
||||||
// @Description Information of a HPC job.
|
// @Description Information of a HPC job.
|
||||||
type Job struct {
|
|
||||||
StartTime time.Time `json:"startTime"`
|
|
||||||
BaseJob
|
|
||||||
ID int64 `json:"id" db:"id"`
|
|
||||||
StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// JobMeta struct type
|
type Job struct {
|
||||||
//
|
Cluster string `json:"cluster" db:"cluster" example:"fritz"`
|
||||||
// When reading from the database or sending data via GraphQL, the start time
|
SubCluster string `json:"subCluster" db:"subcluster" example:"main"`
|
||||||
// can be in the much more convenient time.Time type. In the `meta.json`
|
Partition string `json:"partition,omitempty" db:"cluster_partition" example:"main"`
|
||||||
// files, the start time is encoded as a unix epoch timestamp. This is why
|
Project string `json:"project" db:"project" example:"abcd200"`
|
||||||
// there is this struct, which contains all fields from the regular job
|
User string `json:"user" db:"hpc_user" example:"abcd100h"`
|
||||||
// struct, but "overwrites" the StartTime field with one of type int64. ID
|
State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"`
|
||||||
// *int64 `json:"id,omitempty"` >> never used in the job-archive, only
|
Tags []*Tag `json:"tags,omitempty"`
|
||||||
// available via REST-API
|
RawEnergyFootprint []byte `json:"-" db:"energy_footprint"`
|
||||||
//
|
RawFootprint []byte `json:"-" db:"footprint"`
|
||||||
|
RawMetaData []byte `json:"-" db:"meta_data"`
|
||||||
|
RawResources []byte `json:"-" db:"resources"`
|
||||||
|
Resources []*Resource `json:"resources"`
|
||||||
|
EnergyFootprint map[string]float64 `json:"energyFootprint"`
|
||||||
|
Footprint map[string]float64 `json:"footprint"`
|
||||||
|
MetaData map[string]string `json:"metaData"`
|
||||||
|
ConcurrentJobs JobLinkResultList `json:"concurrentJobs"`
|
||||||
|
Energy float64 `json:"energy" db:"energy"`
|
||||||
|
ArrayJobId int64 `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"`
|
||||||
|
Walltime int64 `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"`
|
||||||
|
RequestedMemory int64 `json:"requestedMemory,omitempty" db:"requested_memory" example:"128000" minimum:"1"` // in MB
|
||||||
|
JobID int64 `json:"jobId" db:"job_id" example:"123000"`
|
||||||
|
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"`
|
||||||
|
SMT int32 `json:"smt,omitempty" db:"smt" example:"4"`
|
||||||
|
MonitoringStatus int32 `json:"monitoringStatus,omitempty" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"`
|
||||||
|
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"`
|
||||||
|
NumAcc int32 `json:"numAcc,omitempty" db:"num_acc" example:"2" minimum:"1"`
|
||||||
|
NumHWThreads int32 `json:"numHwthreads,omitempty" db:"num_hwthreads" example:"20" minimum:"1"`
|
||||||
|
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"`
|
||||||
|
Statistics map[string]JobStatistics `json:"statistics"`
|
||||||
|
ID *int64 `json:"id,omitempty" db:"id"`
|
||||||
|
StartTime int64 `json:"startTime" db:"start_time" example:"1649723812"`
|
||||||
|
}
|
||||||
|
|
||||||
type JobLink struct {
|
type JobLink struct {
|
||||||
ID int64 `json:"id"`
|
ID int64 `json:"id"`
|
||||||
@ -79,15 +61,6 @@ type JobLinkResultList struct {
|
|||||||
Count int `json:"count"`
|
Count int `json:"count"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// JobMeta model
|
|
||||||
// @Description Meta data information of a HPC job.
|
|
||||||
type JobMeta struct {
|
|
||||||
ID *int64 `json:"id,omitempty"`
|
|
||||||
Statistics map[string]JobStatistics `json:"statistics"`
|
|
||||||
BaseJob
|
|
||||||
StartTime int64 `json:"startTime" db:"start_time" example:"1649723812" minimum:"1"`
|
|
||||||
}
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
MonitoringStatusDisabled int32 = 0
|
MonitoringStatusDisabled int32 = 0
|
||||||
MonitoringStatusRunningOrArchiving int32 = 1
|
MonitoringStatusRunningOrArchiving int32 = 1
|
||||||
@ -95,10 +68,10 @@ const (
|
|||||||
MonitoringStatusArchivingSuccessful int32 = 3
|
MonitoringStatusArchivingSuccessful int32 = 3
|
||||||
)
|
)
|
||||||
|
|
||||||
var JobDefaults BaseJob = BaseJob{
|
// var JobDefaults Job = Job{
|
||||||
Exclusive: 1,
|
// Exclusive: 1,
|
||||||
MonitoringStatus: MonitoringStatusRunningOrArchiving,
|
// MonitoringStatus: MonitoringStatusRunningOrArchiving,
|
||||||
}
|
// }
|
||||||
|
|
||||||
type Unit struct {
|
type Unit struct {
|
||||||
Base string `json:"base"`
|
Base string `json:"base"`
|
||||||
@ -145,7 +118,12 @@ const (
|
|||||||
JobStateOutOfMemory JobState = "out_of_memory"
|
JobStateOutOfMemory JobState = "out_of_memory"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (e *JobState) UnmarshalGQL(v interface{}) error {
|
func (j Job) GoString() string {
|
||||||
|
return fmt.Sprintf("Job{ID:%d, StartTime:%d, JobID:%v, BaseJob:%v}",
|
||||||
|
j.ID, j.StartTime, j.JobID, j)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *JobState) UnmarshalGQL(v any) error {
|
||||||
str, ok := v.(string)
|
str, ok := v.(string)
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("SCHEMA/JOB > enums must be strings")
|
return fmt.Errorf("SCHEMA/JOB > enums must be strings")
|
||||||
|
41
pkg/schema/node.go
Normal file
41
pkg/schema/node.go
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package schema
|
||||||
|
|
||||||
|
type NodeState string
|
||||||
|
|
||||||
|
const (
|
||||||
|
NodeStateAllocated NodeState = "allocated"
|
||||||
|
NodeStateReserved NodeState = "reserved"
|
||||||
|
NodeStateIdle NodeState = "idle"
|
||||||
|
NodeStateMixed NodeState = "mixed"
|
||||||
|
NodeStateDown NodeState = "down"
|
||||||
|
NodeStateUnknown NodeState = "unknown"
|
||||||
|
)
|
||||||
|
|
||||||
|
type MonitoringState string
|
||||||
|
|
||||||
|
const (
|
||||||
|
MonitoringStateFull MonitoringState = "full"
|
||||||
|
MonitoringStatePartial MonitoringState = "partial"
|
||||||
|
MonitoringStateFailed MonitoringState = "failed"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Node struct {
|
||||||
|
ID int64 `json:"id" db:"id"`
|
||||||
|
Hostname string `json:"hostname" db:"hostname" example:"fritz"`
|
||||||
|
Cluster string `json:"cluster" db:"cluster" example:"fritz"`
|
||||||
|
SubCluster string `json:"subCluster" db:"subcluster" example:"main"`
|
||||||
|
NodeState NodeState `json:"nodeState" db:"node_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"`
|
||||||
|
HealthState MonitoringState `json:"healthState" db:"health_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"`
|
||||||
|
CpusAllocated int `json:"cpusAllocated" db:"cpus_allocated"`
|
||||||
|
CpusTotal int `json:"cpusTotal" db:"cpus_total"`
|
||||||
|
MemoryAllocated int `json:"memoryAllocated" db:"memory_allocated"`
|
||||||
|
MemoryTotal int `json:"memoryTotal" db:"memory_total"`
|
||||||
|
GpusAllocated int `json:"gpusAllocated" db:"gpus_allocated"`
|
||||||
|
GpusTotal int `json:"gpusTotal" db:"gpus_total"`
|
||||||
|
RawMetaData []byte `json:"-" db:"meta_data"`
|
||||||
|
MetaData map[string]string `json:"metaData"`
|
||||||
|
}
|
@ -38,10 +38,7 @@
|
|||||||
"db-driver": {
|
"db-driver": {
|
||||||
"description": "sqlite3 or mysql (mysql will work for mariadb as well).",
|
"description": "sqlite3 or mysql (mysql will work for mariadb as well).",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": [
|
"enum": ["sqlite3", "mysql"]
|
||||||
"sqlite3",
|
|
||||||
"mysql"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"db": {
|
"db": {
|
||||||
"description": "For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!).",
|
"description": "For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!).",
|
||||||
@ -54,10 +51,7 @@
|
|||||||
"kind": {
|
"kind": {
|
||||||
"description": "Backend type for job-archive",
|
"description": "Backend type for job-archive",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": [
|
"enum": ["file", "s3"]
|
||||||
"file",
|
|
||||||
"s3"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"path": {
|
"path": {
|
||||||
"description": "Path to job archive for file backend",
|
"description": "Path to job archive for file backend",
|
||||||
@ -74,11 +68,7 @@
|
|||||||
"policy": {
|
"policy": {
|
||||||
"description": "Retention policy",
|
"description": "Retention policy",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": [
|
"enum": ["none", "delete", "move"]
|
||||||
"none",
|
|
||||||
"delete",
|
|
||||||
"move"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"includeDB": {
|
"includeDB": {
|
||||||
"description": "Also remove jobs from database",
|
"description": "Also remove jobs from database",
|
||||||
@ -93,19 +83,19 @@
|
|||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": ["policy"]
|
||||||
"policy"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": ["kind"]
|
||||||
"kind"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"disable-archive": {
|
"disable-archive": {
|
||||||
"description": "Keep all metric data in the metric data repositories, do not write to the job-archive.",
|
"description": "Keep all metric data in the metric data repositories, do not write to the job-archive.",
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
|
"enable-job-taggers": {
|
||||||
|
"description": "Turn on automatic application and jobclass taggers",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
"validate": {
|
"validate": {
|
||||||
"description": "Validate all input json documents against json schema.",
|
"description": "Validate all input json documents against json schema.",
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
@ -168,10 +158,7 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": ["trigger", "resolutions"]
|
||||||
"trigger",
|
|
||||||
"resolutions"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"jwts": {
|
"jwts": {
|
||||||
"description": "For JWT token authentication.",
|
"description": "For JWT token authentication.",
|
||||||
@ -198,9 +185,7 @@
|
|||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": ["max-age"]
|
||||||
"max-age"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"oidc": {
|
"oidc": {
|
||||||
"provider": {
|
"provider": {
|
||||||
@ -215,9 +200,7 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"required": [
|
"required": ["provider"]
|
||||||
"provider"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"ldap": {
|
"ldap": {
|
||||||
"description": "For LDAP Authentication and user synchronisation.",
|
"description": "For LDAP Authentication and user synchronisation.",
|
||||||
@ -260,13 +243,7 @@
|
|||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": ["url", "user_base", "search_dn", "user_bind", "user_filter"]
|
||||||
"url",
|
|
||||||
"user_base",
|
|
||||||
"search_dn",
|
|
||||||
"user_bind",
|
|
||||||
"user_filter"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"clusters": {
|
"clusters": {
|
||||||
"description": "Configuration for the clusters to be displayed.",
|
"description": "Configuration for the clusters to be displayed.",
|
||||||
@ -284,12 +261,7 @@
|
|||||||
"properties": {
|
"properties": {
|
||||||
"kind": {
|
"kind": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": [
|
"enum": ["influxdb", "prometheus", "cc-metric-store", "test"]
|
||||||
"influxdb",
|
|
||||||
"prometheus",
|
|
||||||
"cc-metric-store",
|
|
||||||
"test"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"url": {
|
"url": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
@ -298,10 +270,7 @@
|
|||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": ["kind", "url"]
|
||||||
"kind",
|
|
||||||
"url"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"filterRanges": {
|
"filterRanges": {
|
||||||
"description": "This option controls the slider ranges for the UI controls of numNodes, duration, and startTime.",
|
"description": "This option controls the slider ranges for the UI controls of numNodes, duration, and startTime.",
|
||||||
@ -318,10 +287,7 @@
|
|||||||
"type": "integer"
|
"type": "integer"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": ["from", "to"]
|
||||||
"from",
|
|
||||||
"to"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"duration": {
|
"duration": {
|
||||||
"description": "UI slider range for duration",
|
"description": "UI slider range for duration",
|
||||||
@ -334,10 +300,7 @@
|
|||||||
"type": "integer"
|
"type": "integer"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": ["from", "to"]
|
||||||
"from",
|
|
||||||
"to"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"startTime": {
|
"startTime": {
|
||||||
"description": "UI slider range for start time",
|
"description": "UI slider range for start time",
|
||||||
@ -351,24 +314,13 @@
|
|||||||
"type": "null"
|
"type": "null"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": ["from", "to"]
|
||||||
"from",
|
|
||||||
"to"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": ["numNodes", "duration", "startTime"]
|
||||||
"numNodes",
|
|
||||||
"duration",
|
|
||||||
"startTime"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": ["name", "metricDataRepository", "filterRanges"],
|
||||||
"name",
|
|
||||||
"metricDataRepository",
|
|
||||||
"filterRanges"
|
|
||||||
],
|
|
||||||
"minItems": 1
|
"minItems": 1
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -490,9 +442,5 @@
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": ["jwts", "clusters", "apiAllowedIPs"]
|
||||||
"jwts",
|
|
||||||
"clusters",
|
|
||||||
"apiAllowedIPs"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
|
@ -6,6 +6,7 @@ package schema
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -50,12 +51,7 @@ type User struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (u *User) HasProject(project string) bool {
|
func (u *User) HasProject(project string) bool {
|
||||||
for _, p := range u.Projects {
|
return slices.Contains(u.Projects, project)
|
||||||
if p == project {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetRoleString(roleInt Role) string {
|
func GetRoleString(roleInt Role) string {
|
||||||
|
@ -28,12 +28,13 @@ const (
|
|||||||
//go:embed schemas/*
|
//go:embed schemas/*
|
||||||
var schemaFiles embed.FS
|
var schemaFiles embed.FS
|
||||||
|
|
||||||
func Validate(k Kind, r io.Reader) (err error) {
|
func Validate(k Kind, r io.Reader) error {
|
||||||
jsonschema.Loaders["embedfs"] = func(s string) (io.ReadCloser, error) {
|
jsonschema.Loaders["embedfs"] = func(s string) (io.ReadCloser, error) {
|
||||||
f := filepath.Join("schemas", strings.Split(s, "//")[1])
|
f := filepath.Join("schemas", strings.Split(s, "//")[1])
|
||||||
return schemaFiles.Open(f)
|
return schemaFiles.Open(f)
|
||||||
}
|
}
|
||||||
var s *jsonschema.Schema
|
var s *jsonschema.Schema
|
||||||
|
var err error
|
||||||
|
|
||||||
switch k {
|
switch k {
|
||||||
case Meta:
|
case Meta:
|
||||||
@ -54,7 +55,7 @@ func Validate(k Kind, r io.Reader) (err error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var v interface{}
|
var v interface{}
|
||||||
if err := json.NewDecoder(r).Decode(&v); err != nil {
|
if err = json.NewDecoder(r).Decode(&v); err != nil {
|
||||||
log.Warnf("Error while decoding raw json schema: %#v", err)
|
log.Warnf("Error while decoding raw json schema: %#v", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -35,9 +35,6 @@ func TestValidateConfig(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestValidateJobMeta(t *testing.T) {
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestValidateCluster(t *testing.T) {
|
func TestValidateCluster(t *testing.T) {
|
||||||
json := []byte(`{
|
json := []byte(`{
|
||||||
"name": "emmy",
|
"name": "emmy",
|
||||||
|
691
web/frontend/package-lock.json
generated
691
web/frontend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -7,25 +7,24 @@
|
|||||||
"dev": "rollup -c -w"
|
"dev": "rollup -c -w"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@rollup/plugin-commonjs": "^25.0.8",
|
"@rollup/plugin-commonjs": "^28.0.3",
|
||||||
"@rollup/plugin-node-resolve": "^15.3.0",
|
"@rollup/plugin-node-resolve": "^16.0.1",
|
||||||
"@rollup/plugin-terser": "^0.4.4",
|
"@rollup/plugin-terser": "^0.4.4",
|
||||||
"@timohausmann/quadtree-js": "^1.2.6",
|
"@timohausmann/quadtree-js": "^1.2.6",
|
||||||
"rollup": "^4.27.4",
|
"rollup": "^4.41.1",
|
||||||
"rollup-plugin-css-only": "^4.5.2",
|
"rollup-plugin-css-only": "^4.5.2",
|
||||||
"rollup-plugin-svelte": "^7.2.2",
|
"rollup-plugin-svelte": "^7.2.2",
|
||||||
"svelte": "^4.2.19"
|
"svelte": "^5.33.14"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@rollup/plugin-replace": "^5.0.7",
|
"@rollup/plugin-replace": "^6.0.2",
|
||||||
"@sveltestrap/sveltestrap": "^6.2.7",
|
"@sveltestrap/sveltestrap": "^7.1.0",
|
||||||
"@urql/svelte": "^4.2.2",
|
"@urql/svelte": "^4.2.3",
|
||||||
"chart.js": "^4.4.6",
|
"chart.js": "^4.4.9",
|
||||||
"date-fns": "^2.30.0",
|
"date-fns": "^4.1.0",
|
||||||
"graphql": "^16.9.0",
|
"graphql": "^16.11.0",
|
||||||
"mathjs": "^12.4.3",
|
"mathjs": "^14.5.2",
|
||||||
"svelte-chartjs": "^3.1.5",
|
"uplot": "^1.6.32",
|
||||||
"uplot": "^1.6.31",
|
"wonka": "^6.3.5"
|
||||||
"wonka": "^6.3.4"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,13 +6,20 @@ import terser from '@rollup/plugin-terser';
|
|||||||
import css from 'rollup-plugin-css-only';
|
import css from 'rollup-plugin-css-only';
|
||||||
|
|
||||||
const production = !process.env.ROLLUP_WATCH;
|
const production = !process.env.ROLLUP_WATCH;
|
||||||
// const production = false
|
|
||||||
|
|
||||||
const plugins = [
|
const plugins = [
|
||||||
svelte({
|
svelte({
|
||||||
compilerOptions: {
|
compilerOptions: {
|
||||||
// enable run-time checks when not in production
|
// Enable run-time checks when not in production
|
||||||
dev: !production
|
dev: !production,
|
||||||
|
// Enable Svelte 5-specific features
|
||||||
|
hydratable: true, // If using server-side rendering
|
||||||
|
immutable: true, // Optimize updates for immutable data
|
||||||
|
// As of sveltestrap 7.1.0, filtered warnings would appear for imported sveltestrap components
|
||||||
|
warningFilter: (warning) => (
|
||||||
|
warning.code !== 'element_invalid_self_closing_tag' &&
|
||||||
|
warning.code !== 'a11y_interactive_supports_focus'
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
|
|
||||||
@ -23,7 +30,7 @@ const plugins = [
|
|||||||
// https://github.com/rollup/plugins/tree/master/packages/commonjs
|
// https://github.com/rollup/plugins/tree/master/packages/commonjs
|
||||||
resolve({
|
resolve({
|
||||||
browser: true,
|
browser: true,
|
||||||
dedupe: ['svelte']
|
dedupe: ['svelte', '@sveltejs/kit'] // Ensure deduplication for Svelte 5
|
||||||
}),
|
}),
|
||||||
commonjs(),
|
commonjs(),
|
||||||
|
|
||||||
@ -32,8 +39,10 @@ const plugins = [
|
|||||||
production && terser(),
|
production && terser(),
|
||||||
|
|
||||||
replace({
|
replace({
|
||||||
"process.env.NODE_ENV": JSON.stringify("development"),
|
preventAssignment: true,
|
||||||
preventAssignment: true
|
values: {
|
||||||
|
"process.env.NODE_ENV": JSON.stringify(production ? "production" : "development"),
|
||||||
|
}
|
||||||
})
|
})
|
||||||
];
|
];
|
||||||
|
|
||||||
|
@ -37,14 +37,12 @@
|
|||||||
import ScatterPlot from "./generic/plots/Scatter.svelte";
|
import ScatterPlot from "./generic/plots/Scatter.svelte";
|
||||||
import RooflineHeatmap from "./generic/plots/RooflineHeatmap.svelte";
|
import RooflineHeatmap from "./generic/plots/RooflineHeatmap.svelte";
|
||||||
|
|
||||||
const { query: initq } = init();
|
/* Svelte 5 Props */
|
||||||
|
let { filterPresets } = $props();
|
||||||
export let filterPresets;
|
|
||||||
|
|
||||||
// By default, look at the jobs of the last 6 hours:
|
// By default, look at the jobs of the last 6 hours:
|
||||||
if (filterPresets?.startTime == null) {
|
if (filterPresets?.startTime == null) {
|
||||||
if (filterPresets == null) filterPresets = {};
|
if (filterPresets == null) filterPresets = {};
|
||||||
|
|
||||||
let now = new Date(Date.now());
|
let now = new Date(Date.now());
|
||||||
let hourAgo = new Date(now);
|
let hourAgo = new Date(now);
|
||||||
hourAgo.setHours(hourAgo.getHours() - 6);
|
hourAgo.setHours(hourAgo.getHours() - 6);
|
||||||
@ -54,27 +52,10 @@
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
let cluster;
|
/* Const Init */
|
||||||
let filterComponent; // see why here: https://stackoverflow.com/questions/58287729/how-can-i-export-a-function-from-a-svelte-component-that-changes-a-value-in-the
|
const { query: initq } = init();
|
||||||
let jobFilters = [];
|
const client = getContextClient();
|
||||||
let rooflineMaxY;
|
|
||||||
let colWidth1, colWidth2;
|
|
||||||
let numBins = 50;
|
|
||||||
let maxY = -1;
|
|
||||||
|
|
||||||
const initialized = getContext("initialized");
|
|
||||||
const globalMetrics = getContext("globalMetrics");
|
|
||||||
const ccconfig = getContext("cc-config");
|
const ccconfig = getContext("cc-config");
|
||||||
|
|
||||||
let metricsInHistograms = ccconfig.analysis_view_histogramMetrics,
|
|
||||||
metricsInScatterplots = ccconfig.analysis_view_scatterPlotMetrics;
|
|
||||||
|
|
||||||
$: metrics = [
|
|
||||||
...new Set([...metricsInHistograms, ...metricsInScatterplots.flat()]),
|
|
||||||
];
|
|
||||||
|
|
||||||
$: clusterName = cluster?.name ? cluster.name : cluster;
|
|
||||||
|
|
||||||
const sortOptions = [
|
const sortOptions = [
|
||||||
{ key: "totalWalltime", label: "Walltime" },
|
{ key: "totalWalltime", label: "Walltime" },
|
||||||
{ key: "totalNodeHours", label: "Node Hours" },
|
{ key: "totalNodeHours", label: "Node Hours" },
|
||||||
@ -86,7 +67,22 @@
|
|||||||
{ key: "project", label: "Project ID" },
|
{ key: "project", label: "Project ID" },
|
||||||
];
|
];
|
||||||
|
|
||||||
let sortSelection =
|
/* Var Init */
|
||||||
|
let metricUnits = {};
|
||||||
|
let metricScopes = {};
|
||||||
|
let numBins = 50;
|
||||||
|
|
||||||
|
/* State Init */
|
||||||
|
let filterComponent = $state(); // see why here: https://stackoverflow.com/questions/58287729/how-can-i-export-a-function-from-a-svelte-component-that-changes-a-value-in-the
|
||||||
|
let cluster = $state(filterPresets?.cluster);
|
||||||
|
let rooflineMaxY = $state(0);
|
||||||
|
let maxY = $state(-1);
|
||||||
|
let colWidth1 = $state(0);
|
||||||
|
let colWidth2 = $state(0);
|
||||||
|
let jobFilters = $state([]);
|
||||||
|
let metricsInHistograms = $state(ccconfig.analysis_view_histogramMetrics)
|
||||||
|
let metricsInScatterplots = $state(ccconfig.analysis_view_scatterPlotMetrics)
|
||||||
|
let sortSelection = $state(
|
||||||
sortOptions.find(
|
sortOptions.find(
|
||||||
(option) =>
|
(option) =>
|
||||||
option.key ==
|
option.key ==
|
||||||
@ -94,17 +90,20 @@
|
|||||||
) ||
|
) ||
|
||||||
sortOptions.find(
|
sortOptions.find(
|
||||||
(option) => option.key == ccconfig.analysis_view_selectedTopCategory,
|
(option) => option.key == ccconfig.analysis_view_selectedTopCategory,
|
||||||
);
|
)
|
||||||
let groupSelection =
|
);
|
||||||
groupOptions.find(
|
let groupSelection = $state(
|
||||||
(option) =>
|
groupOptions.find(
|
||||||
option.key ==
|
(option) =>
|
||||||
ccconfig[`analysis_view_selectedTopEntity:${filterPresets.cluster}`],
|
option.key ==
|
||||||
) ||
|
ccconfig[`analysis_view_selectedTopEntity:${filterPresets.cluster}`],
|
||||||
groupOptions.find(
|
) ||
|
||||||
(option) => option.key == ccconfig.analysis_view_selectedTopEntity,
|
groupOptions.find(
|
||||||
);
|
(option) => option.key == ccconfig.analysis_view_selectedTopEntity,
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
/* Init Function */
|
||||||
getContext("on-init")(({ data }) => {
|
getContext("on-init")(({ data }) => {
|
||||||
if (data != null) {
|
if (data != null) {
|
||||||
cluster = data.clusters.find((c) => c.name == filterPresets.cluster);
|
cluster = data.clusters.find((c) => c.name == filterPresets.cluster);
|
||||||
@ -121,120 +120,145 @@
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
const client = getContextClient();
|
/* Derived Vars */
|
||||||
|
const clusterName = $derived(cluster?.name ? cluster.name : cluster);
|
||||||
|
const availableMetrics = $derived(loadAvailable($initq?.data?.globalMetrics, clusterName));
|
||||||
|
const metrics = $derived(
|
||||||
|
[...new Set([...metricsInHistograms, ...metricsInScatterplots.flat()])]
|
||||||
|
);
|
||||||
|
|
||||||
$: statsQuery = queryStore({
|
let statsQuery = $derived(
|
||||||
client: client,
|
queryStore({
|
||||||
query: gql`
|
client: client,
|
||||||
query ($jobFilters: [JobFilter!]!) {
|
query: gql`
|
||||||
stats: jobsStatistics(filter: $jobFilters) {
|
query ($jobFilters: [JobFilter!]!) {
|
||||||
totalJobs
|
stats: jobsStatistics(filter: $jobFilters) {
|
||||||
shortJobs
|
totalJobs
|
||||||
totalWalltime
|
shortJobs
|
||||||
totalNodeHours
|
totalWalltime
|
||||||
totalCoreHours
|
totalNodeHours
|
||||||
totalAccHours
|
totalCoreHours
|
||||||
histDuration {
|
totalAccHours
|
||||||
count
|
histDuration {
|
||||||
value
|
count
|
||||||
}
|
value
|
||||||
histNumCores {
|
}
|
||||||
count
|
histNumCores {
|
||||||
value
|
count
|
||||||
|
value
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
`,
|
||||||
`,
|
variables: { jobFilters },
|
||||||
variables: { jobFilters },
|
})
|
||||||
});
|
);
|
||||||
|
|
||||||
$: topQuery = queryStore({
|
let topQuery = $derived(
|
||||||
client: client,
|
queryStore({
|
||||||
query: gql`
|
client: client,
|
||||||
query (
|
query: gql`
|
||||||
$jobFilters: [JobFilter!]!
|
query (
|
||||||
$paging: PageRequest!
|
$jobFilters: [JobFilter!]!
|
||||||
$sortBy: SortByAggregate!
|
$paging: PageRequest!
|
||||||
$groupBy: Aggregate!
|
$sortBy: SortByAggregate!
|
||||||
) {
|
$groupBy: Aggregate!
|
||||||
topList: jobsStatistics(
|
|
||||||
filter: $jobFilters
|
|
||||||
page: $paging
|
|
||||||
sortBy: $sortBy
|
|
||||||
groupBy: $groupBy
|
|
||||||
) {
|
) {
|
||||||
id
|
topList: jobsStatistics(
|
||||||
name
|
filter: $jobFilters
|
||||||
totalWalltime
|
page: $paging
|
||||||
totalNodeHours
|
sortBy: $sortBy
|
||||||
totalCoreHours
|
groupBy: $groupBy
|
||||||
totalAccHours
|
) {
|
||||||
|
id
|
||||||
|
name
|
||||||
|
totalWalltime
|
||||||
|
totalNodeHours
|
||||||
|
totalCoreHours
|
||||||
|
totalAccHours
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
`,
|
||||||
`,
|
variables: {
|
||||||
variables: {
|
jobFilters,
|
||||||
jobFilters,
|
paging: { itemsPerPage: 10, page: 1 },
|
||||||
paging: { itemsPerPage: 10, page: 1 },
|
sortBy: sortSelection.key.toUpperCase(),
|
||||||
sortBy: sortSelection.key.toUpperCase(),
|
groupBy: groupSelection.key.toUpperCase(),
|
||||||
groupBy: groupSelection.key.toUpperCase(),
|
},
|
||||||
},
|
})
|
||||||
});
|
);
|
||||||
|
|
||||||
// Note: Different footprints than those saved in DB per Job -> Caused by Legacy Naming
|
// Note: Different footprints than those saved in DB per Job -> Caused by Legacy Naming
|
||||||
$: footprintsQuery = queryStore({
|
let footprintsQuery = $derived(
|
||||||
client: client,
|
queryStore({
|
||||||
query: gql`
|
client: client,
|
||||||
query ($jobFilters: [JobFilter!]!, $metrics: [String!]!) {
|
query: gql`
|
||||||
footprints: jobsFootprints(filter: $jobFilters, metrics: $metrics) {
|
query ($jobFilters: [JobFilter!]!, $metrics: [String!]!) {
|
||||||
timeWeights {
|
footprints: jobsFootprints(filter: $jobFilters, metrics: $metrics) {
|
||||||
nodeHours
|
timeWeights {
|
||||||
accHours
|
nodeHours
|
||||||
coreHours
|
accHours
|
||||||
}
|
coreHours
|
||||||
metrics {
|
}
|
||||||
metric
|
metrics {
|
||||||
data
|
metric
|
||||||
|
data
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
`,
|
||||||
`,
|
variables: { jobFilters, metrics },
|
||||||
variables: { jobFilters, metrics },
|
})
|
||||||
});
|
);
|
||||||
|
|
||||||
$: rooflineQuery = queryStore({
|
let rooflineQuery = $derived(
|
||||||
client: client,
|
queryStore({
|
||||||
query: gql`
|
client: client,
|
||||||
query (
|
query: gql`
|
||||||
$jobFilters: [JobFilter!]!
|
query (
|
||||||
$rows: Int!
|
$jobFilters: [JobFilter!]!
|
||||||
$cols: Int!
|
$rows: Int!
|
||||||
$minX: Float!
|
$cols: Int!
|
||||||
$minY: Float!
|
$minX: Float!
|
||||||
$maxX: Float!
|
$minY: Float!
|
||||||
$maxY: Float!
|
$maxX: Float!
|
||||||
) {
|
$maxY: Float!
|
||||||
rooflineHeatmap(
|
) {
|
||||||
filter: $jobFilters
|
rooflineHeatmap(
|
||||||
rows: $rows
|
filter: $jobFilters
|
||||||
cols: $cols
|
rows: $rows
|
||||||
minX: $minX
|
cols: $cols
|
||||||
minY: $minY
|
minX: $minX
|
||||||
maxX: $maxX
|
minY: $minY
|
||||||
maxY: $maxY
|
maxX: $maxX
|
||||||
)
|
maxY: $maxY
|
||||||
}
|
)
|
||||||
`,
|
}
|
||||||
variables: {
|
`,
|
||||||
jobFilters,
|
variables: {
|
||||||
rows: 50,
|
jobFilters,
|
||||||
cols: 50,
|
rows: 50,
|
||||||
minX: 0.01,
|
cols: 50,
|
||||||
minY: 1,
|
minX: 0.01,
|
||||||
maxX: 1000,
|
minY: 1,
|
||||||
maxY,
|
maxX: 1000,
|
||||||
},
|
maxY,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
/* Reactive Effects */
|
||||||
|
$effect(() => {
|
||||||
|
loadUnitsAndScopes(availableMetrics.length, availableMetrics);
|
||||||
|
});
|
||||||
|
$effect(() => {
|
||||||
|
updateEntityConfiguration(groupSelection.key);
|
||||||
|
});
|
||||||
|
$effect(() => {
|
||||||
|
updateCategoryConfiguration(sortSelection.key);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/* Functions */
|
||||||
const updateConfigurationMutation = ({ name, value }) => {
|
const updateConfigurationMutation = ({ name, value }) => {
|
||||||
return mutationStore({
|
return mutationStore({
|
||||||
client: client,
|
client: client,
|
||||||
@ -287,22 +311,26 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let availableMetrics = [];
|
function loadAvailable(globals, name) {
|
||||||
let metricUnits = {};
|
const availableMetrics = new Set();
|
||||||
let metricScopes = {};
|
if (globals && globals.length > 0) {
|
||||||
function loadMetrics(isInitialized) {
|
for (let gm of globals) {
|
||||||
if (!isInitialized) return
|
if (gm.availability.find((av) => av.cluster == name)) {
|
||||||
availableMetrics = [...globalMetrics.filter((gm) => gm?.availability.find((av) => av.cluster == cluster.name))]
|
availableMetrics.add({name: gm.name, scope: gm.scope, unit: gm.unit});
|
||||||
for (let sm of availableMetrics) {
|
};
|
||||||
metricUnits[sm.name] = (sm?.unit?.prefix ? sm.unit.prefix : "") + (sm?.unit?.base ? sm.unit.base : "")
|
}
|
||||||
metricScopes[sm.name] = sm?.scope
|
}
|
||||||
|
return [...availableMetrics]
|
||||||
|
};
|
||||||
|
|
||||||
|
function loadUnitsAndScopes(length, available) {
|
||||||
|
for (let am of available) {
|
||||||
|
metricUnits[am.name] = (am?.unit?.prefix ? am.unit.prefix : "") + (am?.unit?.base ? am.unit.base : "")
|
||||||
|
metricScopes[am.name] = am?.scope
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$: loadMetrics($initialized)
|
/* On Mount */
|
||||||
$: updateEntityConfiguration(groupSelection.key);
|
|
||||||
$: updateCategoryConfiguration(sortSelection.key);
|
|
||||||
|
|
||||||
onMount(() => filterComponent.updateFilters());
|
onMount(() => filterComponent.updateFilters());
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
@ -329,7 +357,7 @@
|
|||||||
{filterPresets}
|
{filterPresets}
|
||||||
disableClusterSelection={true}
|
disableClusterSelection={true}
|
||||||
startTimeQuickSelect={true}
|
startTimeQuickSelect={true}
|
||||||
on:update-filters={({ detail }) => {
|
applyFilters={(detail) => {
|
||||||
jobFilters = detail.filters;
|
jobFilters = detail.filters;
|
||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
@ -392,6 +420,7 @@
|
|||||||
<Card body color="danger">{$topQuery.error.message}</Card>
|
<Card body color="danger">{$topQuery.error.message}</Card>
|
||||||
{:else}
|
{:else}
|
||||||
<Pie
|
<Pie
|
||||||
|
canvasId={`pie-${groupSelection.key}`}
|
||||||
size={colWidth1}
|
size={colWidth1}
|
||||||
sliceLabel={sortSelection.label}
|
sliceLabel={sortSelection.label}
|
||||||
quantities={$topQuery.data.topList.map(
|
quantities={$topQuery.data.topList.map(
|
||||||
|
@ -14,15 +14,18 @@
|
|||||||
import SupportSettings from "./config/SupportSettings.svelte";
|
import SupportSettings from "./config/SupportSettings.svelte";
|
||||||
import AdminSettings from "./config/AdminSettings.svelte";
|
import AdminSettings from "./config/AdminSettings.svelte";
|
||||||
|
|
||||||
export let isAdmin;
|
/* Svelte 5 Props */
|
||||||
export let isSupport;
|
let {
|
||||||
export let isApi;
|
isAdmin,
|
||||||
export let username;
|
isSupport,
|
||||||
export let ncontent;
|
isApi,
|
||||||
|
username,
|
||||||
|
ncontent,
|
||||||
|
} = $props();
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
{#if isAdmin}
|
{#if isAdmin}
|
||||||
<Card style="margin-bottom: 1.5em;">
|
<Card style="margin-bottom: 1.5rem;">
|
||||||
<CardHeader>
|
<CardHeader>
|
||||||
<CardTitle class="mb-1">Admin Options</CardTitle>
|
<CardTitle class="mb-1">Admin Options</CardTitle>
|
||||||
</CardHeader>
|
</CardHeader>
|
||||||
@ -31,7 +34,7 @@
|
|||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
{#if isSupport || isAdmin}
|
{#if isSupport || isAdmin}
|
||||||
<Card style="margin-bottom: 1.5em;">
|
<Card style="margin-bottom: 1.5rem;">
|
||||||
<CardHeader>
|
<CardHeader>
|
||||||
<CardTitle class="mb-1">Support Options</CardTitle>
|
<CardTitle class="mb-1">Support Options</CardTitle>
|
||||||
</CardHeader>
|
</CardHeader>
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
- `username String`: Empty string if auth. is disabled, otherwise the username as string
|
- `username String`: Empty string if auth. is disabled, otherwise the username as string
|
||||||
- `authlevel Number`: The current users authentication level
|
- `authlevel Number`: The current users authentication level
|
||||||
- `clusters [String]`: List of cluster names
|
- `clusters [String]`: List of cluster names
|
||||||
|
- `subClusters [String]`: List of subCluster names
|
||||||
- `roles [Number]`: Enum containing available roles
|
- `roles [Number]`: Enum containing available roles
|
||||||
-->
|
-->
|
||||||
|
|
||||||
@ -23,24 +24,21 @@
|
|||||||
import NavbarLinks from "./header/NavbarLinks.svelte";
|
import NavbarLinks from "./header/NavbarLinks.svelte";
|
||||||
import NavbarTools from "./header/NavbarTools.svelte";
|
import NavbarTools from "./header/NavbarTools.svelte";
|
||||||
|
|
||||||
export let username;
|
/* Svelte 5 Props */
|
||||||
export let authlevel;
|
let { username, authlevel, clusters, subClusters, roles } = $props();
|
||||||
export let clusters;
|
|
||||||
export let subClusters;
|
|
||||||
export let roles;
|
|
||||||
|
|
||||||
let isOpen = false;
|
|
||||||
let screenSize;
|
|
||||||
|
|
||||||
|
/* Const Init */
|
||||||
const jobsTitle = new Map();
|
const jobsTitle = new Map();
|
||||||
jobsTitle.set(2, "Job Search");
|
jobsTitle.set(2, "Job Search");
|
||||||
jobsTitle.set(3, "Managed Jobs");
|
jobsTitle.set(3, "Managed Jobs");
|
||||||
jobsTitle.set(4, "Jobs");
|
jobsTitle.set(4, "Jobs");
|
||||||
jobsTitle.set(5, "Jobs");
|
jobsTitle.set(5, "Jobs");
|
||||||
|
|
||||||
const usersTitle = new Map();
|
const usersTitle = new Map();
|
||||||
usersTitle.set(3, "Managed Users");
|
usersTitle.set(3, "Managed Users");
|
||||||
usersTitle.set(4, "Users");
|
usersTitle.set(4, "Users");
|
||||||
usersTitle.set(5, "Users");
|
usersTitle.set(5, "Users");
|
||||||
|
|
||||||
const projectsTitle = new Map();
|
const projectsTitle = new Map();
|
||||||
projectsTitle.set(3, "Managed Projects");
|
projectsTitle.set(3, "Managed Projects");
|
||||||
projectsTitle.set(4, "Projects");
|
projectsTitle.set(4, "Projects");
|
||||||
@ -120,29 +118,41 @@
|
|||||||
menu: "Info",
|
menu: "Info",
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
|
/* State Init */
|
||||||
|
let isOpen = $state(false);
|
||||||
|
let screenSize = $state(0);
|
||||||
|
|
||||||
|
/* Derived Vars */
|
||||||
|
let showMax = $derived(screenSize >= 1500);
|
||||||
|
let showMid = $derived(screenSize < 1500 && screenSize >= 1300);
|
||||||
|
let showSml = $derived(screenSize < 1300 && screenSize >= 768);
|
||||||
|
let showBrg = $derived(screenSize < 768);
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<svelte:window bind:innerWidth={screenSize} />
|
<svelte:window bind:innerWidth={screenSize} />
|
||||||
|
|
||||||
<Navbar color="light" light expand="md" fixed="top">
|
<Navbar color="light" light expand="md" fixed="top">
|
||||||
<NavbarBrand href="/">
|
<NavbarBrand href="/">
|
||||||
<img alt="ClusterCockpit Logo" src="/img/logo.png" height="25rem" />
|
<img alt="ClusterCockpit Logo" src="/img/logo.png" height="25rem" />
|
||||||
</NavbarBrand>
|
</NavbarBrand>
|
||||||
<NavbarToggler on:click={() => (isOpen = !isOpen)} />
|
<NavbarToggler onclick={() => (isOpen = !isOpen)} />
|
||||||
<Collapse
|
<Collapse
|
||||||
style="justify-content: space-between"
|
style="justify-content: space-between"
|
||||||
{isOpen}
|
{isOpen}
|
||||||
navbar
|
navbar
|
||||||
expand="md"
|
expand="md"
|
||||||
on:update={({ detail }) => (isOpen = detail.isOpen)}
|
onupdate={({ detail }) => (isOpen = detail.isOpen)}
|
||||||
>
|
>
|
||||||
<Nav navbar>
|
<Nav navbar>
|
||||||
{#if screenSize > 1500 || screenSize < 768}
|
{#if showMax || showBrg}
|
||||||
<NavbarLinks
|
<NavbarLinks
|
||||||
{clusters}
|
{clusters}
|
||||||
{subClusters}
|
{subClusters}
|
||||||
links={views.filter((item) => item.requiredRole <= authlevel)}
|
links={views.filter((item) => item.requiredRole <= authlevel)}
|
||||||
/>
|
/>
|
||||||
{:else if screenSize > 1300}
|
|
||||||
|
{:else if showMid}
|
||||||
<NavbarLinks
|
<NavbarLinks
|
||||||
{clusters}
|
{clusters}
|
||||||
{subClusters}
|
{subClusters}
|
||||||
@ -169,7 +179,8 @@
|
|||||||
</DropdownMenu>
|
</DropdownMenu>
|
||||||
</Dropdown>
|
</Dropdown>
|
||||||
{/if}
|
{/if}
|
||||||
{:else}
|
|
||||||
|
{:else if showSml}
|
||||||
<NavbarLinks
|
<NavbarLinks
|
||||||
{clusters}
|
{clusters}
|
||||||
{subClusters}
|
{subClusters}
|
||||||
@ -228,8 +239,11 @@
|
|||||||
</DropdownMenu>
|
</DropdownMenu>
|
||||||
</Dropdown>
|
</Dropdown>
|
||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
|
{:else}
|
||||||
|
<span>Error: Unknown Window Size!</span>
|
||||||
{/if}
|
{/if}
|
||||||
</Nav>
|
</Nav>
|
||||||
<NavbarTools {username} {authlevel} {roles} {screenSize} />
|
<NavbarTools {username} {authlevel} {roles} {screenSize} />
|
||||||
</Collapse>
|
</Collapse>
|
||||||
</Navbar>
|
</Navbar>
|
@ -56,8 +56,8 @@
|
|||||||
selectedScopes = [],
|
selectedScopes = [],
|
||||||
plots = {};
|
plots = {};
|
||||||
|
|
||||||
let availableMetrics = new Set(),
|
let totalMetrics = 0;
|
||||||
missingMetrics = [],
|
let missingMetrics = [],
|
||||||
missingHosts = [],
|
missingHosts = [],
|
||||||
somethingMissing = false;
|
somethingMissing = false;
|
||||||
|
|
||||||
@ -294,7 +294,7 @@
|
|||||||
{#if $initq?.data}
|
{#if $initq?.data}
|
||||||
<Col xs="auto">
|
<Col xs="auto">
|
||||||
<Button outline on:click={() => (isMetricsSelectionOpen = true)} color="primary">
|
<Button outline on:click={() => (isMetricsSelectionOpen = true)} color="primary">
|
||||||
Select Metrics (Selected {selectedMetrics.length} of {availableMetrics.size} available)
|
Select Metrics (Selected {selectedMetrics.length} of {totalMetrics} available)
|
||||||
</Button>
|
</Button>
|
||||||
</Col>
|
</Col>
|
||||||
{/if}
|
{/if}
|
||||||
@ -428,12 +428,16 @@
|
|||||||
|
|
||||||
{#if $initq?.data}
|
{#if $initq?.data}
|
||||||
<MetricSelection
|
<MetricSelection
|
||||||
|
bind:isOpen={isMetricsSelectionOpen}
|
||||||
|
bind:totalMetrics
|
||||||
|
presetMetrics={selectedMetrics}
|
||||||
cluster={$initq.data.job.cluster}
|
cluster={$initq.data.job.cluster}
|
||||||
subCluster={$initq.data.job.subCluster}
|
subCluster={$initq.data.job.subCluster}
|
||||||
configName="job_view_selectedMetrics"
|
configName="job_view_selectedMetrics"
|
||||||
bind:metrics={selectedMetrics}
|
preInitialized
|
||||||
bind:isOpen={isMetricsSelectionOpen}
|
applyMetrics={(newMetrics) =>
|
||||||
bind:allMetrics={availableMetrics}
|
selectedMetrics = [...newMetrics]
|
||||||
|
}
|
||||||
/>
|
/>
|
||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
import { onMount, getContext } from "svelte";
|
import { untrack, onMount, getContext } from "svelte";
|
||||||
import {
|
import {
|
||||||
Row,
|
Row,
|
||||||
Col,
|
Col,
|
||||||
@ -21,35 +21,61 @@
|
|||||||
import { init } from "./generic/utils.js";
|
import { init } from "./generic/utils.js";
|
||||||
import Filters from "./generic/Filters.svelte";
|
import Filters from "./generic/Filters.svelte";
|
||||||
import JobList from "./generic/JobList.svelte";
|
import JobList from "./generic/JobList.svelte";
|
||||||
|
import JobCompare from "./generic/JobCompare.svelte";
|
||||||
import TextFilter from "./generic/helper/TextFilter.svelte";
|
import TextFilter from "./generic/helper/TextFilter.svelte";
|
||||||
import Refresher from "./generic/helper/Refresher.svelte";
|
import Refresher from "./generic/helper/Refresher.svelte";
|
||||||
import Sorting from "./generic/select/SortSelection.svelte";
|
import Sorting from "./generic/select/SortSelection.svelte";
|
||||||
import MetricSelection from "./generic/select/MetricSelection.svelte";
|
import MetricSelection from "./generic/select/MetricSelection.svelte";
|
||||||
|
|
||||||
|
/* Svelte 5 Props */
|
||||||
|
let { filterPresets, authlevel, roles } = $props();
|
||||||
|
|
||||||
|
/* Const Init */
|
||||||
const { query: initq } = init();
|
const { query: initq } = init();
|
||||||
|
|
||||||
const ccconfig = getContext("cc-config");
|
const ccconfig = getContext("cc-config");
|
||||||
|
const presetProject = filterPresets?.project ? filterPresets.project : ""
|
||||||
|
|
||||||
export let filterPresets = {};
|
/* State Init */
|
||||||
export let authlevel;
|
let filterComponent = $state(); // see why here: https://stackoverflow.com/questions/58287729/how-can-i-export-a-function-from-a-svelte-component-that-changes-a-value-in-the
|
||||||
export let roles;
|
let selectedJobs = $state([]);
|
||||||
|
let filterBuffer = $state([]);
|
||||||
let filterComponent; // see why here: https://stackoverflow.com/questions/58287729/how-can-i-export-a-function-from-a-svelte-component-that-changes-a-value-in-the
|
let jobList = $state(null);
|
||||||
let jobList,
|
let jobCompare = $state(null);
|
||||||
matchedJobs = null;
|
let matchedListJobs = $state(0);
|
||||||
let sorting = { field: "startTime", type: "col", order: "DESC" },
|
let matchedCompareJobs = $state(0);
|
||||||
isSortingOpen = false,
|
let isSortingOpen = $state(false);
|
||||||
isMetricsSelectionOpen = false;
|
let showCompare = $state(false);
|
||||||
let metrics = filterPresets.cluster
|
let isMetricsSelectionOpen = $state(false);
|
||||||
|
let sorting = $state({ field: "startTime", type: "col", order: "DESC" });
|
||||||
|
let selectedCluster = $state(filterPresets?.cluster ? filterPresets.cluster : null);
|
||||||
|
let metrics = $state(filterPresets.cluster
|
||||||
? ccconfig[`plot_list_selectedMetrics:${filterPresets.cluster}`] ||
|
? ccconfig[`plot_list_selectedMetrics:${filterPresets.cluster}`] ||
|
||||||
ccconfig.plot_list_selectedMetrics
|
ccconfig.plot_list_selectedMetrics
|
||||||
: ccconfig.plot_list_selectedMetrics;
|
: ccconfig.plot_list_selectedMetrics
|
||||||
let showFootprint = filterPresets.cluster
|
);
|
||||||
|
let showFootprint = $state(filterPresets.cluster
|
||||||
? !!ccconfig[`plot_list_showFootprint:${filterPresets.cluster}`]
|
? !!ccconfig[`plot_list_showFootprint:${filterPresets.cluster}`]
|
||||||
: !!ccconfig.plot_list_showFootprint;
|
: !!ccconfig.plot_list_showFootprint
|
||||||
let selectedCluster = filterPresets?.cluster ? filterPresets.cluster : null;
|
);
|
||||||
let presetProject = filterPresets?.project ? filterPresets.project : ""
|
|
||||||
|
|
||||||
|
/* Functions */
|
||||||
|
function resetJobSelection() {
|
||||||
|
if (filterComponent && selectedJobs.length === 0) {
|
||||||
|
filterComponent.updateFilters({ dbId: [] });
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Reactive Effects */
|
||||||
|
$effect(() => {
|
||||||
|
// Reactive : Trigger Effect
|
||||||
|
selectedJobs.length
|
||||||
|
untrack(() => {
|
||||||
|
// Unreactive : Apply Reset w/o starting infinite loop
|
||||||
|
resetJobSelection()
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
/* On Mount */
|
||||||
// The filterPresets are handled by the Filters component,
|
// The filterPresets are handled by the Filters component,
|
||||||
// so we need to wait for it to be ready before we can start a query.
|
// so we need to wait for it to be ready before we can start a query.
|
||||||
// This is also why JobList component starts out with a paused query.
|
// This is also why JobList component starts out with a paused query.
|
||||||
@ -72,70 +98,119 @@
|
|||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
<!-- ROW2: Tools-->
|
<!-- ROW2: Tools-->
|
||||||
<Row cols={{ xs: 1, md: 2, lg: 4}} class="mb-3">
|
<Row cols={{ xs: 1, md: 2, lg: 5}} class="mb-3">
|
||||||
<Col lg="2" class="mb-2 mb-lg-0">
|
<Col lg="2" class="mb-2 mb-lg-0">
|
||||||
<ButtonGroup class="w-100">
|
<ButtonGroup class="w-100">
|
||||||
<Button outline color="primary" on:click={() => (isSortingOpen = true)}>
|
<Button outline color="primary" onclick={() => (isSortingOpen = true)} disabled={showCompare}>
|
||||||
<Icon name="sort-up" /> Sorting
|
<Icon name="sort-up" /> Sorting
|
||||||
</Button>
|
</Button>
|
||||||
<Button
|
<Button
|
||||||
outline
|
outline
|
||||||
color="primary"
|
color="primary"
|
||||||
on:click={() => (isMetricsSelectionOpen = true)}
|
onclick={() => (isMetricsSelectionOpen = true)}
|
||||||
>
|
>
|
||||||
<Icon name="graph-up" /> Metrics
|
<Icon name="graph-up" /> Metrics
|
||||||
</Button>
|
</Button>
|
||||||
</ButtonGroup>
|
</ButtonGroup>
|
||||||
</Col>
|
</Col>
|
||||||
<Col lg="4" xl="{(presetProject !== '') ? 5 : 6}" class="mb-1 mb-lg-0">
|
<Col lg="5" class="mb-1 mb-lg-0">
|
||||||
<Filters
|
<Filters
|
||||||
{filterPresets}
|
|
||||||
{matchedJobs}
|
|
||||||
bind:this={filterComponent}
|
bind:this={filterComponent}
|
||||||
on:update-filters={({ detail }) => {
|
{filterPresets}
|
||||||
|
showFilter={!showCompare}
|
||||||
|
matchedJobs={showCompare? matchedCompareJobs: matchedListJobs}
|
||||||
|
applyFilters={(detail) => {
|
||||||
selectedCluster = detail.filters[0]?.cluster
|
selectedCluster = detail.filters[0]?.cluster
|
||||||
? detail.filters[0].cluster.eq
|
? detail.filters[0].cluster.eq
|
||||||
: null;
|
: null;
|
||||||
jobList.queryJobs(detail.filters);
|
filterBuffer = [...detail.filters]
|
||||||
|
if (showCompare) {
|
||||||
|
jobCompare.queryJobs(detail.filters);
|
||||||
|
} else {
|
||||||
|
jobList.queryJobs(detail.filters);
|
||||||
|
}
|
||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
</Col>
|
</Col>
|
||||||
<Col lg="3" xl="{(presetProject !== '') ? 3 : 2}" class="mb-2 mb-lg-0">
|
<Col lg="2" class="mb-2 mb-lg-0">
|
||||||
<TextFilter
|
{#if !showCompare}
|
||||||
{presetProject}
|
<TextFilter
|
||||||
bind:authlevel
|
{presetProject}
|
||||||
bind:roles
|
bind:authlevel
|
||||||
on:set-filter={({ detail }) => filterComponent.updateFilters(detail)}
|
bind:roles
|
||||||
/>
|
setFilter={(filter) => filterComponent.updateFilters(filter)}
|
||||||
|
/>
|
||||||
|
{/if}
|
||||||
</Col>
|
</Col>
|
||||||
<Col lg="3" xl="2" class="mb-1 mb-lg-0">
|
<Col lg="3" class="mb-1 mb-lg-0 d-inline-flex align-items-start justify-content-end ">
|
||||||
<Refresher on:refresh={() => {
|
{#if !showCompare}
|
||||||
jobList.refreshJobs()
|
<Refresher presetClass="w-auto" onRefresh={() => {
|
||||||
jobList.refreshAllMetrics()
|
jobList.refreshJobs()
|
||||||
}} />
|
jobList.refreshAllMetrics()
|
||||||
|
}} />
|
||||||
|
{/if}
|
||||||
|
<div class="mx-1"></div>
|
||||||
|
<ButtonGroup class="w-50">
|
||||||
|
<Button color="primary" disabled={(matchedListJobs >= 500 && !(selectedJobs.length != 0)) || $initq.fetching} onclick={() => {
|
||||||
|
if (selectedJobs.length != 0) filterComponent.updateFilters({dbId: selectedJobs}, true)
|
||||||
|
showCompare = !showCompare
|
||||||
|
}} >
|
||||||
|
{showCompare ? 'Return to List' :
|
||||||
|
matchedListJobs >= 500 && selectedJobs.length == 0
|
||||||
|
? 'Compare Disabled'
|
||||||
|
: 'Compare' + (selectedJobs.length != 0 ? ` ${selectedJobs.length} ` : ' ') + 'Jobs'
|
||||||
|
}
|
||||||
|
</Button>
|
||||||
|
{#if !showCompare && selectedJobs.length != 0}
|
||||||
|
<Button class="w-auto" color="warning" onclick={() => {
|
||||||
|
selectedJobs = [] // Only empty array, filters handled by reactive reset
|
||||||
|
}}>
|
||||||
|
Clear
|
||||||
|
</Button>
|
||||||
|
{/if}
|
||||||
|
</ButtonGroup>
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
<!-- ROW3: Job List-->
|
<!-- ROW3: Job List / Job Compare-->
|
||||||
<Row>
|
<Row>
|
||||||
<Col>
|
<Col>
|
||||||
<JobList
|
{#if !showCompare}
|
||||||
bind:this={jobList}
|
<JobList
|
||||||
bind:metrics
|
bind:this={jobList}
|
||||||
bind:sorting
|
bind:metrics
|
||||||
bind:matchedJobs
|
bind:sorting
|
||||||
bind:showFootprint
|
bind:matchedListJobs
|
||||||
/>
|
bind:showFootprint
|
||||||
|
bind:selectedJobs
|
||||||
|
{filterBuffer}
|
||||||
|
/>
|
||||||
|
{:else}
|
||||||
|
<JobCompare
|
||||||
|
bind:this={jobCompare}
|
||||||
|
bind:metrics
|
||||||
|
bind:matchedCompareJobs
|
||||||
|
{filterBuffer}
|
||||||
|
/>
|
||||||
|
{/if}
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
<Sorting bind:sorting bind:isOpen={isSortingOpen} />
|
<Sorting
|
||||||
|
bind:isOpen={isSortingOpen}
|
||||||
|
presetSorting={sorting}
|
||||||
|
applySorting={(newSort) =>
|
||||||
|
sorting = {...newSort}
|
||||||
|
}/>
|
||||||
|
|
||||||
<MetricSelection
|
<MetricSelection
|
||||||
bind:cluster={selectedCluster}
|
bind:isOpen={isMetricsSelectionOpen}
|
||||||
configName="plot_list_selectedMetrics"
|
bind:showFootprint
|
||||||
bind:metrics
|
presetMetrics={metrics}
|
||||||
bind:isOpen={isMetricsSelectionOpen}
|
cluster={selectedCluster}
|
||||||
bind:showFootprint
|
configName="plot_list_selectedMetrics"
|
||||||
footprintSelect
|
footprintSelect
|
||||||
|
applyMetrics={(newMetrics) =>
|
||||||
|
metrics = [...newMetrics]
|
||||||
|
}
|
||||||
/>
|
/>
|
||||||
|
@ -31,10 +31,8 @@
|
|||||||
} from "./generic/utils.js";
|
} from "./generic/utils.js";
|
||||||
import Filters from "./generic/Filters.svelte";
|
import Filters from "./generic/Filters.svelte";
|
||||||
|
|
||||||
const {} = init();
|
/* Svelte 5 Props */
|
||||||
|
let { type, filterPresets } = $props();
|
||||||
export let type;
|
|
||||||
export let filterPresets;
|
|
||||||
|
|
||||||
// By default, look at the jobs of the last 30 days:
|
// By default, look at the jobs of the last 30 days:
|
||||||
if (filterPresets?.startTime == null) {
|
if (filterPresets?.startTime == null) {
|
||||||
@ -51,50 +49,64 @@
|
|||||||
"Invalid list type provided!",
|
"Invalid list type provided!",
|
||||||
);
|
);
|
||||||
|
|
||||||
let filterComponent; // see why here: https://stackoverflow.com/questions/58287729/how-can-i-export-a-function-from-a-svelte-component-that-changes-a-value-in-the
|
/* Const Init */
|
||||||
let jobFilters = [];
|
const {} = init();
|
||||||
let nameFilter = "";
|
|
||||||
let sorting = { field: "totalJobs", direction: "down" };
|
|
||||||
|
|
||||||
const client = getContextClient();
|
const client = getContextClient();
|
||||||
$: stats = queryStore({
|
|
||||||
client: client,
|
|
||||||
query: gql`
|
|
||||||
query($jobFilters: [JobFilter!]!) {
|
|
||||||
rows: jobsStatistics(filter: $jobFilters, groupBy: ${type}) {
|
|
||||||
id
|
|
||||||
name
|
|
||||||
totalJobs
|
|
||||||
totalWalltime
|
|
||||||
totalCoreHours
|
|
||||||
totalAccHours
|
|
||||||
}
|
|
||||||
}`,
|
|
||||||
variables: { jobFilters },
|
|
||||||
});
|
|
||||||
|
|
||||||
function changeSorting(event, field) {
|
/* State Init*/
|
||||||
let target = event.target;
|
let filterComponent = $state(); // see why here: https://stackoverflow.com/questions/58287729/how-can-i-export-a-function-from-a-svelte-component-that-changes-a-value-in-the
|
||||||
while (target.tagName != "BUTTON") target = target.parentElement;
|
let jobFilters = $state([]);
|
||||||
|
let nameFilter = $state("");
|
||||||
|
let sorting = $state({ field: "totalJobs", direction: "down" });
|
||||||
|
|
||||||
let direction = target.children[0].className.includes("up") ? "down" : "up";
|
/* Derived Vars */
|
||||||
target.children[0].className = `bi-sort-numeric-${direction}`;
|
let stats = $derived(
|
||||||
sorting = { field, direction };
|
queryStore({
|
||||||
|
client: client,
|
||||||
|
query: gql`
|
||||||
|
query($jobFilters: [JobFilter!]!) {
|
||||||
|
rows: jobsStatistics(filter: $jobFilters, groupBy: ${type}) {
|
||||||
|
id
|
||||||
|
name
|
||||||
|
totalJobs
|
||||||
|
totalWalltime
|
||||||
|
totalCoreHours
|
||||||
|
totalAccHours
|
||||||
|
}
|
||||||
|
}`,
|
||||||
|
variables: { jobFilters },
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
/* Functions */
|
||||||
|
function changeSorting(field) {
|
||||||
|
sorting = { field, direction: sorting?.direction == "down" ? "up" : "down" };
|
||||||
}
|
}
|
||||||
|
|
||||||
function sort(stats, sorting, nameFilter) {
|
function sort(stats, sorting, nameFilter) {
|
||||||
const cmp =
|
const idCmp = sorting.direction == "up"
|
||||||
sorting.field == "id"
|
? (a, b) => b.id.localeCompare(a.id)
|
||||||
? sorting.direction == "up"
|
: (a, b) => a.id.localeCompare(b.id)
|
||||||
? (a, b) => a.id < b.id
|
|
||||||
: (a, b) => a.id > b.id
|
|
||||||
: sorting.direction == "up"
|
|
||||||
? (a, b) => a[sorting.field] - b[sorting.field]
|
|
||||||
: (a, b) => b[sorting.field] - a[sorting.field];
|
|
||||||
|
|
||||||
return stats.filter((u) => u.id.includes(nameFilter)).sort(cmp);
|
// Force empty or undefined strings to the end of the list
|
||||||
|
const nameCmp = sorting.direction == "up"
|
||||||
|
? (a, b) => !a?.name ? 1 : (!b?.name ? -1 : (b.name.localeCompare(a.name)))
|
||||||
|
: (a, b) => !a?.name ? 1 : (!b?.name ? -1 : (a.name.localeCompare(b.name)))
|
||||||
|
|
||||||
|
const intCmp = sorting.direction == "up"
|
||||||
|
? (a, b) => a[sorting.field] - b[sorting.field]
|
||||||
|
: (a, b) => b[sorting.field] - a[sorting.field];
|
||||||
|
|
||||||
|
if (sorting.field == "id") {
|
||||||
|
return stats.filter((u) => u.id.includes(nameFilter)).sort(idCmp)
|
||||||
|
} else if (sorting.field == "name") {
|
||||||
|
return stats.filter((u) => u.id.includes(nameFilter)).sort(nameCmp)
|
||||||
|
} else {
|
||||||
|
return stats.filter((u) => u.id.includes(nameFilter)).sort(intCmp)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* On Mount */
|
||||||
onMount(() => filterComponent.updateFilters());
|
onMount(() => filterComponent.updateFilters());
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
@ -119,7 +131,7 @@
|
|||||||
{filterPresets}
|
{filterPresets}
|
||||||
startTimeQuickSelect={true}
|
startTimeQuickSelect={true}
|
||||||
menuText="Only {type.toLowerCase()}s with jobs that match the filters will show up"
|
menuText="Only {type.toLowerCase()}s with jobs that match the filters will show up"
|
||||||
on:update-filters={({ detail }) => {
|
applyFilters={(detail) => {
|
||||||
jobFilters = detail.filters;
|
jobFilters = detail.filters;
|
||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
@ -137,9 +149,14 @@
|
|||||||
<Button
|
<Button
|
||||||
color={sorting.field == "id" ? "primary" : "light"}
|
color={sorting.field == "id" ? "primary" : "light"}
|
||||||
size="sm"
|
size="sm"
|
||||||
on:click={(e) => changeSorting(e, "id")}
|
onclick={() => changeSorting("id")}
|
||||||
>
|
>
|
||||||
<Icon name="sort-numeric-down" />
|
{#if sorting?.field == "id"}
|
||||||
|
<!-- Note on Icon-Name: Arrow-indicator always down, only alpha-indicator switches -->
|
||||||
|
<Icon name={`sort-alpha-${sorting?.direction == 'down' ? 'down' : 'down-alt'}`} />
|
||||||
|
{:else}
|
||||||
|
<Icon name="three-dots-vertical" />
|
||||||
|
{/if}
|
||||||
</Button>
|
</Button>
|
||||||
</th>
|
</th>
|
||||||
{#if type == "USER"}
|
{#if type == "USER"}
|
||||||
@ -148,9 +165,13 @@
|
|||||||
<Button
|
<Button
|
||||||
color={sorting.field == "name" ? "primary" : "light"}
|
color={sorting.field == "name" ? "primary" : "light"}
|
||||||
size="sm"
|
size="sm"
|
||||||
on:click={(e) => changeSorting(e, "name")}
|
onclick={() => changeSorting("name")}
|
||||||
>
|
>
|
||||||
<Icon name="sort-numeric-down" />
|
{#if sorting?.field == "name"}
|
||||||
|
<Icon name={`sort-alpha-${sorting?.direction == 'down' ? 'down' : 'down-alt'}`} />
|
||||||
|
{:else}
|
||||||
|
<Icon name="three-dots-vertical" />
|
||||||
|
{/if}
|
||||||
</Button>
|
</Button>
|
||||||
</th>
|
</th>
|
||||||
{/if}
|
{/if}
|
||||||
@ -159,9 +180,14 @@
|
|||||||
<Button
|
<Button
|
||||||
color={sorting.field == "totalJobs" ? "primary" : "light"}
|
color={sorting.field == "totalJobs" ? "primary" : "light"}
|
||||||
size="sm"
|
size="sm"
|
||||||
on:click={(e) => changeSorting(e, "totalJobs")}
|
onclick={() => changeSorting("totalJobs")}
|
||||||
>
|
>
|
||||||
<Icon name="sort-numeric-down" />
|
{#if sorting?.field == "totalJobs"}
|
||||||
|
<!-- Note on Icon-Name: Arrow-indicator always down, only numeric-indicator switches -->
|
||||||
|
<Icon name={`sort-numeric-${sorting?.direction == 'down' ? 'down-alt' : 'down'}`} />
|
||||||
|
{:else}
|
||||||
|
<Icon name="three-dots-vertical" />
|
||||||
|
{/if}
|
||||||
</Button>
|
</Button>
|
||||||
</th>
|
</th>
|
||||||
<th scope="col">
|
<th scope="col">
|
||||||
@ -169,9 +195,13 @@
|
|||||||
<Button
|
<Button
|
||||||
color={sorting.field == "totalWalltime" ? "primary" : "light"}
|
color={sorting.field == "totalWalltime" ? "primary" : "light"}
|
||||||
size="sm"
|
size="sm"
|
||||||
on:click={(e) => changeSorting(e, "totalWalltime")}
|
onclick={() => changeSorting("totalWalltime")}
|
||||||
>
|
>
|
||||||
<Icon name="sort-numeric-down" />
|
{#if sorting?.field == "totalWalltime"}
|
||||||
|
<Icon name={`sort-numeric-${sorting?.direction == 'down' ? 'down-alt' : 'down'}`} />
|
||||||
|
{:else}
|
||||||
|
<Icon name="three-dots-vertical" />
|
||||||
|
{/if}
|
||||||
</Button>
|
</Button>
|
||||||
</th>
|
</th>
|
||||||
<th scope="col">
|
<th scope="col">
|
||||||
@ -179,9 +209,13 @@
|
|||||||
<Button
|
<Button
|
||||||
color={sorting.field == "totalCoreHours" ? "primary" : "light"}
|
color={sorting.field == "totalCoreHours" ? "primary" : "light"}
|
||||||
size="sm"
|
size="sm"
|
||||||
on:click={(e) => changeSorting(e, "totalCoreHours")}
|
onclick={() => changeSorting("totalCoreHours")}
|
||||||
>
|
>
|
||||||
<Icon name="sort-numeric-down" />
|
{#if sorting?.field == "totalCoreHours"}
|
||||||
|
<Icon name={`sort-numeric-${sorting?.direction == 'down' ? 'down-alt' : 'down'}`} />
|
||||||
|
{:else}
|
||||||
|
<Icon name="three-dots-vertical" />
|
||||||
|
{/if}
|
||||||
</Button>
|
</Button>
|
||||||
</th>
|
</th>
|
||||||
<th scope="col">
|
<th scope="col">
|
||||||
@ -189,9 +223,13 @@
|
|||||||
<Button
|
<Button
|
||||||
color={sorting.field == "totalAccHours" ? "primary" : "light"}
|
color={sorting.field == "totalAccHours" ? "primary" : "light"}
|
||||||
size="sm"
|
size="sm"
|
||||||
on:click={(e) => changeSorting(e, "totalAccHours")}
|
onclick={() => changeSorting("totalAccHours")}
|
||||||
>
|
>
|
||||||
<Icon name="sort-numeric-down" />
|
{#if sorting?.field == "totalAccHours"}
|
||||||
|
<Icon name={`sort-numeric-${sorting?.direction == 'down' ? 'down-alt' : 'down'}`} />
|
||||||
|
{:else}
|
||||||
|
<Icon name="three-dots-vertical" />
|
||||||
|
{/if}
|
||||||
</Button>
|
</Button>
|
||||||
</th>
|
</th>
|
||||||
</tr>
|
</tr>
|
||||||
|
@ -9,7 +9,9 @@
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
import { getContext } from "svelte";
|
import {
|
||||||
|
getContext,
|
||||||
|
} from "svelte";
|
||||||
import {
|
import {
|
||||||
Row,
|
Row,
|
||||||
Col,
|
Col,
|
||||||
@ -34,23 +36,28 @@
|
|||||||
import TimeSelection from "./generic/select/TimeSelection.svelte";
|
import TimeSelection from "./generic/select/TimeSelection.svelte";
|
||||||
import Refresher from "./generic/helper/Refresher.svelte";
|
import Refresher from "./generic/helper/Refresher.svelte";
|
||||||
|
|
||||||
export let cluster;
|
/* Svelte 5 Props */
|
||||||
export let hostname;
|
let {
|
||||||
export let from = null;
|
cluster,
|
||||||
export let to = null;
|
hostname,
|
||||||
|
presetFrom = null,
|
||||||
|
presetTo = null,
|
||||||
|
} = $props();
|
||||||
|
|
||||||
|
/* Const Init */
|
||||||
const { query: initq } = init();
|
const { query: initq } = init();
|
||||||
|
|
||||||
if (from == null || to == null) {
|
|
||||||
to = new Date(Date.now());
|
|
||||||
from = new Date(to.getTime());
|
|
||||||
from.setHours(from.getHours() - 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
const initialized = getContext("initialized")
|
const initialized = getContext("initialized")
|
||||||
const globalMetrics = getContext("globalMetrics")
|
const globalMetrics = getContext("globalMetrics")
|
||||||
const ccconfig = getContext("cc-config");
|
const ccconfig = getContext("cc-config");
|
||||||
const clusters = getContext("clusters");
|
const clusters = getContext("clusters");
|
||||||
|
const nowEpoch = Date.now();
|
||||||
|
const paging = { itemsPerPage: 50, page: 1 };
|
||||||
|
const sorting = { field: "startTime", type: "col", order: "DESC" };
|
||||||
|
const filter = [
|
||||||
|
{ cluster: { eq: cluster } },
|
||||||
|
{ node: { contains: hostname } },
|
||||||
|
{ state: ["running"] },
|
||||||
|
];
|
||||||
const client = getContextClient();
|
const client = getContextClient();
|
||||||
const nodeMetricsQuery = gql`
|
const nodeMetricsQuery = gql`
|
||||||
query ($cluster: String!, $nodes: [String!], $from: Time!, $to: Time!) {
|
query ($cluster: String!, $nodes: [String!], $from: Time!, $to: Time!) {
|
||||||
@ -79,27 +86,6 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
`;
|
`;
|
||||||
|
|
||||||
$: nodeMetricsData = queryStore({
|
|
||||||
client: client,
|
|
||||||
query: nodeMetricsQuery,
|
|
||||||
variables: {
|
|
||||||
cluster: cluster,
|
|
||||||
nodes: [hostname],
|
|
||||||
from: from.toISOString(),
|
|
||||||
to: to.toISOString(),
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
|
|
||||||
const paging = { itemsPerPage: 50, page: 1 };
|
|
||||||
const sorting = { field: "startTime", type: "col", order: "DESC" };
|
|
||||||
const filter = [
|
|
||||||
{ cluster: { eq: cluster } },
|
|
||||||
{ node: { contains: hostname } },
|
|
||||||
{ state: ["running"] },
|
|
||||||
];
|
|
||||||
|
|
||||||
const nodeJobsQuery = gql`
|
const nodeJobsQuery = gql`
|
||||||
query (
|
query (
|
||||||
$filter: [JobFilter!]!
|
$filter: [JobFilter!]!
|
||||||
@ -112,13 +98,37 @@
|
|||||||
}
|
}
|
||||||
`;
|
`;
|
||||||
|
|
||||||
$: nodeJobsData = queryStore({
|
/* State Init */
|
||||||
client: client,
|
let from = $state(presetFrom ? presetFrom : new Date(nowEpoch - (4 * 3600 * 1000)));
|
||||||
query: nodeJobsQuery,
|
let to = $state(presetTo ? presetTo : new Date(nowEpoch));
|
||||||
variables: { paging, sorting, filter },
|
let systemUnits = $state({});
|
||||||
|
|
||||||
|
/* Derived */
|
||||||
|
const nodeMetricsData = $derived(queryStore({
|
||||||
|
client: client,
|
||||||
|
query: nodeMetricsQuery,
|
||||||
|
variables: {
|
||||||
|
cluster: cluster,
|
||||||
|
nodes: [hostname],
|
||||||
|
from: from?.toISOString(),
|
||||||
|
to: to?.toISOString(),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
const nodeJobsData = $derived(queryStore({
|
||||||
|
client: client,
|
||||||
|
query: nodeJobsQuery,
|
||||||
|
variables: { paging, sorting, filter },
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
/* Effect */
|
||||||
|
$effect(() => {
|
||||||
|
loadUnits($initialized);
|
||||||
});
|
});
|
||||||
|
|
||||||
let systemUnits = {};
|
/* Functions */
|
||||||
function loadUnits(isInitialized) {
|
function loadUnits(isInitialized) {
|
||||||
if (!isInitialized) return
|
if (!isInitialized) return
|
||||||
const systemMetrics = [...globalMetrics.filter((gm) => gm?.availability.find((av) => av.cluster == cluster))]
|
const systemMetrics = [...globalMetrics.filter((gm) => gm?.availability.find((av) => av.cluster == cluster))]
|
||||||
@ -126,8 +136,6 @@
|
|||||||
systemUnits[sm.name] = (sm?.unit?.prefix ? sm.unit.prefix : "") + (sm?.unit?.base ? sm.unit.base : "")
|
systemUnits[sm.name] = (sm?.unit?.prefix ? sm.unit.prefix : "") + (sm?.unit?.base ? sm.unit.base : "")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$: loadUnits($initialized)
|
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<Row cols={{ xs: 2, lg: 4 }}>
|
<Row cols={{ xs: 2, lg: 4 }}>
|
||||||
@ -146,7 +154,14 @@
|
|||||||
</Col>
|
</Col>
|
||||||
<!-- Time Col -->
|
<!-- Time Col -->
|
||||||
<Col>
|
<Col>
|
||||||
<TimeSelection bind:from bind:to />
|
<TimeSelection
|
||||||
|
presetFrom={from}
|
||||||
|
presetTo={to}
|
||||||
|
applyTime={(newFrom, newTo) => {
|
||||||
|
from = newFrom;
|
||||||
|
to = newTo;
|
||||||
|
}}
|
||||||
|
/>
|
||||||
</Col>
|
</Col>
|
||||||
<!-- Concurrent Col -->
|
<!-- Concurrent Col -->
|
||||||
<Col class="mt-2 mt-lg-0">
|
<Col class="mt-2 mt-lg-0">
|
||||||
@ -172,7 +187,7 @@
|
|||||||
<!-- Refresh Col-->
|
<!-- Refresh Col-->
|
||||||
<Col class="mt-2 mt-lg-0">
|
<Col class="mt-2 mt-lg-0">
|
||||||
<Refresher
|
<Refresher
|
||||||
on:refresh={() => {
|
onRefresh={() => {
|
||||||
const diff = Date.now() - to;
|
const diff = Date.now() - to;
|
||||||
from = new Date(from.getTime() + diff);
|
from = new Date(from.getTime() + diff);
|
||||||
to = new Date(to.getTime() + diff);
|
to = new Date(to.getTime() + diff);
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user