Merge pull request #170 from ClusterCockpit/hotfix

Hotfix
This commit is contained in:
Jan Eitzinger 2023-07-01 07:14:46 +02:00 committed by GitHub
commit 036685cbe7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 135 additions and 46 deletions

View File

@ -10,7 +10,6 @@ import (
"encoding/json"
"errors"
"fmt"
"math"
"strconv"
"sync"
"time"
@ -18,7 +17,6 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/util"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
@ -318,7 +316,7 @@ func (r *JobRepository) FindConcurrentJobs(
stopTime = startTime + int64(job.Duration)
}
// Add 5m overlap for jobs start time at the end
// Add 200s overlap for jobs start time at the end
startTimeTail := startTime + 10
stopTimeTail := stopTime - 200
startTimeFront := startTime + 200
@ -338,8 +336,7 @@ func (r *JobRepository) FindConcurrentJobs(
}
items := make([]*model.JobLink, 0, 10)
minStart := int64(math.MaxInt64)
maxStart := int64(0)
queryString := fmt.Sprintf("cluster=%s", job.Cluster)
for rows.Next() {
var id, jobId, startTime sql.NullInt64
@ -350,9 +347,7 @@ func (r *JobRepository) FindConcurrentJobs(
}
if id.Valid {
minStart = util.Min(minStart, startTime.Int64)
maxStart = util.Max(maxStart, startTime.Int64)
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
items = append(items,
&model.JobLink{
ID: fmt.Sprint(id.Int64),
@ -376,9 +371,7 @@ func (r *JobRepository) FindConcurrentJobs(
}
if id.Valid {
minStart = util.Min(minStart, startTime.Int64)
maxStart = util.Max(maxStart, startTime.Int64)
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
items = append(items,
&model.JobLink{
ID: fmt.Sprint(id.Int64),
@ -388,8 +381,6 @@ func (r *JobRepository) FindConcurrentJobs(
}
cnt := len(items)
queryString := fmt.Sprintf("cluster=%s&startTime=%d-%d&node=%s",
job.Cluster, minStart, maxStart, hostname)
return &model.JobLinkResultList{
ListQuery: &queryString,

View File

@ -1,4 +1,10 @@
ALTER TABLE configuration ADD COLUMN value_new varchar(255);
INSERT INTO configuration (value_new) SELECT value FROM configuration;
ALTER TABLE configuration DROP COLUMN value;
ALTER TABLE configuration RENAME COLUMN value_new TO value;
CREATE TABLE IF NOT EXISTS configuration_new (
username varchar(255),
confkey varchar(255),
value varchar(255),
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);
INSERT INTO configuration_new SELECT * FROM configuration;
DROP TABLE configuration;
ALTER TABLE configuration_new RENAME TO configuration;

View File

@ -1,4 +1,10 @@
ALTER TABLE configuration ADD COLUMN value_new TEXT;
INSERT INTO configuration (value_new) SELECT value FROM configuration;
ALTER TABLE configuration DROP COLUMN value;
ALTER TABLE configuration RENAME COLUMN value_new TO value;
CREATE TABLE IF NOT EXISTS configuration_new (
username varchar(255),
confkey varchar(255),
value text,
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);
INSERT INTO configuration_new SELECT * FROM configuration;
DROP TABLE configuration;
ALTER TABLE configuration_new RENAME TO configuration;

View File

@ -263,11 +263,11 @@ func buildStringCondition(field string, cond *model.StringInput, query sq.Select
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.Contains, "%"))
}
if cond.In != nil {
queryUsers := make([]string, len(cond.In))
queryElements := make([]string, len(cond.In))
for i, val := range cond.In {
queryUsers[i] = val
queryElements[i] = val
}
return query.Where(sq.Or{sq.Eq{"job.user": queryUsers}})
return query.Where(sq.Or{sq.Eq{field: queryElements}})
}
return query
}

View File

@ -184,6 +184,9 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
}
}
}
if query.Get("node") != "" {
filterPresets["node"] = query.Get("node")
}
if query.Get("numNodes") != "" {
parts := strings.Split(query.Get("numNodes"), "-")
if len(parts) == 2 {
@ -205,7 +208,13 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
}
}
if query.Get("jobId") != "" {
if len(query["jobId"]) == 1 {
filterPresets["jobId"] = query.Get("jobId")
filterPresets["jobIdMatch"] = "eq"
} else {
filterPresets["jobId"] = query["jobId"]
filterPresets["jobIdMatch"] = "in"
}
}
if query.Get("arrayJobId") != "" {
if num, err := strconv.Atoi(query.Get("arrayJobId")); err == nil {

View File

@ -27,7 +27,7 @@
resources { hostname, hwthreads, accelerators },
metaData,
userData { name, email },
concurrentJobs { items { id, jobId }, count }
concurrentJobs { items { id, jobId }, count, listQuery }
}
`)
@ -118,11 +118,12 @@
{/if}
</Col>
{#if $jobMetrics.data && $initq.data}
{#if $initq.data.job.concurrentJobs != null}
{#if $initq.data.job.concurrentJobs.items.length != 0}
{#if authlevel > roles.manager}
<Col>
<h5>Concurrent Jobs <Icon name="info-circle" style="cursor:help;" title="Shared jobs running on the same node with overlapping runtimes"/></h5>
<ul>
<li><a href="/monitoring/jobs/?{$initq.data.job.concurrentJobs.listQuery}" target="_blank">See All</a></li>
{#each $initq.data.job.concurrentJobs.items as pjob, index}
<li><a href="/monitoring/job/{pjob.id}" target="_blank">{pjob.jobId}</a></li>
{/each}

View File

@ -23,7 +23,7 @@
const ccconfig = getContext('cc-config')
const clusters = getContext('clusters')
const client = getContextClient();
const query = gql`query($cluster: String!, $nodes: [String!], $from: Time!, $to: Time!) {
const nodeMetricsQuery = gql`query($cluster: String!, $nodes: [String!], $from: Time!, $to: Time!) {
nodeMetrics(cluster: $cluster, nodes: $nodes, from: $from, to: $to) {
host
subCluster
@ -42,9 +42,9 @@
}
}`;
$: nodesQuery = queryStore({
$: nodeMetricsData = queryStore({
client: client,
query: query,
query: nodeMetricsQuery,
variables: {
cluster: cluster,
nodes: [hostname],
@ -53,8 +53,44 @@
}
});
let itemsPerPage = ccconfig.plot_list_jobsPerPage;
let page = 1;
let paging = { itemsPerPage, page };
let sorting = { field: "startTime", order: "DESC" };
$: filter = [
{cluster: { eq: cluster }},
{node: { eq: hostname }},
{state: 'running'}
// {startTime: {
// from: from.toISOString(),
// to: to.toISOString()
// }}
];
const nodeJobsQuery = gql`
query (
$filter: [JobFilter!]!
$sorting: OrderByInput!
$paging: PageRequest!
) {
jobs(filter: $filter, order: $sorting, page: $paging) {
# items {
# id
# jobId
# }
count
}
}
`;
$: nodeJobsData = queryStore({
client: client,
query: nodeJobsQuery,
variables: { paging, sorting, filter }
});
let metricUnits = {}
$: if ($nodesQuery.data) {
$: if ($nodeMetricsData.data) {
let thisCluster = clusters.find(c => c.name == cluster)
if (thisCluster) {
for (let metric of thisCluster.metricConfig) {
@ -67,7 +103,7 @@
}
}
// $: console.log($nodesQuery?.data?.nodeMetrics[0].metrics)
const dateToUnixEpoch = rfc3339 => Math.floor(Date.parse(rfc3339) / 1000)
</script>
<Row>
@ -82,6 +118,18 @@
<InputGroupText>{hostname} ({cluster})</InputGroupText>
</InputGroup>
</Col>
<Col>
{#if $nodeJobsData.fetching }
<Spinner/>
{:else}
{#if $nodeJobsData.data}
Currently running jobs on this node: { $nodeJobsData.data.jobs.count }
[ <a href="/monitoring/jobs/?cluster={cluster}&state=running&node={hostname}" target="_blank">View in Job List</a> ]
{:else}
No currently running jobs.
{/if}
{/if}
</Col>
<Col>
<TimeSelection
bind:from={from}
@ -92,9 +140,9 @@
<br/>
<Row>
<Col>
{#if $nodesQuery.error}
<Card body color="danger">{$nodesQuery.error.message}</Card>
{:else if $nodesQuery.fetching || $initq.fetching}
{#if $nodeMetricsData.error}
<Card body color="danger">{$nodeMetricsData.error.message}</Card>
{:else if $nodeMetricsData.fetching || $initq.fetching}
<Spinner/>
{:else}
<PlotTable
@ -102,18 +150,18 @@
let:width
renderFor="node"
itemsPerRow={ccconfig.plot_view_plotsPerRow}
items={$nodesQuery.data.nodeMetrics[0].metrics
.map(m => ({ ...m, disabled: checkMetricDisabled(m.name, cluster, $nodesQuery.data.nodeMetrics[0].subCluster)}))
items={$nodeMetricsData.data.nodeMetrics[0].metrics
.map(m => ({ ...m, disabled: checkMetricDisabled(m.name, cluster, $nodeMetricsData.data.nodeMetrics[0].subCluster)}))
.sort((a, b) => a.name.localeCompare(b.name))}>
<h4 style="text-align: center; padding-top:15px;">{item.name} {metricUnits[item.name]}</h4>
{#if item.disabled === false && item.metric}
<MetricPlot
width={width} height={300} metric={item.name} timestep={item.metric.timestep}
cluster={clusters.find(c => c.name == cluster)} subCluster={$nodesQuery.data.nodeMetrics[0].subCluster}
cluster={clusters.find(c => c.name == cluster)} subCluster={$nodeMetricsData.data.nodeMetrics[0].subCluster}
series={item.metric.series} />
{:else if item.disabled === true && item.metric}
<Card style="margin-left: 2rem;margin-right: 2rem;" body color="info">Metric disabled for subcluster <code>{item.name}:{$nodesQuery.data.nodeMetrics[0].subCluster}</code></Card>
<Card style="margin-left: 2rem;margin-right: 2rem;" body color="info">Metric disabled for subcluster <code>{item.name}:{$nodeMetricsData.data.nodeMetrics[0].subCluster}</code></Card>
{:else}
<Card style="margin-left: 2rem;margin-right: 2rem;" body color="warning">No dataset returned for <code>{item.name}</code></Card>
{/if}

View File

@ -34,6 +34,7 @@
let filters = {
projectMatch: filterPresets.projectMatch || 'contains',
userMatch: filterPresets.userMatch || 'contains',
jobIdMatch: filterPresets.jobIdMatch || 'eq',
cluster: filterPresets.cluster || null,
partition: filterPresets.partition || null,
@ -47,6 +48,7 @@
project: filterPresets.project || '',
jobName: filterPresets.jobName || '',
node: filterPresets.node || null,
numNodes: filterPresets.numNodes || { from: null, to: null },
numHWThreads: filterPresets.numHWThreads || { from: null, to: null },
numAccelerators: filterPresets.numAccelerators || { from: null, to: null },
@ -74,6 +76,8 @@
let items = []
if (filters.cluster)
items.push({ cluster: { eq: filters.cluster } })
if (filters.node)
items.push({ node: { contains: filters.node } })
if (filters.partition)
items.push({ partition: { eq: filters.partition } })
if (filters.states.length != allJobStates.length)
@ -85,7 +89,7 @@
if (filters.duration.from || filters.duration.to)
items.push({ duration: { from: filters.duration.from, to: filters.duration.to } })
if (filters.jobId)
items.push({ jobId: { eq: filters.jobId } })
items.push({ jobId: { [filters.jobIdMatch]: filters.jobId } })
if (filters.arrayJobId != null)
items.push({ arrayJobId: filters.arrayJobId })
if (filters.numNodes.from != null || filters.numNodes.to != null)
@ -114,6 +118,8 @@
let opts = []
if (filters.cluster)
opts.push(`cluster=${filters.cluster}`)
if (filters.node)
opts.push(`node=${filters.node}`)
if (filters.partition)
opts.push(`partition=${filters.partition}`)
if (filters.states.length != allJobStates.length)
@ -125,6 +131,15 @@
// } else {
opts.push(`startTime=${dateToUnixEpoch(filters.startTime.from)}-${dateToUnixEpoch(filters.startTime.to)}`)
// }
if (filters.jobId.length != 0)
if (filters.jobIdMatch != 'in') {
opts.push(`jobId=${filters.jobId}`)
} else {
for (let singleJobId of filters.jobId)
opts.push(`jobId=${singleJobId}`)
}
if (filters.jobIdMatch != 'eq')
opts.push(`jobIdMatch=${filters.jobIdMatch}`)
for (let tag of filters.tags)
opts.push(`tag=${tag}`)
if (filters.duration.from && filters.duration.to)
@ -272,6 +287,12 @@
</Info>
{/if}
{#if filters.node != null }
<Info icon="hdd-stack" on:click={() => (isResourcesOpen = true)}>
Node: {filters.node}
</Info>
{/if}
{#if filters.stats.length > 0}
<Info icon="bar-chart" on:click={() => (isStatsOpen = true)}>
{filters.stats.map(stat => `${stat.text}: ${stat.from} - ${stat.to}`).join(', ')}
@ -318,6 +339,7 @@
bind:numNodes={filters.numNodes}
bind:numHWThreads={filters.numHWThreads}
bind:numAccelerators={filters.numAccelerators}
bind:namedNode={filters.node}
bind:isNodesModified={isNodesModified}
bind:isHwthreadsModified={isHwthreadsModified}
bind:isAccsModified={isAccsModified}

View File

@ -16,8 +16,9 @@
export let isNodesModified = false
export let isHwthreadsModified = false
export let isAccsModified = false
export let namedNode = null
let pendingNumNodes = numNodes, pendingNumHWThreads = numHWThreads, pendingNumAccelerators = numAccelerators
let pendingNumNodes = numNodes, pendingNumHWThreads = numHWThreads, pendingNumAccelerators = numAccelerators, pendingNamedNode = namedNode
const findMaxNumAccels = clusters => clusters.reduce((max, cluster) => Math.max(max,
cluster.subClusters.reduce((max, sc) => Math.max(max, sc.topology.accelerators?.length || 0), 0)), 0)
@ -76,7 +77,9 @@
Select number of utilized Resources
</ModalHeader>
<ModalBody>
<h6>Number of Nodes</h6>
<h6>Named Node</h6>
<input type="text" class="form-control" bind:value={pendingNamedNode}>
<h6 style="margin-top: 1rem;">Number of Nodes</h6>
<DoubleRangeSlider
on:change={({ detail }) => {
pendingNumNodes = { from: detail[0], to: detail[1] }
@ -117,7 +120,8 @@
numNodes ={ from: pendingNumNodes.from, to: pendingNumNodes.to }
numHWThreads = { from: pendingNumHWThreads.from, to: pendingNumHWThreads.to }
numAccelerators = { from: pendingNumAccelerators.from, to: pendingNumAccelerators.to }
dispatch('update', { numNodes, numHWThreads, numAccelerators })
namedNode = pendingNamedNode
dispatch('update', { numNodes, numHWThreads, numAccelerators, namedNode })
}}>
Close & Apply
</Button>
@ -126,13 +130,15 @@
pendingNumNodes = { from: null, to: null }
pendingNumHWThreads = { from: null, to: null }
pendingNumAccelerators = { from: null, to: null }
pendingNamedNode = null
numNodes = { from: pendingNumNodes.from, to: pendingNumNodes.to }
numHWThreads = { from: pendingNumHWThreads.from, to: pendingNumHWThreads.to }
numAccelerators = { from: pendingNumAccelerators.from, to: pendingNumAccelerators.to }
isNodesModified = false
isHwthreadsModified = false
isAccsModified = false
dispatch('update', { numNodes, numHWThreads, numAccelerators })
namedNode = pendingNamedNode
dispatch('update', { numNodes, numHWThreads, numAccelerators, namedNode})
}}>Reset</Button>
<Button on:click={() => (isOpen = false)}>Close</Button>
</ModalFooter>