Merge branch 'change_resolution_on_zoom' into sample_resolution_select

This commit is contained in:
Christoph Kluge 2024-09-02 18:23:13 +02:00
commit bce2a66177
8 changed files with 221 additions and 47 deletions

View File

@ -1,5 +1,11 @@
CREATE INDEX IF NOT EXISTS job_by_project ON job (project);
CREATE INDEX IF NOT EXISTS job_list_projects ON job (project, job_state);
DROP INDEX job_stats;
DROP INDEX job_by_user;
DROP INDEX job_by_starttime;
DROP INDEX job_by_job_id;
DROP INDEX job_list;
DROP INDEX job_list_user;
DROP INDEX job_list_users;
DROP INDEX job_list_users_start;
ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0;
ALTER TABLE job ADD COLUMN energy_footprint TEXT DEFAULT NULL;
@ -24,3 +30,45 @@ ALTER TABLE job DROP net_bw_avg;
ALTER TABLE job DROP net_data_vol_total;
ALTER TABLE job DROP file_bw_avg;
ALTER TABLE job DROP file_data_vol_total;
CREATE INDEX jobs_cluster IF NOT EXISTS ON job (cluster);
CREATE INDEX jobs_cluster_starttime IF NOT EXISTS ON job (cluster, start_time);
CREATE INDEX jobs_cluster_user IF NOT EXISTS ON job (cluster, user);
CREATE INDEX jobs_cluster_project IF NOT EXISTS ON job (cluster, project);
CREATE INDEX jobs_cluster_subcluster IF NOT EXISTS ON job (cluster, subcluster);
CREATE INDEX jobs_cluster_partition IF NOT EXISTS ON job (cluster, partition);
CREATE INDEX jobs_cluster_partition_starttime IF NOT EXISTS ON job (cluster, partition, start_time);
CREATE INDEX jobs_cluster_partition_jobstate IF NOT EXISTS ON job (cluster, partition, job_state);
CREATE INDEX jobs_cluster_partition_jobstate_user IF NOT EXISTS ON job (cluster, partition, job_state, user);
CREATE INDEX jobs_cluster_partition_jobstate_project IF NOT EXISTS ON job (cluster, partition, job_state, project);
CREATE INDEX jobs_cluster_partition_jobstate_starttime IF NOT EXISTS ON job (cluster, partition, job_state, start_time);
CREATE INDEX jobs_cluster_jobstate IF NOT EXISTS ON job (cluster, job_state);
CREATE INDEX jobs_cluster_jobstate_starttime IF NOT EXISTS ON job (cluster, job_state, starttime);
CREATE INDEX jobs_cluster_jobstate_user IF NOT EXISTS ON job (cluster, job_state, user);
CREATE INDEX jobs_cluster_jobstate_project IF NOT EXISTS ON job (cluster, job_state, project);
CREATE INDEX jobs_user IF NOT EXISTS ON job (user);
CREATE INDEX jobs_user_starttime IF NOT EXISTS ON job (user, start_time);
CREATE INDEX jobs_project IF NOT EXISTS ON job (project);
CREATE INDEX jobs_project_starttime IF NOT EXISTS ON job (project, start_time);
CREATE INDEX jobs_project_user IF NOT EXISTS ON job (project, user);
CREATE INDEX jobs_jobstate IF NOT EXISTS ON job (job_state);
CREATE INDEX jobs_jobstate_user IF NOT EXISTS ON job (job_state, user);
CREATE INDEX jobs_jobstate_project IF NOT EXISTS ON job (job_state, project);
CREATE INDEX jobs_jobstate_cluster IF NOT EXISTS ON job (job_state, cluster);
CREATE INDEX jobs_jobstate_starttime IF NOT EXISTS ON job (job_state, start_time);
CREATE INDEX jobs_arrayjobid_starttime IF NOT EXISTS ON job (array_job_id, start_time);
CREATE INDEX jobs_cluster_arrayjobid_starttime IF NOT EXISTS ON job (cluster, array_job_id, start_time);
CREATE INDEX jobs_starttime IF NOT EXISTS ON job (start_time);
CREATE INDEX jobs_duration IF NOT EXISTS ON job (duration);
CREATE INDEX jobs_numnodes IF NOT EXISTS ON job (num_nodes);
CREATE INDEX jobs_numhwthreads IF NOT EXISTS ON job (num_hwthreads);
CREATE INDEX jobs_numacc IF NOT EXISTS ON job (num_acc);
PRAGMA optimize;

View File

@ -77,8 +77,8 @@ func (r *JobRepository) buildStatsQuery(
// fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
if col != "" {
// Scan columns: id, totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select(col, "COUNT(job.id) as totalJobs",
// Scan columns: id, totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select(col, "COUNT(job.id) as totalJobs", "name",
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType),
@ -86,10 +86,10 @@ func (r *JobRepository) buildStatsQuery(
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s) as totalCoreHours`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_acc) as %s) as totalAccs`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType),
).From("job").GroupBy(col)
).From("job").Join("user ON user.username = job.user").GroupBy(col)
} else {
// Scan columns: totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select("COUNT(job.id)",
// Scan columns: totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select("COUNT(job.id)", "name",
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s)`, time.Now().Unix(), castType),
@ -97,7 +97,7 @@ func (r *JobRepository) buildStatsQuery(
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s)`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_acc) as %s)`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s)`, time.Now().Unix(), castType),
).From("job")
).From("job").Join("user ON user.username = job.user")
}
for _, f := range filter {
@ -107,15 +107,15 @@ func (r *JobRepository) buildStatsQuery(
return query
}
func (r *JobRepository) getUserName(ctx context.Context, id string) string {
user := GetUserFromContext(ctx)
name, _ := r.FindColumnValue(user, id, "user", "name", "username", false)
if name != "" {
return name
} else {
return "-"
}
}
// func (r *JobRepository) getUserName(ctx context.Context, id string) string {
// user := GetUserFromContext(ctx)
// name, _ := r.FindColumnValue(user, id, "user", "name", "username", false)
// if name != "" {
// return name
// } else {
// return "-"
// }
// }
func (r *JobRepository) getCastType() string {
var castType string
@ -167,14 +167,20 @@ func (r *JobRepository) JobsStatsGrouped(
for rows.Next() {
var id sql.NullString
var name sql.NullString
var jobs, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
if err := rows.Scan(&id, &jobs, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
if err := rows.Scan(&id, &jobs, &name, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if id.Valid {
var totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours int
var personName string
if name.Valid {
personName = name.String
}
if jobs.Valid {
totalJobs = int(jobs.Int64)
@ -205,11 +211,11 @@ func (r *JobRepository) JobsStatsGrouped(
}
if col == "job.user" {
name := r.getUserName(ctx, id.String)
// name := r.getUserName(ctx, id.String)
stats = append(stats,
&model.JobsStatistics{
ID: id.String,
Name: name,
Name: personName,
TotalJobs: totalJobs,
TotalWalltime: totalWalltime,
TotalNodes: totalNodes,

View File

@ -48,6 +48,7 @@
href: `/monitoring/user/${username}`,
icon: "bar-chart-line-fill",
perCluster: false,
listOptions: false,
menu: "none",
},
{
@ -56,6 +57,7 @@
href: `/monitoring/jobs/`,
icon: "card-list",
perCluster: false,
listOptions: false,
menu: "none",
},
{
@ -63,7 +65,8 @@
requiredRole: roles.manager,
href: "/monitoring/users/",
icon: "people-fill",
perCluster: false,
perCluster: true,
listOptions: true,
menu: "Groups",
},
{
@ -71,7 +74,8 @@
requiredRole: roles.support,
href: "/monitoring/projects/",
icon: "folder",
perCluster: false,
perCluster: true,
listOptions: true,
menu: "Groups",
},
{
@ -80,6 +84,7 @@
href: "/monitoring/tags/",
icon: "tags",
perCluster: false,
listOptions: false,
menu: "Groups",
},
{
@ -88,6 +93,7 @@
href: "/monitoring/analysis/",
icon: "graph-up",
perCluster: true,
listOptions: false,
menu: "Stats",
},
{
@ -96,6 +102,7 @@
href: "/monitoring/systems/",
icon: "cpu",
perCluster: true,
listOptions: false,
menu: "Groups",
},
{
@ -104,6 +111,7 @@
href: "/monitoring/status/",
icon: "cpu",
perCluster: true,
listOptions: false,
menu: "Stats",
},
];

View File

@ -90,11 +90,10 @@
},
});
let itemsPerPage = ccconfig.plot_list_jobsPerPage;
let page = 1;
let paging = { itemsPerPage, page };
let sorting = { field: "startTime", type: "col", order: "DESC" };
$: filter = [
const paging = { itemsPerPage: 50, page: 1 };
const sorting = { field: "startTime", type: "col", order: "DESC" };
const filter = [
{ cluster: { eq: cluster } },
{ node: { contains: hostname } },
{ state: ["running"] },

View File

@ -32,12 +32,14 @@
? ["core", "accelerator"]
: ["core"]
: ["node"];
let selectedResolution = 600;
let zoomStates = {};
const cluster = getContext("clusters").find((c) => c.name == job.cluster);
const client = getContextClient();
const query = gql`
query ($id: ID!, $metrics: [String!]!, $scopes: [MetricScope!]!) {
jobMetrics(id: $id, metrics: $metrics, scopes: $scopes) {
query ($id: ID!, $metrics: [String!]!, $scopes: [MetricScope!]!, $selectedResolution: Int) {
jobMetrics(id: $id, metrics: $metrics, scopes: $scopes, resolution: $selectedResolution) {
name
scope
metric {
@ -66,17 +68,30 @@
}
`;
function handleZoom(detail, metric) {
if (
(zoomStates[metric]?.x?.min !== detail?.lastZoomState?.x?.min) &&
(zoomStates[metric]?.y?.max !== detail?.lastZoomState?.y?.max)
) {
zoomStates[metric] = {...detail.lastZoomState}
}
if (detail?.newRes) { // Triggers GQL
selectedResolution = detail.newRes
}
}
$: metricsQuery = queryStore({
client: client,
query: query,
variables: { id, metrics, scopes },
variables: { id, metrics, scopes, selectedResolution },
});
function refreshMetrics() {
metricsQuery = queryStore({
client: client,
query: query,
variables: { id, metrics, scopes },
variables: { id, metrics, scopes, selectedResolution },
// requestPolicy: 'network-only' // use default cache-first for refresh
});
}
@ -159,6 +174,7 @@
<!-- Subluster Metricconfig remove keyword for jobtables (joblist main, user joblist, project joblist) to be used here as toplevel case-->
{#if metric.disabled == false && metric.data}
<MetricPlot
on:zoom={({detail}) => { handleZoom(detail, metric.data.name) }}
width={plotWidth}
height={plotHeight}
timestep={metric.data.metric.timestep}
@ -171,6 +187,7 @@
isShared={job.exclusive != 1}
numhwthreads={job.numHWThreads}
numaccs={job.numAcc}
zoomState={zoomStates[metric.data.name]}
/>
{:else if metric.disabled == true && metric.data}
<Card body color="info"

View File

@ -112,7 +112,7 @@
<script>
import uPlot from "uplot";
import { formatNumber } from "../units.js";
import { getContext, onMount, onDestroy } from "svelte";
import { getContext, onMount, onDestroy, createEventDispatcher } from "svelte";
import { Card } from "@sveltestrap/sveltestrap";
export let metric;
@ -129,11 +129,15 @@
export let forNode = false;
export let numhwthreads = 0;
export let numaccs = 0;
export let zoomState = null;
// $: console.log('Changed ZoomState for', metric, zoomState)
if (useStatsSeries == null) useStatsSeries = statisticsSeries != null;
if (useStatsSeries == false && series == null) useStatsSeries = true;
const dispatch = createEventDispatcher();
const subClusterTopology = getContext("getHardwareTopology")(cluster, subCluster);
const metricConfig = getContext("getMetricConfig")(cluster, subCluster, metric);
const clusterCockpitConfig = getContext("cc-config");
@ -392,6 +396,19 @@
bands: plotBands,
padding: [5, 10, -20, 0],
hooks: {
init: [
(u) => {
u.over.addEventListener("dblclick", (e) => {
console.log('Dispatch Reset')
dispatch('zoom', {
lastZoomState: {
x: { time: false },
y: { auto: true }
}
});
});
}
],
draw: [
(u) => {
// Draw plot type label:
@ -433,6 +450,32 @@
u.ctx.restore();
},
],
setScale: [
(u, key) => {
if (key === 'x') {
const numX = (u.series[0].idxs[1] - u.series[0].idxs[0])
if (numX <= 20 && timestep !== 60) { // Zoom IN if not at MAX
console.log('Dispatch Zoom')
if (timestep == 600) {
dispatch('zoom', {
newRes: 240,
lastZoomState: u?.scales
});
} else if (timestep === 240) {
dispatch('zoom', {
newRes: 60,
lastZoomState: u?.scales
});
}
} else {
console.log('Dispatch Update')
dispatch('zoom', {
lastZoomState: u?.scales
});
}
};
}
]
},
scales: {
x: { time: false },
@ -463,6 +506,10 @@
if (!uplot) {
opts.width = width;
opts.height = height;
if (zoomState) {
// console.log('Use last state for uPlot init:', metric, scope, zoomState)
opts.scales = {...zoomState}
}
uplot = new uPlot(opts, plotData, plotWrapper);
} else {
uplot.setSize({ width, height });
@ -471,7 +518,6 @@
function onSizeChange() {
if (!uplot) return;
if (timeoutId != null) clearTimeout(timeoutId);
timeoutId = setTimeout(() => {

View File

@ -21,7 +21,41 @@
</script>
{#each links as item}
{#if !item.perCluster}
{#if item.listOptions}
<Dropdown nav inNavbar>
<DropdownToggle nav caret>
<Icon name={item.icon} />
{item.title}
</DropdownToggle>
<DropdownMenu class="dropdown-menu-lg-end">
<DropdownItem
href={item.href}
>
All Clusters
</DropdownItem>
<DropdownItem divider />
{#each clusters as cluster}
<Dropdown nav direction="right">
<DropdownToggle nav caret class="dropdown-item py-1 px-2">
{cluster.name}
</DropdownToggle>
<DropdownMenu>
<DropdownItem class="py-1 px-2"
href={item.href + '?cluster=' + cluster.name}
>
All Jobs
</DropdownItem>
<DropdownItem class="py-1 px-2"
href={item.href + '?cluster=' + cluster.name + '&state=running'}
>
Running Jobs
</DropdownItem>
</DropdownMenu>
</Dropdown>
{/each}
</DropdownMenu>
</Dropdown>
{:else if !item.perCluster}
<NavLink href={item.href} active={window.location.pathname == item.href}
><Icon name={item.icon} /> {item.title}</NavLink
>

View File

@ -27,7 +27,9 @@
Spinner,
Card,
} from "@sveltestrap/sveltestrap";
import { minScope } from "../generic/utils.js";
import {
minScope,
} from "../generic/utils.js";
import Timeseries from "../generic/plots/MetricPlot.svelte";
export let job;
@ -39,9 +41,8 @@
export let rawData;
export let isShared = false;
let selectedHost = null,
plot,
error = null;
let selectedHost = null;
let error = null;
let selectedScope = minScope(scopes);
let selectedResolution;
let pendingResolution = 600;
@ -49,11 +50,12 @@
let patternMatches = false;
let nodeOnly = false; // If, after load-all, still only node scope returned
let statsSeries = rawData.map((data) => data?.statisticsSeries ? data.statisticsSeries : null);
let zoomState = null;
let pendingZoomState = null;
const dispatch = createEventDispatcher();
const statsPattern = /(.*)-stat$/;
const unit = (metricUnit?.prefix ? metricUnit.prefix : "") + (metricUnit?.base ? metricUnit.base : "");
const resolutions = [600, 240, 60] // DEV: Make configable
const client = getContextClient();
const subQuery = gql`
query ($dbid: ID!, $selectedMetrics: [String!]!, $selectedScopes: [MetricScope!]!, $selectedResolution: Int) {
@ -86,6 +88,19 @@
}
`;
function handleZoom(detail) {
if ( // States have to differ, causes deathloop if just set
(pendingZoomState?.x?.min !== detail?.lastZoomState?.x?.min) &&
(pendingZoomState?.y?.max !== detail?.lastZoomState?.y?.max)
) {
pendingZoomState = {...detail.lastZoomState}
}
if (detail?.newRes) { // Triggers GQL
pendingResolution = detail.newRes
}
}
let metricData;
let selectedScopes = [...scopes]
const dbid = job.id;
@ -119,11 +134,15 @@
});
if ($metricData && !$metricData.fetching) {
rawData = $metricData.data.singleUpdate.map((x) => x.metric)
scopes = $metricData.data.singleUpdate.map((x) => x.scope)
statsSeries = rawData.map((data) => data?.statisticsSeries ? data.statisticsSeries : null)
// Keep Zoomlevel if ResChange By Zoom
if (pendingZoomState) {
zoomState = {...pendingZoomState}
}
// Set selected scope to min of returned scopes
if (selectedScope == "load-all") {
selectedScope = minScope(scopes)
@ -176,11 +195,6 @@
{/each}
</select>
{/if}
<select class="form-select" bind:value={pendingResolution}>
{#each resolutions as res}
<option value={res}>Timestep: {res}</option>
{/each}
</select>
</InputGroup>
{#key series}
{#if $metricData?.fetching == true}
@ -189,7 +203,7 @@
<Card body color="danger">{error.message}</Card>
{:else if series != null && !patternMatches}
<Timeseries
bind:this={plot}
on:zoom={({detail}) => { handleZoom(detail) }}
{width}
height={300}
cluster={job.cluster}
@ -199,10 +213,11 @@
metric={metricName}
{series}
{isShared}
{zoomState}
/>
{:else if statsSeries[selectedScopeIndex] != null && patternMatches}
<Timeseries
bind:this={plot}
on:zoom={({detail}) => { handleZoom(detail) }}
{width}
height={300}
cluster={job.cluster}
@ -212,6 +227,7 @@
metric={metricName}
{series}
{isShared}
{zoomState}
statisticsSeries={statsSeries[selectedScopeIndex]}
useStatsSeries={!!statsSeries[selectedScopeIndex]}
/>