Update config README. Add helper script for subClusters

This commit is contained in:
Jan Eitzinger 2022-09-19 17:36:45 +02:00
parent 71d6bf81c2
commit 2a43e0bcf3
2 changed files with 231 additions and 2 deletions

View File

@ -3,7 +3,7 @@
cc-backend requires a configuration file speciyfing the cluster systems to be used. Still many default cc-backend requires a configuration file speciyfing the cluster systems to be used. Still many default
options documented below are used. cc-backend tries to load a config.json from the working directory per default. options documented below are used. cc-backend tries to load a config.json from the working directory per default.
To overwrite the default specify a json config file location using the command line option `--config <filepath>`. To overwrite the default specify a json config file location using the command line option `--config <filepath>`.
All security relevant configuration. e.g., keys and passwords, are set using environment variables. All security relevant configuration. e.g., keys and passwords, are set using environment variables.
It is supported to specify these by means of an `.env` file located in the project root. It is supported to specify these by means of an `.env` file located in the project root.
## Configuration Options ## Configuration Options
@ -18,6 +18,7 @@ It is supported to specify these by means of an `.env` file located in the proje
* `db`: Type string. For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!). Default: `./var/job.db`. * `db`: Type string. For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!). Default: `./var/job.db`.
* `job-archive`: Type string. Path to the job-archive. Default: `./var/job-archive`. * `job-archive`: Type string. Path to the job-archive. Default: `./var/job-archive`.
* `disable-archive`: Type bool. Keep all metric data in the metric data repositories, do not write to the job-archive. Default `false`. * `disable-archive`: Type bool. Keep all metric data in the metric data repositories, do not write to the job-archive. Default `false`.
* `validate`: Type bool. Validate all input json documents against json schema.
* `"session-max-age`: Type string. Specifies for how long a session shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire! Default `168h`. * `"session-max-age`: Type string. Specifies for how long a session shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire! Default `168h`.
* `"jwt-max-age`: Type string. Specifies for how long a JWT token shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire! Default `0`. * `"jwt-max-age`: Type string. Specifies for how long a JWT token shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire! Default `0`.
* `https-cert-file` and `https-key-file`: Type string. If both those options are not empty, use HTTPS using those certificates. * `https-cert-file` and `https-key-file`: Type string. If both those options are not empty, use HTTPS using those certificates.
@ -27,7 +28,7 @@ It is supported to specify these by means of an `.env` file located in the proje
* `ldap`: Type object. For LDAP Authentication and user synchronisation. Default `nil`. * `ldap`: Type object. For LDAP Authentication and user synchronisation. Default `nil`.
- `url`: Type string. URL of LDAP directory server. - `url`: Type string. URL of LDAP directory server.
- `user_base`: Type string. Base DN of user tree root. - `user_base`: Type string. Base DN of user tree root.
- `search_dn`: Type string. DN for authenticating LDAP admin account with fgeneral read rights. - `search_dn`: Type string. DN for authenticating LDAP admin account with general read rights.
- `user_bind`: Type string. Expression used to authenticate users via LDAP bind. Must contain `uid={username}`. - `user_bind`: Type string. Expression used to authenticate users via LDAP bind. Must contain `uid={username}`.
- `user_filter`: Type string. Filter to extract users for syncing. - `user_filter`: Type string. Filter to extract users for syncing.
- `sync_interval`: Type string. Interval used for syncing local user table with LDAP directory. Parsed using time.ParseDuration. - `sync_interval`: Type string. Interval used for syncing local user table with LDAP directory. Parsed using time.ParseDuration.

228
configs/generate-subcluster.pl Executable file
View File

@ -0,0 +1,228 @@
#!/usr/bin/env perl
use strict;
use warnings;
use utf8;
my %INFO;
my %DOMAINS;
my $SMT;
my $numMemoryDomains;
$DOMAINS{socket} = [];
$DOMAINS{memoryDomain} = [];
$DOMAINS{core} = [];
$DOMAINS{gpu} = [];
my $gpuID=-1;
my $id;
# Step 1 : Extract system information
my $topo = `likwid-topology -O -G`;
$INFO{numGPUs} = 0;
foreach my $ln (split("\n", $topo)) {
if ( $ln =~ /^STRUCT,NUMA Topology ([0-9]+)/ ) {
$id = $1;
}
if ( $ln =~ /^Processors/ ) {
my @fields = split(",", $ln);
shift @fields;
$DOMAINS{memoryDomain}[$id] = [ @fields ];
}
if ( $ln =~ /^STRUCT,Cache Topology L1/ ) {
$id = -1;
}
if ( $ln =~ /^Cache groups/ ) {
if ( $id == -1 ) {
my @fields = split(",", $ln);
shift @fields;
my $i = 0;
foreach my $core ( @fields ) {
$DOMAINS{core}[$i++] = [ split(" ", $core) ];
}
$id = 0;
}
}
if ( $ln =~ /^ID:/ ) {
my @fields = split(",", $ln);
$gpuID = $fields[1];
}
if ( $gpuID >= 0 ) {
if ( $ln =~ /^Name:/ ) {
my @fields = split(",", $ln);
$DOMAINS{gpu}[$gpuID] = {};
$DOMAINS{gpu}[$gpuID]{model} = $fields[1];
if ( $fields[1] =~ /nvidia/i ) {
$DOMAINS{gpu}[$gpuID]{type} = "Nvidia GPU";
} elsif ( $fields[1] =~ /amd/i ) {
$DOMAINS{gpu}[$gpuID]{type} = "AMD GPU";
} elsif ( $fields[1] =~ /intel/i ) {
$DOMAINS{gpu}[$gpuID]{type} = "Intel GPU";
}
}
if ( $ln =~ /^PCI bus:/ ) {
my @fields = split(",", $ln);
$fields[1] =~ s/0x//;
$DOMAINS{gpu}[$gpuID]{bus} = $fields[1];
}
if ( $ln =~ /^PCI domain:/ ) {
my @fields = split(",", $ln);
$fields[1] =~ s/0x//;
$DOMAINS{gpu}[$gpuID]{domain} = $fields[1];
}
if ( $ln =~ /^PCI device/ ) {
my @fields = split(",", $ln);
$fields[1] =~ s/0x//;
$DOMAINS{gpu}[$gpuID]{device} = $fields[1];
$gpuID = -1;
}
}
if ( $ln =~ /^CPU name:/ ) {
my @fields = split(",", $ln);
$INFO{processor} = $fields[1];
}
if ( $ln =~ /^CPU type/ ) {
my @fields = split(",", $ln);
$INFO{family} = $fields[1];
$INFO{family} =~ s/[\(\)]//g;
}
if ( $ln =~ /^Sockets:/ ) {
my @fields = split(",", $ln);
$INFO{socketsPerNode} = $fields[1];
}
if ( $ln =~ /^Cores per socket:/ ) {
my @fields = split(",", $ln);
$INFO{coresPerSocket} = $fields[1];
}
if ( $ln =~ /^GPU count:/ ) {
my @fields = split(",", $ln);
$INFO{numGPUs} = $fields[1];
}
if ( $ln =~ /^Threads per core:/ ) {
my @fields = split(",", $ln);
$SMT = $fields[1];
$INFO{threadsPerCore} = $SMT;
}
if ( $ln =~ /^NUMA domains:/ ) {
my @fields = split(",", $ln);
$INFO{memoryDomainsPerNode} = $fields[1];
}
if ( $ln =~ /^Socket ([0-9]+)/ ) {
my @fields = split(",", $ln);
shift @fields;
$DOMAINS{socket}[$1] = [ @fields ];
}
}
my $node;
my @sockets;
foreach my $socket ( @{$DOMAINS{socket}} ) {
push @sockets, "[".join(",", @{$socket})."]";
$node .= join(",", @{$socket})
}
$INFO{sockets} = join(",\n", @sockets);
my @memDomains;
foreach my $d ( @{$DOMAINS{memoryDomain}} ) {
push @memDomains, "[".join(",", @{$d})."]";
}
$INFO{memoryDomains} = join(",\n", @memDomains);
my @cores;
foreach my $c ( @{$DOMAINS{core}} ) {
push @cores, "[".join(",", @{$c})."]";
}
$INFO{cores} = join(",", @cores);
my $numCoresPerNode = $INFO{coresPerSocket} * $INFO{socketsPerNode};
my $numCoresPerMemoryDomain = $numCoresPerNode / $INFO{memoryDomainsPerNode};
my $memBw;
my $exp = join(' ',map("-w M$_:1GB:$numCoresPerMemoryDomain:1:$SMT", 0 ... $INFO{memoryDomainsPerNode}-1));
print "Using: $exp\n";
my $out = `likwid-bench -t clload $exp`;
foreach my $ln ( split("\n", $out) ){
if ( $ln =~ /MByte\/s:\s+([0-9.]+)/ ) {
$memBw = my $rounded = int($1/1000 + 0.5);
}
}
my $flopsScalar;
$out = `likwid-bench -t peakflops -w N:24kB:$numCoresPerNode`;
foreach my $ln ( split("\n", $out) ){
if ( $ln =~ /MFlops\/s:\s+([0-9.]+)/ ) {
$flopsScalar = my $rounded = int($1/1000 + 0.5);
}
}
my $simd = "";
my $fh;
open($fh,"<","/proc/cpuinfo");
foreach my $ln ( <$fh> ) {
if ( $ln =~ /flags/ ) {
if ( $ln =~ /avx2/ ) {
$simd = '_avx_fma';
}
if ( $ln =~ /avx512ifma/ ) {
$simd = '_avx512_fma';
}
last;
}
}
close $fh;
print "Using peakflops variant $simd\n";
my $flopsSimd;
$out = `likwid-bench -t peakflops$simd -w N:500kB:$numCoresPerNode`;
foreach my $ln ( split("\n", $out) ){
if ( $ln =~ /MFlops\/s:\s+([0-9.]+)/ ) {
$flopsSimd = my $rounded = int($1/1000 + 0.5);
}
}
if ( $INFO{numGPUs} > 0 ) {
$INFO{gpus} = "\"accelerators\": [\n";
my @gpuStr;
foreach $id ( 0 ... ($INFO{numGPUs}-1) ) {
my %gpu = %{$DOMAINS{gpu}[$id]};
my $deviceAddr = sprintf("%08x:%02x:%02x\.0", hex($gpu{domain}), hex($gpu{bus}), hex($gpu{device}));
$gpuStr[$id] = <<END
{
"id": "$deviceAddr",
"type": "$gpu{type}",
"model": "$gpu{model}"
}
END
}
$INFO{gpus} .= join(",\n",@gpuStr);
$INFO{gpus} .= "]\n";
}
print <<"END";
{
"name": "<FILL IN>",
"processorType": "$INFO{processor}",
"socketsPerNode": $INFO{socketsPerNode},
"coresPerSocket": $INFO{coresPerSocket},
"threadsPerCore": $INFO{threadsPerCore},
"flopRateScalar": $flopsScalar,
"flopRateSimd": $flopsSimd,
"memoryBandwidth": $memBw,
"nodes": "<FILL IN NODE RANGES>",
"topology": {
"node": [$node],
"socket": [
$INFO{sockets}
],
"memoryDomain": [
$INFO{memoryDomains}
],
$INFO{gpus}
"core": [
$INFO{cores}
]
}
}
END