From 2a43e0bcf3642516126d12582b122d734568146e Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Mon, 19 Sep 2022 17:36:45 +0200 Subject: [PATCH] Update config README. Add helper script for subClusters --- configs/README.md | 5 +- configs/generate-subcluster.pl | 228 +++++++++++++++++++++++++++++++++ 2 files changed, 231 insertions(+), 2 deletions(-) create mode 100755 configs/generate-subcluster.pl diff --git a/configs/README.md b/configs/README.md index 17ee551..6cf54c8 100644 --- a/configs/README.md +++ b/configs/README.md @@ -3,7 +3,7 @@ cc-backend requires a configuration file speciyfing the cluster systems to be used. Still many default options documented below are used. cc-backend tries to load a config.json from the working directory per default. To overwrite the default specify a json config file location using the command line option `--config `. -All security relevant configuration. e.g., keys and passwords, are set using environment variables. +All security relevant configuration. e.g., keys and passwords, are set using environment variables. It is supported to specify these by means of an `.env` file located in the project root. ## Configuration Options @@ -18,6 +18,7 @@ It is supported to specify these by means of an `.env` file located in the proje * `db`: Type string. For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!). Default: `./var/job.db`. * `job-archive`: Type string. Path to the job-archive. Default: `./var/job-archive`. * `disable-archive`: Type bool. Keep all metric data in the metric data repositories, do not write to the job-archive. Default `false`. +* `validate`: Type bool. Validate all input json documents against json schema. * `"session-max-age`: Type string. Specifies for how long a session shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire! Default `168h`. * `"jwt-max-age`: Type string. Specifies for how long a JWT token shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire! Default `0`. * `https-cert-file` and `https-key-file`: Type string. If both those options are not empty, use HTTPS using those certificates. @@ -27,7 +28,7 @@ It is supported to specify these by means of an `.env` file located in the proje * `ldap`: Type object. For LDAP Authentication and user synchronisation. Default `nil`. - `url`: Type string. URL of LDAP directory server. - `user_base`: Type string. Base DN of user tree root. - - `search_dn`: Type string. DN for authenticating LDAP admin account with fgeneral read rights. + - `search_dn`: Type string. DN for authenticating LDAP admin account with general read rights. - `user_bind`: Type string. Expression used to authenticate users via LDAP bind. Must contain `uid={username}`. - `user_filter`: Type string. Filter to extract users for syncing. - `sync_interval`: Type string. Interval used for syncing local user table with LDAP directory. Parsed using time.ParseDuration. diff --git a/configs/generate-subcluster.pl b/configs/generate-subcluster.pl new file mode 100755 index 0000000..51fd2cc --- /dev/null +++ b/configs/generate-subcluster.pl @@ -0,0 +1,228 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use utf8; + +my %INFO; +my %DOMAINS; +my $SMT; +my $numMemoryDomains; +$DOMAINS{socket} = []; +$DOMAINS{memoryDomain} = []; +$DOMAINS{core} = []; +$DOMAINS{gpu} = []; + +my $gpuID=-1; +my $id; + +# Step 1 : Extract system information +my $topo = `likwid-topology -O -G`; +$INFO{numGPUs} = 0; + +foreach my $ln (split("\n", $topo)) { + if ( $ln =~ /^STRUCT,NUMA Topology ([0-9]+)/ ) { + $id = $1; + } + if ( $ln =~ /^Processors/ ) { + my @fields = split(",", $ln); + shift @fields; + $DOMAINS{memoryDomain}[$id] = [ @fields ]; + } + if ( $ln =~ /^STRUCT,Cache Topology L1/ ) { + $id = -1; + } + if ( $ln =~ /^Cache groups/ ) { + if ( $id == -1 ) { + my @fields = split(",", $ln); + shift @fields; + my $i = 0; + foreach my $core ( @fields ) { + $DOMAINS{core}[$i++] = [ split(" ", $core) ]; + } + $id = 0; + } + } + if ( $ln =~ /^ID:/ ) { + my @fields = split(",", $ln); + $gpuID = $fields[1]; + } + if ( $gpuID >= 0 ) { + if ( $ln =~ /^Name:/ ) { + my @fields = split(",", $ln); + $DOMAINS{gpu}[$gpuID] = {}; + $DOMAINS{gpu}[$gpuID]{model} = $fields[1]; + if ( $fields[1] =~ /nvidia/i ) { + $DOMAINS{gpu}[$gpuID]{type} = "Nvidia GPU"; + } elsif ( $fields[1] =~ /amd/i ) { + $DOMAINS{gpu}[$gpuID]{type} = "AMD GPU"; + } elsif ( $fields[1] =~ /intel/i ) { + $DOMAINS{gpu}[$gpuID]{type} = "Intel GPU"; + } + } + if ( $ln =~ /^PCI bus:/ ) { + my @fields = split(",", $ln); + $fields[1] =~ s/0x//; + $DOMAINS{gpu}[$gpuID]{bus} = $fields[1]; + } + if ( $ln =~ /^PCI domain:/ ) { + my @fields = split(",", $ln); + $fields[1] =~ s/0x//; + $DOMAINS{gpu}[$gpuID]{domain} = $fields[1]; + } + if ( $ln =~ /^PCI device/ ) { + my @fields = split(",", $ln); + $fields[1] =~ s/0x//; + $DOMAINS{gpu}[$gpuID]{device} = $fields[1]; + $gpuID = -1; + } + } + if ( $ln =~ /^CPU name:/ ) { + my @fields = split(",", $ln); + $INFO{processor} = $fields[1]; + } + if ( $ln =~ /^CPU type/ ) { + my @fields = split(",", $ln); + $INFO{family} = $fields[1]; + $INFO{family} =~ s/[\(\)]//g; + } + if ( $ln =~ /^Sockets:/ ) { + my @fields = split(",", $ln); + $INFO{socketsPerNode} = $fields[1]; + } + if ( $ln =~ /^Cores per socket:/ ) { + my @fields = split(",", $ln); + $INFO{coresPerSocket} = $fields[1]; + } + if ( $ln =~ /^GPU count:/ ) { + my @fields = split(",", $ln); + $INFO{numGPUs} = $fields[1]; + } + if ( $ln =~ /^Threads per core:/ ) { + my @fields = split(",", $ln); + $SMT = $fields[1]; + $INFO{threadsPerCore} = $SMT; + } + if ( $ln =~ /^NUMA domains:/ ) { + my @fields = split(",", $ln); + $INFO{memoryDomainsPerNode} = $fields[1]; + } + if ( $ln =~ /^Socket ([0-9]+)/ ) { + my @fields = split(",", $ln); + shift @fields; + $DOMAINS{socket}[$1] = [ @fields ]; + } +} + +my $node; +my @sockets; +foreach my $socket ( @{$DOMAINS{socket}} ) { + push @sockets, "[".join(",", @{$socket})."]"; + $node .= join(",", @{$socket}) +} +$INFO{sockets} = join(",\n", @sockets); + +my @memDomains; +foreach my $d ( @{$DOMAINS{memoryDomain}} ) { + push @memDomains, "[".join(",", @{$d})."]"; +} +$INFO{memoryDomains} = join(",\n", @memDomains); + +my @cores; +foreach my $c ( @{$DOMAINS{core}} ) { + push @cores, "[".join(",", @{$c})."]"; +} +$INFO{cores} = join(",", @cores); + +my $numCoresPerNode = $INFO{coresPerSocket} * $INFO{socketsPerNode}; +my $numCoresPerMemoryDomain = $numCoresPerNode / $INFO{memoryDomainsPerNode}; +my $memBw; + +my $exp = join(' ',map("-w M$_:1GB:$numCoresPerMemoryDomain:1:$SMT", 0 ... $INFO{memoryDomainsPerNode}-1)); +print "Using: $exp\n"; +my $out = `likwid-bench -t clload $exp`; +foreach my $ln ( split("\n", $out) ){ + if ( $ln =~ /MByte\/s:\s+([0-9.]+)/ ) { + $memBw = my $rounded = int($1/1000 + 0.5); + } +} + +my $flopsScalar; +$out = `likwid-bench -t peakflops -w N:24kB:$numCoresPerNode`; +foreach my $ln ( split("\n", $out) ){ + if ( $ln =~ /MFlops\/s:\s+([0-9.]+)/ ) { + $flopsScalar = my $rounded = int($1/1000 + 0.5); + } +} + +my $simd = ""; +my $fh; +open($fh,"<","/proc/cpuinfo"); +foreach my $ln ( <$fh> ) { + if ( $ln =~ /flags/ ) { + if ( $ln =~ /avx2/ ) { + $simd = '_avx_fma'; + } + if ( $ln =~ /avx512ifma/ ) { + $simd = '_avx512_fma'; + } + last; + } +} +close $fh; + +print "Using peakflops variant $simd\n"; +my $flopsSimd; +$out = `likwid-bench -t peakflops$simd -w N:500kB:$numCoresPerNode`; +foreach my $ln ( split("\n", $out) ){ + if ( $ln =~ /MFlops\/s:\s+([0-9.]+)/ ) { + $flopsSimd = my $rounded = int($1/1000 + 0.5); + } +} + +if ( $INFO{numGPUs} > 0 ) { + $INFO{gpus} = "\"accelerators\": [\n"; + + my @gpuStr; + + foreach $id ( 0 ... ($INFO{numGPUs}-1) ) { + my %gpu = %{$DOMAINS{gpu}[$id]}; + my $deviceAddr = sprintf("%08x:%02x:%02x\.0", hex($gpu{domain}), hex($gpu{bus}), hex($gpu{device})); + $gpuStr[$id] = <", + "processorType": "$INFO{processor}", + "socketsPerNode": $INFO{socketsPerNode}, + "coresPerSocket": $INFO{coresPerSocket}, + "threadsPerCore": $INFO{threadsPerCore}, + "flopRateScalar": $flopsScalar, + "flopRateSimd": $flopsSimd, + "memoryBandwidth": $memBw, + "nodes": "", + "topology": { + "node": [$node], + "socket": [ + $INFO{sockets} + ], + "memoryDomain": [ + $INFO{memoryDomains} + ], + $INFO{gpus} + "core": [ + $INFO{cores} + ] + } +} +END