Initial commit

This commit is contained in:
Aditya Ujeniya 2024-10-10 15:04:47 +02:00
parent cf7ddde758
commit 523a688bed
15 changed files with 89 additions and 1236 deletions

View File

@ -1,10 +1,11 @@
FROM golang:1.17
FROM golang:1.22.4
RUN apt-get update
RUN apt-get -y install git
RUN git clone https://github.com/ClusterCockpit/cc-metric-store.git /cc-metric-store
RUN cd /cc-metric-store && go build
RUN ls
RUN cd /cc-metric-store && go build ./cmd/cc-metric-store
# Reactivate when latest commit is available
#RUN go get -d -v github.com/ClusterCockpit/cc-metric-store

View File

@ -1,34 +0,0 @@
#!/usr/bin/env bash
if [ -d symfony ]; then
echo "Data already initialized!"
echo -n "Perform a fresh initialisation? [yes to proceed / no to exit] "
read -r answer
if [ "$answer" == "yes" ]; then
echo "Cleaning directories ..."
rm -rf symfony
rm -rf job-archive
rm -rf influxdb/data/*
rm -rf sqldata/*
echo "done."
else
echo "Aborting ..."
exit
fi
fi
mkdir symfony
wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive_stable.tar.xz
tar xJf job-archive_stable.tar.xz
rm ./job-archive_stable.tar.xz
# 101 is the uid and gid of the user and group www-data in the cc-php container running php-fpm.
# For a demo with no new jobs it is enough to give www read permissions on that directory.
# echo "This script needs to chown the job-archive directory so that the application can write to it:"
# sudo chown -R 82:82 ./job-archive
mkdir -p influxdb/data
wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/influxdbv2-data_stable.tar.xz
cd influxdb/data
tar xJf ../../influxdbv2-data_stable.tar.xz
rm ../../influxdbv2-data_stable.tar.xz

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +0,0 @@
[mysqld]
innodb_buffer_pool_size=4096M
innodb_log_file_size=64M
innodb_lock_wait_timeout=900
max_allowed_packet=16M

View File

@ -1,4 +0,0 @@
ConstrainCores=yes
ConstrainDevices=no
ConstrainRAMSpace=yes
ConstrainSwapSpace=yes

View File

@ -1,48 +0,0 @@
# slurm.conf file generated by configurator.html.
# Put this file on all nodes of your cluster.
# See the slurm.conf man page for more information.
#
ClusterName=snowflake
SlurmctldHost=slurmctld
SlurmUser=slurm
SlurmctldPort=6817
SlurmdPort=6818
MpiDefault=none
ProctrackType=proctrack/linuxproc
ReturnToService=1
SlurmctldPidFile=/var/run/slurmctld.pid
SlurmdPidFile=/var/run/slurmd.pid
SlurmdSpoolDir=/var/spool/slurm/d
StateSaveLocation=/var/spool/slurm/ctld
SwitchType=switch/none
TaskPlugin=task/affinity
#
# TIMERS
InactiveLimit=0
KillWait=30
MinJobAge=300
SlurmctldTimeout=120
SlurmdTimeout=300
Waittime=0
#
# SCHEDULING
SchedulerType=sched/backfill
SelectType=select/cons_tres
#
# LOGGING AND ACCOUNTING
AccountingStorageHost=slurmdb
AccountingStoragePort=6819
AccountingStorageType=accounting_storage/slurmdbd
AccountingStorageUser=slurm
AccountingStoreFlags=job_script,job_comment,job_env,job_extra
JobCompType=jobcomp/none
JobAcctGatherFrequency=30
JobAcctGatherType=jobacct_gather/linux
SlurmctldDebug=info
SlurmctldLogFile=/var/log/slurmctld.log
SlurmdDebug=info
SlurmdLogFile=/var/log/slurmd.log
#
# COMPUTE NODES
NodeName=node0[1-2] CPUs=1 State=UNKNOWN
PartitionName=main Nodes=ALL Default=YES MaxTime=INFINITE State=UP

View File

@ -1,31 +0,0 @@
# Archive info
#ArchiveJobs=yes
#ArchiveDir="/tmp"
#ArchiveSteps=yes
#ArchiveScript=
#JobPurge=12
#StepPurge=1
#
# Authentication info
AuthType=auth/munge
AuthInfo=/var/run/munge/munge.socket.2
#
# slurmDBD info
DbdAddr=slurmdb
DbdHost=slurmdb
DbdPort=6819
SlurmUser=slurm
DebugLevel=4
LogFile=/var/log/slurm/slurmdbd.log
PidFile=/var/run/slurmdbd.pid
#PluginDir=/usr/lib/slurm
#PrivateData=accounts,users,usage,jobs
#TrackWCKey=yes
#
# Database info
StorageType=accounting_storage/mysql
StorageHost=mariadb
StoragePort=3306
StoragePass=demo
StorageUser=slurm
StorageLoc=slurm_acct_db

View File

@ -81,6 +81,7 @@ services:
# - SYS_NICE
slurm-controller:
image: cc-docker:22.05
container_name: slurmctld
hostname: slurmctld
build:

View File

@ -9,7 +9,6 @@ use File::Slurp;
use Data::Dumper;
use Time::Piece;
use Sort::Versions;
use REST::Client;
### JOB-ARCHIVE
my $localtime = localtime;
@ -19,80 +18,80 @@ my $archiveSrc = './data/job-archive-source';
my @ArchiveClusters;
# Get clusters by job-archive/$subfolder
opendir my $dh, $archiveSrc or die "can't open directory: $!";
while ( readdir $dh ) {
chomp; next if $_ eq '.' or $_ eq '..' or $_ eq 'job-archive';
# opendir my $dh, $archiveSrc or die "can't open directory: $!";
# while ( readdir $dh ) {
# chomp; next if $_ eq '.' or $_ eq '..' or $_ eq 'job-archive' or $_ eq 'version.txt';
my $cluster = $_;
push @ArchiveClusters, $cluster;
}
# my $cluster = $_;
# push @ArchiveClusters, $cluster;
# }
# start for jobarchive
foreach my $cluster ( @ArchiveClusters ) {
print "Starting to update start- and stoptimes in job-archive for $cluster\n";
# # start for jobarchive
# foreach my $cluster ( @ArchiveClusters ) {
# print "Starting to update start- and stoptimes in job-archive for $cluster\n";
opendir my $dhLevel1, "$archiveSrc/$cluster" or die "can't open directory: $!";
while ( readdir $dhLevel1 ) {
chomp; next if $_ eq '.' or $_ eq '..';
my $level1 = $_;
# opendir my $dhLevel1, "$archiveSrc/$cluster" or die "can't open directory: $!";
# while ( readdir $dhLevel1 ) {
# chomp; next if $_ eq '.' or $_ eq '..';
# my $level1 = $_;
if ( -d "$archiveSrc/$cluster/$level1" ) {
opendir my $dhLevel2, "$archiveSrc/$cluster/$level1" or die "can't open directory: $!";
while ( readdir $dhLevel2 ) {
chomp; next if $_ eq '.' or $_ eq '..';
my $level2 = $_;
my $jobSource = "$archiveSrc/$cluster/$level1/$level2";
my $jobTarget = "$archiveTarget/$cluster/$level1/$level2/";
my $jobOrigin = $jobSource;
# check if files are directly accessible (old format) else get subfolders as file and update path
if ( ! -e "$jobSource/meta.json") {
my @folders = read_dir($jobSource);
if (!@folders) {
next;
}
# Only use first subfolder for now TODO
$jobSource = "$jobSource/".$folders[0];
}
# check if subfolder contains file, else remove source and skip
if ( ! -e "$jobSource/meta.json") {
# rmtree $jobOrigin;
next;
}
# if ( -d "$archiveSrc/$cluster/$level1" ) {
# opendir my $dhLevel2, "$archiveSrc/$cluster/$level1" or die "can't open directory: $!";
# while ( readdir $dhLevel2 ) {
# chomp; next if $_ eq '.' or $_ eq '..';
# my $level2 = $_;
# my $jobSource = "$archiveSrc/$cluster/$level1/$level2";
# my $jobTarget = "$archiveTarget/$cluster/$level1/$level2/";
# my $jobOrigin = $jobSource;
# # check if files are directly accessible (old format) else get subfolders as file and update path
# if ( ! -e "$jobSource/meta.json") {
# my @folders = read_dir($jobSource);
# if (!@folders) {
# next;
# }
# # Only use first subfolder for now TODO
# $jobSource = "$jobSource/".$folders[0];
# }
# # check if subfolder contains file, else remove source and skip
# if ( ! -e "$jobSource/meta.json") {
# # rmtree $jobOrigin;
# next;
# }
my $rawstr = read_file("$jobSource/meta.json");
my $json = decode_json($rawstr);
# my $rawstr = read_file("$jobSource/meta.json");
# my $json = decode_json($rawstr);
# NOTE Start meta.json iteration here
# my $random_number = int(rand(UPPERLIMIT)) + LOWERLIMIT;
# Set new startTime: Between 5 days and 1 day before now
# # NOTE Start meta.json iteration here
# # my $random_number = int(rand(UPPERLIMIT)) + LOWERLIMIT;
# # Set new startTime: Between 5 days and 1 day before now
# Remove id from attributes
$json->{startTime} = $epochtime - (int(rand(432000)) + 86400);
$json->{stopTime} = $json->{startTime} + $json->{duration};
# # Remove id from attributes
# $json->{startTime} = $epochtime - (int(rand(432000)) + 86400);
# $json->{stopTime} = $json->{startTime} + $json->{duration};
# Add starttime subfolder to target path
$jobTarget .= $json->{startTime};
# # Add starttime subfolder to target path
# $jobTarget .= $json->{startTime};
# target is not directory
if ( not -d $jobTarget ){
# print "Writing files\n";
# print "$cluster/$level1/$level2\n";
make_path($jobTarget);
# # target is not directory
# if ( not -d $jobTarget ){
# # print "Writing files\n";
# # print "$cluster/$level1/$level2\n";
# make_path($jobTarget);
my $outstr = encode_json($json);
write_file("$jobTarget/meta.json", $outstr);
# my $outstr = encode_json($json);
# write_file("$jobTarget/meta.json", $outstr);
my $datstr = read_file("$jobSource/data.json");
write_file("$jobTarget/data.json", $datstr);
} else {
# rmtree $jobSource;
}
}
}
}
}
print "Done for job-archive\n";
sleep(1);
# my $datstr = read_file("$jobSource/data.json.gz");
# write_file("$jobTarget/data.json.gz", $datstr);
# } else {
# # rmtree $jobSource;
# }
# }
# }
# }
# }
# print "Done for job-archive\n";
# sleep(1);
## CHECKPOINTS
chomp(my $checkpointStart=`date --date 'TZ="Europe/Berlin" 0:00 7 days ago' +%s`);

View File

@ -10,6 +10,7 @@ else
if [ ! -d var ]; then
mkdir var
touch var/job.db
make
else
echo "'cc-backend/var' exists. Cautiously exiting."
echo -n "Stopped."
@ -17,13 +18,13 @@ else
fi
fi
# Download unedited job-archive to ./data/job-archive-source
if [ ! -d data/job-archive-source ]; then
cd data
wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar
tar xf job-archive-demo.tar
mv ./job-archive ./job-archive-source
# mv ./var/job-archive ./job-archive-source
# mv -f ./var/job.db ./cc-backend/var/
# rm -rf ./var
rm ./job-archive-demo.tar
cd ..
else
@ -32,12 +33,12 @@ fi
# Download unedited checkpoint files to ./data/cc-metric-store-source/checkpoints
if [ ! -d data/cc-metric-store-source ]; then
mkdir -p data/cc-metric-store-source/checkpoints
cd data/cc-metric-store-source/checkpoints
wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/cc-metric-store-checkpoints.tar.xz
tar xf cc-metric-store-checkpoints.tar.xz
rm cc-metric-store-checkpoints.tar.xz
cd ../../../
mkdir -p data/cc-metric-store-source/checkpoints
cd data/cc-metric-store-source/checkpoints
wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/cc-metric-store-checkpoints.tar.xz
tar xf cc-metric-store-checkpoints.tar.xz
rm cc-metric-store-checkpoints.tar.xz
cd ../../../
else
echo "'data/cc-metric-store-source' already exists!"
fi
@ -52,7 +53,7 @@ fi
# cleanup sources
# rm -r ./data/job-archive-source
# rm -r ./data/cc-metric-store-source
rm -r ./data/cc-metric-store-source
# prepare folders for influxdb2
if [ ! -d data/influxdb ]; then
@ -67,7 +68,7 @@ if [ ! -d .env ]; then
cp templates/env.default ./.env
fi
if [ ! -d docker-compose.yml ]; then
if [ ! -f docker-compose.yml ]; then
cp templates/docker-compose.yml.default ./docker-compose.yml
fi
@ -75,8 +76,8 @@ docker-compose build
./cc-backend/cc-backend --init-db --add-user demo:admin:AdminDev
docker-compose up -d
echo ""
echo "Setup complete, containers are up by default: Shut down with 'docker-compose down'."
echo "Use './cc-backend/cc-backend' to start cc-backend."
echo "Use scripts in /scripts to load data into influx or mariadb."
# ./cc-backend/cc-backend
# echo ""
# echo "Setup complete, containers are up by default: Shut down with 'docker-compose down'."
# echo "Use './cc-backend/cc-backend' to start cc-backend."
# echo "Use scripts in /scripts to load data into influx or mariadb."
# # ./cc-backend/cc-backend

View File

@ -1,5 +1,5 @@
FROM rockylinux:8
MAINTAINER Jan Eitzinger <jan.eitzinger@fau.de>
LABEL org.opencontainers.image.authors="jan.eitzinger@fau.de"
ENV SLURM_VERSION=22.05.6
ENV ARCH=aarch64

View File

@ -1,5 +1,5 @@
FROM clustercockpit/slurm.base:22.05.6
MAINTAINER Jan Eitzinger <jan.eitzinger@fau.de>
LABEL org.opencontainers.image.authors="jan.eitzinger@fau.de"
# clean up
RUN rm -f /root/rpmbuild/RPMS/slurm-*.rpm \

View File

@ -1,5 +1,5 @@
FROM clustercockpit/slurm.base:22.05.6
MAINTAINER Jan Eitzinger <jan.eitzinger@fau.de>
LABEL org.opencontainers.image.authors="jan.eitzinger@fau.de"
# clean up
RUN rm -f /root/rpmbuild/RPMS/slurm-*.rpm \

View File

@ -1,5 +1,5 @@
FROM clustercockpit/slurm.base:22.05.6
MAINTAINER Jan Eitzinger <jan.eitzinger@fau.de>
LABEL org.opencontainers.image.authors="jan.eitzinger@fau.de"
# clean up
RUN rm -f /root/rpmbuild/RPMS/slurm-*.rpm \

View File

@ -1,5 +1,5 @@
FROM clustercockpit/slurm.base:22.05.6
MAINTAINER Jan Eitzinger <jan.eitzinger@fau.de>
LABEL org.opencontainers.image.authors="jan.eitzinger@fau.de"
# clean up
RUN rm -f /root/rpmbuild/RPMS/slurm-*.rpm \