From f4ed659be26ace06f42d919d4d155e110d751bf4 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Mon, 20 Jun 2022 18:53:44 +0200 Subject: [PATCH] add script fot changing starttimes of jobarchive --- migrateTimestamps.pl | 146 +++++++++++++++++++++++++++++++++++++++++++ setupDev.sh | 29 +++++++++ 2 files changed, 175 insertions(+) create mode 100755 migrateTimestamps.pl diff --git a/migrateTimestamps.pl b/migrateTimestamps.pl new file mode 100755 index 0000000..7b25244 --- /dev/null +++ b/migrateTimestamps.pl @@ -0,0 +1,146 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use utf8; + +use File::Path qw( make_path rmtree ); +use String::CamelCase qw(camelize); +use Cpanel::JSON::XS qw( decode_json encode_json ); +use File::Slurp; +use Data::Dumper; +use Data::Walk; +use Scalar::Util qw( reftype ); +use Time::Piece; + +## NOTE: Based on Jan: migrateCC-jobArchive.pl + +my $FIRST=1; +my @METRICS = ('flops_any', 'cpu_load', 'mem_used', 'flops_sp', + 'flops_dp', 'mem_bw', 'cpi', 'cpi_avg', 'clock', 'rapl_power'); + +my %UNITS = ( + 'flops_any' => 'GF/s', + 'cpu_load' => 'load', + 'mem_used' => 'GB', + 'flops_sp' => 'GF/s', + 'flops_dp' => 'GF/s', + 'mem_bw' => 'GB/s', + 'clock' => 'MHz', + 'rapl_power' => 'W' +); + +sub process { +if ( $Data::Walk::type eq 'HASH' && !($Data::Walk::index%2)) { + + if ( ! $FIRST ) { + my $key = $_; + if ( ! grep( /^$key$/, @METRICS) ) { + my $str = lcfirst(camelize($key)); + my $hashref = $Data::Walk::container; + my $value = delete ${$hashref}{$key}; + ${$hashref}{$str} = $value; + } + } + + if ( $FIRST ) { + $FIRST = 0; + } +} +} + +my $localtime = localtime; +my $epochtime = $localtime->epoch; +my $targetDir = './cc-backend/var/job-archive'; +my @Clusters; +my $src = './data/job-archive'; + +chomp($checkpointStart=`date --date 'TZ="Europe/Berlin" 0:00 7 days ago' +%s`); +my $halfday = 43200; +my $targetDirCheckpoints = './data/cc-metric-store_new' +my $srcCheckpoints = './data/cc-metric-store' +my @ClustersCheckpoints + +## Get Clusters +opendir my $dh, $src or die "can't open directory: $!"; + +while ( readdir $dh ) { + chomp; next if $_ eq '.' or $_ eq '..' or $_ eq 'job-archive'; + + my $cluster = $_; + push @Clusters, $cluster; +} + +opendir my $dhc, $srcCheckpoints or die "can't open directory: $!"; + +while ( readdir $dhc ) { + chomp; next if $_ eq '.' or $_ eq '..' or $_ eq 'job-archive'; + + my $cluster = $_; + push @ClustersCheckpoints, $cluster; +} + +# start for jobarchive +foreach my $cluster ( @Clusters ) { + print "Starting to update startTime for $cluster\n"; + + opendir my $dhLevel1, "$src/$cluster" or die "can't open directory: $!"; + while ( readdir $dhLevel1 ) { + chomp; next if $_ eq '.' or $_ eq '..'; + my $level1 = $_; + + if ( -d "$src/$cluster/$level1" ) { + opendir my $dhLevel2, "$src/$cluster/$level1" or die "can't open directory: $!"; + while ( readdir $dhLevel2 ) { + chomp; next if $_ eq '.' or $_ eq '..'; + my $level2 = $_; + my $src = "$src/$cluster/$level1/$level2"; + my $target = "$targetDir/$cluster/$level1/$level2/"; + + my $oldsrc = $src; + if ( ! -e "$src/meta.json") { + my @files = read_dir($src); + if (!@files) { + next; + } + $src = "$src/".$files[0]; + } + + if ( ! -e "$src/meta.json") { + rmtree $oldsrc; + next; + } + my $str = read_file("$src/meta.json"); + my $json = decode_json($str); + $FIRST = 1; + walk \&process, $json; + + # NOTE Start meta.json iteration here + # my $random_number = int(rand(UPPERLIMIT)) + LOWERLIMIT; + # Set new startTime: Between 5 days and 1 day before now + + # Remove id from attributes + $json->{startTime} = $epochtime - (int(rand(432000)) + 86400); + $json->{stopTime} = $json->{startTime} + $json->{duration}; + + $target .= $json->{startTime}; + + if ( not -d $target ){ + print "Writing files\n"; + + print "$cluster/$level1/$level2\n"; + make_path($target); + + $str = encode_json($json); + write_file("$target/meta.json", $str); + + $str = read_file("$src/data.json"); + write_file("$target/data.json", $str); + } else { + #rmtree $src; + } + } + } + } +} + +print "Done\n"; diff --git a/setupDev.sh b/setupDev.sh index 854b625..c1256a1 100755 --- a/setupDev.sh +++ b/setupDev.sh @@ -1,5 +1,34 @@ #!/bin/bash +# Check cc-backend, touch job.db if exists +if [ ! -d cc-backend ]; then + echo "'cc-backend' not yet prepared! Please clone cc-backend repository before starting this script." + echo -n "Stopped." + exit +else + cd cc-backend + if [ ! -d var ]; then + mkdir var + touch var/job.db + else + echo "'cc-backend/var' exists. Cautiously exiting." + echo -n "Stopped." + exit + fi +fi + + +# Download unedited job-archibe to /data +if [ ! -d data/job-archive ]; then + cd data + wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive.tar.xz + tar xJf job-archive.tar.xz + rm ./job-archive.tar.xz + cd .. +fi + + + # Download data for influxdb2 if [ ! -d data/influxdb ]; then mkdir -p data/influxdb/data