Initial checkin.

2026-02-17 00:11:46 +01:00 · 2019-04-29 10:21:48 +02:00
parent dee638f4d7
commit c92ebc4149
6 changed files with 817 additions and 1 deletions
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 MIT License

-Copyright (c) 2019 
+Copyright (c) 2019 RRZE, University Erlangen-Nuremberg

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/acImportPBS.pl
+++ b/acImportPBS.pl
@@ -0,0 +1,189 @@
+#!/usr/bin/env perl
+# =======================================================================================
+#
+#      Author:   Jan Eitzinger (je), jan.eitzinger@fau.de
+#      Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
+#
+#      Permission is hereby granted, free of charge, to any person obtaining a copy
+#      of this software and associated documentation files (the "Software"), to deal
+#      in the Software without restriction, including without limitation the rights
+#      to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+#      copies of the Software, and to permit persons to whom the Software is
+#      furnished to do so, subject to the following conditions:
+#
+#      The above copyright notice and this permission notice shall be included in all
+#      copies or substantial portions of the Software.
+#
+#      THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#      IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#      FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#      AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#      LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#      OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+#      SOFTWARE.
+#
+# =======================================================================================
+
+use strict;
+use warnings;
+use utf8;
+
+use Data::Dumper;
+use DateTime::Format::Strptime;
+use DBI;
+
+if ( $#ARGV < 1 ){
+    die "Usage: $0 <DBFile> <importDIR>\n";
+}
+
+my $database = $ARGV[0];
+my $basedir = $ARGV[1];
+
+my %attr = (
+    PrintError => 1,
+    RaiseError => 1
+);
+
+my $dbh = DBI->connect(
+    "DBI:SQLite:dbname=$database", "", "", \%attr);
+
+my $dateParser =
+DateTime::Format::Strptime->new(
+    pattern => '%m/%d/%Y %H:%M:%S',
+    time_zone => 'Europe/Berlin',
+    on_error  => 'undef'
+);
+
+my $sth_insert_job = $dbh->prepare(qq{
+    INSERT INTO job
+    (job_id, user_id, project_id, cluster_id,
+    start_time, stop_time, duration, walltime,
+    job_state, num_nodes, node_list, has_profile)
+    VALUES (?,?,?,?,?,?,?,?,?,?,?,?)
+    });
+
+my $sth_select_job = $dbh->prepare(qq{
+    SELECT id, user_id, job_id, cluster_id,
+           start_time, stop_time, duration, num_nodes
+    FROM job
+    WHERE job_id=?
+    });
+
+my %JOBCACHE;
+
+while( defined( my $file = glob($basedir . '/*' ) ) ) {
+
+    print "Processing $file ...";
+    open(my $fh, "<","$file");
+
+    while ( my $record = <$fh> ) {
+        if ( $record =~ /(.*);([A-Z]);(.*?);(.*)/ ) {
+            my $dt = $dateParser->parse_datetime($1);
+            my $timestamp = $dt->epoch;
+            my $job_state = $2;
+            my $job_id = $3;
+            my $jobinfo = $4;
+            my @data = split(/ /, $jobinfo);
+            my $queue;
+            my $user_id;
+            my $project_id;
+            my $start_time;
+            my $stop_time;
+            my $walltime;
+            my @nodes;
+            my $num_nodes;
+            my $node_list;
+
+            foreach my $prop ( @data ) {
+                if ( $prop =~ /user=(.*)/ ) {
+                    $user_id = $1;
+                } elsif ( $prop =~ /group=(.*)/ ) {
+                    $project_id = $1;
+                } elsif ( $prop =~ /start=(.*)/ ) {
+                    $start_time = $1;
+                } elsif ( $prop =~ /end=(.*)/ ) {
+                    $stop_time = $1;
+                } elsif ( $prop =~ /queue=(.*)/ ) {
+                    $queue = $1;
+                } elsif ( $prop =~ /Resource_List\.walltime=([0-9]+):([0-9]+):([0-9]+)/ ) {
+                    $walltime = $1 * 3600 + $2 * 60 + $3;
+                } elsif ( $prop =~ /exec_host=(.*)/ ) {
+                    my $hostlist = $1;
+                    my @hosts = split(/\+/, $hostlist);
+
+                    foreach my $host ( @hosts ) {
+                        if ( $host =~ /(.*?)\/0/) {
+                            push @nodes, $1;
+                        }
+                    }
+
+                    $num_nodes = @nodes;
+                    $node_list = join(',', @nodes);
+                }
+            }
+
+            if ( $job_state eq 'S' ) {
+                $JOBCACHE{$job_id}  = {
+                    'user_id'      => $user_id,
+                    'project_id'   => $project_id,
+                    'start_time'   => $start_time,
+                    'walltime'     => $walltime,
+                    'num_nodes'    => $num_nodes,
+                    'node_list'    => $node_list
+                };
+            } elsif ( $job_state eq 'E' ) {
+                delete $JOBCACHE{$job_id};
+            } elsif ( $job_state eq 'D' or $job_state eq 'A' ) {
+                my $job;
+
+                if (exists $JOBCACHE{$job_id}){
+                    $job = $JOBCACHE{$job_id};
+                } else {
+                    next;
+                }
+                # print Dumper($job);
+                $user_id     = $job->{'user_id'};
+                $project_id  = $job->{'project_id'};
+                $start_time  = $job->{'start_time'};
+                $stop_time   = $timestamp;
+                $walltime    = $job->{'walltime'};
+                $num_nodes   = $job->{'num_nodes'};
+                $node_list   = $job->{'node_list'};
+                delete $JOBCACHE{$job_id};
+            }
+
+            if ( $job_state eq 'E' or
+                 $job_state eq 'D' or
+                 $job_state eq 'A' )
+             {
+                my $duration = $stop_time - $start_time;
+
+                # check if job already exists
+                my @row = $dbh->selectrow_array($sth_select_job, undef, $job_id);
+
+                if ( @row ) {
+                    print "Job $job_id already exists!\n";
+                } else {
+                    $sth_insert_job->execute(
+                        $job_id,
+                        $user_id,
+                        $project_id,
+                        "emmy",
+                        $start_time,
+                        $stop_time,
+                        $duration,
+                        $walltime,
+                        $job_state,
+                        $num_nodes,
+                        $node_list,
+                        0);
+                }
+            }
+        }
+    }
+
+    close $fh or die "can't close file $!";
+    print " done\n";
+}
+
+$dbh->disconnect;
--- a/acImportSlurm.pl
+++ b/acImportSlurm.pl
@@ -0,0 +1,158 @@
+#!/usr/bin/env perl
+# =======================================================================================
+#
+#      Author:   Jan Eitzinger (je), jan.eitzinger@fau.de
+#      Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
+#
+#      Permission is hereby granted, free of charge, to any person obtaining a copy
+#      of this software and associated documentation files (the "Software"), to deal
+#      in the Software without restriction, including without limitation the rights
+#      to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+#      copies of the Software, and to permit persons to whom the Software is
+#      furnished to do so, subject to the following conditions:
+#
+#      The above copyright notice and this permission notice shall be included in all
+#      copies or substantial portions of the Software.
+#
+#      THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#      IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#      FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#      AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#      LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#      OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+#      SOFTWARE.
+#
+# =======================================================================================
+
+use strict;
+use warnings;
+use utf8;
+
+use Data::Dumper;
+use DateTime::Format::Strptime;
+use DBI;
+
+if ( $#ARGV < 1 ){
+    die "Usage: $0 <DBFile> <importDIR>\n";
+}
+
+my $database = $ARGV[0];
+my $basedir = $ARGV[1];
+
+my %attr = (
+    PrintError => 1,
+    RaiseError => 1
+);
+
+my $dbh = DBI->connect(
+    "DBI:SQLite:dbname=$database", "", "", \%attr);
+
+my $dateParser =
+DateTime::Format::Strptime->new(
+    pattern => '%Y-%m-%dT%H:%M:%S',
+    time_zone => 'Europe/Berlin',
+    on_error  => 'undef'
+);
+
+sub parse_nodelist {
+    my $nodestr = shift;
+    my @nodes;
+
+    if ( $nodestr =~ /([a-z]+)\[(.*)\]/) {
+        my $prefix = $1;
+        my $list = $2;
+        my @listitems = split(',', $list);
+
+        foreach my $item ( @listitems ){
+            if ( $item =~ /([0-9]+)-([0-9]+)/ ){
+                foreach my $nodeId ( $1 ... $2 ){
+                    push @nodes, $prefix.$nodeId;
+                }
+            } else {
+                push @nodes, $prefix.$item;
+            }
+        }
+
+        return join(',', @nodes);
+    } else {
+        return $nodestr;
+    }
+}
+
+my $sth_insert_job = $dbh->prepare(qq{
+    INSERT INTO job
+    (job_id, user_id, project_id, cluster_id,
+    start_time, stop_time, duration, walltime,
+    job_state, num_nodes, node_list, has_profile)
+    VALUES (?,?,?,?,?,?,?,?,?,?,?,?)
+    });
+
+my $sth_select_job = $dbh->prepare(qq{
+    SELECT id, user_id, job_id, cluster_id,
+           start_time, stop_time, duration, num_nodes
+    FROM job
+    WHERE job_id=?
+    });
+
+my %JOBCACHE;
+my $dt;
+
+while( defined( my $file = glob($basedir . '/*' ) ) ) {
+
+    print "Processing $file ...";
+    open(my $fh, "<","$file");
+    my $columns = <$fh>;
+
+    while ( my $record = <$fh> ) {
+
+        my @fields = split(/\|/, $record);
+
+        if ( $fields[1] =~ /^[0-9]+$/) {
+
+            my $cluster_id = $fields[0];
+            my $job_id = $fields[1];
+            my $user_id = $fields[2];
+            my $project_id = $fields[3];
+            $dt = $dateParser->parse_datetime($fields[5]);
+            my $start_time = $dt->epoch;
+            $dt = $dateParser->parse_datetime($fields[6]);
+            my $stop_time = $dt->epoch;
+            my $num_nodes = $fields[11];
+            my $node_list = parse_nodelist($fields[13]);
+            my $job_state = $fields[10];
+            $job_state =~ s/ by [0-9]+//;
+            my $walltime = 0;
+
+            my $duration = $stop_time - $start_time;
+
+            # check if job already exists
+            my @row = $dbh->selectrow_array($sth_select_job, undef, $job_id);
+
+            if ( @row ) {
+                print "Job $job_id already exists!\n";
+            } else {
+                $sth_insert_job->execute(
+                    $job_id,
+                    $user_id,
+                    $project_id,
+                    $cluster_id,
+                    $start_time,
+                    $stop_time,
+                    $duration,
+                    $walltime,
+                    $job_state,
+                    $num_nodes,
+                    $node_list,
+                    0);
+            }
+        } else {
+            # print "$fields[1] \n";
+            next;
+        }
+    }
+
+    close $fh or die "can't close file $!";
+    print " done\n";
+}
+
+$dbh->disconnect;
--- a/acQuery.pl
+++ b/acQuery.pl
@@ -0,0 +1,361 @@
+#!/usr/bin/env perl
+# =======================================================================================
+#
+#      Author:   Jan Eitzinger (je), jan.eitzinger@fau.de
+#      Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
+#
+#      Permission is hereby granted, free of charge, to any person obtaining a copy
+#      of this software and associated documentation files (the "Software"), to deal
+#      in the Software without restriction, including without limitation the rights
+#      to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+#      copies of the Software, and to permit persons to whom the Software is
+#      furnished to do so, subject to the following conditions:
+#
+#      The above copyright notice and this permission notice shall be included in all
+#      copies or substantial portions of the Software.
+#
+#      THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#      IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#      FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#      AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#      LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#      OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+#      SOFTWARE.
+#
+# =======================================================================================
+
+use strict;
+use warnings;
+use utf8;
+
+use Data::Dumper;
+use Getopt::Long;
+use Pod::Usage;
+use DateTime::Format::Strptime;
+use DBI;
+
+my $database = 'jobDB';
+my @conditions;
+my ($add, $from, $to);
+
+my $dateParser =
+DateTime::Format::Strptime->new(
+    pattern => '%d.%m.%Y',
+    time_zone => 'Europe/Berlin',
+    on_error  => 'undef'
+);
+
+my $help = 0;
+my $man = 0;
+my $mode = 'count';
+my $user = '';
+my $project = '';
+my @numnodes;
+my @starttime;
+my @duration;
+
+GetOptions (
+    'help'           => \$help,
+    'man'            => \$man,
+    'mode=s'         => \$mode,
+    'user=s'         => \$user,
+    'project=s'      => \$project,
+    'numnodes=i{2}'  => \@numnodes,
+    'starttime=s{2}' => \@starttime,
+    'duration=s{2}'  => \@duration
+) or pod2usage(2);
+
+my %attr = (
+    PrintError => 1,
+    RaiseError => 1
+);
+
+if ( $#ARGV == 0 ) {
+    $database = $ARGV[0];
+}
+
+my $dbh = DBI->connect(
+    "DBI:SQLite:dbname=$database", "", "", \%attr)
+ or die("Cannot connect to database $database\n");
+
+sub parseDate {
+    my $str = shift;
+    my $dt;
+
+    if ( $str ){
+        $dt = $dateParser->parse_datetime($str);
+
+        if ( $dt ) {
+            return $dt->epoch;
+        } else {
+            print "Cannot parse datetime string $str: Ignoring!\n";
+            return 0;
+        }
+    } else {
+        return 0;
+    }
+}
+
+sub parseDuration {
+    my $str = shift;
+
+    if ( $str =~ /([0-9]+)h/ ) {
+        return $1 * 3600;
+
+    } elsif ( $str =~ /([0-9]+)m/ ) {
+        return $1 * 60;
+
+    } elsif ( $str =~ /([0-9]+)s/ ) {
+        return $1;
+
+    } elsif ( $str =~ /([0-9]+)/ ) {
+        return $1;
+
+    } else {
+        print "Cannot parse duration string $str: Ignoring!\n";
+        return 0;
+    }
+}
+
+sub formatDuration {
+    my $ts = shift;
+
+}
+
+sub processRange {
+    my $lower = shift;
+    my $upper = shift;
+
+    if ( $lower && $upper ){
+        return (3, $lower, $upper);
+    } elsif ( $lower && !$upper ){
+        return (1, $lower, 0);
+    } elsif ( !$lower && $upper ){
+        return (2, 0, $upper);
+    }
+}
+
+sub buildCondition {
+    my $name = shift;
+
+    if ( $add ) {
+        if ( $add == 1 ) {
+            push @conditions, "$name < $from";
+        } elsif ( $add == 2 ) {
+            push @conditions, "$name > $to";
+        } elsif ( $add == 3 ) {
+            push @conditions, "$name BETWEEN $from AND $to";
+        }
+    }
+}
+
+sub printJobStat {
+    my $conditionstring = shift;
+
+    my $query = 'SELECT COUNT(id), SUM(duration)/3600 FROM job '.$conditionstring;
+    my ($count, $coreHours) = $dbh->selectrow_array($query);
+
+    print "=================================\n";
+    print "Job count: $count\n";
+    print "Core hours: $coreHours \n";
+
+    $query = 'SELECT num_nodes, COUNT(*) FROM job '.$conditionstring.' GROUP BY 1';
+    my @histo_num_nodes = $dbh->selectall_array($query);
+    print "\nHistogram: Number of nodes\n";
+    print "nodes\tcount\n";
+
+    foreach my $bin ( @histo_num_nodes ) {
+        print "$bin->[0]\t$bin->[1]\n";
+    }
+
+    $query = 'SELECT duration/3600, COUNT(*) FROM job '.$conditionstring.' GROUP BY 1';
+    my @histo_runtime = $dbh->selectall_array($query);
+    print "\nHistogram: Runtime\n";
+    print "hours\tcount\n";
+
+    foreach my $bin ( @histo_runtime ) {
+        print "$bin->[0]\t$bin->[1]\n";
+    }
+}
+
+
+sub printJob {
+    my $job = shift;
+
+    my $jobString = <<"END_JOB";
+=================================
+JobId: $job->{job_id}
+UserId: $job->{user_id}
+Number of nodes: $job->{num_nodes}
+From $job->{start_time} to $job->{stop_time}
+Duration $job->{duration}
+END_JOB
+
+    print $jobString;
+}
+
+pod2usage(1) if $help;
+pod2usage(-verbose  => 2) if $man;
+
+# build query conditions
+if ( $user ) {
+    push @conditions, "user_id=\'$user\'";
+}
+
+if ( $project ) {
+    push @conditions, "project_id=\'$project\'";
+}
+
+
+if ( @numnodes ) {
+    ($add, $from, $to) = processRange($numnodes[0], $numnodes[1]);
+    buildCondition('num_nodes');
+}
+
+if ( @starttime ) {
+    ($add, $from, $to) = processRange( parseDate($starttime[0]), parseDate($starttime[1]));
+    buildCondition('start_time');
+}
+
+if ( @duration ) {
+    ($add, $from, $to) = processRange( parseDuration($duration[0]), parseDuration($duration[1]));
+    buildCondition('duration');
+}
+
+my $query;
+my $conditionstring;
+
+if ( @conditions ){
+    $conditionstring = ' WHERE ';
+    $conditionstring .= join(' AND ',@conditions);
+}
+
+# handle mode
+if ( $mode eq 'query' ) {
+    $query = 'SELECT * FROM job'.$conditionstring;
+    print "$query\n";
+    exit;
+}
+
+if ( $mode eq 'count' ) {
+    $query = 'SELECT COUNT(*) FROM job'.$conditionstring;
+    my ($count) = $dbh->selectrow_array($query);
+    print "COUNT $count\n";
+    exit;
+}
+
+if ( $mode eq 'stat' ) {
+    printJobStat($conditionstring);
+    exit;
+}
+
+$query = 'SELECT * FROM job'.$conditionstring;
+my $sth = $dbh->prepare($query);
+$sth->execute;
+my %row;
+$sth->bind_columns( \( @row{ @{$sth->{NAME_lc} } } ));
+
+if ( $mode eq 'list' ) {
+    while ($sth->fetch) {
+        printJob(\%row);
+    }
+} elsif ( $mode eq 'ids' ) {
+    while ($sth->fetch) {
+        print "$row{job_id}\n";
+    }
+} else {
+    die "ERROR Unknown mode $mode!\n";
+}
+
+__END__
+
+=head1 NAME
+
+acQuery.pl - Wrapper script to access sqlite job database.
+
+=head1 SYNOPSIS
+
+   acQuery.pl [options] -- <DB file>
+
+   Help Options:
+   --help  Show help text
+   --man   Show man page
+   --mode <mode>  Set the operation mode
+   --user <user_id> Search for jobs of specific user
+   --project <project_id> Search for jobs of specific project
+   --duration <from> <to>  Specify duration range of jobs
+   --numnodes <from> <to>  Specify range for number of nodes of job
+   --starttime <from> <to>  Specify range for start time of jobs
+
+=head1 OPTIONS
+
+=over 8
+
+=item B<--help>
+Show a brief help information.
+
+=item B<--man>
+Read the manual, with examples
+
+=item B<--mode [012]>
+Specify output mode. Mode can be one of:
+
+=over 4
+
+=item B<ids>
+Print list of job ids matching conditions. One job id per line. (default mode)
+
+=item B<query>
+Print the query string and then exit.
+
+=item B<count>
+Only output the number of jobs matching the conditions.
+
+=item B<list>
+Output a record of every job matching the conditions.
+
+=item B<stat>
+Output job statistic for all jobs matching the conditions.
+
+=back
+
+=item B<--user>
+Search job for a specific user id.
+
+=item B<--project>
+Search job for a specific project.
+
+=item B<--duration>
+Specify condition for job duration. This option takes two arguments: If both
+arguments are positive integers the condition is duration between first
+argument and second argument. If the second argument is zero condition is duration
+smaller than first argument. If first argument is zero condition is duration
+larger than second argument. Duration can be in seconds, minutes (append m) or
+hours (append h).
+
+=item B<--numnodes>
+Specify condition for number of node range of job. This option takes two
+arguments: If both arguments are positive integers the condition is number of
+nodes between first argument and second argument. If the second argument is
+zero condition is number of nodes smaller than first argument. If first
+argument is zero condition is number of nodes larger than second argument.
+
+=item B<--starttime>
+Specify condition for the starttime of job. This option takes two
+arguments: If both arguments are positive integers the condition is start time
+between first argument and second argument. If the second argument is
+zero condition is start time smaller than first argument. If first
+argument is zero condition is start time larger than second argument.
+Start time must be given as date in the following format: %d.%m.%Y
+
+=back
+
+=head1 DESCRIPTION
+
+=head1 EXAMPLES
+
+=head1 AUTHOR
+
+Jan Eitzinger - L<https://hpc.fau.de/person/jan-eitzinger/>
+
+=cut
--- a/acSync.pl
+++ b/acSync.pl
@@ -0,0 +1,98 @@
+#!/usr/bin/env perl
+# =======================================================================================
+#
+#      Author:   Jan Eitzinger (je), jan.eitzinger@fau.de
+#      Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
+#
+#      Permission is hereby granted, free of charge, to any person obtaining a copy
+#      of this software and associated documentation files (the "Software"), to deal
+#      in the Software without restriction, including without limitation the rights
+#      to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+#      copies of the Software, and to permit persons to whom the Software is
+#      furnished to do so, subject to the following conditions:
+#
+#      The above copyright notice and this permission notice shall be included in all
+#      copies or substantial portions of the Software.
+#
+#      THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#      IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#      FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#      AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#      LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#      OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+#      SOFTWARE.
+#
+# =======================================================================================
+
+use strict;
+use warnings;
+use utf8;
+
+use File::Slurp;
+use Data::Dumper;
+use JSON::MaybeXS qw(encode_json decode_json);
+use DBI;
+
+my $database = 'jobDB';
+my $basedir = './data';
+
+my %attr = (
+    PrintError => 1,
+    RaiseError => 1
+);
+
+my $dbh = DBI->connect(
+    "DBI:SQLite:dbname=$database", "", "", \%attr);
+
+my $sth_select_job = $dbh->prepare(qq{
+    SELECT id, user_id, job_id, cluster_id,
+    start_time, stop_time, duration, num_nodes
+    FROM job
+    WHERE job_id=?
+    });
+
+my $jobcount = 0;
+my $wrongjobcount = 0;
+
+opendir my $dh, $basedir or die "can't open directory: $!";
+while ( readdir $dh ) {
+    chomp;
+    next if $_ eq '.' or $_ eq '..';
+
+    my $jobID = $_;
+    my $needsUpdate = 0;
+
+    my $jobmeta_json = read_file("$basedir/$jobID/meta.json");
+    my $job = decode_json $jobmeta_json;
+    my @row = $dbh->selectrow_array($sth_select_job, undef, $jobID);
+
+    if ( @row ) {
+
+        $jobcount++;
+    # print Dumper(@row);
+        my $duration_diff = abs($job->{duration} - $row[6]);
+
+        if ( $duration_diff > 120 ) {
+            $needsUpdate = 1;
+            # print "$jobID DIFF DURATION $duration_diff\n";
+            # print "CC $row[4] - $row[5]\n";
+            # print "DB $job->{start_time} - $job->{stop_time}\n"
+        }
+
+        if ( $row[7] != $job->{num_nodes} ){
+            $needsUpdate = 1;
+            # print "$jobID DIFF NODES $row[7] $job->{num_nodes}\n";
+        }
+    } else {
+        print "$jobID NOT in DB!\n";
+    }
+
+    if ( $needsUpdate ){
+        $wrongjobcount++;
+        print "$jobID\n";
+    }
+}
+closedir $dh or die "can't close directory: $!";
+$dbh->disconnect;
+
+print "$wrongjobcount of $jobcount need update\n";
--- a/initDB.sql
+++ b/initDB.sql
@@ -0,0 +1,10 @@
+CREATE TABLE job ( id INTEGER PRIMARY KEY,
+ job_id TEXT, user_id TEXT, project_id TEXT, cluster_id TEXT,
+ start_time INTEGER, stop_time INTEGER, duration INTEGER,
+ walltime INTEGER, job_state TEXT,
+ num_nodes INTEGER, node_list TEXT, has_profile INTEGER,
+ mem_used REAL, flops_any REAL, mem_bw REAL, ib_bw REAL, file_bw REAL);
+CREATE TABLE tag ( id INTEGER PRIMARY KEY, tag_type TEXT, tag_name TEXT);
+CREATE TABLE jobtag ( job_id INTEGER, tag_id INTEGER, PRIMARY KEY (job_id, tag_id),
+ FOREIGN KEY (job_id) REFERENCES job (id)  ON DELETE CASCADE ON UPDATE NO ACTION,
+ FOREIGN KEY (tag_id) REFERENCES tag (id)  ON DELETE CASCADE ON UPDATE NO ACTION );