mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2024-12-26 13:29:05 +01:00
First working version of anonymize script
This commit is contained in:
parent
6b26f8b4e9
commit
173ddd5a48
104
anonDB.pl
104
anonDB.pl
@ -50,86 +50,92 @@ my $sth_select_all = $dbh->prepare(qq{
|
|||||||
FROM job;
|
FROM job;
|
||||||
});
|
});
|
||||||
|
|
||||||
my $sth_select_job = $dbh->prepare(qq{
|
|
||||||
SELECT id, user_id, project_id
|
|
||||||
FROM job
|
|
||||||
WHERE job_id=?
|
|
||||||
});
|
|
||||||
|
|
||||||
my $sth_update_job = $dbh->prepare(qq{
|
my $sth_update_job = $dbh->prepare(qq{
|
||||||
UPDATE job
|
UPDATE job
|
||||||
SET user_id = ?,
|
SET user_id = ?,
|
||||||
project_id = ?,
|
project_id = ?
|
||||||
flops_any = ?,
|
|
||||||
mem_bw = ?
|
|
||||||
WHERE id=?;
|
WHERE id=?;
|
||||||
});
|
});
|
||||||
|
|
||||||
my ($user_id, $num_nodes, $start_time, $stop_time, $queue, $duration, $db_id);
|
|
||||||
|
|
||||||
# build user lookup
|
|
||||||
$sth_select_all->execute;
|
|
||||||
my $user_index = 0; my $project_index = 0;
|
my $user_index = 0; my $project_index = 0;
|
||||||
my %user_lookup; my %project_lookup;
|
my %user_lookup; my %project_lookup;
|
||||||
|
my %user_group;
|
||||||
my %row;
|
my %row;
|
||||||
$sth_select_all->bind_columns( \( @row{ @{$sth->{NAME_lc} } } ));
|
|
||||||
|
# build lookups
|
||||||
|
$sth_select_all->execute;
|
||||||
|
$sth_select_all->bind_columns( \( @row{ @{$sth_select_all->{NAME_lc} } } ));
|
||||||
|
|
||||||
while ($sth_select_all->fetch) {
|
while ($sth_select_all->fetch) {
|
||||||
my $user_id = $row->{'user_id'};
|
my $user_id = $row{'user_id'};
|
||||||
|
my $project_id = $row{'project_id'};
|
||||||
|
|
||||||
if ( not exists $user_lookup{$user_id}) {
|
if ( not exists $user_lookup{$user_id}) {
|
||||||
print "New user $user_id\n";
|
|
||||||
|
|
||||||
$user_index++;
|
$user_index++;
|
||||||
$user_lookup{$user_id} = $user_index;
|
$user_lookup{$user_id} = $user_index;
|
||||||
|
$user_group{$user_id} = $project_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( not exists $project_lookup{$project_id}) {
|
||||||
|
$project_index++;
|
||||||
|
$project_lookup{$project_id} = $project_index;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
exit;
|
write_file("user-conversion.json", encode_json \%user_lookup);
|
||||||
|
write_file("project-conversion.json", encode_json \%project_lookup);
|
||||||
|
print "$user_index total users\n";
|
||||||
|
print "$project_index total projects\n";
|
||||||
|
|
||||||
|
# convert database
|
||||||
|
$sth_select_all->execute;
|
||||||
|
$sth_select_all->bind_columns( \( @row{ @{$sth_select_all->{NAME_lc} } } ));
|
||||||
|
|
||||||
|
while ($sth_select_all->fetch) {
|
||||||
|
my $user_id = 'user_'.$user_lookup{$row{'user_id'}};
|
||||||
|
my $project_id = 'project_'.$project_lookup{$row{'project_id'}};
|
||||||
|
|
||||||
|
# print "$row{'id'}: $user_id - $project_id\n";
|
||||||
|
|
||||||
|
$sth_update_job->execute(
|
||||||
|
$user_id,
|
||||||
|
$project_id,
|
||||||
|
$row{'id'}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
# convert job meta file
|
||||||
opendir my $dh, $basedir or die "can't open directory: $!";
|
opendir my $dh, $basedir or die "can't open directory: $!";
|
||||||
while ( readdir $dh ) {
|
while ( readdir $dh ) {
|
||||||
chomp;
|
chomp;
|
||||||
next if $_ eq '.' or $_ eq '..';
|
next if $_ eq '.' or $_ eq '..';
|
||||||
|
|
||||||
my $jobID = $_;
|
my $jobID = $_;
|
||||||
my $needsUpdate = 0;
|
|
||||||
|
|
||||||
my $jobmeta_json = read_file("$basedir/$jobID/meta.json");
|
my $jobmeta_json = read_file("$basedir/$jobID/meta.json");
|
||||||
my $job = decode_json $jobmeta_json;
|
my $job = decode_json $jobmeta_json;
|
||||||
my @row = $dbh->selectrow_array($sth_select_job, undef, $jobID);
|
|
||||||
my ($db_id, $db_user_id, $db_job_id, $db_cluster_id, $db_start_time, $db_stop_time, $db_duration, $db_num_nodes);
|
|
||||||
|
|
||||||
# print Dumper($job);
|
my $user = $job->{'user_id'};
|
||||||
|
my $project;
|
||||||
|
|
||||||
if ( @row ) {
|
if ( exists $user_lookup{$user}) {
|
||||||
($db_id,
|
$project = $user_group{$user};
|
||||||
$db_user_id,
|
$user = 'user_'.$user_lookup{$user};
|
||||||
$db_job_id,
|
|
||||||
$db_cluster_id,
|
|
||||||
$db_start_time,
|
|
||||||
$db_stop_time,
|
|
||||||
$db_duration,
|
|
||||||
$db_num_nodes) = @row;
|
|
||||||
|
|
||||||
my $footprint = $job->{footprint};
|
|
||||||
|
|
||||||
# print "$footprint->{mem_used}->{avg}, $footprint->{flops_any}->{avg}, $footprint->{mem_bw}->{avg}\n";
|
|
||||||
|
|
||||||
$sth_update_job->execute(
|
|
||||||
1,
|
|
||||||
$footprint->{mem_used}->{avg},
|
|
||||||
$footprint->{flops_any}->{avg},
|
|
||||||
$footprint->{mem_bw}->{avg},
|
|
||||||
$db_id
|
|
||||||
);
|
|
||||||
|
|
||||||
$jobcount++;
|
|
||||||
} else {
|
} else {
|
||||||
print "$jobID NOT in DB!\n";
|
die "$user not in lookup hash!\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( exists $project_lookup{$project}) {
|
||||||
|
$project = 'project_'.$project_lookup{$project};
|
||||||
|
} else {
|
||||||
|
die "$project not in lookup hash!\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
$job->{user_id} = $user;
|
||||||
|
$job->{project_id} = $project;
|
||||||
|
$jobmeta_json = encode_json $job;
|
||||||
|
# print "$jobmeta_json\n";
|
||||||
|
write_file("$basedir/$jobID/meta.json", $jobmeta_json);
|
||||||
}
|
}
|
||||||
closedir $dh or die "can't close directory: $!";
|
closedir $dh or die "can't close directory: $!";
|
||||||
$dbh->disconnect;
|
|
||||||
|
|
||||||
print "$wrongjobcount of $jobcount need update\n";
|
$dbh->disconnect;
|
||||||
|
Loading…
Reference in New Issue
Block a user