Files
cc-backend/pkg/archive/parquet/convert_test.go

306 lines
8.2 KiB
Go

// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package parquet
import (
"testing"
"github.com/ClusterCockpit/cc-lib/v2/schema"
)
func TestParquetRowToJob(t *testing.T) {
meta := &schema.Job{
JobID: 42,
Cluster: "testcluster",
SubCluster: "sc0",
Partition: "main",
Project: "testproject",
User: "testuser",
State: schema.JobStateCompleted,
StartTime: 1700000000,
Duration: 3600,
Walltime: 7200,
NumNodes: 2,
NumHWThreads: 16,
NumAcc: 4,
Energy: 123.45,
SMT: 2,
Resources: []*schema.Resource{
{Hostname: "node001", HWThreads: []int{0, 1, 2, 3}},
{Hostname: "node002", HWThreads: []int{4, 5, 6, 7}},
},
Statistics: map[string]schema.JobStatistics{
"cpu_load": {Avg: 50.0, Min: 10.0, Max: 90.0},
},
Tags: []*schema.Tag{
{Type: "test", Name: "tag1"},
},
MetaData: map[string]string{
"key1": "value1",
},
Footprint: map[string]float64{
"cpu_load": 50.0,
},
EnergyFootprint: map[string]float64{
"total": 123.45,
},
}
data := &schema.JobData{
"cpu_load": {
schema.MetricScopeNode: &schema.JobMetric{
Unit: schema.Unit{Base: ""},
Timestep: 60,
Series: []schema.Series{
{
Hostname: "node001",
Data: []schema.Float{1.0, 2.0, 3.0},
},
},
},
},
}
// Convert to parquet row
row, err := JobToParquetRow(meta, data)
if err != nil {
t.Fatalf("JobToParquetRow: %v", err)
}
// Convert back
gotMeta, gotData, err := ParquetRowToJob(row)
if err != nil {
t.Fatalf("ParquetRowToJob: %v", err)
}
// Verify scalar fields
if gotMeta.JobID != meta.JobID {
t.Errorf("JobID = %d, want %d", gotMeta.JobID, meta.JobID)
}
if gotMeta.Cluster != meta.Cluster {
t.Errorf("Cluster = %q, want %q", gotMeta.Cluster, meta.Cluster)
}
if gotMeta.SubCluster != meta.SubCluster {
t.Errorf("SubCluster = %q, want %q", gotMeta.SubCluster, meta.SubCluster)
}
if gotMeta.Partition != meta.Partition {
t.Errorf("Partition = %q, want %q", gotMeta.Partition, meta.Partition)
}
if gotMeta.Project != meta.Project {
t.Errorf("Project = %q, want %q", gotMeta.Project, meta.Project)
}
if gotMeta.User != meta.User {
t.Errorf("User = %q, want %q", gotMeta.User, meta.User)
}
if gotMeta.State != meta.State {
t.Errorf("State = %q, want %q", gotMeta.State, meta.State)
}
if gotMeta.StartTime != meta.StartTime {
t.Errorf("StartTime = %d, want %d", gotMeta.StartTime, meta.StartTime)
}
if gotMeta.Duration != meta.Duration {
t.Errorf("Duration = %d, want %d", gotMeta.Duration, meta.Duration)
}
if gotMeta.Walltime != meta.Walltime {
t.Errorf("Walltime = %d, want %d", gotMeta.Walltime, meta.Walltime)
}
if gotMeta.NumNodes != meta.NumNodes {
t.Errorf("NumNodes = %d, want %d", gotMeta.NumNodes, meta.NumNodes)
}
if gotMeta.NumHWThreads != meta.NumHWThreads {
t.Errorf("NumHWThreads = %d, want %d", gotMeta.NumHWThreads, meta.NumHWThreads)
}
if gotMeta.NumAcc != meta.NumAcc {
t.Errorf("NumAcc = %d, want %d", gotMeta.NumAcc, meta.NumAcc)
}
if gotMeta.Energy != meta.Energy {
t.Errorf("Energy = %f, want %f", gotMeta.Energy, meta.Energy)
}
if gotMeta.SMT != meta.SMT {
t.Errorf("SMT = %d, want %d", gotMeta.SMT, meta.SMT)
}
// Verify complex fields
if len(gotMeta.Resources) != 2 {
t.Fatalf("Resources len = %d, want 2", len(gotMeta.Resources))
}
if gotMeta.Resources[0].Hostname != "node001" {
t.Errorf("Resources[0].Hostname = %q, want %q", gotMeta.Resources[0].Hostname, "node001")
}
if len(gotMeta.Resources[0].HWThreads) != 4 {
t.Errorf("Resources[0].HWThreads len = %d, want 4", len(gotMeta.Resources[0].HWThreads))
}
if len(gotMeta.Statistics) != 1 {
t.Fatalf("Statistics len = %d, want 1", len(gotMeta.Statistics))
}
if stat, ok := gotMeta.Statistics["cpu_load"]; !ok {
t.Error("Statistics missing cpu_load")
} else if stat.Avg != 50.0 {
t.Errorf("Statistics[cpu_load].Avg = %f, want 50.0", stat.Avg)
}
if len(gotMeta.Tags) != 1 || gotMeta.Tags[0].Name != "tag1" {
t.Errorf("Tags = %v, want [{test tag1}]", gotMeta.Tags)
}
if gotMeta.MetaData["key1"] != "value1" {
t.Errorf("MetaData[key1] = %q, want %q", gotMeta.MetaData["key1"], "value1")
}
if gotMeta.Footprint["cpu_load"] != 50.0 {
t.Errorf("Footprint[cpu_load] = %f, want 50.0", gotMeta.Footprint["cpu_load"])
}
if gotMeta.EnergyFootprint["total"] != 123.45 {
t.Errorf("EnergyFootprint[total] = %f, want 123.45", gotMeta.EnergyFootprint["total"])
}
// Verify metric data
if gotData == nil {
t.Fatal("JobData is nil")
}
cpuLoad, ok := (*gotData)["cpu_load"]
if !ok {
t.Fatal("JobData missing cpu_load")
}
nodeMetric, ok := cpuLoad[schema.MetricScopeNode]
if !ok {
t.Fatal("cpu_load missing node scope")
}
if nodeMetric.Timestep != 60 {
t.Errorf("Timestep = %d, want 60", nodeMetric.Timestep)
}
if len(nodeMetric.Series) != 1 {
t.Fatalf("Series len = %d, want 1", len(nodeMetric.Series))
}
if nodeMetric.Series[0].Hostname != "node001" {
t.Errorf("Series[0].Hostname = %q, want %q", nodeMetric.Series[0].Hostname, "node001")
}
if len(nodeMetric.Series[0].Data) != 3 {
t.Errorf("Series[0].Data len = %d, want 3", len(nodeMetric.Series[0].Data))
}
}
func TestParquetRowToJobNilOptionalFields(t *testing.T) {
meta := &schema.Job{
JobID: 1,
Cluster: "test",
SubCluster: "sc0",
Project: "proj",
User: "user",
State: schema.JobStateCompleted,
StartTime: 1700000000,
Duration: 60,
NumNodes: 1,
Resources: []*schema.Resource{
{Hostname: "node001"},
},
}
data := &schema.JobData{
"cpu_load": {
schema.MetricScopeNode: &schema.JobMetric{
Timestep: 60,
Series: []schema.Series{
{Hostname: "node001", Data: []schema.Float{1.0}},
},
},
},
}
row, err := JobToParquetRow(meta, data)
if err != nil {
t.Fatalf("JobToParquetRow: %v", err)
}
gotMeta, gotData, err := ParquetRowToJob(row)
if err != nil {
t.Fatalf("ParquetRowToJob: %v", err)
}
if gotMeta.JobID != 1 {
t.Errorf("JobID = %d, want 1", gotMeta.JobID)
}
if gotMeta.Tags != nil {
t.Errorf("Tags should be nil, got %v", gotMeta.Tags)
}
if gotMeta.Statistics != nil {
t.Errorf("Statistics should be nil, got %v", gotMeta.Statistics)
}
if gotMeta.MetaData != nil {
t.Errorf("MetaData should be nil, got %v", gotMeta.MetaData)
}
if gotMeta.Footprint != nil {
t.Errorf("Footprint should be nil, got %v", gotMeta.Footprint)
}
if gotMeta.EnergyFootprint != nil {
t.Errorf("EnergyFootprint should be nil, got %v", gotMeta.EnergyFootprint)
}
if gotData == nil {
t.Fatal("JobData is nil")
}
}
func TestRoundTripThroughParquetFile(t *testing.T) {
meta, data := makeTestJob(999)
meta.Tags = []*schema.Tag{{Type: "test", Name: "roundtrip"}}
// Convert to row and write to parquet
row, err := JobToParquetRow(meta, data)
if err != nil {
t.Fatalf("JobToParquetRow: %v", err)
}
// Write to parquet bytes
parquetBytes, err := writeParquetBytes([]ParquetJobRow{*row})
if err != nil {
t.Fatalf("writeParquetBytes: %v", err)
}
// Read back from parquet bytes
rows, err := ReadParquetFile(parquetBytes)
if err != nil {
t.Fatalf("ReadParquetFile: %v", err)
}
if len(rows) != 1 {
t.Fatalf("expected 1 row, got %d", len(rows))
}
// Convert back to job
gotMeta, gotData, err := ParquetRowToJob(&rows[0])
if err != nil {
t.Fatalf("ParquetRowToJob: %v", err)
}
// Verify key fields survived the round trip
if gotMeta.JobID != 999 {
t.Errorf("JobID = %d, want 999", gotMeta.JobID)
}
if gotMeta.Cluster != "testcluster" {
t.Errorf("Cluster = %q, want %q", gotMeta.Cluster, "testcluster")
}
if gotMeta.User != "testuser" {
t.Errorf("User = %q, want %q", gotMeta.User, "testuser")
}
if gotMeta.State != schema.JobStateCompleted {
t.Errorf("State = %q, want %q", gotMeta.State, schema.JobStateCompleted)
}
if len(gotMeta.Tags) != 1 || gotMeta.Tags[0].Name != "roundtrip" {
t.Errorf("Tags = %v, want [{test roundtrip}]", gotMeta.Tags)
}
if len(gotMeta.Resources) != 2 {
t.Errorf("Resources len = %d, want 2", len(gotMeta.Resources))
}
if gotData == nil {
t.Fatal("JobData is nil")
}
if _, ok := (*gotData)["cpu_load"]; !ok {
t.Error("JobData missing cpu_load")
}
}