Files
cc-backend/pkg/archive/parquet/convert.go

201 lines
5.2 KiB
Go

// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package parquet
import (
"bytes"
"compress/gzip"
"encoding/json"
"fmt"
"github.com/ClusterCockpit/cc-lib/v2/schema"
)
// JobToParquetRow converts job metadata and metric data into a flat ParquetJobRow.
// Nested fields are marshaled to JSON; metric data is gzip-compressed JSON.
func JobToParquetRow(meta *schema.Job, data *schema.JobData) (*ParquetJobRow, error) {
resourcesJSON, err := json.Marshal(meta.Resources)
if err != nil {
return nil, fmt.Errorf("marshal resources: %w", err)
}
var statisticsJSON []byte
if meta.Statistics != nil {
statisticsJSON, err = json.Marshal(meta.Statistics)
if err != nil {
return nil, fmt.Errorf("marshal statistics: %w", err)
}
}
var tagsJSON []byte
if len(meta.Tags) > 0 {
tagsJSON, err = json.Marshal(meta.Tags)
if err != nil {
return nil, fmt.Errorf("marshal tags: %w", err)
}
}
var metaDataJSON []byte
if meta.MetaData != nil {
metaDataJSON, err = json.Marshal(meta.MetaData)
if err != nil {
return nil, fmt.Errorf("marshal metadata: %w", err)
}
}
var footprintJSON []byte
if meta.Footprint != nil {
footprintJSON, err = json.Marshal(meta.Footprint)
if err != nil {
return nil, fmt.Errorf("marshal footprint: %w", err)
}
}
var energyFootJSON []byte
if meta.EnergyFootprint != nil {
energyFootJSON, err = json.Marshal(meta.EnergyFootprint)
if err != nil {
return nil, fmt.Errorf("marshal energy footprint: %w", err)
}
}
metricDataGz, err := compressJobData(data)
if err != nil {
return nil, fmt.Errorf("compress metric data: %w", err)
}
return &ParquetJobRow{
JobID: meta.JobID,
Cluster: meta.Cluster,
SubCluster: meta.SubCluster,
Partition: meta.Partition,
Project: meta.Project,
User: meta.User,
State: string(meta.State),
StartTime: meta.StartTime,
Duration: meta.Duration,
Walltime: meta.Walltime,
NumNodes: meta.NumNodes,
NumHWThreads: meta.NumHWThreads,
NumAcc: meta.NumAcc,
Energy: meta.Energy,
SMT: meta.SMT,
ResourcesJSON: resourcesJSON,
StatisticsJSON: statisticsJSON,
TagsJSON: tagsJSON,
MetaDataJSON: metaDataJSON,
FootprintJSON: footprintJSON,
EnergyFootJSON: energyFootJSON,
MetricDataGz: metricDataGz,
}, nil
}
// ParquetRowToJob converts a ParquetJobRow back into job metadata and metric data.
// This is the reverse of JobToParquetRow.
func ParquetRowToJob(row *ParquetJobRow) (*schema.Job, *schema.JobData, error) {
meta := &schema.Job{
JobID: row.JobID,
Cluster: row.Cluster,
SubCluster: row.SubCluster,
Partition: row.Partition,
Project: row.Project,
User: row.User,
State: schema.JobState(row.State),
StartTime: row.StartTime,
Duration: row.Duration,
Walltime: row.Walltime,
NumNodes: row.NumNodes,
NumHWThreads: row.NumHWThreads,
NumAcc: row.NumAcc,
Energy: row.Energy,
SMT: row.SMT,
}
if len(row.ResourcesJSON) > 0 {
if err := json.Unmarshal(row.ResourcesJSON, &meta.Resources); err != nil {
return nil, nil, fmt.Errorf("unmarshal resources: %w", err)
}
}
if len(row.StatisticsJSON) > 0 {
if err := json.Unmarshal(row.StatisticsJSON, &meta.Statistics); err != nil {
return nil, nil, fmt.Errorf("unmarshal statistics: %w", err)
}
}
if len(row.TagsJSON) > 0 {
if err := json.Unmarshal(row.TagsJSON, &meta.Tags); err != nil {
return nil, nil, fmt.Errorf("unmarshal tags: %w", err)
}
}
if len(row.MetaDataJSON) > 0 {
if err := json.Unmarshal(row.MetaDataJSON, &meta.MetaData); err != nil {
return nil, nil, fmt.Errorf("unmarshal metadata: %w", err)
}
}
if len(row.FootprintJSON) > 0 {
if err := json.Unmarshal(row.FootprintJSON, &meta.Footprint); err != nil {
return nil, nil, fmt.Errorf("unmarshal footprint: %w", err)
}
}
if len(row.EnergyFootJSON) > 0 {
if err := json.Unmarshal(row.EnergyFootJSON, &meta.EnergyFootprint); err != nil {
return nil, nil, fmt.Errorf("unmarshal energy footprint: %w", err)
}
}
data, err := decompressJobData(row.MetricDataGz)
if err != nil {
return nil, nil, fmt.Errorf("decompress metric data: %w", err)
}
return meta, data, nil
}
func decompressJobData(data []byte) (*schema.JobData, error) {
gz, err := gzip.NewReader(bytes.NewReader(data))
if err != nil {
return nil, err
}
defer gz.Close()
var buf bytes.Buffer
if _, err := buf.ReadFrom(gz); err != nil {
return nil, err
}
var jobData schema.JobData
if err := json.Unmarshal(buf.Bytes(), &jobData); err != nil {
return nil, err
}
return &jobData, nil
}
func compressJobData(data *schema.JobData) ([]byte, error) {
jsonBytes, err := json.Marshal(data)
if err != nil {
return nil, err
}
var buf bytes.Buffer
gz, err := gzip.NewWriterLevel(&buf, gzip.BestCompression)
if err != nil {
return nil, err
}
if _, err := gz.Write(jsonBytes); err != nil {
return nil, err
}
if err := gz.Close(); err != nil {
return nil, err
}
return buf.Bytes(), nil
}