fix to Avro writer special cases

This commit is contained in:
Aditya Ujeniya 2025-05-14 17:29:28 +02:00
parent a03eb315f5
commit 5569ad53d2
6 changed files with 149 additions and 110 deletions

View File

@ -134,9 +134,9 @@ func main() {
MinVersion: tls.VersionTLS12,
PreferServerCipherSuites: true,
})
fmt.Printf("HTTPS server listening at %s...", config.Keys.HttpConfig.Address)
fmt.Printf("HTTPS server listening at %s...\n", config.Keys.HttpConfig.Address)
} else {
fmt.Printf("HTTP server listening at %s...", config.Keys.HttpConfig.Address)
fmt.Printf("HTTP server listening at %s...\n", config.Keys.HttpConfig.Address)
}
wg.Add(1)

View File

@ -9,89 +9,89 @@ while [ true ]; do
echo "Alex Metrics for hwthread types and type-ids"
timestamp="$(date '+%s%N')"
echo "Timestamp : "+$timestamp
for metric in cpu_load cpu_user;do # flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
for hostname in a0603 a0903; do # a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
for id in {0..127}; do
echo "$metric,cluster=alex,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
done
done
done
# for metric in cpu_load cpu_user;do # flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
# for hostname in a0603 a0903; do # a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
# for id in {0..127}; do
# echo "$metric,cluster=alex,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
# done
# done
# done
curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
# curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
echo "Fritz Metrics for hwthread types and type-ids"
for metric in cpu_load cpu_user; do # flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
for hostname in f0201 f0202; do # f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do
for id in {0..71}; do
echo "$metric,cluster=fritz,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt
done
done
done
# echo "Fritz Metrics for hwthread types and type-ids"
# for metric in cpu_load cpu_user; do # flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
# for hostname in f0201 f0202; do # f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do
# for id in {0..71}; do
# echo "$metric,cluster=fritz,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt
# done
# done
# done
curl -X 'POST' 'http://localhost:8082/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt
# curl -X 'POST' 'http://localhost:8082/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt
rm sample_fritz.txt
rm sample_alex.txt
# rm sample_fritz.txt
# rm sample_alex.txt
echo "Alex Metrics for accelerator types and type-ids"
for metric in cpu_load cpu_user; do # flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
for hostname in a0603 a0903; do # a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
for id in 00000000:49:00.0 00000000:0E:00.0 00000000:D1:00.0 00000000:90:00.0 00000000:13:00.0 00000000:96:00.0 00000000:CC:00.0 00000000:4F:00.0; do
echo "$metric,cluster=alex,hostname=$hostname,type=accelerator,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
done
done
done
# echo "Alex Metrics for accelerator types and type-ids"
# for metric in cpu_load cpu_user; do # flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
# for hostname in a0603 a0903; do # a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
# for id in 00000000:49:00.0 00000000:0E:00.0 00000000:D1:00.0 00000000:90:00.0 00000000:13:00.0 00000000:96:00.0 00000000:CC:00.0 00000000:4F:00.0; do
# echo "$metric,cluster=alex,hostname=$hostname,type=accelerator,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
# done
# done
# done
curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
# curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
rm sample_alex.txt
# rm sample_alex.txt
echo "Alex Metrics for memoryDomain types and type-ids"
for metric in cpu_load cpu_user; do # flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
for hostname in a0603 a0903; do # a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
for id in {0..7}; do
echo "$metric,cluster=alex,hostname=$hostname,type=memoryDomain,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
done
done
done
# echo "Alex Metrics for memoryDomain types and type-ids"
# for metric in cpu_load cpu_user; do # flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
# for hostname in a0603 a0903; do # a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
# for id in {0..7}; do
# echo "$metric,cluster=alex,hostname=$hostname,type=memoryDomain,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
# done
# done
# done
curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
# curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
rm sample_alex.txt
# rm sample_alex.txt
echo "Alex Metrics for socket types and type-ids"
for metric in cpu_load cpu_user; do # flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
for hostname in a0603 a0903; do # a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
for id in {0..1}; do
echo "$metric,cluster=alex,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
done
done
done
# echo "Alex Metrics for socket types and type-ids"
# for metric in cpu_load cpu_user; do # flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
# for hostname in a0603 a0903; do # a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
# for id in {0..1}; do
# echo "$metric,cluster=alex,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
# done
# done
# done
curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
# curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
echo "Fritz Metrics for socket types and type-ids"
for metric in cpu_load cpu_user; do # flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
for hostname in f0201 f0202; do # f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do
for id in {0..1}; do
echo "$metric,cluster=fritz,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt
done
done
done
# echo "Fritz Metrics for socket types and type-ids"
# for metric in cpu_load cpu_user; do # flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
# for hostname in f0201 f0202; do # f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do
# for id in {0..1}; do
# echo "$metric,cluster=fritz,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt
# done
# done
# done
curl -X 'POST' 'http://localhost:8082/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt
# curl -X 'POST' 'http://localhost:8082/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt
rm sample_fritz.txt
rm sample_alex.txt
# rm sample_fritz.txt
# rm sample_alex.txt
echo "Alex Metrics for nodes"
for metric in cpu_irq cpu_load; do # mem_cached net_bytes_in cpu_user cpu_idle nfs4_read mem_used nfs4_write nfs4_total ib_xmit ib_xmit_pkts net_bytes_out cpu_iowait ib_recv cpu_system ib_recv_pkts; do
for hostname in a0603 a0903; do # a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
echo "$metric,cluster=alex,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
done
done
# echo "Alex Metrics for nodes"
# for metric in cpu_irq cpu_load; do # mem_cached net_bytes_in cpu_user cpu_idle nfs4_read mem_used nfs4_write nfs4_total ib_xmit ib_xmit_pkts net_bytes_out cpu_iowait ib_recv cpu_system ib_recv_pkts; do
# for hostname in a0603 a0903; do # a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
# echo "$metric,cluster=alex,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
# done
# done
curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
# curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
echo "Fritz Metrics for nodes"
for metric in cpu_irq cpu_load; do # mem_cached net_bytes_in cpu_user cpu_idle nfs4_read mem_used nfs4_write nfs4_total ib_xmit ib_xmit_pkts net_bytes_out cpu_iowait ib_recv cpu_system ib_recv_pkts; do

View File

@ -9,10 +9,12 @@ import (
"path"
"sort"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/ClusterCockpit/cc-metric-store/internal/config"
"github.com/ClusterCockpit/cc-metric-store/internal/util"
"github.com/linkedin/goavro/v2"
)
@ -32,7 +34,7 @@ func (as *AvroStore) ToCheckpoint(dir string) (int, error) {
for sel2, l2 := range l1.children {
l2.lock.RLock()
// Frequency
for sel3, l3 := range l1.children {
for sel3, l3 := range l2.children {
levels = append(levels, l3)
selectors = append(selectors, []string{sel1, sel2, sel3})
}
@ -98,6 +100,8 @@ func getTimestamp(dir string) int64 {
// The existing avro file will be in epoch timestamp format
// iterate over all the files in the directory and find the maximum timestamp
// and return it
dir = path.Dir(dir)
files, err := os.ReadDir(dir)
if err != nil {
return 0
@ -116,15 +120,24 @@ func getTimestamp(dir string) int64 {
if len(name) < 5 {
continue
}
ts, err := strconv.ParseInt(name[:len(name)-5], 10, 64)
ts, err := strconv.ParseInt(name[strings.Index(name, "_")+1:len(name)-5], 10, 64)
if err != nil {
fmt.Printf("error while parsing timestamp: %s\n", err.Error())
continue
}
if ts > maxTs {
maxTs = ts
}
}
interval, _ := time.ParseDuration(config.Keys.Checkpoints.Interval)
updateTime := time.Now().Add(-interval).Unix()
if maxTs > updateTime {
return 0
}
return maxTs
}
@ -135,32 +148,36 @@ func (l *AvroLevel) toCheckpoint(dir string, from int64) error {
// find smallest overall timestamp in l.data map and delete it from l.data
var minTs int64 = int64(1<<63 - 1)
for ts := range l.data {
if ts < minTs {
if ts < minTs && len(l.data[ts]) != 0 {
minTs = ts
}
}
if from == 0 {
if from == 0 && minTs != int64(1<<63-1) {
from = minTs
}
if from == 0 {
return ErrNoNewData
}
var schema string
var codec *goavro.Codec
record_list := make([]map[string]interface{}, 0)
var f *os.File
filePath := path.Join(dir, fmt.Sprintf("%d.avro", from))
filePath := dir + fmt.Sprintf("_%d.avro", from)
if _, err := os.Stat(filePath); errors.Is(err, os.ErrNotExist) {
err = os.MkdirAll(dir, 0o755)
err = os.MkdirAll(path.Dir(dir), 0o755)
if err == nil {
f, err = os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY, 0o644)
f, err = os.OpenFile(filePath, os.O_CREATE|os.O_RDWR, 0o644)
if err != nil {
return fmt.Errorf("failed to create new avro file: %v", err)
}
}
} else {
} else if fp_, err := os.Stat(filePath); fp_.Size() != 0 || errors.Is(err, os.ErrNotExist) {
f, err = os.Open(filePath)
if err != nil {
return fmt.Errorf("failed to open existing avro file: %v", err)
@ -169,52 +186,58 @@ func (l *AvroLevel) toCheckpoint(dir string, from int64) error {
if err != nil {
return fmt.Errorf("failed to create OCF reader: %v", err)
}
schema = reader.Codec().Schema()
codec = reader.Codec()
schema = codec.Schema()
f.Close()
f, err = os.OpenFile(filePath, os.O_APPEND|os.O_RDWR, 0o644)
if err != nil {
log.Fatalf("Failed to create file: %v", err)
return fmt.Errorf("failed to create file: %v", err)
}
} else {
f, err = os.OpenFile(filePath, os.O_APPEND|os.O_RDWR, 0o644)
if err != nil {
return fmt.Errorf("failed to append new avro file: %v", err)
}
}
defer f.Close()
time_ref := time.Now().Add(time.Duration(-CheckpointBufferMinutes) * time.Minute).Unix()
time_ref := time.Now().Add(time.Duration(-CheckpointBufferMinutes+1) * time.Minute).Unix()
for ts := range l.data {
if ts < time_ref {
schema_gen, err := generateSchema(l.data[ts])
data := l.data[ts]
schema_gen, err := generateSchema(data)
if err != nil {
return err
}
flag, schema, err := compareSchema(schema, schema_gen)
if err != nil {
log.Fatalf("Failed to compare read and generated schema: %v", err)
return fmt.Errorf("failed to compare read and generated schema: %v", err)
}
if flag {
codec, err = goavro.NewCodec(schema)
if err != nil {
log.Fatalf("Failed to create codec after merged schema: %v", err)
}
f.Close()
f, err = os.Open(filePath)
if err != nil {
log.Fatalf("Failed to open Avro file: %v", err)
return fmt.Errorf("failed to open Avro file: %v", err)
}
ocfReader, err := goavro.NewOCFReader(f)
if err != nil {
log.Fatalf("Failed to create OCF reader: %v", err)
return fmt.Errorf("failed to create OCF reader: %v", err)
}
for ocfReader.Scan() {
record, err := ocfReader.Read()
if err != nil {
log.Fatalf("Failed to read record: %v", err)
return fmt.Errorf("failed to read record: %v", err)
}
record_list = append(record_list, record.(map[string]interface{}))
@ -224,17 +247,21 @@ func (l *AvroLevel) toCheckpoint(dir string, from int64) error {
err = os.Remove(filePath)
if err != nil {
log.Fatalf("Failed to delete file: %v", err)
return fmt.Errorf("failed to delete file: %v", err)
}
f, err = os.OpenFile(filePath, os.O_CREATE|os.O_RDWR, 0o644)
if err != nil {
log.Fatalf("Failed to create file after deleting : %v", err)
return fmt.Errorf("failed to create file after deleting : %v", err)
}
}
codec, err = goavro.NewCodec(schema)
if err != nil {
return fmt.Errorf("failed to create codec after merged schema: %v", err)
}
record_list = append(record_list, generateRecord(l.data[ts]))
delete(l.data, minTs)
record_list = append(record_list, generateRecord(data))
delete(l.data, ts)
}
}
@ -242,20 +269,23 @@ func (l *AvroLevel) toCheckpoint(dir string, from int64) error {
return ErrNoNewData
}
// fmt.Printf("Codec : %#v\n", codec)
writer, err := goavro.NewOCFWriter(goavro.OCFConfig{
W: f,
Codec: codec,
Schema: schema,
W: f,
Codec: codec,
})
if err != nil {
log.Fatalf("Failed to create OCF writer: %v", err)
return fmt.Errorf("failed to create OCF writer: %v", err)
}
// Append the new record
if err := writer.Append(record_list); err != nil {
log.Fatalf("Failed to append record: %v", err)
return fmt.Errorf("failed to append record: %v", err)
}
f.Close()
return nil
}
@ -351,9 +381,6 @@ func compareSchema(schemaRead, schemaGen string) (bool, string, error) {
return false, "", fmt.Errorf("failed to marshal merged schema: %v", err)
}
fmt.Printf("Merged Schema: %s\n", string(mergedSchemaJson))
fmt.Printf("Read Schema: %s\n", schemaRead)
return true, string(mergedSchemaJson), nil
}
@ -370,9 +397,11 @@ func generateSchema(data map[string]util.Float) (string, error) {
for key := range data {
if _, exists := fieldTracker[key]; !exists {
key = correctKey(key)
field := map[string]interface{}{
"name": key,
"type": "double", // Allows null or float
"type": "double",
"default": 0.0,
}
schema["fields"] = append(schema["fields"].([]map[string]interface{}), field)
@ -387,15 +416,27 @@ func generateSchema(data map[string]util.Float) (string, error) {
return string(schemaString), nil
}
func generateRecord(data map[string]util.Float) map[string]interface{} {
record := make(map[string]interface{})
// Iterate through each map in data
for key, value := range data {
key = correctKey(key)
// Set the value in the record
record[key] = value
record[key] = value.Double()
}
return record
}
func correctKey(key string) string {
// Replace any invalid characters in the key
// For example, replace spaces with underscores
key = strings.ReplaceAll(key, ":", "___")
key = strings.ReplaceAll(key, ".", "__")
return key
}

View File

@ -36,12 +36,8 @@ func DataStaging(wg *sync.WaitGroup, ctx context.Context) {
metricName := ""
for i, name := range val.Selector {
if i == 0 {
metricName += name
} else {
metricName += "_" + name
}
for _, selector_name := range val.Selector {
metricName += selector_name + "_"
}
metricName += val.MetricName

View File

@ -1,7 +1,6 @@
package avro
import (
"fmt"
"sync"
"github.com/ClusterCockpit/cc-metric-store/internal/util"
@ -117,7 +116,6 @@ func (l *AvroLevel) addMetric(metricName string, value util.Float, timestamp int
// Create keys for the next KeyCounter timestamps
l.data[lastTs+int64(Freq)] = make(map[string]util.Float, 0)
}
fmt.Printf("Creating timestamp keys to store key-value\n")
}
// Iterate over timestamps and choose the one which is within range.

View File

@ -28,6 +28,10 @@ func (f Float) MarshalJSON() ([]byte, error) {
return strconv.AppendFloat(make([]byte, 0, 10), float64(f), 'f', 3, 64), nil
}
func (f Float) Double() float64 {
return float64(f)
}
func (f *Float) UnmarshalJSON(input []byte) error {
if string(input) == "null" {
*f = NaN