Initial avro reader complete

This commit is contained in:
Aditya Ujeniya 2025-05-19 09:28:30 +02:00
parent 8098417f78
commit 968940da1f
8 changed files with 352 additions and 154 deletions

View File

@ -69,7 +69,7 @@ func main() {
restoreFrom := startupTime.Add(-d) restoreFrom := startupTime.Add(-d)
log.Printf("Loading checkpoints newer than %s\n", restoreFrom.Format(time.RFC3339)) log.Printf("Loading checkpoints newer than %s\n", restoreFrom.Format(time.RFC3339))
files, err := ms.FromCheckpoint(config.Keys.Checkpoints.RootDir, restoreFrom.Unix()) files, err := ms.FromCheckpointFiles(config.Keys.Checkpoints.RootDir, restoreFrom.Unix())
loadedData := ms.SizeInBytes() / 1024 / 1024 // In MB loadedData := ms.SizeInBytes() / 1024 / 1024 // In MB
if err != nil { if err != nil {
log.Fatalf("Loading checkpoints failed: %s\n", err.Error()) log.Fatalf("Loading checkpoints failed: %s\n", err.Error())

View File

@ -6,97 +6,96 @@ rm sample_fritz.txt
rm sample_alex.txt rm sample_alex.txt
while [ true ]; do while [ true ]; do
echo "Alex Metrics for hwthread types and type-ids"
timestamp="$(date '+%s%N')" timestamp="$(date '+%s%N')"
echo "Timestamp : "+$timestamp echo "Timestamp : "+$timestamp
for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
for id in {0..127}; do
echo "$metric,cluster=alex,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
done
done
done
# echo "Alex Metrics for hwthread types and type-ids" curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
# for metric in cpu_load cpu_user;do # flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
# for hostname in a0603 a0903; do # a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
# for id in {0..127}; do
# echo "$metric,cluster=alex,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
# done
# done
# done
# curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt echo "Fritz Metrics for hwthread types and type-ids"
for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
for hostname in f0201 f0202 f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do
for id in {0..71}; do
echo "$metric,cluster=fritz,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt
done
done
done
# echo "Fritz Metrics for hwthread types and type-ids" curl -X 'POST' 'http://localhost:8082/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt
# for metric in cpu_load cpu_user; do # flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
# for hostname in f0201 f0202; do # f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do
# for id in {0..71}; do
# echo "$metric,cluster=fritz,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt
# done
# done
# done
# curl -X 'POST' 'http://localhost:8082/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt rm sample_fritz.txt
rm sample_alex.txt
# rm sample_fritz.txt echo "Alex Metrics for accelerator types and type-ids"
# rm sample_alex.txt for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
for id in 00000000:49:00.0 00000000:0E:00.0 00000000:D1:00.0 00000000:90:00.0 00000000:13:00.0 00000000:96:00.0 00000000:CC:00.0 00000000:4F:00.0; do
echo "$metric,cluster=alex,hostname=$hostname,type=accelerator,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
done
done
done
# echo "Alex Metrics for accelerator types and type-ids" curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
# for metric in cpu_load cpu_user; do # flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
# for hostname in a0603 a0903; do # a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
# for id in 00000000:49:00.0 00000000:0E:00.0 00000000:D1:00.0 00000000:90:00.0 00000000:13:00.0 00000000:96:00.0 00000000:CC:00.0 00000000:4F:00.0; do
# echo "$metric,cluster=alex,hostname=$hostname,type=accelerator,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
# done
# done
# done
# curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt rm sample_alex.txt
# rm sample_alex.txt echo "Alex Metrics for memoryDomain types and type-ids"
for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
for id in {0..7}; do
echo "$metric,cluster=alex,hostname=$hostname,type=memoryDomain,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
done
done
done
# echo "Alex Metrics for memoryDomain types and type-ids" curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
# for metric in cpu_load cpu_user; do # flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
# for hostname in a0603 a0903; do # a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
# for id in {0..7}; do
# echo "$metric,cluster=alex,hostname=$hostname,type=memoryDomain,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
# done
# done
# done
# curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt rm sample_alex.txt
# rm sample_alex.txt echo "Alex Metrics for socket types and type-ids"
for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
for id in {0..1}; do
echo "$metric,cluster=alex,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
done
done
done
# echo "Alex Metrics for socket types and type-ids" curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
# for metric in cpu_load cpu_user; do # flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
# for hostname in a0603 a0903; do # a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
# for id in {0..1}; do
# echo "$metric,cluster=alex,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
# done
# done
# done
# curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt echo "Fritz Metrics for socket types and type-ids"
for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
for hostname in f0201 f0202 f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do
for id in {0..1}; do
echo "$metric,cluster=fritz,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt
done
done
done
# echo "Fritz Metrics for socket types and type-ids" curl -X 'POST' 'http://localhost:8082/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt
# for metric in cpu_load cpu_user; do # flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
# for hostname in f0201 f0202; do # f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do
# for id in {0..1}; do
# echo "$metric,cluster=fritz,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt
# done
# done
# done
# curl -X 'POST' 'http://localhost:8082/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt rm sample_fritz.txt
rm sample_alex.txt
# rm sample_fritz.txt echo "Alex Metrics for nodes"
# rm sample_alex.txt for metric in cpu_irq cpu_load mem_cached net_bytes_in cpu_user cpu_idle nfs4_read mem_used nfs4_write nfs4_total ib_xmit ib_xmit_pkts net_bytes_out cpu_iowait ib_recv cpu_system ib_recv_pkts; do
for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
echo "$metric,cluster=alex,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
done
done
# echo "Alex Metrics for nodes" curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
# for metric in cpu_irq cpu_load; do # mem_cached net_bytes_in cpu_user cpu_idle nfs4_read mem_used nfs4_write nfs4_total ib_xmit ib_xmit_pkts net_bytes_out cpu_iowait ib_recv cpu_system ib_recv_pkts; do
# for hostname in a0603 a0903; do # a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
# echo "$metric,cluster=alex,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
# done
# done
# curl -X 'POST' 'http://localhost:8082/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt echo "Fritz Metrics for nodes"
for metric in cpu_irq cpu_load mem_cached net_bytes_in cpu_user cpu_idle nfs4_read mem_used nfs4_write nfs4_total ib_xmit ib_xmit_pkts net_bytes_out cpu_iowait ib_recv cpu_system ib_recv_pkts; do
echo "30s Fritz Metrics for nodes" for hostname in f0201 f0202 f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do
for metric in clock cpu_idle; do # mem_cached net_bytes_in cpu_user cpu_idle nfs4_read mem_used nfs4_write nfs4_total ib_xmit ib_xmit_pkts net_bytes_out cpu_iowait ib_recv cpu_system ib_recv_pkts; do
for hostname in f0201 f0202; do # f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do
echo "$metric,cluster=fritz,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt echo "$metric,cluster=fritz,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt
done done
done done
@ -104,32 +103,8 @@ while [ true ]; do
curl -X 'POST' 'http://localhost:8082/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt curl -X 'POST' 'http://localhost:8082/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt
rm sample_fritz.txt rm sample_fritz.txt
# rm sample_alex.txt rm sample_alex.txt
sleep 30s sleep 1m
timestamp="$(date '+%s%N')"
echo "Timestamp : "+$timestamp
echo "30s Fritz Metrics for nodes"
for metric in clock cpu_idle; do # mem_cached net_bytes_in cpu_user cpu_idle nfs4_read mem_used nfs4_write nfs4_total ib_xmit ib_xmit_pkts net_bytes_out cpu_iowait ib_recv cpu_system ib_recv_pkts; do
for hostname in f0201 f0202; do # f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do
echo "$metric,cluster=fritz,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt
done
done
echo "60s Fritz Metrics for nodes"
for metric in cpu_irq cpu_load ipc; do # mem_cached net_bytes_in cpu_user cpu_idle nfs4_read mem_used nfs4_write nfs4_total ib_xmit ib_xmit_pkts net_bytes_out cpu_iowait ib_recv cpu_system ib_recv_pkts; do
for hostname in f0201 f0202; do # f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do
echo "$metric,cluster=fritz,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt
done
done
curl -X 'POST' 'http://localhost:8082/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt
rm sample_fritz.txt
# rm sample_alex.txt
sleep 30s
done done
# curl -X 'POST' 'http://localhost:8081/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" -d $'cpu_load,cluster=alex,hostname=a042,type=hwthread,type-id=0 value=35.0 1725827464642231296' # curl -X 'POST' 'http://localhost:8081/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" -d $'cpu_load,cluster=alex,hostname=a042,type=hwthread,type-id=0 value=35.0 1725827464642231296'

View File

@ -1,6 +1,7 @@
package avro package avro
import ( import (
"bufio"
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
@ -149,7 +150,7 @@ func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error {
l.lock.Lock() l.lock.Lock()
defer l.lock.Unlock() defer l.lock.Unlock()
fmt.Printf("Checkpointing directory: %s\n", dir) // fmt.Printf("Checkpointing directory: %s\n", dir)
// find smallest overall timestamp in l.data map and delete it from l.data // find smallest overall timestamp in l.data map and delete it from l.data
var minTs int64 = int64(1<<63 - 1) var minTs int64 = int64(1<<63 - 1)
@ -175,17 +176,21 @@ func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error {
filePath := dir + fmt.Sprintf("_%d.avro", from) filePath := dir + fmt.Sprintf("_%d.avro", from)
if _, err := os.Stat(filePath); errors.Is(err, os.ErrNotExist) { fp_, err := os.Stat(filePath)
if errors.Is(err, os.ErrNotExist) {
err = os.MkdirAll(path.Dir(dir), 0o755) err = os.MkdirAll(path.Dir(dir), 0o755)
if err != nil { if err != nil {
return fmt.Errorf("failed to create directory: %v", err) return fmt.Errorf("failed to create directory: %v", err)
} }
} else if fp_, err := os.Stat(filePath); fp_.Size() != 0 || errors.Is(err, os.ErrNotExist) { } else if fp_.Size() != 0 {
f, err = os.Open(filePath) f, err = os.Open(filePath)
if err != nil { if err != nil {
return fmt.Errorf("failed to open existing avro file: %v", err) return fmt.Errorf("failed to open existing avro file: %v", err)
} }
reader, err := goavro.NewOCFReader(f)
br := bufio.NewReader(f)
reader, err := goavro.NewOCFReader(br)
if err != nil { if err != nil {
return fmt.Errorf("failed to create OCF reader: %v", err) return fmt.Errorf("failed to create OCF reader: %v", err)
} }
@ -194,11 +199,6 @@ func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error {
f.Close() f.Close()
} }
f, err := os.OpenFile(filePath, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0o644)
if err != nil {
return fmt.Errorf("failed to append new avro file: %v", err)
}
defer f.Close()
time_ref := time.Now().Add(time.Duration(-CheckpointBufferMinutes+1) * time.Minute).Unix() time_ref := time.Now().Add(time.Duration(-CheckpointBufferMinutes+1) * time.Minute).Unix()
@ -218,7 +218,10 @@ func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error {
} }
} }
readFlag := true
for ts := range l.data { for ts := range l.data {
flag := false
if ts < time_ref { if ts < time_ref {
data := l.data[ts] data := l.data[ts]
@ -228,12 +231,12 @@ func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error {
return err return err
} }
flag, schema, err := compareSchema(schema, schema_gen) flag, schema, err = compareSchema(schema, schema_gen)
if err != nil { if err != nil {
return fmt.Errorf("failed to compare read and generated schema: %v", err) return fmt.Errorf("failed to compare read and generated schema: %v", err)
} }
if flag { if flag && readFlag {
f.Close() f.Close()
@ -242,9 +245,11 @@ func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error {
return fmt.Errorf("failed to open Avro file: %v", err) return fmt.Errorf("failed to open Avro file: %v", err)
} }
ocfReader, err := goavro.NewOCFReader(f) br := bufio.NewReader(f)
ocfReader, err := goavro.NewOCFReader(br)
if err != nil { if err != nil {
return fmt.Errorf("failed to create OCF reader: %v", err) return fmt.Errorf("failed to create OCF reader while changing schema: %v", err)
} }
for ocfReader.Scan() { for ocfReader.Scan() {
@ -263,10 +268,7 @@ func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error {
return fmt.Errorf("failed to delete file: %v", err) return fmt.Errorf("failed to delete file: %v", err)
} }
f, err = os.OpenFile(filePath, os.O_CREATE|os.O_RDWR, 0o644) readFlag = false
if err != nil {
return fmt.Errorf("failed to create file after deleting : %v", err)
}
} }
codec, err = goavro.NewCodec(schema) codec, err = goavro.NewCodec(schema)
if err != nil { if err != nil {
@ -282,6 +284,11 @@ func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error {
return ErrNoNewData return ErrNoNewData
} }
f, err = os.OpenFile(filePath, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0o644)
if err != nil {
return fmt.Errorf("failed to append new avro file: %v", err)
}
// fmt.Printf("Codec : %#v\n", codec) // fmt.Printf("Codec : %#v\n", codec)
writer, err := goavro.NewOCFWriter(goavro.OCFConfig{ writer, err := goavro.NewOCFWriter(goavro.OCFConfig{
@ -415,7 +422,7 @@ func generateSchema(data map[string]util.Float) (string, error) {
field := map[string]interface{}{ field := map[string]interface{}{
"name": key, "name": key,
"type": "double", "type": "double",
"default": 0.0, "default": -1.0,
} }
schema["fields"] = append(schema["fields"].([]map[string]interface{}), field) schema["fields"] = append(schema["fields"].([]map[string]interface{}), field)
fieldTracker[key] = struct{}{} fieldTracker[key] = struct{}{}
@ -453,3 +460,12 @@ func correctKey(key string) string {
return key return key
} }
func ReplaceKey(key string) string {
// Replace any invalid characters in the key
// For example, replace spaces with underscores
key = strings.ReplaceAll(key, "___", ":")
key = strings.ReplaceAll(key, "__", ".")
return key
}

View File

@ -38,7 +38,7 @@ func DataStaging(wg *sync.WaitGroup, ctx context.Context) {
metricName := "" metricName := ""
for _, selector_name := range val.Selector { for _, selector_name := range val.Selector {
metricName += selector_name + "_" metricName += selector_name + Delimiter
} }
metricName += val.MetricName metricName += val.MetricName

View File

@ -7,7 +7,7 @@ import (
) )
var LineProtocolMessages = make(chan AvroStruct) var LineProtocolMessages = make(chan AvroStruct)
var Delimiter = "ZZZZZ"
var AvroCounter = 0 var AvroCounter = 0
// CheckpointBufferMinutes should always be in minutes. // CheckpointBufferMinutes should always be in minutes.

View File

@ -122,7 +122,8 @@ func archiveCheckpoints(dir string, archiveDir string, from int64, deleteInstead
return 0, err return 0, err
} }
files, err := findFiles(entries, from, false) extension := config.Keys.Checkpoints.FileFormat
files, err := findFiles(entries, from, extension, false)
if err != nil { if err != nil {
return 0, err return 0, err
} }

View File

@ -22,6 +22,7 @@ import (
"github.com/ClusterCockpit/cc-metric-store/internal/avro" "github.com/ClusterCockpit/cc-metric-store/internal/avro"
"github.com/ClusterCockpit/cc-metric-store/internal/config" "github.com/ClusterCockpit/cc-metric-store/internal/config"
"github.com/ClusterCockpit/cc-metric-store/internal/util" "github.com/ClusterCockpit/cc-metric-store/internal/util"
"github.com/linkedin/goavro/v2"
) )
// Whenever changed, update MarshalJSON as well! // Whenever changed, update MarshalJSON as well!
@ -91,6 +92,8 @@ func Checkpointing(wg *sync.WaitGroup, ctx context.Context) {
case <-time.After(time.Duration(avro.CheckpointBufferMinutes) * time.Minute): case <-time.After(time.Duration(avro.CheckpointBufferMinutes) * time.Minute):
// This is the first tick untill we collect the data for given minutes. // This is the first tick untill we collect the data for given minutes.
avro.GetAvroStore().ToCheckpoint(config.Keys.Checkpoints.RootDir, false) avro.GetAvroStore().ToCheckpoint(config.Keys.Checkpoints.RootDir, false)
// log.Printf("Checkpointing %d avro files", count)
} }
ticks := func() <-chan time.Time { ticks := func() <-chan time.Time {
@ -107,6 +110,7 @@ func Checkpointing(wg *sync.WaitGroup, ctx context.Context) {
case <-ticks: case <-ticks:
// Regular ticks of 1 minute to write data. // Regular ticks of 1 minute to write data.
avro.GetAvroStore().ToCheckpoint(config.Keys.Checkpoints.RootDir, false) avro.GetAvroStore().ToCheckpoint(config.Keys.Checkpoints.RootDir, false)
// log.Printf("Checkpointing %d avro files", count)
} }
} }
}() }()
@ -298,19 +302,7 @@ func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {
return bw.Flush() return bw.Flush()
} }
// Metrics stored at the lowest 2 levels are not loaded (root and cluster)! func (m *MemoryStore) FromCheckpoint(dir string, from int64, extension string) (int, error) {
// This function can only be called once and before the very first write or read.
// Different host's data is loaded to memory in parallel.
func (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) {
if _, err := os.Stat(dir); os.IsNotExist(err) {
// The directory does not exist, so create it using os.MkdirAll()
err := os.MkdirAll(dir, 0755) // 0755 sets the permissions for the directory
if err != nil {
log.Fatalf("Error creating directory: %#v\n", err)
}
fmt.Printf("%#v Directory created successfully.\n", dir)
}
var wg sync.WaitGroup var wg sync.WaitGroup
work := make(chan [2]string, NumWorkers) work := make(chan [2]string, NumWorkers)
n, errs := int32(0), int32(0) n, errs := int32(0), int32(0)
@ -321,7 +313,7 @@ func (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) {
defer wg.Done() defer wg.Done()
for host := range work { for host := range work {
lvl := m.root.findLevelOrCreate(host[:], len(m.Metrics)) lvl := m.root.findLevelOrCreate(host[:], len(m.Metrics))
nn, err := lvl.fromCheckpoint(filepath.Join(dir, host[0], host[1]), from, m) nn, err := lvl.fromCheckpoint(m, filepath.Join(dir, host[0], host[1]), from, extension)
if err != nil { if err != nil {
log.Fatalf("error while loading checkpoints: %s", err.Error()) log.Fatalf("error while loading checkpoints: %s", err.Error())
atomic.AddInt32(&errs, 1) atomic.AddInt32(&errs, 1)
@ -378,6 +370,211 @@ done:
return int(n), nil return int(n), nil
} }
// Metrics stored at the lowest 2 levels are not loaded (root and cluster)!
// This function can only be called once and before the very first write or read.
// Different host's data is loaded to memory in parallel.
func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
if _, err := os.Stat(dir); os.IsNotExist(err) {
// The directory does not exist, so create it using os.MkdirAll()
err := os.MkdirAll(dir, 0755) // 0755 sets the permissions for the directory
if err != nil {
log.Fatalf("Error creating directory: %#v\n", err)
}
fmt.Printf("%#v Directory created successfully.\n", dir)
}
// Config read (replace with your actual config read)
fileFormat := config.Keys.Checkpoints.FileFormat
// Map to easily get the fallback format
oppositeFormat := map[string]string{
"json": "avro",
"avro": "json",
}
// First, attempt to load the specified format
if found, err := checkFilesWithExtension(dir, fileFormat); err != nil {
return 0, fmt.Errorf("error checking files with extension: %v", err)
} else if found {
log.Printf("Loading %s files because fileformat is %s\n", fileFormat, fileFormat)
return m.FromCheckpoint(dir, from, fileFormat)
}
// If not found, attempt the opposite format
altFormat := oppositeFormat[fileFormat]
if found, err := checkFilesWithExtension(dir, altFormat); err != nil {
return 0, fmt.Errorf("error checking files with extension: %v", err)
} else if found {
log.Printf("Loading %s files but fileformat is %s\n", altFormat, fileFormat)
return m.FromCheckpoint(dir, from, altFormat)
}
log.Println("No valid checkpoint files found in the directory.")
return 0, nil
}
func checkFilesWithExtension(dir string, extension string) (bool, error) {
found := false
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return fmt.Errorf("error accessing path %s: %v", path, err)
}
if !info.IsDir() && filepath.Ext(info.Name()) == "."+extension {
found = true
return nil
}
return nil
})
if err != nil {
return false, fmt.Errorf("error walking through directories: %s", err)
}
return found, nil
}
func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error {
br := bufio.NewReader(f)
fileName := f.Name()[strings.LastIndex(f.Name(), "/")+1:]
resolution, err := strconv.ParseInt(fileName[0:strings.Index(fileName, "_")], 10, 64)
if err != nil {
return fmt.Errorf("error while reading avro file (resolution parsing) : %s", err)
}
// fmt.Printf("File : %s with resolution : %d\n", fileName, resolution)
var recordCounter int64 = 0
// Create a new OCF reader from the buffered reader
ocfReader, err := goavro.NewOCFReader(br)
if err != nil {
panic(err)
}
metricsData := make(map[string]util.FloatArray)
for ocfReader.Scan() {
datum, err := ocfReader.Read()
if err != nil {
return fmt.Errorf("error while reading avro file : %s", err)
}
record, ok := datum.(map[string]interface{})
if !ok {
panic("failed to assert datum as map[string]interface{}")
}
for key, value := range record {
metricsData[key] = append(metricsData[key], util.ConvertToFloat(value.(float64)))
}
recordCounter += 1
}
to := (from + (recordCounter / (60 / resolution) * 60))
if to < from {
return nil
}
for key, floatArray := range metricsData {
metricName := avro.ReplaceKey(key)
if strings.Contains(metricName, avro.Delimiter) {
subString := strings.Split(metricName, avro.Delimiter)
lvl := l
for i := 0; i < len(subString)-1; i++ {
sel := subString[i]
if lvl.children == nil {
lvl.children = make(map[string]*Level)
}
child, ok := lvl.children[sel]
if !ok {
child = &Level{
metrics: make([]*buffer, len(m.Metrics)),
children: nil,
}
lvl.children[sel] = child
}
lvl = child
}
leafMetricName := subString[len(subString)-1]
err = lvl.createBuffer(m, leafMetricName, floatArray, from, resolution)
if err != nil {
return fmt.Errorf("error while creating buffers from avroReader : %s", err)
}
} else {
err = l.createBuffer(m, metricName, floatArray, from, resolution)
if err != nil {
return fmt.Errorf("error while creating buffers from avroReader : %s", err)
}
}
}
return nil
}
func (l *Level) createBuffer(m *MemoryStore, metricName string, floatArray util.FloatArray, from int64, resolution int64) error {
b := &buffer{
frequency: resolution,
start: from,
data: floatArray,
prev: nil,
next: nil,
archived: true,
}
b.close()
minfo, ok := m.Metrics[metricName]
if !ok {
return nil
// return errors.New("Unkown metric: " + name)
}
prev := l.metrics[minfo.Offset]
if prev == nil {
l.metrics[minfo.Offset] = b
} else {
if prev.start > b.start {
return errors.New("wooops")
}
b.prev = prev
prev.next = b
}
l.metrics[minfo.Offset] = b
return nil
}
func (l *Level) loadJsonFile(m *MemoryStore, f *os.File, from int64) error {
br := bufio.NewReader(f)
cf := &CheckpointFile{}
if err := json.NewDecoder(br).Decode(cf); err != nil {
return err
}
if cf.To != 0 && cf.To < from {
return nil
}
if err := l.loadFile(cf, m); err != nil {
return err
}
return nil
}
func (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error { func (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error {
for name, metric := range cf.Metrics { for name, metric := range cf.Metrics {
n := len(metric.Data) n := len(metric.Data)
@ -433,7 +630,7 @@ func (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error {
return nil return nil
} }
func (l *Level) fromCheckpoint(dir string, from int64, m *MemoryStore) (int, error) { func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64, extension string) (int, error) {
direntries, err := os.ReadDir(dir) direntries, err := os.ReadDir(dir)
if err != nil { if err != nil {
if os.IsNotExist(err) { if os.IsNotExist(err) {
@ -443,7 +640,7 @@ func (l *Level) fromCheckpoint(dir string, from int64, m *MemoryStore) (int, err
return 0, err return 0, err
} }
jsonFiles := make([]fs.DirEntry, 0) allFiles := make([]fs.DirEntry, 0)
filesLoaded := 0 filesLoaded := 0
for _, e := range direntries { for _, e := range direntries {
if e.IsDir() { if e.IsDir() {
@ -452,25 +649,32 @@ func (l *Level) fromCheckpoint(dir string, from int64, m *MemoryStore) (int, err
children: make(map[string]*Level), children: make(map[string]*Level),
} }
files, err := child.fromCheckpoint(path.Join(dir, e.Name()), from, m) files, err := child.fromCheckpoint(m, path.Join(dir, e.Name()), from, extension)
filesLoaded += files filesLoaded += files
if err != nil { if err != nil {
return filesLoaded, err return filesLoaded, err
} }
l.children[e.Name()] = child l.children[e.Name()] = child
} else if strings.HasSuffix(e.Name(), ".json") { } else if strings.HasSuffix(e.Name(), "."+extension) {
jsonFiles = append(jsonFiles, e) allFiles = append(allFiles, e)
} else { } else {
return filesLoaded, errors.New("unexpected file: " + dir + "/" + e.Name()) continue
} }
} }
files, err := findFiles(jsonFiles, from, true) files, err := findFiles(allFiles, from, extension, true)
if err != nil { if err != nil {
return filesLoaded, err return filesLoaded, err
} }
loaders := map[string]func(*MemoryStore, *os.File, int64) error{
"json": l.loadJsonFile,
"avro": l.loadAvroFile,
}
loader := loaders[extension]
for _, filename := range files { for _, filename := range files {
f, err := os.Open(path.Join(dir, filename)) f, err := os.Open(path.Join(dir, filename))
if err != nil { if err != nil {
@ -478,17 +682,7 @@ func (l *Level) fromCheckpoint(dir string, from int64, m *MemoryStore) (int, err
} }
defer f.Close() defer f.Close()
br := bufio.NewReader(f) if err = loader(m, f, from); err != nil {
cf := &CheckpointFile{}
if err = json.NewDecoder(br).Decode(cf); err != nil {
return filesLoaded, err
}
if cf.To != 0 && cf.To < from {
continue
}
if err = l.loadFile(cf, m); err != nil {
return filesLoaded, err return filesLoaded, err
} }
@ -501,10 +695,14 @@ func (l *Level) fromCheckpoint(dir string, from int64, m *MemoryStore) (int, err
// This will probably get very slow over time! // This will probably get very slow over time!
// A solution could be some sort of an index file in which all other files // A solution could be some sort of an index file in which all other files
// and the timespan they contain is listed. // and the timespan they contain is listed.
func findFiles(direntries []fs.DirEntry, t int64, findMoreRecentFiles bool) ([]string, error) { func findFiles(direntries []fs.DirEntry, t int64, extension string, findMoreRecentFiles bool) ([]string, error) {
nums := map[string]int64{} nums := map[string]int64{}
for _, e := range direntries { for _, e := range direntries {
ts, err := strconv.ParseInt(strings.TrimSuffix(e.Name(), ".json"), 10, 64) if !strings.HasSuffix(e.Name(), "."+extension) {
continue
}
ts, err := strconv.ParseInt(e.Name()[strings.Index(e.Name(), "_")+1:len(e.Name())-5], 10, 64)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -49,6 +49,14 @@ func (f *Float) UnmarshalJSON(input []byte) error {
// Same as `[]Float`, but can be marshaled to JSON with less allocations. // Same as `[]Float`, but can be marshaled to JSON with less allocations.
type FloatArray []Float type FloatArray []Float
func ConvertToFloat(input float64) Float {
if input == -1.0 {
return NaN
} else {
return Float(input)
}
}
func (fa FloatArray) MarshalJSON() ([]byte, error) { func (fa FloatArray) MarshalJSON() ([]byte, error) {
buf := make([]byte, 0, 2+len(fa)*8) buf := make([]byte, 0, 2+len(fa)*8)
buf = append(buf, '[') buf = append(buf, '[')