cc-backend/configs/cluster.json

238 lines
10 KiB
JSON
Raw Permalink Normal View History

{
"subClusters": [
{
"name": "a40",
"numberOfNodes": 38,
"processorType": "AMD Milan",
"socketsPerNode": 2,
"coresPerSocket": 64,
"threadsPerCore": 1,
"flopRateScalar": 432,
"flopRateSimd": 9216,
"memoryBandwidth": 400,
"topology": {
"node": [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127],
"socket": [
[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63],
[64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127]
],
"memoryDomain": [
[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127]
],
"core": [
[0],[1],[2],[3],[4],[5],[6],[7],[8],[9],[10],[11],[12],[13],[14],[15],[16],[17],[18],[19],[20],[21],[22],[23],[24],[25],[26],[27],[28],[29],[30],[31],[32],[33],[34],[35],[36],[37],[38],[39],[40],[41],[42],[43],[44],[45],[46],[47],[48],[49],[50],[51],[52],[53],[54],[55],[56],[57],[58],[59],[60],[61],[62],[63],[64],[65],[66],[67],[68],[69],[70],[71],[73],[74],[75],[76],[77],[78],[79],[80],[81],[82],[83],[84],[85],[86],[87],[88],[89],[90],[91],[92],[93],[94],[95],[96],[97],[98],[99],[100],[101],[102],[103],[104],[105],[106],[107],[108],[109],[110],[111],[112],[113],[114],[115],[116],[117],[118],[119],[120],[121],[122],[123],[124],[125],[126],[127]
],
"accelerators": [
{
"id": "00000000:01:00.0",
"type": "Nvidia GPU",
"model": "A40"
},
{
"id": "00000000:25:00.0",
"type": "Nvidia GPU",
"model": "A40"
},
{
"id": "00000000:41:00.0",
"type": "Nvidia GPU",
"model": "A40"
},
{
"id": "00000000:61:00.0",
"type": "Nvidia GPU",
"model": "A40"
},
{
"id": "00000000:81:00.0",
"type": "Nvidia GPU",
"model": "A40"
},
{
"id": "00000000:A1:00.0",
"type": "Nvidia GPU",
"model": "A40"
},
{
"id": "00000000:C1:00.0",
"type": "Nvidia GPU",
"model": "A40"
},
{
"id": "00000000:E1:00.0",
"type": "Nvidia GPU",
"model": "A40"
}
]
}
},
{
"name": "a100",
"numberOfNodes": 20,
"processorType": "AMD Milan",
"socketsPerNode": 2,
"coresPerSocket": 64,
"threadsPerCore": 1,
"flopRateScalar": 432,
"flopRateSimd": 9216,
"memoryBandwidth": 400,
"topology": {
"node": [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127],
"socket": [
[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63],
[64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127]
],
"memoryDomain": [
[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127]
],
"core": [
[0],[1],[2],[3],[4],[5],[6],[7],[8],[9],[10],[11],[12],[13],[14],[15],[16],[17],[18],[19],[20],[21],[22],[23],[24],[25],[26],[27],[28],[29],[30],[31],[32],[33],[34],[35],[36],[37],[38],[39],[40],[41],[42],[43],[44],[45],[46],[47],[48],[49],[50],[51],[52],[53],[54],[55],[56],[57],[58],[59],[60],[61],[62],[63],[64],[65],[66],[67],[68],[69],[70],[71],[73],[74],[75],[76],[77],[78],[79],[80],[81],[82],[83],[84],[85],[86],[87],[88],[89],[90],[91],[92],[93],[94],[95],[96],[97],[98],[99],[100],[101],[102],[103],[104],[105],[106],[107],[108],[109],[110],[111],[112],[113],[114],[115],[116],[117],[118],[119],[120],[121],[122],[123],[124],[125],[126],[127]
],
"accelerators": [
{
"id": "00000000:0E:00.0",
"type": "Nvidia GPU",
"model": "A100"
},
{
"id": "00000000:13:00.0",
"type": "Nvidia GPU",
"model": "A100"
},
{
"id": "00000000:49:00.0",
"type": "Nvidia GPU",
"model": "A100"
},
{
"id": "00000000:4F:00.0",
"type": "Nvidia GPU",
"model": "A100"
},
{
"id": "00000000:90:00.0",
"type": "Nvidia GPU",
"model": "A100"
},
{
"id": "00000000:96:00.0",
"type": "Nvidia GPU",
"model": "A100"
},
{
"id": "00000000:CC:00.0",
"type": "Nvidia GPU",
"model": "A100"
},
{
"id": "00000000:D1:00.0",
"type": "Nvidia GPU",
"model": "A100"
}
]
}
}
],
"metricConfig": [
{
"name": "cpu_load",
"scope": "node",
"unit": "load 1m",
"timestep": 60,
"aggregation": null,
"peak": 128,
"normal": 128,
"caution": 10,
"alert": 5
},
{
"name": "cpu_user",
"scope": "hwthread",
"unit": "cpu user",
"timestep": 60,
"aggregation": "avg",
"peak": 100,
"normal": 50,
"caution": 20,
"alert": 10
},
{
"name": "mem_used",
"scope": "node",
"unit": "GB",
"timestep": 60,
"aggregation": null,
"peak": 512,
"normal": 128,
"caution": 200,
"alert": 240
},
{
"name": "flops_any",
"scope": "hwthread",
"unit": "GF/s",
"timestep": 60,
"aggregation": "sum",
"peak": 9216,
"normal": 1000,
"caution": 200,
"alert": 50
},
{
"name": "mem_bw",
"scope": "socket",
"unit": "GB/s",
"timestep": 60,
"aggregation": "sum",
"peak": 350,
"normal": 100,
"caution": 50,
"alert": 10
},
{
"name": "clock",
"scope": "hwthread",
"unit": "MHz",
"timestep": 60,
"aggregation": "avg",
"peak": 3000,
"normal": 2400,
"caution": 1800,
"alert": 1200
},
{
"name": "core_power",
"scope": "hwthread",
"unit": "W",
"timestep": 60,
"aggregation": "sum",
"peak": 500,
"normal": 250,
"caution": 100,
"alert": 50
},
{
"name": "cpu_power",
"scope": "socket",
"unit": "W",
"timestep": 60,
"aggregation": "sum",
"peak": 500,
"normal": 250,
"caution": 100,
"alert": 50
},
{
"name": "ipc",
"scope": "hwthread",
"unit": "IPC",
"timestep": 60,
"aggregation": "avg",
"peak": 4,
"normal": 2,
"caution": 1,
"alert": 0.5
}
]
}