MD-Bench/static_analysis/jan/analyses/gromacs-icx-avx512-dp-iaca.out

155 lines
15 KiB
Plaintext
Raw Normal View History

2023-02-13 14:15:08 +01:00
Intel(R) Architecture Code Analyzer Version - v3.0-28-g1ba2cbb build date: 2017-10-23;16:42:45
Analyzed File - gromacs-icx-avx512-dp.o
Binary Format - 64Bit
Architecture - SKX
Analysis Type - Throughput
Throughput Analysis Report
--------------------------
Block Throughput: 49.26 Cycles Throughput Bottleneck: Backend
Loop Count: 22
Port Binding In Cycles Per Iteration:
--------------------------------------------------------------------------------------------------
| Port | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
--------------------------------------------------------------------------------------------------
| Cycles | 44.0 0.0 | 13.5 | 5.5 5.5 | 5.5 5.5 | 0.0 | 44.0 | 13.5 | 0.0 |
--------------------------------------------------------------------------------------------------
DV - Divider pipe (on port 0)
D - Data fetch pipe (on ports 2 and 3)
F - Macro Fusion with the previous instruction occurred
* - instruction micro-ops not bound to a port
^ - Micro Fusion occurred
# - ESP Tracking sync uop was issued
@ - SSE instruction followed an AVX256/AVX512 instruction, dozens of cycles penalty is expected
X - instruction not supported, was not accounted in Analysis
| Num Of | Ports pressure in cycles | |
| Uops | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
-----------------------------------------------------------------------------------------
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | movsxd rcx, dword ptr [r10+rbx*4]
| 1 | | 1.0 | | | | | | | lea rdx, ptr [rcx+rcx*2]
| 1 | | | | | | | 1.0 | | shl rdx, 0x6
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm28, zmmword ptr [rsi+rdx*1]
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm29, zmmword ptr [rsi+rdx*1+0x40]
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm30, zmmword ptr [rsi+rdx*1+0x80]
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm3, zmmword ptr [rsp+0x10]
| 1 | | | | | | 1.0 | | | vsubpd zmm3, zmm3, zmm28
| 1 | 1.0 | | | | | | | | vsubpd zmm31, zmm24, zmm30
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm16, zmmword ptr [rsp+0x150]
| 1 | | | | | | 1.0 | | | vsubpd zmm16, zmm16, zmm29
| 1 | 1.0 | | | | | | | | vmulpd zmm17, zmm31, zmm31
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm17, zmm16, zmm16
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm17, zmm3, zmm3
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm18, zmm17
| 1 | | | | | | 1.0 | | | vmulpd zmm19, zmm21, zmm18
| 1 | | | | | | 1.0 | | | vmulpd zmm19, zmm18, zmm19
| 1 | 1.0 | | | | | | | | vmulpd zmm19, zmm18, zmm19
| 1 | | | | | | 1.0 | | | vaddpd zmm20, zmm19, zmm1
| 1 | 1.0 | | | | | | | | vmulpd zmm18, zmm22, zmm18
| 1 | | | | | | 1.0 | | | vmulpd zmm18, zmm18, zmm20
| 1 | 1.0 | | | | | | | | vsubpd zmm20, zmm25, zmm30
| 1 | | 1.0 | | | | | | | lea edx, ptr [rcx+rcx*1]
| 1 | | | | | | | 1.0 | | cmp r11, rdx
| 1 | | | | | | | 1.0 | | setnz dl
| 1 | | | | | | | 1.0 | | setz al
| 1 | | 1.0 | | | | | | | add ecx, ecx
| 1 | | 1.0 | | | | | | | inc ecx
| 1 | | 0.5 | | | | | 0.5 | | cmp r11, rcx
| 1 | | | | | | | 1.0 | | setz cl
| 1 | | | | | | 1.0 | | | vmulpd zmm18, zmm19, zmm18
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm19, zmmword ptr [rsp+0x210]
| 1 | 1.0 | | | | | | | | vsubpd zmm19, zmm19, zmm28
| 1 | | | | | | | 1.0 | | setnz dil
| 1* | | | | | | | | | mov ebp, edi
| 1 | | | | | | | 1.0 | | shl bpl, 0x4
| 1 | | 1.0 | | | | | | | sub bpl, al
| 1 | | 1.0 | | | | | | | add bpl, 0xef
| 1 | | | | | | 1.0 | | | kmovd k1, ebp
| 1 | | | | | | 1.0 | | | vcmppd k1{k1}, zmm17, zmm0, 0x1
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm17, zmmword ptr [rsp+0x110]
| 1 | 1.0 | | | | | | | | vsubpd zmm17, zmm17, zmm29
| 1 | | 1.0 | | | | | | | lea eax, ptr [rdx+rdx*1]
| 1* | | | | | | | | | mov ebp, edi
| 1 | 1.0 | | | | | | | | vmulpd zmm18, zmm18, zmm2
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm14{k1}, zmm3, zmm18
| 1 | 1.0 | | | | | | | | vmulpd zmm3, zmm20, zmm20
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm3, zmm17, zmm17
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm3, zmm19, zmm19
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm11{k1}, zmm16, zmm18
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm16, zmm3
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm7{k1}, zmm31, zmm18
| 1 | 1.0 | | | | | | | | vmulpd zmm18, zmm21, zmm16
| 1 | | | | | | 1.0 | | | vmulpd zmm18, zmm16, zmm18
| 1 | 1.0 | | | | | | | | vmulpd zmm18, zmm16, zmm18
| 1 | | | | | | 1.0 | | | vaddpd zmm31, zmm18, zmm1
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm22, zmm16
| 1 | | | | | | 1.0 | | | vmulpd zmm16, zmm16, zmm31
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm31, zmmword ptr [rsp+0x1d0]
| 1 | 1.0 | | | | | | | | vsubpd zmm31, zmm31, zmm28
| 1 | | | | | | | 1.0 | | shl bpl, 0x5
| 1 | | 1.0 | | | | | | | or bpl, al
| 1 | | 0.5 | | | | | 0.5 | | or bpl, 0xdd
| 1 | | | | | | 1.0 | | | kmovd k1, ebp
| 1 | | | | | | 1.0 | | | vcmppd k1{k1}, zmm3, zmm0, 0x1
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm3, zmmword ptr [rsp+0xd0]
| 1 | 1.0 | | | | | | | | vsubpd zmm3, zmm3, zmm29
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm18, zmm16
| 1 | | | | | | 1.0 | | | vsubpd zmm18, zmm26, zmm30
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm16, zmm2
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm15{k1}, zmm19, zmm16
| 1 | 1.0 | | | | | | | | vmulpd zmm19, zmm18, zmm18
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm19, zmm3, zmm3
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm19, zmm31, zmm31
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm10{k1}, zmm17, zmm16
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm17, zmm19
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm6{k1}, zmm20, zmm16
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm21, zmm17
| 1 | | | | | | 1.0 | | | vmulpd zmm16, zmm17, zmm16
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm17, zmm16
| 1 | | | | | | 1.0 | | | vaddpd zmm20, zmm16, zmm1
| 1 | 1.0 | | | | | | | | vmulpd zmm17, zmm22, zmm17
| 1 | | | | | | 1.0 | | | vmulpd zmm17, zmm17, zmm20
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm16, zmm17
| 1 | | 1.0 | | | | | | | lea eax, ptr [rdx*4]
| 1 | | | | | | | 1.0 | | shl dil, 0x6
| 1 | | 0.5 | | | | | 0.5 | | or dil, al
| 1 | | 0.5 | | | | | 0.5 | | or dil, 0xbb
| 1 | | | | | | 1.0 | | | kmovd k1, edi
| 1 | | | | | | 1.0 | | | vcmppd k1{k1}, zmm19, zmm0, 0x1
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm17, zmmword ptr [rsp+0x190]
| 1 | 1.0 | | | | | | | | vsubpd zmm17, zmm17, zmm28
| 1 | 1.0 | | | | | | | | vsubpd zmm19, zmm23, zmm29
| 1 | | | | | | 1.0 | | | vsubpd zmm20, zmm27, zmm30
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm16, zmm2
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm13{k1}, zmm31, zmm16
| 1 | 1.0 | | | | | | | | vmulpd zmm28, zmm20, zmm20
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm28, zmm19, zmm19
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm28, zmm17, zmm17
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm9{k1}, zmm3, zmm16
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm3, zmm28
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm5{k1}, zmm18, zmm16
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm21, zmm3
| 1 | | | | | | 1.0 | | | vmulpd zmm16, zmm3, zmm16
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm3, zmm16
| 1 | | | | | | 1.0 | | | vaddpd zmm18, zmm16, zmm1
| 1 | 1.0 | | | | | | | | vmulpd zmm3, zmm22, zmm3
| 1 | | | | | | 1.0 | | | vmulpd zmm3, zmm3, zmm18
| 1 | 1.0 | | | | | | | | vmulpd zmm3, zmm16, zmm3
| 1 | | | | | | | 1.0 | | shl dl, 0x3
| 1 | | | | | | | 1.0 | | shl cl, 0x7
| 1 | | 1.0 | | | | | | | or cl, dl
| 1 | | 1.0 | | | | | | | add cl, 0xf7
| 1 | | | | | | 1.0 | | | kmovd k1, ecx
| 1 | | | | | | 1.0 | | | vcmppd k1{k1}, zmm28, zmm0, 0x1
| 1 | 1.0 | | | | | | | | vmulpd zmm3, zmm3, zmm2
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm12{k1}, zmm17, zmm3
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm8{k1}, zmm19, zmm3
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm4{k1}, zmm20, zmm3
| 1 | | 0.5 | | | | | 0.5 | | inc rbx
| 1* | | | | | | | | | cmp r9, rbx
| 0*F | | | | | | | | | jnz 0xfffffffffffffd5a
Total Num Of Uops: 129
Analysis Notes:
Backend allocation was stalled due to unavailable allocation resources.