155 lines
15 KiB
Plaintext
155 lines
15 KiB
Plaintext
|
Intel(R) Architecture Code Analyzer Version - v3.0-28-g1ba2cbb build date: 2017-10-23;16:42:45
|
||
|
Analyzed File - gromacs-icx-avx512-dp.o
|
||
|
Binary Format - 64Bit
|
||
|
Architecture - SKX
|
||
|
Analysis Type - Throughput
|
||
|
|
||
|
Throughput Analysis Report
|
||
|
--------------------------
|
||
|
Block Throughput: 49.26 Cycles Throughput Bottleneck: Backend
|
||
|
Loop Count: 22
|
||
|
Port Binding In Cycles Per Iteration:
|
||
|
--------------------------------------------------------------------------------------------------
|
||
|
| Port | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
||
|
--------------------------------------------------------------------------------------------------
|
||
|
| Cycles | 44.0 0.0 | 13.5 | 5.5 5.5 | 5.5 5.5 | 0.0 | 44.0 | 13.5 | 0.0 |
|
||
|
--------------------------------------------------------------------------------------------------
|
||
|
|
||
|
DV - Divider pipe (on port 0)
|
||
|
D - Data fetch pipe (on ports 2 and 3)
|
||
|
F - Macro Fusion with the previous instruction occurred
|
||
|
* - instruction micro-ops not bound to a port
|
||
|
^ - Micro Fusion occurred
|
||
|
# - ESP Tracking sync uop was issued
|
||
|
@ - SSE instruction followed an AVX256/AVX512 instruction, dozens of cycles penalty is expected
|
||
|
X - instruction not supported, was not accounted in Analysis
|
||
|
|
||
|
| Num Of | Ports pressure in cycles | |
|
||
|
| Uops | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
||
|
-----------------------------------------------------------------------------------------
|
||
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | movsxd rcx, dword ptr [r10+rbx*4]
|
||
|
| 1 | | 1.0 | | | | | | | lea rdx, ptr [rcx+rcx*2]
|
||
|
| 1 | | | | | | | 1.0 | | shl rdx, 0x6
|
||
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm28, zmmword ptr [rsi+rdx*1]
|
||
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm29, zmmword ptr [rsi+rdx*1+0x40]
|
||
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm30, zmmword ptr [rsi+rdx*1+0x80]
|
||
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm3, zmmword ptr [rsp+0x10]
|
||
|
| 1 | | | | | | 1.0 | | | vsubpd zmm3, zmm3, zmm28
|
||
|
| 1 | 1.0 | | | | | | | | vsubpd zmm31, zmm24, zmm30
|
||
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm16, zmmword ptr [rsp+0x150]
|
||
|
| 1 | | | | | | 1.0 | | | vsubpd zmm16, zmm16, zmm29
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm17, zmm31, zmm31
|
||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm17, zmm16, zmm16
|
||
|
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm17, zmm3, zmm3
|
||
|
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm18, zmm17
|
||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm19, zmm21, zmm18
|
||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm19, zmm18, zmm19
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm19, zmm18, zmm19
|
||
|
| 1 | | | | | | 1.0 | | | vaddpd zmm20, zmm19, zmm1
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm18, zmm22, zmm18
|
||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm18, zmm18, zmm20
|
||
|
| 1 | 1.0 | | | | | | | | vsubpd zmm20, zmm25, zmm30
|
||
|
| 1 | | 1.0 | | | | | | | lea edx, ptr [rcx+rcx*1]
|
||
|
| 1 | | | | | | | 1.0 | | cmp r11, rdx
|
||
|
| 1 | | | | | | | 1.0 | | setnz dl
|
||
|
| 1 | | | | | | | 1.0 | | setz al
|
||
|
| 1 | | 1.0 | | | | | | | add ecx, ecx
|
||
|
| 1 | | 1.0 | | | | | | | inc ecx
|
||
|
| 1 | | 0.5 | | | | | 0.5 | | cmp r11, rcx
|
||
|
| 1 | | | | | | | 1.0 | | setz cl
|
||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm18, zmm19, zmm18
|
||
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm19, zmmword ptr [rsp+0x210]
|
||
|
| 1 | 1.0 | | | | | | | | vsubpd zmm19, zmm19, zmm28
|
||
|
| 1 | | | | | | | 1.0 | | setnz dil
|
||
|
| 1* | | | | | | | | | mov ebp, edi
|
||
|
| 1 | | | | | | | 1.0 | | shl bpl, 0x4
|
||
|
| 1 | | 1.0 | | | | | | | sub bpl, al
|
||
|
| 1 | | 1.0 | | | | | | | add bpl, 0xef
|
||
|
| 1 | | | | | | 1.0 | | | kmovd k1, ebp
|
||
|
| 1 | | | | | | 1.0 | | | vcmppd k1{k1}, zmm17, zmm0, 0x1
|
||
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm17, zmmword ptr [rsp+0x110]
|
||
|
| 1 | 1.0 | | | | | | | | vsubpd zmm17, zmm17, zmm29
|
||
|
| 1 | | 1.0 | | | | | | | lea eax, ptr [rdx+rdx*1]
|
||
|
| 1* | | | | | | | | | mov ebp, edi
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm18, zmm18, zmm2
|
||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm14{k1}, zmm3, zmm18
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm3, zmm20, zmm20
|
||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm3, zmm17, zmm17
|
||
|
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm3, zmm19, zmm19
|
||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm11{k1}, zmm16, zmm18
|
||
|
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm16, zmm3
|
||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm7{k1}, zmm31, zmm18
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm18, zmm21, zmm16
|
||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm18, zmm16, zmm18
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm18, zmm16, zmm18
|
||
|
| 1 | | | | | | 1.0 | | | vaddpd zmm31, zmm18, zmm1
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm22, zmm16
|
||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm16, zmm16, zmm31
|
||
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm31, zmmword ptr [rsp+0x1d0]
|
||
|
| 1 | 1.0 | | | | | | | | vsubpd zmm31, zmm31, zmm28
|
||
|
| 1 | | | | | | | 1.0 | | shl bpl, 0x5
|
||
|
| 1 | | 1.0 | | | | | | | or bpl, al
|
||
|
| 1 | | 0.5 | | | | | 0.5 | | or bpl, 0xdd
|
||
|
| 1 | | | | | | 1.0 | | | kmovd k1, ebp
|
||
|
| 1 | | | | | | 1.0 | | | vcmppd k1{k1}, zmm3, zmm0, 0x1
|
||
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm3, zmmword ptr [rsp+0xd0]
|
||
|
| 1 | 1.0 | | | | | | | | vsubpd zmm3, zmm3, zmm29
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm18, zmm16
|
||
|
| 1 | | | | | | 1.0 | | | vsubpd zmm18, zmm26, zmm30
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm16, zmm2
|
||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm15{k1}, zmm19, zmm16
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm19, zmm18, zmm18
|
||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm19, zmm3, zmm3
|
||
|
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm19, zmm31, zmm31
|
||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm10{k1}, zmm17, zmm16
|
||
|
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm17, zmm19
|
||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm6{k1}, zmm20, zmm16
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm21, zmm17
|
||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm16, zmm17, zmm16
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm17, zmm16
|
||
|
| 1 | | | | | | 1.0 | | | vaddpd zmm20, zmm16, zmm1
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm17, zmm22, zmm17
|
||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm17, zmm17, zmm20
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm16, zmm17
|
||
|
| 1 | | 1.0 | | | | | | | lea eax, ptr [rdx*4]
|
||
|
| 1 | | | | | | | 1.0 | | shl dil, 0x6
|
||
|
| 1 | | 0.5 | | | | | 0.5 | | or dil, al
|
||
|
| 1 | | 0.5 | | | | | 0.5 | | or dil, 0xbb
|
||
|
| 1 | | | | | | 1.0 | | | kmovd k1, edi
|
||
|
| 1 | | | | | | 1.0 | | | vcmppd k1{k1}, zmm19, zmm0, 0x1
|
||
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm17, zmmword ptr [rsp+0x190]
|
||
|
| 1 | 1.0 | | | | | | | | vsubpd zmm17, zmm17, zmm28
|
||
|
| 1 | 1.0 | | | | | | | | vsubpd zmm19, zmm23, zmm29
|
||
|
| 1 | | | | | | 1.0 | | | vsubpd zmm20, zmm27, zmm30
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm16, zmm2
|
||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm13{k1}, zmm31, zmm16
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm28, zmm20, zmm20
|
||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm28, zmm19, zmm19
|
||
|
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm28, zmm17, zmm17
|
||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm9{k1}, zmm3, zmm16
|
||
|
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm3, zmm28
|
||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm5{k1}, zmm18, zmm16
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm21, zmm3
|
||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm16, zmm3, zmm16
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm3, zmm16
|
||
|
| 1 | | | | | | 1.0 | | | vaddpd zmm18, zmm16, zmm1
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm3, zmm22, zmm3
|
||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm3, zmm3, zmm18
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm3, zmm16, zmm3
|
||
|
| 1 | | | | | | | 1.0 | | shl dl, 0x3
|
||
|
| 1 | | | | | | | 1.0 | | shl cl, 0x7
|
||
|
| 1 | | 1.0 | | | | | | | or cl, dl
|
||
|
| 1 | | 1.0 | | | | | | | add cl, 0xf7
|
||
|
| 1 | | | | | | 1.0 | | | kmovd k1, ecx
|
||
|
| 1 | | | | | | 1.0 | | | vcmppd k1{k1}, zmm28, zmm0, 0x1
|
||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm3, zmm3, zmm2
|
||
|
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm12{k1}, zmm17, zmm3
|
||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm8{k1}, zmm19, zmm3
|
||
|
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm4{k1}, zmm20, zmm3
|
||
|
| 1 | | 0.5 | | | | | 0.5 | | inc rbx
|
||
|
| 1* | | | | | | | | | cmp r9, rbx
|
||
|
| 0*F | | | | | | | | | jnz 0xfffffffffffffd5a
|
||
|
Total Num Of Uops: 129
|
||
|
Analysis Notes:
|
||
|
Backend allocation was stalled due to unavailable allocation resources.
|