163 lines
16 KiB
Plaintext
163 lines
16 KiB
Plaintext
Intel(R) Architecture Code Analyzer Version - v3.0-28-g1ba2cbb build date: 2017-10-23;16:42:45
|
|
Analyzed File - gromacs-icx-avx512-sp.o
|
|
Binary Format - 64Bit
|
|
Architecture - SKX
|
|
Analysis Type - Throughput
|
|
|
|
Throughput Analysis Report
|
|
--------------------------
|
|
Block Throughput: 64.00 Cycles Throughput Bottleneck: Backend
|
|
Loop Count: 22
|
|
Port Binding In Cycles Per Iteration:
|
|
--------------------------------------------------------------------------------------------------
|
|
| Port | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
|
--------------------------------------------------------------------------------------------------
|
|
| Cycles | 50.0 0.0 | 7.0 | 9.5 8.1 | 9.5 7.9 | 3.0 | 50.0 | 7.0 | 0.0 |
|
|
--------------------------------------------------------------------------------------------------
|
|
|
|
DV - Divider pipe (on port 0)
|
|
D - Data fetch pipe (on ports 2 and 3)
|
|
F - Macro Fusion with the previous instruction occurred
|
|
* - instruction micro-ops not bound to a port
|
|
^ - Micro Fusion occurred
|
|
# - ESP Tracking sync uop was issued
|
|
@ - SSE instruction followed an AVX256/AVX512 instruction, dozens of cycles penalty is expected
|
|
X - instruction not supported, was not accounted in Analysis
|
|
|
|
| Num Of | Ports pressure in cycles | |
|
|
| Uops | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
|
-----------------------------------------------------------------------------------------
|
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | movsxd rax, dword ptr [r11+rdx*4]
|
|
| 1* | | | | | | | | | mov rsi, rax
|
|
| 1 | | | | | | | 1.0 | | shl rsi, 0x5
|
|
| 1 | | 1.0 | | | | | | | lea rbx, ptr [rsi+rsi*2]
|
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm15, zmmword ptr [rdi+rbx*1]
|
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm16, zmmword ptr [rdi+rbx*1+0x20]
|
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm27, zmmword ptr [rdi+rbx*1+0x40]
|
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm1, zmmword ptr [rsp+0x80]
|
|
| 1 | | | | | | 1.0 | | | vsubps zmm24, zmm1, zmm15
|
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm1, zmmword ptr [rsp+0x140]
|
|
| 1 | 1.0 | | | | | | | | vsubps zmm25, zmm1, zmm16
|
|
| 1 | | | | | | 1.0 | | | vsubps zmm26, zmm9, zmm27
|
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm1, zmmword ptr [rsp]
|
|
| 1 | 1.0 | | | | | | | | vsubps zmm21, zmm1, zmm15
|
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm1, zmmword ptr [rsp+0x100]
|
|
| 1 | | | | | | 1.0 | | | vsubps zmm22, zmm1, zmm16
|
|
| 1 | 1.0 | | | | | | | | vsubps zmm23, zmm10, zmm27
|
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm1, zmmword ptr [rsp+0x1c0]
|
|
| 1 | | | | | | 1.0 | | | vsubps zmm17, zmm1, zmm15
|
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm1, zmmword ptr [rsp+0xc0]
|
|
| 1 | 1.0 | | | | | | | | vsubps zmm19, zmm1, zmm16
|
|
| 1 | | | | | | 1.0 | | | vsubps zmm20, zmm11, zmm27
|
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm1, zmmword ptr [rsp+0x180]
|
|
| 1 | 1.0 | | | | | | | | vsubps zmm18, zmm1, zmm15
|
|
| 1 | | | | | | 1.0 | | | vsubps zmm16, zmm8, zmm16
|
|
| 1 | 1.0 | | | | | | | | vsubps zmm15, zmm12, zmm27
|
|
| 1 | | | | | | 1.0 | | | vmulps zmm27, zmm26, zmm26
|
|
| 1 | 1.0 | | | | | | | | vfmadd231ps zmm27, zmm25, zmm25
|
|
| 1 | | | | | | 1.0 | | | vfmadd231ps zmm27, zmm24, zmm24
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm28, zmm23, zmm23
|
|
| 1 | | | | | | 1.0 | | | vfmadd231ps zmm28, zmm22, zmm22
|
|
| 1 | 1.0 | | | | | | | | vfmadd231ps zmm28, zmm21, zmm21
|
|
| 1 | | | | | | 1.0 | | | vmulps zmm29, zmm20, zmm20
|
|
| 1 | 1.0 | | | | | | | | vfmadd231ps zmm29, zmm19, zmm19
|
|
| 1 | | | | | | 1.0 | | | vfmadd231ps zmm29, zmm17, zmm17
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm30, zmm15, zmm15
|
|
| 1 | | | | | | 1.0 | | | vfmadd231ps zmm30, zmm16, zmm16
|
|
| 3 | 2.0 | | | | | 1.0 | | | vrcp14ps zmm31, zmm27
|
|
| 3 | 2.0 | | | | | 1.0 | | | vrcp14ps zmm1, zmm28
|
|
| 3 | 2.0 | | | | | 1.0 | | | vrcp14ps zmm2, zmm29
|
|
| 1 | | | | | | 1.0 | | | vfmadd231ps zmm30, zmm18, zmm18
|
|
| 3 | 2.0 | | | | | 1.0 | | | vrcp14ps zmm3, zmm30
|
|
| 1 | | | | | | 1.0 | | | vmulps zmm4, zmm6, zmm31
|
|
| 1 | | | | | | 1.0 | | | vmulps zmm4, zmm31, zmm4
|
|
| 1 | | | | | | 1.0 | | | vmulps zmm4, zmm31, zmm4
|
|
| 1 | 1.0 | | | | | | | | vaddps zmm5, zmm4, zmm13
|
|
| 1 | | | | | | 1.0 | | | vmulps zmm31, zmm7, zmm31
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm5, zmm31, zmm5
|
|
| 1 | | | | | | 1.0 | | | vmulps zmm31, zmm6, zmm1
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm31, zmm1, zmm31
|
|
| 1 | | | | | | 1.0 | | | vmulps zmm31, zmm1, zmm31
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm4, zmm4, zmm5
|
|
| 1 | | | | | | 1.0 | | | vaddps zmm5, zmm31, zmm13
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm1, zmm7, zmm1
|
|
| 1 | | | | | | 1.0 | | | vmulps zmm1, zmm1, zmm5
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm5, zmm6, zmm2
|
|
| 1 | | | | | | 1.0 | | | vmulps zmm5, zmm2, zmm5
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm5, zmm2, zmm5
|
|
| 1 | | | | | | 1.0 | | | vmulps zmm1, zmm31, zmm1
|
|
| 1 | 1.0 | | | | | | | | vaddps zmm31, zmm5, zmm13
|
|
| 1 | | | | | | 1.0 | | | vmulps zmm2, zmm7, zmm2
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm2, zmm2, zmm31
|
|
| 1 | | | | | | 1.0 | | | vmulps zmm31, zmm6, zmm3
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm31, zmm3, zmm31
|
|
| 1 | | | | | | 1.0 | | | vmulps zmm31, zmm3, zmm31
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm2, zmm5, zmm2
|
|
| 1 | | | | | | 1.0 | | | vaddps zmm5, zmm31, zmm13
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm3, zmm7, zmm3
|
|
| 1 | | | | | | 1.0 | | | vmulps zmm3, zmm3, zmm5
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm3, zmm31, zmm3
|
|
| 1* | | | | | | | | | xor esi, esi
|
|
| 1* | | | | | | | | | xor edi, edi
|
|
| 1 | | 1.0 | | | | | | | test eax, 0x7fffffff
|
|
| 1 | | | | | | | 1.0 | | setz sil
|
|
| 1 | | | | | | | 1.0 | | setnz dil
|
|
| 1 | | 1.0 | | | | | | | mov eax, 0xff
|
|
| 1 | | | | | | | 1.0 | | cmovz eax, r8d
|
|
| 1 | | 1.0 | | | | | | | mov ecx, 0xff
|
|
| 1 | | | | | | | 1.0 | | cmovz ecx, r9d
|
|
| 1 | | 1.0 | | | | | | | xor esi, 0xff
|
|
| 1 | | | | | | 1.0 | | | kmovd k1, esi
|
|
| 1 | | | | | | 1.0 | | | vcmpps k1{k1}, zmm27, zmm0, 0x1
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm4, zmm4, zmm14
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm5{k1}{z}, zmm24, zmm4
|
|
| 1 | | | | | | 1.0 | | | vmulps zmm24{k1}{z}, zmm25, zmm4
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm4{k1}{z}, zmm26, zmm4
|
|
| 1 | | 1.0 | | | | | | | lea esi, ptr [rdi+rdi*2]
|
|
| 1 | | | | | | | 1.0 | | or esi, 0xfc
|
|
| 1 | | | | | | 1.0 | | | kmovd k1, esi
|
|
| 1 | | | | | | 1.0 | | | vcmpps k1{k1}, zmm28, zmm0, 0x1
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm1, zmm1, zmm14
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm21{k1}{z}, zmm21, zmm1
|
|
| 1 | | | | | | 1.0 | | | vaddps zmm5, zmm5, zmm21
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm21{k1}{z}, zmm22, zmm1
|
|
| 1 | | | | | | 1.0 | | | vaddps zmm21, zmm24, zmm21
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm1{k1}{z}, zmm23, zmm1
|
|
| 1 | | | | | | 1.0 | | | vaddps zmm1, zmm4, zmm1
|
|
| 1 | | | | | | 1.0 | | | kmovd k1, eax
|
|
| 1 | | | | | | 1.0 | | | vcmpps k1{k1}, zmm29, zmm0, 0x1
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm2, zmm2, zmm14
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm4{k1}{z}, zmm17, zmm2
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm17{k1}{z}, zmm19, zmm2
|
|
| 1 | | | | | | 1.0 | | | vmulps zmm2{k1}{z}, zmm20, zmm2
|
|
| 1 | | | | | | 1.0 | | | kmovd k1, ecx
|
|
| 1 | | | | | | 1.0 | | | vcmpps k1{k1}, zmm30, zmm0, 0x1
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm3, zmm3, zmm14
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm18{k1}{z}, zmm18, zmm3
|
|
| 1 | 1.0 | | | | | | | | vaddps zmm4, zmm4, zmm18
|
|
| 1 | | | | | | 1.0 | | | vaddps zmm4, zmm5, zmm4
|
|
| 1 | 1.0 | | | | | | | | vmulps zmm5{k1}{z}, zmm16, zmm3
|
|
| 1 | | | | | | 1.0 | | | vaddps zmm5, zmm17, zmm5
|
|
| 1 | 1.0 | | | | | | | | vaddps zmm5, zmm21, zmm5
|
|
| 1 | | | | | | 1.0 | | | vmulps zmm3{k1}{z}, zmm15, zmm3
|
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | mov rax, qword ptr [r15+0xb0]
|
|
| 1 | 1.0 | | | | | | | | vaddps zmm2, zmm2, zmm3
|
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm3, zmmword ptr [rax+rbx*1]
|
|
| 1 | | | | | | 1.0 | | | vsubps zmm3, zmm3, zmm4
|
|
| 2 | | | 0.5 | 0.5 | 1.0 | | | | vmovups zmmword ptr [rax+rbx*1], zmm3
|
|
| 1 | 1.0 | | | | | | | | vaddps zmm1, zmm1, zmm2
|
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm2, zmmword ptr [rax+rbx*1+0x20]
|
|
| 1 | | | | | | 1.0 | | | vsubps zmm2, zmm2, zmm5
|
|
| 2 | | | 0.5 | 0.5 | 1.0 | | | | vmovups zmmword ptr [rax+rbx*1+0x20], zmm2
|
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm2, zmmword ptr [rax+rbx*1+0x40]
|
|
| 1 | 1.0 | | | | | | | | vsubps zmm1, zmm2, zmm1
|
|
| 2 | | | 0.5 | 0.5 | 1.0 | | | | vmovups zmmword ptr [rax+rbx*1+0x40], zmm1
|
|
| 1* | | | | | | | | | cmp r10, rdx
|
|
| 0*F | | | | | | | | | jz 0x34
|
|
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | mov rdi, qword ptr [r15+0xa0]
|
|
| 1 | | 1.0 | | | | | | | inc rdx
|
|
| 1 | | | | | | | 1.0 | | jmp 0xfffffffffffffcfc
|
|
Total Num Of Uops: 140
|
|
Analysis Notes:
|
|
Backend allocation was stalled due to unavailable allocation resources.
|