added asm files and analysis output
This commit is contained in:
198
static_analysis/jan/analyses/gromacs-icc-avx512-dp-iaca.out
Normal file
198
static_analysis/jan/analyses/gromacs-icc-avx512-dp-iaca.out
Normal file
@@ -0,0 +1,198 @@
|
||||
Intel(R) Architecture Code Analyzer Version - v3.0-28-g1ba2cbb build date: 2017-10-23;16:42:45
|
||||
Analyzed File - gromacs-icc-avx512-dp.o
|
||||
Binary Format - 64Bit
|
||||
Architecture - SKX
|
||||
Analysis Type - Throughput
|
||||
|
||||
Throughput Analysis Report
|
||||
--------------------------
|
||||
Block Throughput: 62.00 Cycles Throughput Bottleneck: Backend
|
||||
Loop Count: 22
|
||||
Port Binding In Cycles Per Iteration:
|
||||
--------------------------------------------------------------------------------------------------
|
||||
| Port | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
||||
--------------------------------------------------------------------------------------------------
|
||||
| Cycles | 58.0 0.0 | 16.0 | 16.0 15.0 | 16.0 15.0 | 2.0 | 58.0 | 16.0 | 0.0 |
|
||||
--------------------------------------------------------------------------------------------------
|
||||
|
||||
DV - Divider pipe (on port 0)
|
||||
D - Data fetch pipe (on ports 2 and 3)
|
||||
F - Macro Fusion with the previous instruction occurred
|
||||
* - instruction micro-ops not bound to a port
|
||||
^ - Micro Fusion occurred
|
||||
# - ESP Tracking sync uop was issued
|
||||
@ - SSE instruction followed an AVX256/AVX512 instruction, dozens of cycles penalty is expected
|
||||
X - instruction not supported, was not accounted in Analysis
|
||||
|
||||
| Num Of | Ports pressure in cycles | |
|
||||
| Uops | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
||||
-----------------------------------------------------------------------------------------
|
||||
| 1 | | | 1.0 1.0 | | | | | | mov edx, dword ptr [r10+rsi*4]
|
||||
| 1 | | | | | | | 1.0 | | inc rsi
|
||||
| 1 | | | | 1.0 1.0 | | | | | vmovups zmm20, zmmword ptr [rsp+0x380]
|
||||
| 1 | | | 1.0 1.0 | | | | | | vmovups zmm25, zmmword ptr [rsp+0x340]
|
||||
| 1 | | | | 1.0 1.0 | | | | | vmovups zmm24, zmmword ptr [rsp+0x1c0]
|
||||
| 1 | | | 1.0 1.0 | | | | | | vmovups zmm23, zmmword ptr [rsp+0x2c0]
|
||||
| 1 | | | | 1.0 1.0 | | | | | vmovups zmm16, zmmword ptr [rsp+0x3c0]
|
||||
| 1 | | | 1.0 1.0 | | | | | | vmovups zmm14, zmmword ptr [rsp+0x300]
|
||||
| 1 | | | | 1.0 1.0 | | | | | vmovups zmm15, zmmword ptr [rsp+0x240]
|
||||
| 1 | | | 1.0 1.0 | | | | | | vmovups zmm12, zmmword ptr [rsp+0x180]
|
||||
| 1 | | | | 1.0 1.0 | | | | | vmovups zmm21, zmmword ptr [rsp+0x200]
|
||||
| 1 | | | 1.0 1.0 | | | | | | vmovups zmm18, zmmword ptr [rsp+0x140]
|
||||
| 1 | | | | 1.0 1.0 | | | | | vmovups zmm22, zmmword ptr [rsp+0x100]
|
||||
| 1 | | | 1.0 1.0 | | | | | | vmovups zmm17, zmmword ptr [rsp+0x280]
|
||||
| 1 | | 1.0 | | | | | | | lea r12d, ptr [rdx+rdx*2]
|
||||
| 1 | | | | | | | 1.0 | | shl r12d, 0x3
|
||||
| 1 | | 1.0 | | | | | | | lea r13d, ptr [rdx+rdx*1]
|
||||
| 1 | | | | | | | 1.0 | | movsxd r12, r12d
|
||||
| 1 | | 1.0 | | | | | | | cmp r13d, r11d
|
||||
| 1 | | 1.0 | | | | | | | lea eax, ptr [rdx+rdx*1+0x1]
|
||||
| 1 | | | | | | | 1.0 | | mov edx, 0x0
|
||||
| 1 | | | | | | | 1.0 | | setz dl
|
||||
| 1 | | 1.0 | | | | | | | cmp eax, r11d
|
||||
| 1 | | | | | | | 1.0 | | mov eax, 0x0
|
||||
| 1* | | | | | | | | | mov r13d, edx
|
||||
| 2 | 1.0 | | | 1.0 1.0 | | | | | vsubpd zmm29, zmm20, zmmword ptr [r8+r12*8+0x80]
|
||||
| 1 | | | | | | | 1.0 | | setz al
|
||||
| 2 | | | 1.0 1.0 | | | 1.0 | | | vsubpd zmm26, zmm25, zmmword ptr [r8+r12*8+0x80]
|
||||
| 2 | 1.0 | | | 1.0 1.0 | | | | | vsubpd zmm25, zmm24, zmmword ptr [r8+r12*8]
|
||||
| 2 | | | 1.0 1.0 | | | 1.0 | | | vsubpd zmm24, zmm23, zmmword ptr [r8+r12*8+0x40]
|
||||
| 2 | 1.0 | | | 1.0 1.0 | | | | | vsubpd zmm23, zmm16, zmmword ptr [r8+r12*8+0x80]
|
||||
| 2 | | | 1.0 1.0 | | | 1.0 | | | vsubpd zmm20, zmm14, zmmword ptr [r8+r12*8+0x80]
|
||||
| 2 | 1.0 | | | 1.0 1.0 | | | | | vsubpd zmm27, zmm12, zmmword ptr [r8+r12*8+0x40]
|
||||
| 2 | | | 1.0 1.0 | | | 1.0 | | | vsubpd zmm28, zmm15, zmmword ptr [r8+r12*8]
|
||||
| 2 | 1.0 | | | 1.0 1.0 | | | | | vsubpd zmm30, zmm21, zmmword ptr [r8+r12*8+0x40]
|
||||
| 2 | | | 1.0 1.0 | | | 1.0 | | | vsubpd zmm21, zmm18, zmmword ptr [r8+r12*8+0x40]
|
||||
| 2 | 1.0 | | | 1.0 1.0 | | | | | vsubpd zmm31, zmm22, zmmword ptr [r8+r12*8]
|
||||
| 2 | | | 1.0 1.0 | | | 1.0 | | | vsubpd zmm22, zmm17, zmmword ptr [r8+r12*8]
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm13, zmm29, zmm29
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm15, zmm26, zmm26
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm12, zmm23, zmm23
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm14, zmm20, zmm20
|
||||
| 1 | | | | 1.0 1.0 | | | | | vmovups zmm16, zmmword ptr [rsp+0xc0]
|
||||
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm13, zmm30, zmm30
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm15, zmm27, zmm27
|
||||
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm12, zmm24, zmm24
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm14, zmm21, zmm21
|
||||
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm13, zmm31, zmm31
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm15, zmm28, zmm28
|
||||
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm12, zmm25, zmm25
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm14, zmm22, zmm22
|
||||
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm19, zmm13
|
||||
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm18, zmm15
|
||||
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm17, zmm12
|
||||
| 1 | | | | | | 1.0 | | | vcmppd k1, zmm13, zmm16, 0x11
|
||||
| 1 | | | | | | 1.0 | | | vcmppd k6, zmm15, zmm16, 0x11
|
||||
| 1 | | | | | | 1.0 | | | vcmppd k7, zmm12, zmm16, 0x11
|
||||
| 1 | | | | | | 1.0 | | | vcmppd k0, zmm14, zmm16, 0x11
|
||||
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm15, zmm14
|
||||
| 1 | | | 1.0 1.0 | | | | | | vmovups zmm16, zmmword ptr [rsp+0x40]
|
||||
| 1 | | | | 1.0 1.0 | | | | | vmovups zmm12, zmmword ptr [rsp+0x80]
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm13, zmm19, zmm16
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm13, zmm19, zmm13
|
||||
| 1 | | 1.0 | | | | | | | neg r13d
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm14, zmm19, zmm13
|
||||
| 1* | | | | | | | | | mov r12d, eax
|
||||
| 1 | | | | | | 1.0 | | | vfmsub213pd zmm13, zmm19, zmm1
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm19, zmm19, zmm12
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm13, zmm13, zmm19
|
||||
| 1 | | 1.0 | | | | | | | add r13d, 0xff
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm14, zmm14, zmm13
|
||||
| 1 | | | | | | | 1.0 | | nop
|
||||
| 1 | | | 1.0 1.0 | | | | | | vmovups zmm13, zmmword ptr [rsp+0x400]
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm19, zmm10, zmm14
|
||||
| 1 | | | | | | | 1.0 | | shl r12d, 0x4
|
||||
| 1 | | 1.0 | | | | | | | sub r13d, r12d
|
||||
| 1 | | | | | | 1.0 | | | kmovb k5, r13d
|
||||
| 1 | 1.0 | | | | | | | | kmovw r13d, k1
|
||||
| 1 | 1.0 | | | | | | | | kmovb r12d, k5
|
||||
| 1 | | | | | | 1.0 | | | kmovb k5, r12d
|
||||
| 1 | | | | | | 1.0 | | | kmovb k1, r13d
|
||||
| 1* | | | | | | | | | mov r13d, eax
|
||||
| 1 | 1.0 | | | | | | | | kandb k5, k5, k1
|
||||
| 1 | 1.0 | | | | | | | | kmovb r12d, k5
|
||||
| 1 | | | | | | 1.0 | | | kmovw k5, r12d
|
||||
| 1 | | 1.0 | | | | | | | lea r12d, ptr [rdx+rdx*1]
|
||||
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm9{k5}, zmm19, zmm29
|
||||
| 1 | | | | | | | 1.0 | | neg r12d
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm13{k5}, zmm19, zmm31
|
||||
| 1 | | | | 1.0 1.0 | | | | | vmovups zmm31, zmmword ptr [rsp+0x440]
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm29, zmm18, zmm16
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm31{k5}, zmm19, zmm30
|
||||
| 2^ | | | 1.0 | | 1.0 | | | | vmovups zmmword ptr [rsp+0x400], zmm13
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm30, zmm18, zmm29
|
||||
| 2^ | | | | 1.0 | 1.0 | | | | vmovups zmmword ptr [rsp+0x440], zmm31
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm13, zmm18, zmm30
|
||||
| 1 | 1.0 | | | | | | | | vfmsub213pd zmm30, zmm18, zmm1
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm18, zmm18, zmm12
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm14, zmm30, zmm18
|
||||
| 1 | | 1.0 | | | | | | | add r12d, 0xff
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm19, zmm13, zmm14
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm29, zmm10, zmm19
|
||||
| 1 | | | | | | | 1.0 | | shl r13d, 0x5
|
||||
| 1 | | 1.0 | | | | | | | sub r12d, r13d
|
||||
| 1 | | | | | | 1.0 | | | kmovb k1, r12d
|
||||
| 1 | 1.0 | | | | | | | | kmovw r12d, k6
|
||||
| 1 | 1.0 | | | | | | | | kmovb r13d, k1
|
||||
| 1 | | | | | | 1.0 | | | kmovb k1, r13d
|
||||
| 1 | | | | | | 1.0 | | | kmovb k6, r12d
|
||||
| 1* | | | | | | | | | mov r12d, eax
|
||||
| 1 | 1.0 | | | | | | | | kandb k1, k1, k6
|
||||
| 1 | 1.0 | | | | | | | | kmovb r13d, k1
|
||||
| 1 | | | | | | 1.0 | | | kmovw k1, r13d
|
||||
| 1 | | 1.0 | | | | | | | lea r13d, ptr [rdx*4]
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm6{k1}, zmm29, zmm26
|
||||
| 1 | | | | | | | 1.0 | | neg r13d
|
||||
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm7{k1}, zmm29, zmm27
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm8{k1}, zmm29, zmm28
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm26, zmm17, zmm16
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm28, zmm17, zmm12
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm15, zmm16
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm12, zmm15, zmm12
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm27, zmm17, zmm26
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm19, zmm15, zmm16
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm13, zmm17, zmm27
|
||||
| 1 | | | | | | 1.0 | | | vfmsub213pd zmm27, zmm17, zmm1
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm14, zmm27, zmm28
|
||||
| 1 | | | | | | | 1.0 | | add r13d, 0xff
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm17, zmm13, zmm14
|
||||
| 1 | | | | | | | 1.0 | | shl edx, 0x3
|
||||
| 1 | | | | | | | 1.0 | | shl r12d, 0x6
|
||||
| 1 | | 1.0 | | | | | | | neg edx
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm18, zmm10, zmm17
|
||||
| 1 | | 1.0 | | | | | | | sub r13d, r12d
|
||||
| 1 | | | | | | 1.0 | | | kmovb k6, r13d
|
||||
| 1 | | 1.0 | | | | | | | add edx, 0xff
|
||||
| 1 | | | | | | | 1.0 | | shl eax, 0x7
|
||||
| 1 | | 1.0 | | | | | | | sub edx, eax
|
||||
| 1 | 1.0 | | | | | | | | kmovb eax, k6
|
||||
| 1 | | | | | | 1.0 | | | kmovb k6, eax
|
||||
| 1 | 1.0 | | | | | | | | kmovw eax, k7
|
||||
| 1 | | | | | | 1.0 | | | kmovb k7, eax
|
||||
| 1 | 1.0 | | | | | | | | kandb k7, k6, k7
|
||||
| 1 | | | | | | 1.0 | | | kmovb k6, edx
|
||||
| 1 | 1.0 | | | | | | | | kmovb edx, k7
|
||||
| 1 | | | | | | 1.0 | | | kmovw k7, edx
|
||||
| 1 | 1.0 | | | | | | | | kmovw edx, k0
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm11{k7}, zmm18, zmm23
|
||||
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm4{k7}, zmm18, zmm24
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm5{k7}, zmm18, zmm25
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm23, zmm15, zmm19
|
||||
| 1 | | | | | | 1.0 | | | vfmsub213pd zmm19, zmm15, zmm1
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm15, zmm19, zmm12
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm24, zmm23, zmm15
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm25, zmm10, zmm24
|
||||
| 1 | 1.0 | | | | | | | | kmovb eax, k6
|
||||
| 1 | | | | | | 1.0 | | | kmovb k6, eax
|
||||
| 1 | | | | | | 1.0 | | | kmovb k0, edx
|
||||
| 1 | 1.0 | | | | | | | | kandb k0, k6, k0
|
||||
| 1 | 1.0 | | | | | | | | kmovb r12d, k0
|
||||
| 1 | | | | | | 1.0 | | | kmovw k6, r12d
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm3{k6}, zmm25, zmm22
|
||||
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm2{k6}, zmm25, zmm21
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm0{k6}, zmm25, zmm20
|
||||
| 1* | | | | | | | | | cmp rsi, rdi
|
||||
| 0*F | | | | | | | | | jl 0xfffffffffffffc6f
|
||||
Total Num Of Uops: 187
|
||||
Analysis Notes:
|
||||
Backend allocation was stalled due to unavailable allocation resources.
|
152
static_analysis/jan/analyses/gromacs-icc-avx512-sp-iaca.out
Normal file
152
static_analysis/jan/analyses/gromacs-icc-avx512-sp-iaca.out
Normal file
@@ -0,0 +1,152 @@
|
||||
Intel(R) Architecture Code Analyzer Version - v3.0-28-g1ba2cbb build date: 2017-10-23;16:42:45
|
||||
Analyzed File - gromacs-icc-avx512-sp.o
|
||||
Binary Format - 64Bit
|
||||
Architecture - SKX
|
||||
Analysis Type - Throughput
|
||||
|
||||
Throughput Analysis Report
|
||||
--------------------------
|
||||
Block Throughput: 51.00 Cycles Throughput Bottleneck: Backend
|
||||
Loop Count: 22
|
||||
Port Binding In Cycles Per Iteration:
|
||||
--------------------------------------------------------------------------------------------------
|
||||
| Port | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
||||
--------------------------------------------------------------------------------------------------
|
||||
| Cycles | 47.5 0.0 | 9.0 | 11.0 11.0 | 11.0 8.0 | 3.0 | 47.5 | 9.0 | 0.0 |
|
||||
--------------------------------------------------------------------------------------------------
|
||||
|
||||
DV - Divider pipe (on port 0)
|
||||
D - Data fetch pipe (on ports 2 and 3)
|
||||
F - Macro Fusion with the previous instruction occurred
|
||||
* - instruction micro-ops not bound to a port
|
||||
^ - Micro Fusion occurred
|
||||
# - ESP Tracking sync uop was issued
|
||||
@ - SSE instruction followed an AVX256/AVX512 instruction, dozens of cycles penalty is expected
|
||||
X - instruction not supported, was not accounted in Analysis
|
||||
|
||||
| Num Of | Ports pressure in cycles | |
|
||||
| Uops | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
||||
-----------------------------------------------------------------------------------------
|
||||
| 1 | | | 1.0 1.0 | | | | | | mov edi, dword ptr [rcx+rax*4]
|
||||
| 1* | | | | | | | | | mov r12d, r13d
|
||||
| 1 | | | | | | | 1.0 | | movsxd rdi, edi
|
||||
| 1 | | 1.0 | | | | | | | inc rax
|
||||
| 1 | | | | 1.0 1.0 | | | | | vmovups zmm10, zmmword ptr [rsp+0x140]
|
||||
| 1 | | | | | | | 1.0 | | test edi, 0x7fffffff
|
||||
| 1 | | | 1.0 1.0 | | | | | | vmovups zmm11, zmmword ptr [rsp+0x100]
|
||||
| 1 | | | | 1.0 1.0 | | | | | vmovups zmm9, zmmword ptr [rsp+0xc0]
|
||||
| 1 | | | | | | | 1.0 | | setz r12b
|
||||
| 1 | | 1.0 | | | | | | | lea r14, ptr [rdi+rdi*2]
|
||||
| 1 | | | | | | | 1.0 | | shl r14, 0x5
|
||||
| 1* | | | | | | | | | mov r8d, r12d
|
||||
| 1 | | 1.0 | | | | | | | neg r8d
|
||||
| 1* | | | | | | | | | mov r11d, r12d
|
||||
| 1 | | 1.0 | | | | | | | add r8d, 0xff
|
||||
| 1 | | | | | | 1.0 | | | kmovw k0, r8d
|
||||
| 1 | | 1.0 | | | | | | | lea r9d, ptr [r12+r12*2]
|
||||
| 2 | 1.0 | | 1.0 1.0 | | | | | | vsubps zmm3, zmm13, zmmword ptr [r14+rbx*1+0x40]
|
||||
| 2 | 0.5 | | | 1.0 1.0 | | 0.5 | | | vsubps zmm4, zmm10, zmmword ptr [r14+rbx*1]
|
||||
| 2 | 0.5 | | 1.0 1.0 | | | 0.5 | | | vsubps zmm10, zmm11, zmmword ptr [r14+rbx*1+0x40]
|
||||
| 2 | 0.5 | | | 1.0 1.0 | | 0.5 | | | vsubps zmm5, zmm17, zmmword ptr [r14+rbx*1+0x20]
|
||||
| 2 | 0.5 | | 1.0 1.0 | | | 0.5 | | | vsubps zmm27, zmm19, zmmword ptr [r14+rbx*1]
|
||||
| 2 | 0.5 | | | 1.0 1.0 | | 0.5 | | | vsubps zmm8, zmm15, zmmword ptr [r14+rbx*1+0x20]
|
||||
| 2 | 0.5 | | 1.0 1.0 | | | 0.5 | | | vsubps zmm11, zmm0, zmmword ptr [r14+rbx*1+0x40]
|
||||
| 2 | 0.5 | | | 1.0 1.0 | | 0.5 | | | vsubps zmm7, zmm9, zmmword ptr [r14+rbx*1]
|
||||
| 2 | 0.5 | | 1.0 1.0 | | | 0.5 | | | vsubps zmm9, zmm14, zmmword ptr [r14+rbx*1+0x20]
|
||||
| 2 | 0.5 | | | 1.0 1.0 | | 0.5 | | | vsubps zmm29, zmm12, zmmword ptr [r14+rbx*1+0x40]
|
||||
| 2 | 0.5 | | 1.0 1.0 | | | 0.5 | | | vsubps zmm28, zmm16, zmmword ptr [r14+rbx*1+0x20]
|
||||
| 2 | 0.5 | | | 1.0 1.0 | | 0.5 | | | vsubps zmm25, zmm18, zmmword ptr [r14+rbx*1]
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm2, zmm3, zmm3
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm30, zmm10, zmm10
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm1, zmm11, zmm11
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm26, zmm29, zmm29
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231ps zmm2, zmm5, zmm5
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231ps zmm30, zmm8, zmm8
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231ps zmm1, zmm9, zmm9
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231ps zmm26, zmm28, zmm28
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231ps zmm2, zmm27, zmm27
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231ps zmm30, zmm4, zmm4
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231ps zmm1, zmm7, zmm7
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231ps zmm26, zmm25, zmm25
|
||||
| 3 | 2.0 | | | | | 1.0 | | | vrcp14ps zmm31, zmm2
|
||||
| 1 | | | | | | 1.0 | | | vcmpps k7, zmm30, zmm24, 0x11
|
||||
| 3 | 2.0 | | | | | 1.0 | | | vrcp14ps zmm6, zmm30
|
||||
| 1 | | | | | | 1.0 | | | vcmpps k3, zmm2, zmm24, 0x11
|
||||
| 3 | 2.0 | | | | | 1.0 | | | vrcp14ps zmm2, zmm1
|
||||
| 1 | | | | | | 1.0 | | | vcmpps k5, zmm26, zmm24, 0x11
|
||||
| 3 | 2.0 | | | | | 1.0 | | | vrcp14ps zmm26, zmm26
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm30, zmm31, zmm23
|
||||
| 1 | 1.0 | | | | | | | | kandw k2, k0, k3
|
||||
| 1 | | | | | | 1.0 | | | vcmpps k3, zmm1, zmm24, 0x11
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm1, zmm31, zmm30
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm30, zmm31, zmm1
|
||||
| 1 | | | | | | | 1.0 | | neg r9d
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmsub213ps zmm1, zmm31, zmm20
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm31, zmm31, zmm22
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm31, zmm1, zmm31
|
||||
| 1 | | | | | | | 1.0 | | add r9d, 0xff
|
||||
| 1 | | | | | | 1.0 | | | kmovw k4, r9d
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm30, zmm30, zmm31
|
||||
| 1 | 1.0 | | | | | | | | kandw k1, k4, k5
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm1, zmm21, zmm30
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm30, zmm26, zmm23
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm31, zmm26, zmm30
|
||||
| 1 | | 1.0 | | | | | | | lea r10d, ptr [r12*8]
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm30, zmm26, zmm31
|
||||
| 1 | | | | | | | 1.0 | | neg r10d
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmsub213ps zmm31, zmm26, zmm20
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm26, zmm26, zmm22
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm31, zmm31, zmm26
|
||||
| 1 | | 1.0 | | | | | | | add r10d, r12d
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm30, zmm30, zmm31
|
||||
| 1 | | | | | | | 1.0 | | add r10d, 0xff
|
||||
| 1 | | | | | | 1.0 | | | kmovw k6, r10d
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm26, zmm21, zmm30
|
||||
| 1 | 1.0 | | | | | | | | kandw k4, k6, k7
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm25{k1}{z}, zmm25, zmm26
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm31{k1}{z}, zmm28, zmm26
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm28, zmm6, zmm23
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm30{k1}{z}, zmm29, zmm26
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm29, zmm2, zmm23
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231ps zmm25{k2}, zmm27, zmm1
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231ps zmm31{k2}, zmm5, zmm1
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm27, zmm6, zmm28
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231ps zmm30{k2}, zmm3, zmm1
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm1, zmm2, zmm29
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm5, zmm6, zmm27
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmsub213ps zmm27, zmm6, zmm20
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm6, zmm6, zmm22
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm3, zmm2, zmm1
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmsub213ps zmm1, zmm2, zmm20
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm2, zmm2, zmm22
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm26, zmm27, zmm6
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm1, zmm1, zmm2
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm5, zmm5, zmm26
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm3, zmm3, zmm1
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm6, zmm21, zmm5
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulps zmm27, zmm21, zmm3
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231ps zmm25{k4}, zmm4, zmm6
|
||||
| 1 | | | 1.0 1.0 | | | | | | vmovups zmm4, zmmword ptr [r14+rsi*1]
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231ps zmm31{k4}, zmm8, zmm6
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231ps zmm30{k4}, zmm10, zmm6
|
||||
| 1 | | | | | | | 1.0 | | shl r11d, 0x4
|
||||
| 1 | | 1.0 | | | | | | | sub r12d, r11d
|
||||
| 1 | | 1.0 | | | | | | | add r12d, 0xff
|
||||
| 1 | | | | | | 1.0 | | | kmovw k0, r12d
|
||||
| 1 | 1.0 | | | | | | | | kandw k5, k0, k3
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231ps zmm25{k5}, zmm7, zmm27
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231ps zmm31{k5}, zmm9, zmm27
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231ps zmm30{k5}, zmm11, zmm27
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vsubps zmm7, zmm4, zmm25
|
||||
| 2 | | | | 1.0 | 1.0 | | | | vmovups zmmword ptr [r14+rsi*1], zmm7
|
||||
| 1 | | | 1.0 1.0 | | | | | | vmovups zmm8, zmmword ptr [r14+rsi*1+0x20]
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vsubps zmm4, zmm8, zmm31
|
||||
| 2 | | | | 1.0 | 1.0 | | | | vmovups zmmword ptr [r14+rsi*1+0x20], zmm4
|
||||
| 1 | | | 1.0 1.0 | | | | | | vmovups zmm1, zmmword ptr [r14+rsi*1+0x40]
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vsubps zmm2, zmm1, zmm30
|
||||
| 2 | | | | 1.0 | 1.0 | | | | vmovups zmmword ptr [r14+rsi*1+0x40], zmm2
|
||||
| 1* | | | | | | | | | cmp rax, rdx
|
||||
| 0*F | | | | | | | | | jb 0xfffffffffffffd30
|
||||
Total Num Of Uops: 142
|
||||
Analysis Notes:
|
||||
Backend allocation was stalled due to unavailable allocation resources.
|
154
static_analysis/jan/analyses/gromacs-icx-avx512-dp-iaca.out
Normal file
154
static_analysis/jan/analyses/gromacs-icx-avx512-dp-iaca.out
Normal file
@@ -0,0 +1,154 @@
|
||||
Intel(R) Architecture Code Analyzer Version - v3.0-28-g1ba2cbb build date: 2017-10-23;16:42:45
|
||||
Analyzed File - gromacs-icx-avx512-dp.o
|
||||
Binary Format - 64Bit
|
||||
Architecture - SKX
|
||||
Analysis Type - Throughput
|
||||
|
||||
Throughput Analysis Report
|
||||
--------------------------
|
||||
Block Throughput: 49.26 Cycles Throughput Bottleneck: Backend
|
||||
Loop Count: 22
|
||||
Port Binding In Cycles Per Iteration:
|
||||
--------------------------------------------------------------------------------------------------
|
||||
| Port | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
||||
--------------------------------------------------------------------------------------------------
|
||||
| Cycles | 44.0 0.0 | 13.5 | 5.5 5.5 | 5.5 5.5 | 0.0 | 44.0 | 13.5 | 0.0 |
|
||||
--------------------------------------------------------------------------------------------------
|
||||
|
||||
DV - Divider pipe (on port 0)
|
||||
D - Data fetch pipe (on ports 2 and 3)
|
||||
F - Macro Fusion with the previous instruction occurred
|
||||
* - instruction micro-ops not bound to a port
|
||||
^ - Micro Fusion occurred
|
||||
# - ESP Tracking sync uop was issued
|
||||
@ - SSE instruction followed an AVX256/AVX512 instruction, dozens of cycles penalty is expected
|
||||
X - instruction not supported, was not accounted in Analysis
|
||||
|
||||
| Num Of | Ports pressure in cycles | |
|
||||
| Uops | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
||||
-----------------------------------------------------------------------------------------
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | movsxd rcx, dword ptr [r10+rbx*4]
|
||||
| 1 | | 1.0 | | | | | | | lea rdx, ptr [rcx+rcx*2]
|
||||
| 1 | | | | | | | 1.0 | | shl rdx, 0x6
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm28, zmmword ptr [rsi+rdx*1]
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm29, zmmword ptr [rsi+rdx*1+0x40]
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm30, zmmword ptr [rsi+rdx*1+0x80]
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm3, zmmword ptr [rsp+0x10]
|
||||
| 1 | | | | | | 1.0 | | | vsubpd zmm3, zmm3, zmm28
|
||||
| 1 | 1.0 | | | | | | | | vsubpd zmm31, zmm24, zmm30
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm16, zmmword ptr [rsp+0x150]
|
||||
| 1 | | | | | | 1.0 | | | vsubpd zmm16, zmm16, zmm29
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm17, zmm31, zmm31
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm17, zmm16, zmm16
|
||||
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm17, zmm3, zmm3
|
||||
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm18, zmm17
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm19, zmm21, zmm18
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm19, zmm18, zmm19
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm19, zmm18, zmm19
|
||||
| 1 | | | | | | 1.0 | | | vaddpd zmm20, zmm19, zmm1
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm18, zmm22, zmm18
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm18, zmm18, zmm20
|
||||
| 1 | 1.0 | | | | | | | | vsubpd zmm20, zmm25, zmm30
|
||||
| 1 | | 1.0 | | | | | | | lea edx, ptr [rcx+rcx*1]
|
||||
| 1 | | | | | | | 1.0 | | cmp r11, rdx
|
||||
| 1 | | | | | | | 1.0 | | setnz dl
|
||||
| 1 | | | | | | | 1.0 | | setz al
|
||||
| 1 | | 1.0 | | | | | | | add ecx, ecx
|
||||
| 1 | | 1.0 | | | | | | | inc ecx
|
||||
| 1 | | 0.5 | | | | | 0.5 | | cmp r11, rcx
|
||||
| 1 | | | | | | | 1.0 | | setz cl
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm18, zmm19, zmm18
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm19, zmmword ptr [rsp+0x210]
|
||||
| 1 | 1.0 | | | | | | | | vsubpd zmm19, zmm19, zmm28
|
||||
| 1 | | | | | | | 1.0 | | setnz dil
|
||||
| 1* | | | | | | | | | mov ebp, edi
|
||||
| 1 | | | | | | | 1.0 | | shl bpl, 0x4
|
||||
| 1 | | 1.0 | | | | | | | sub bpl, al
|
||||
| 1 | | 1.0 | | | | | | | add bpl, 0xef
|
||||
| 1 | | | | | | 1.0 | | | kmovd k1, ebp
|
||||
| 1 | | | | | | 1.0 | | | vcmppd k1{k1}, zmm17, zmm0, 0x1
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm17, zmmword ptr [rsp+0x110]
|
||||
| 1 | 1.0 | | | | | | | | vsubpd zmm17, zmm17, zmm29
|
||||
| 1 | | 1.0 | | | | | | | lea eax, ptr [rdx+rdx*1]
|
||||
| 1* | | | | | | | | | mov ebp, edi
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm18, zmm18, zmm2
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm14{k1}, zmm3, zmm18
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm3, zmm20, zmm20
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm3, zmm17, zmm17
|
||||
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm3, zmm19, zmm19
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm11{k1}, zmm16, zmm18
|
||||
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm16, zmm3
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm7{k1}, zmm31, zmm18
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm18, zmm21, zmm16
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm18, zmm16, zmm18
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm18, zmm16, zmm18
|
||||
| 1 | | | | | | 1.0 | | | vaddpd zmm31, zmm18, zmm1
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm22, zmm16
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm16, zmm16, zmm31
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm31, zmmword ptr [rsp+0x1d0]
|
||||
| 1 | 1.0 | | | | | | | | vsubpd zmm31, zmm31, zmm28
|
||||
| 1 | | | | | | | 1.0 | | shl bpl, 0x5
|
||||
| 1 | | 1.0 | | | | | | | or bpl, al
|
||||
| 1 | | 0.5 | | | | | 0.5 | | or bpl, 0xdd
|
||||
| 1 | | | | | | 1.0 | | | kmovd k1, ebp
|
||||
| 1 | | | | | | 1.0 | | | vcmppd k1{k1}, zmm3, zmm0, 0x1
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm3, zmmword ptr [rsp+0xd0]
|
||||
| 1 | 1.0 | | | | | | | | vsubpd zmm3, zmm3, zmm29
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm18, zmm16
|
||||
| 1 | | | | | | 1.0 | | | vsubpd zmm18, zmm26, zmm30
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm16, zmm2
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm15{k1}, zmm19, zmm16
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm19, zmm18, zmm18
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm19, zmm3, zmm3
|
||||
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm19, zmm31, zmm31
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm10{k1}, zmm17, zmm16
|
||||
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm17, zmm19
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm6{k1}, zmm20, zmm16
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm21, zmm17
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm16, zmm17, zmm16
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm17, zmm16
|
||||
| 1 | | | | | | 1.0 | | | vaddpd zmm20, zmm16, zmm1
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm17, zmm22, zmm17
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm17, zmm17, zmm20
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm16, zmm17
|
||||
| 1 | | 1.0 | | | | | | | lea eax, ptr [rdx*4]
|
||||
| 1 | | | | | | | 1.0 | | shl dil, 0x6
|
||||
| 1 | | 0.5 | | | | | 0.5 | | or dil, al
|
||||
| 1 | | 0.5 | | | | | 0.5 | | or dil, 0xbb
|
||||
| 1 | | | | | | 1.0 | | | kmovd k1, edi
|
||||
| 1 | | | | | | 1.0 | | | vcmppd k1{k1}, zmm19, zmm0, 0x1
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovupd zmm17, zmmword ptr [rsp+0x190]
|
||||
| 1 | 1.0 | | | | | | | | vsubpd zmm17, zmm17, zmm28
|
||||
| 1 | 1.0 | | | | | | | | vsubpd zmm19, zmm23, zmm29
|
||||
| 1 | | | | | | 1.0 | | | vsubpd zmm20, zmm27, zmm30
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm16, zmm2
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm13{k1}, zmm31, zmm16
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm28, zmm20, zmm20
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm28, zmm19, zmm19
|
||||
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm28, zmm17, zmm17
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm9{k1}, zmm3, zmm16
|
||||
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm3, zmm28
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm5{k1}, zmm18, zmm16
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm21, zmm3
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm16, zmm3, zmm16
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm3, zmm16
|
||||
| 1 | | | | | | 1.0 | | | vaddpd zmm18, zmm16, zmm1
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm3, zmm22, zmm3
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm3, zmm3, zmm18
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm3, zmm16, zmm3
|
||||
| 1 | | | | | | | 1.0 | | shl dl, 0x3
|
||||
| 1 | | | | | | | 1.0 | | shl cl, 0x7
|
||||
| 1 | | 1.0 | | | | | | | or cl, dl
|
||||
| 1 | | 1.0 | | | | | | | add cl, 0xf7
|
||||
| 1 | | | | | | 1.0 | | | kmovd k1, ecx
|
||||
| 1 | | | | | | 1.0 | | | vcmppd k1{k1}, zmm28, zmm0, 0x1
|
||||
| 1 | 1.0 | | | | | | | | vmulpd zmm3, zmm3, zmm2
|
||||
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm12{k1}, zmm17, zmm3
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm8{k1}, zmm19, zmm3
|
||||
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm4{k1}, zmm20, zmm3
|
||||
| 1 | | 0.5 | | | | | 0.5 | | inc rbx
|
||||
| 1* | | | | | | | | | cmp r9, rbx
|
||||
| 0*F | | | | | | | | | jnz 0xfffffffffffffd5a
|
||||
Total Num Of Uops: 129
|
||||
Analysis Notes:
|
||||
Backend allocation was stalled due to unavailable allocation resources.
|
288
static_analysis/jan/analyses/gromacs-icx-avx512-dp-mca.out
Normal file
288
static_analysis/jan/analyses/gromacs-icx-avx512-dp-mca.out
Normal file
@@ -0,0 +1,288 @@
|
||||
|
||||
[0] Code Region
|
||||
|
||||
Iterations: 100
|
||||
Instructions: 12200
|
||||
Total Cycles: 4745
|
||||
Total uOps: 14000
|
||||
|
||||
Dispatch Width: 6
|
||||
uOps Per Cycle: 2.95
|
||||
IPC: 2.57
|
||||
Block RThroughput: 34.0
|
||||
|
||||
|
||||
Instruction Info:
|
||||
[1]: #uOps
|
||||
[2]: Latency
|
||||
[3]: RThroughput
|
||||
[4]: MayLoad
|
||||
[5]: MayStore
|
||||
[6]: HasSideEffects (U)
|
||||
|
||||
[1] [2] [3] [4] [5] [6] Instructions:
|
||||
1 5 0.50 * movslq (%r10,%rbx,4), %rcx
|
||||
1 1 0.50 leaq (%rcx,%rcx,2), %rdx
|
||||
1 1 0.50 shlq $6, %rdx
|
||||
2 8 0.50 * vmovupd (%rsi,%rdx), %zmm28
|
||||
2 8 0.50 * vmovupd 64(%rsi,%rdx), %zmm29
|
||||
2 8 0.50 * vmovupd 128(%rsi,%rdx), %zmm30
|
||||
2 8 0.50 * vmovupd 16(%rsp), %zmm3
|
||||
1 4 0.50 vsubpd %zmm28, %zmm3, %zmm3
|
||||
1 4 0.50 vsubpd %zmm30, %zmm24, %zmm31
|
||||
2 8 0.50 * vmovupd 336(%rsp), %zmm16
|
||||
1 4 0.50 vsubpd %zmm29, %zmm16, %zmm16
|
||||
1 4 0.50 vmulpd %zmm31, %zmm31, %zmm17
|
||||
1 4 0.50 vfmadd231pd %zmm16, %zmm16, %zmm17
|
||||
1 4 0.50 vfmadd231pd %zmm3, %zmm3, %zmm17
|
||||
3 4 2.00 vrcp14pd %zmm17, %zmm18
|
||||
1 4 0.50 vmulpd %zmm18, %zmm21, %zmm19
|
||||
1 4 0.50 vmulpd %zmm19, %zmm18, %zmm19
|
||||
1 4 0.50 vmulpd %zmm19, %zmm18, %zmm19
|
||||
1 4 0.50 vaddpd %zmm1, %zmm19, %zmm20
|
||||
1 4 0.50 vmulpd %zmm18, %zmm22, %zmm18
|
||||
1 4 0.50 vmulpd %zmm20, %zmm18, %zmm18
|
||||
1 4 0.50 vsubpd %zmm30, %zmm25, %zmm20
|
||||
1 1 0.50 leal (%rcx,%rcx), %edx
|
||||
1 1 0.25 cmpq %rdx, %r11
|
||||
1 1 0.50 setne %dl
|
||||
1 1 0.50 sete %al
|
||||
1 1 0.25 addl %ecx, %ecx
|
||||
1 1 0.25 incl %ecx
|
||||
1 1 0.25 cmpq %rcx, %r11
|
||||
1 1 0.50 sete %cl
|
||||
1 4 0.50 vmulpd %zmm18, %zmm19, %zmm18
|
||||
2 8 0.50 * vmovupd 528(%rsp), %zmm19
|
||||
1 4 0.50 vsubpd %zmm28, %zmm19, %zmm19
|
||||
1 1 0.50 setne %dil
|
||||
1 1 0.25 movl %edi, %ebp
|
||||
1 1 0.50 shlb $4, %bpl
|
||||
1 1 0.25 subb %al, %bpl
|
||||
1 1 0.25 addb $-17, %bpl
|
||||
1 1 1.00 kmovd %ebp, %k1
|
||||
1 4 1.00 vcmpltpd %zmm0, %zmm17, %k1 {%k1}
|
||||
2 8 0.50 * vmovupd 272(%rsp), %zmm17
|
||||
1 4 0.50 vsubpd %zmm29, %zmm17, %zmm17
|
||||
1 1 0.50 leal (%rdx,%rdx), %eax
|
||||
1 1 0.25 movl %edi, %ebp
|
||||
1 4 0.50 vmulpd %zmm2, %zmm18, %zmm18
|
||||
1 4 0.50 vfmadd231pd %zmm18, %zmm3, %zmm14 {%k1}
|
||||
1 4 0.50 vmulpd %zmm20, %zmm20, %zmm3
|
||||
1 4 0.50 vfmadd231pd %zmm17, %zmm17, %zmm3
|
||||
1 4 0.50 vfmadd231pd %zmm19, %zmm19, %zmm3
|
||||
1 4 0.50 vfmadd231pd %zmm18, %zmm16, %zmm11 {%k1}
|
||||
3 4 2.00 vrcp14pd %zmm3, %zmm16
|
||||
1 4 0.50 vfmadd231pd %zmm18, %zmm31, %zmm7 {%k1}
|
||||
1 4 0.50 vmulpd %zmm16, %zmm21, %zmm18
|
||||
1 4 0.50 vmulpd %zmm18, %zmm16, %zmm18
|
||||
1 4 0.50 vmulpd %zmm18, %zmm16, %zmm18
|
||||
1 4 0.50 vaddpd %zmm1, %zmm18, %zmm31
|
||||
1 4 0.50 vmulpd %zmm16, %zmm22, %zmm16
|
||||
1 4 0.50 vmulpd %zmm31, %zmm16, %zmm16
|
||||
2 8 0.50 * vmovupd 464(%rsp), %zmm31
|
||||
1 4 0.50 vsubpd %zmm28, %zmm31, %zmm31
|
||||
1 1 0.50 shlb $5, %bpl
|
||||
1 1 0.25 orb %al, %bpl
|
||||
1 1 0.25 orb $-35, %bpl
|
||||
1 1 1.00 kmovd %ebp, %k1
|
||||
1 4 1.00 vcmpltpd %zmm0, %zmm3, %k1 {%k1}
|
||||
2 8 0.50 * vmovupd 208(%rsp), %zmm3
|
||||
1 4 0.50 vsubpd %zmm29, %zmm3, %zmm3
|
||||
1 4 0.50 vmulpd %zmm16, %zmm18, %zmm16
|
||||
1 4 0.50 vsubpd %zmm30, %zmm26, %zmm18
|
||||
1 4 0.50 vmulpd %zmm2, %zmm16, %zmm16
|
||||
1 4 0.50 vfmadd231pd %zmm16, %zmm19, %zmm15 {%k1}
|
||||
1 4 0.50 vmulpd %zmm18, %zmm18, %zmm19
|
||||
1 4 0.50 vfmadd231pd %zmm3, %zmm3, %zmm19
|
||||
1 4 0.50 vfmadd231pd %zmm31, %zmm31, %zmm19
|
||||
1 4 0.50 vfmadd231pd %zmm16, %zmm17, %zmm10 {%k1}
|
||||
3 4 2.00 vrcp14pd %zmm19, %zmm17
|
||||
1 4 0.50 vfmadd231pd %zmm16, %zmm20, %zmm6 {%k1}
|
||||
1 4 0.50 vmulpd %zmm17, %zmm21, %zmm16
|
||||
1 4 0.50 vmulpd %zmm16, %zmm17, %zmm16
|
||||
1 4 0.50 vmulpd %zmm16, %zmm17, %zmm16
|
||||
1 4 0.50 vaddpd %zmm1, %zmm16, %zmm20
|
||||
1 4 0.50 vmulpd %zmm17, %zmm22, %zmm17
|
||||
1 4 0.50 vmulpd %zmm20, %zmm17, %zmm17
|
||||
1 4 0.50 vmulpd %zmm17, %zmm16, %zmm16
|
||||
1 1 0.50 leal (,%rdx,4), %eax
|
||||
1 1 0.50 shlb $6, %dil
|
||||
1 1 0.25 orb %al, %dil
|
||||
1 1 0.25 orb $-69, %dil
|
||||
1 1 1.00 kmovd %edi, %k1
|
||||
1 4 1.00 vcmpltpd %zmm0, %zmm19, %k1 {%k1}
|
||||
2 8 0.50 * vmovupd 400(%rsp), %zmm17
|
||||
1 4 0.50 vsubpd %zmm28, %zmm17, %zmm17
|
||||
1 4 0.50 vsubpd %zmm29, %zmm23, %zmm19
|
||||
1 4 0.50 vsubpd %zmm30, %zmm27, %zmm20
|
||||
1 4 0.50 vmulpd %zmm2, %zmm16, %zmm16
|
||||
1 4 0.50 vfmadd231pd %zmm16, %zmm31, %zmm13 {%k1}
|
||||
1 4 0.50 vmulpd %zmm20, %zmm20, %zmm28
|
||||
1 4 0.50 vfmadd231pd %zmm19, %zmm19, %zmm28
|
||||
1 4 0.50 vfmadd231pd %zmm17, %zmm17, %zmm28
|
||||
1 4 0.50 vfmadd231pd %zmm16, %zmm3, %zmm9 {%k1}
|
||||
3 4 2.00 vrcp14pd %zmm28, %zmm3
|
||||
1 4 0.50 vfmadd231pd %zmm16, %zmm18, %zmm5 {%k1}
|
||||
1 4 0.50 vmulpd %zmm3, %zmm21, %zmm16
|
||||
1 4 0.50 vmulpd %zmm16, %zmm3, %zmm16
|
||||
1 4 0.50 vmulpd %zmm16, %zmm3, %zmm16
|
||||
1 4 0.50 vaddpd %zmm1, %zmm16, %zmm18
|
||||
1 4 0.50 vmulpd %zmm3, %zmm22, %zmm3
|
||||
1 4 0.50 vmulpd %zmm18, %zmm3, %zmm3
|
||||
1 4 0.50 vmulpd %zmm3, %zmm16, %zmm3
|
||||
1 1 0.50 shlb $3, %dl
|
||||
1 1 0.50 shlb $7, %cl
|
||||
1 1 0.25 orb %dl, %cl
|
||||
1 1 0.25 addb $-9, %cl
|
||||
1 1 1.00 kmovd %ecx, %k1
|
||||
1 4 1.00 vcmpltpd %zmm0, %zmm28, %k1 {%k1}
|
||||
1 4 0.50 vmulpd %zmm2, %zmm3, %zmm3
|
||||
1 4 0.50 vfmadd231pd %zmm3, %zmm17, %zmm12 {%k1}
|
||||
1 4 0.50 vfmadd231pd %zmm3, %zmm19, %zmm8 {%k1}
|
||||
1 4 0.50 vfmadd231pd %zmm3, %zmm20, %zmm4 {%k1}
|
||||
1 1 0.25 incq %rbx
|
||||
1 1 0.25 cmpq %rbx, %r9
|
||||
1 1 0.50 jne .LBB5_12
|
||||
|
||||
|
||||
Resources:
|
||||
[0] - SKXDivider
|
||||
[1] - SKXFPDivider
|
||||
[2] - SKXPort0
|
||||
[3] - SKXPort1
|
||||
[4] - SKXPort2
|
||||
[5] - SKXPort3
|
||||
[6] - SKXPort4
|
||||
[7] - SKXPort5
|
||||
[8] - SKXPort6
|
||||
[9] - SKXPort7
|
||||
|
||||
|
||||
Resource pressure per iteration:
|
||||
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
|
||||
- - 45.53 20.45 5.50 5.50 - 44.64 18.38 -
|
||||
|
||||
Resource pressure by instruction:
|
||||
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
|
||||
- - - - 0.50 0.50 - - - - movslq (%r10,%rbx,4), %rcx
|
||||
- - - 0.99 - - - 0.01 - - leaq (%rcx,%rcx,2), %rdx
|
||||
- - 0.01 - - - - - 0.99 - shlq $6, %rdx
|
||||
- - 0.01 0.99 0.49 0.51 - - - - vmovupd (%rsi,%rdx), %zmm28
|
||||
- - 0.01 0.91 0.51 0.49 - 0.08 - - vmovupd 64(%rsi,%rdx), %zmm29
|
||||
- - 0.01 0.56 0.49 0.51 - 0.43 - - vmovupd 128(%rsi,%rdx), %zmm30
|
||||
- - - 0.99 0.50 0.50 - 0.01 - - vmovupd 16(%rsp), %zmm3
|
||||
- - 0.95 - - - - 0.05 - - vsubpd %zmm28, %zmm3, %zmm3
|
||||
- - 0.48 - - - - 0.52 - - vsubpd %zmm30, %zmm24, %zmm31
|
||||
- - - 1.00 0.50 0.50 - - - - vmovupd 336(%rsp), %zmm16
|
||||
- - 0.49 - - - - 0.51 - - vsubpd %zmm29, %zmm16, %zmm16
|
||||
- - 0.48 - - - - 0.52 - - vmulpd %zmm31, %zmm31, %zmm17
|
||||
- - 0.49 - - - - 0.51 - - vfmadd231pd %zmm16, %zmm16, %zmm17
|
||||
- - 0.04 - - - - 0.96 - - vfmadd231pd %zmm3, %zmm3, %zmm17
|
||||
- - 2.00 - - - - 1.00 - - vrcp14pd %zmm17, %zmm18
|
||||
- - 1.00 - - - - - - - vmulpd %zmm18, %zmm21, %zmm19
|
||||
- - 0.51 - - - - 0.49 - - vmulpd %zmm19, %zmm18, %zmm19
|
||||
- - 0.49 - - - - 0.51 - - vmulpd %zmm19, %zmm18, %zmm19
|
||||
- - 0.07 - - - - 0.93 - - vaddpd %zmm1, %zmm19, %zmm20
|
||||
- - - - - - - 1.00 - - vmulpd %zmm18, %zmm22, %zmm18
|
||||
- - 0.95 - - - - 0.05 - - vmulpd %zmm20, %zmm18, %zmm18
|
||||
- - 0.92 - - - - 0.08 - - vsubpd %zmm30, %zmm25, %zmm20
|
||||
- - - 0.94 - - - 0.06 - - leal (%rcx,%rcx), %edx
|
||||
- - - - - - - - 1.00 - cmpq %rdx, %r11
|
||||
- - - - - - - - 1.00 - setne %dl
|
||||
- - 0.44 - - - - - 0.56 - sete %al
|
||||
- - - 0.07 - - - 0.02 0.91 - addl %ecx, %ecx
|
||||
- - - 0.53 - - - 0.46 0.01 - incl %ecx
|
||||
- - - 0.51 - - - 0.46 0.03 - cmpq %rcx, %r11
|
||||
- - 0.02 - - - - - 0.98 - sete %cl
|
||||
- - 0.94 - - - - 0.06 - - vmulpd %zmm18, %zmm19, %zmm18
|
||||
- - 0.01 0.99 0.51 0.49 - - - - vmovupd 528(%rsp), %zmm19
|
||||
- - 0.47 - - - - 0.53 - - vsubpd %zmm28, %zmm19, %zmm19
|
||||
- - 0.04 - - - - - 0.96 - setne %dil
|
||||
- - - 0.95 - - - 0.02 0.03 - movl %edi, %ebp
|
||||
- - 0.01 - - - - - 0.99 - shlb $4, %bpl
|
||||
- - - 0.96 - - - - 0.04 - subb %al, %bpl
|
||||
- - - 0.06 - - - - 0.94 - addb $-17, %bpl
|
||||
- - - - - - - 1.00 - - kmovd %ebp, %k1
|
||||
- - - - - - - 1.00 - - vcmpltpd %zmm0, %zmm17, %k1 {%k1}
|
||||
- - 0.02 0.97 0.50 0.50 - 0.01 - - vmovupd 272(%rsp), %zmm17
|
||||
- - 0.96 - - - - 0.04 - - vsubpd %zmm29, %zmm17, %zmm17
|
||||
- - - 1.00 - - - - - - leal (%rdx,%rdx), %eax
|
||||
- - - 0.05 - - - - 0.95 - movl %edi, %ebp
|
||||
- - 0.51 - - - - 0.49 - - vmulpd %zmm2, %zmm18, %zmm18
|
||||
- - 0.53 - - - - 0.47 - - vfmadd231pd %zmm18, %zmm3, %zmm14 {%k1}
|
||||
- - 0.45 - - - - 0.55 - - vmulpd %zmm20, %zmm20, %zmm3
|
||||
- - 0.94 - - - - 0.06 - - vfmadd231pd %zmm17, %zmm17, %zmm3
|
||||
- - 0.03 - - - - 0.97 - - vfmadd231pd %zmm19, %zmm19, %zmm3
|
||||
- - 0.47 - - - - 0.53 - - vfmadd231pd %zmm18, %zmm16, %zmm11 {%k1}
|
||||
- - 2.00 - - - - 1.00 - - vrcp14pd %zmm3, %zmm16
|
||||
- - 0.53 - - - - 0.47 - - vfmadd231pd %zmm18, %zmm31, %zmm7 {%k1}
|
||||
- - 0.99 - - - - 0.01 - - vmulpd %zmm16, %zmm21, %zmm18
|
||||
- - 0.99 - - - - 0.01 - - vmulpd %zmm18, %zmm16, %zmm18
|
||||
- - 0.97 - - - - 0.03 - - vmulpd %zmm18, %zmm16, %zmm18
|
||||
- - 0.52 - - - - 0.48 - - vaddpd %zmm1, %zmm18, %zmm31
|
||||
- - 0.01 - - - - 0.99 - - vmulpd %zmm16, %zmm22, %zmm16
|
||||
- - 0.52 - - - - 0.48 - - vmulpd %zmm31, %zmm16, %zmm16
|
||||
- - - 0.99 0.50 0.50 - 0.01 - - vmovupd 464(%rsp), %zmm31
|
||||
- - 0.03 - - - - 0.97 - - vsubpd %zmm28, %zmm31, %zmm31
|
||||
- - 0.01 - - - - - 0.99 - shlb $5, %bpl
|
||||
- - - 0.94 - - - - 0.06 - orb %al, %bpl
|
||||
- - - 0.04 - - - - 0.96 - orb $-35, %bpl
|
||||
- - - - - - - 1.00 - - kmovd %ebp, %k1
|
||||
- - - - - - - 1.00 - - vcmpltpd %zmm0, %zmm3, %k1 {%k1}
|
||||
- - - 0.99 0.50 0.50 - 0.01 - - vmovupd 208(%rsp), %zmm3
|
||||
- - 0.95 - - - - 0.05 - - vsubpd %zmm29, %zmm3, %zmm3
|
||||
- - 0.51 - - - - 0.49 - - vmulpd %zmm16, %zmm18, %zmm16
|
||||
- - 0.01 - - - - 0.99 - - vsubpd %zmm30, %zmm26, %zmm18
|
||||
- - 0.52 - - - - 0.48 - - vmulpd %zmm2, %zmm16, %zmm16
|
||||
- - 1.00 - - - - - - - vfmadd231pd %zmm16, %zmm19, %zmm15 {%k1}
|
||||
- - 0.03 - - - - 0.97 - - vmulpd %zmm18, %zmm18, %zmm19
|
||||
- - 0.06 - - - - 0.94 - - vfmadd231pd %zmm3, %zmm3, %zmm19
|
||||
- - 0.50 - - - - 0.50 - - vfmadd231pd %zmm31, %zmm31, %zmm19
|
||||
- - - - - - - 1.00 - - vfmadd231pd %zmm16, %zmm17, %zmm10 {%k1}
|
||||
- - 2.00 - - - - 1.00 - - vrcp14pd %zmm19, %zmm17
|
||||
- - 1.00 - - - - - - - vfmadd231pd %zmm16, %zmm20, %zmm6 {%k1}
|
||||
- - 0.07 - - - - 0.93 - - vmulpd %zmm17, %zmm21, %zmm16
|
||||
- - 0.50 - - - - 0.50 - - vmulpd %zmm16, %zmm17, %zmm16
|
||||
- - 0.09 - - - - 0.91 - - vmulpd %zmm16, %zmm17, %zmm16
|
||||
- - 0.07 - - - - 0.93 - - vaddpd %zmm1, %zmm16, %zmm20
|
||||
- - 0.93 - - - - 0.07 - - vmulpd %zmm17, %zmm22, %zmm17
|
||||
- - 0.94 - - - - 0.06 - - vmulpd %zmm20, %zmm17, %zmm17
|
||||
- - 0.51 - - - - 0.49 - - vmulpd %zmm17, %zmm16, %zmm16
|
||||
- - - 1.00 - - - - - - leal (,%rdx,4), %eax
|
||||
- - - - - - - - 1.00 - shlb $6, %dil
|
||||
- - - 0.02 - - - - 0.98 - orb %al, %dil
|
||||
- - - 0.48 - - - - 0.52 - orb $-69, %dil
|
||||
- - - - - - - 1.00 - - kmovd %edi, %k1
|
||||
- - - - - - - 1.00 - - vcmpltpd %zmm0, %zmm19, %k1 {%k1}
|
||||
- - - 1.00 0.50 0.50 - - - - vmovupd 400(%rsp), %zmm17
|
||||
- - 0.49 - - - - 0.51 - - vsubpd %zmm28, %zmm17, %zmm17
|
||||
- - 0.49 - - - - 0.51 - - vsubpd %zmm29, %zmm23, %zmm19
|
||||
- - 0.02 - - - - 0.98 - - vsubpd %zmm30, %zmm27, %zmm20
|
||||
- - 0.98 - - - - 0.02 - - vmulpd %zmm2, %zmm16, %zmm16
|
||||
- - 0.50 - - - - 0.50 - - vfmadd231pd %zmm16, %zmm31, %zmm13 {%k1}
|
||||
- - 0.94 - - - - 0.06 - - vmulpd %zmm20, %zmm20, %zmm28
|
||||
- - 0.04 - - - - 0.96 - - vfmadd231pd %zmm19, %zmm19, %zmm28
|
||||
- - 0.07 - - - - 0.93 - - vfmadd231pd %zmm17, %zmm17, %zmm28
|
||||
- - 0.50 - - - - 0.50 - - vfmadd231pd %zmm16, %zmm3, %zmm9 {%k1}
|
||||
- - 2.00 - - - - 1.00 - - vrcp14pd %zmm28, %zmm3
|
||||
- - 0.50 - - - - 0.50 - - vfmadd231pd %zmm16, %zmm18, %zmm5 {%k1}
|
||||
- - 1.00 - - - - - - - vmulpd %zmm3, %zmm21, %zmm16
|
||||
- - 0.55 - - - - 0.45 - - vmulpd %zmm16, %zmm3, %zmm16
|
||||
- - 0.99 - - - - 0.01 - - vmulpd %zmm16, %zmm3, %zmm16
|
||||
- - 0.99 - - - - 0.01 - - vaddpd %zmm1, %zmm16, %zmm18
|
||||
- - - - - - - 1.00 - - vmulpd %zmm3, %zmm22, %zmm3
|
||||
- - 0.52 - - - - 0.48 - - vmulpd %zmm18, %zmm3, %zmm3
|
||||
- - 0.99 - - - - 0.01 - - vmulpd %zmm3, %zmm16, %zmm3
|
||||
- - - - - - - - 1.00 - shlb $3, %dl
|
||||
- - - - - - - - 1.00 - shlb $7, %cl
|
||||
- - - 1.00 - - - - - - orb %dl, %cl
|
||||
- - - 0.52 - - - - 0.48 - addb $-9, %cl
|
||||
- - - - - - - 1.00 - - kmovd %ecx, %k1
|
||||
- - - - - - - 1.00 - - vcmpltpd %zmm0, %zmm28, %k1 {%k1}
|
||||
- - 0.98 - - - - 0.02 - - vmulpd %zmm2, %zmm3, %zmm3
|
||||
- - 0.97 - - - - 0.03 - - vfmadd231pd %zmm3, %zmm17, %zmm12 {%k1}
|
||||
- - 0.03 - - - - 0.97 - - vfmadd231pd %zmm3, %zmm19, %zmm8 {%k1}
|
||||
- - 0.97 - - - - 0.03 - - vfmadd231pd %zmm3, %zmm20, %zmm4 {%k1}
|
||||
- - - 0.48 - - - - 0.52 - incq %rbx
|
||||
- - - 0.52 - - - - 0.48 - cmpq %rbx, %r9
|
||||
- - - - - - - - 1.00 - jne .LBB5_12
|
167
static_analysis/jan/analyses/gromacs-icx-avx512-dp-osaca.out
Normal file
167
static_analysis/jan/analyses/gromacs-icx-avx512-dp-osaca.out
Normal file
@@ -0,0 +1,167 @@
|
||||
Open Source Architecture Code Analyzer (OSACA) - 0.4.12
|
||||
Analyzed file: gromacs-icx-avx512-dp.s
|
||||
Architecture: CSX
|
||||
Timestamp: 2023-02-10 16:30:53
|
||||
|
||||
|
||||
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
||||
* - Instruction micro-ops not bound to a port
|
||||
X - No throughput/latency information for this instruction in data file
|
||||
|
||||
|
||||
Combined Analysis Report
|
||||
------------------------
|
||||
Port pressure in cycles
|
||||
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |
|
||||
--------------------------------------------------------------------------------------------------
|
||||
2241 | | | | | | | | || | | # pointer_increment=64 da67166e5736661e6b03ea29ee7bfd67
|
||||
2242 | | | | | | | | || | | # LLVM-MCA-BEGIN
|
||||
2243 | | | | | | | | || | | .LBB5_12: # Parent Loop BB5_7 Depth=1
|
||||
2244 | | | | | | | | || | | # => This Inner Loop Header: Depth=2
|
||||
2245 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | movslq (%r10,%rbx,4), %rcx
|
||||
2246 | | 1.00 | | | | 0.00 | | || 1.0 | | leaq (%rcx,%rcx,2), %rdx
|
||||
2247 | 0.00 | | | | | | 1.00 | || 1.0 | | shlq $6, %rdx
|
||||
2248 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd (%rsi,%rdx), %zmm28 # AlignMOV convert to UnAlignMOV
|
||||
2249 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 64(%rsi,%rdx), %zmm29 # AlignMOV convert to UnAlignMOV
|
||||
2250 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | vmovupd 128(%rsi,%rdx), %zmm30 # AlignMOV convert to UnAlignMOV
|
||||
2251 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 16(%rsp), %zmm3 # 64-byte Reload
|
||||
2252 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm28, %zmm3, %zmm3
|
||||
2253 | 0.50 | | | | | 0.50 | | || 4.0 | | vsubpd %zmm30, %zmm24, %zmm31
|
||||
2254 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 336(%rsp), %zmm16 # 64-byte Reload
|
||||
2255 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm29, %zmm16, %zmm16
|
||||
2256 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm31, %zmm31, %zmm17
|
||||
2257 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm16, %zmm16, %zmm17 # zmm17 = (zmm16 * zmm16) + zmm17
|
||||
2258 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm3, %zmm3, %zmm17 # zmm17 = (zmm3 * zmm3) + zmm17
|
||||
2259 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14pd %zmm17, %zmm18
|
||||
2260 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm18, %zmm21, %zmm19
|
||||
2261 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm19, %zmm18, %zmm19
|
||||
2262 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm19, %zmm18, %zmm19
|
||||
2263 | 0.50 | | | | | 0.50 | | || 4.0 | | vaddpd %zmm1, %zmm19, %zmm20
|
||||
2264 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm18, %zmm22, %zmm18
|
||||
2265 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm20, %zmm18, %zmm18
|
||||
2266 | 1.00 | | | | | 0.00 | | || | | vsubpd %zmm30, %zmm25, %zmm20
|
||||
2267 | | 1.00 | | | | 0.00 | | || | | leal (%rcx,%rcx), %edx
|
||||
2268 | 0.00 | 1.00 | | | | 0.00 | 0.00 | || | | cmpq %rdx, %r11
|
||||
2269 | 0.00 | | | | | | 1.00 | || | | setne %dl
|
||||
2270 | 0.00 | | | | | | 1.00 | || | | sete %al
|
||||
2271 | 0.00 | 0.50 | | | | 0.00 | 0.50 | || | | addl %ecx, %ecx
|
||||
2272 | 0.00 | 0.50 | | | | 0.00 | 0.50 | || | | incl %ecx
|
||||
2273 | 0.00 | 1.00 | | | | 0.00 | 0.00 | || | | cmpq %rcx, %r11
|
||||
2274 | 0.00 | | | | | | 1.00 | || | | sete %cl
|
||||
2275 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm18, %zmm19, %zmm18
|
||||
2276 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 528(%rsp), %zmm19 # 64-byte Reload
|
||||
2277 | 1.00 | | | | | 0.00 | | || | | vsubpd %zmm28, %zmm19, %zmm19
|
||||
2278 | 0.00 | | | | | | 1.00 | || | | setne %dil
|
||||
2279 | 0.00 | 0.75 | | | | 0.00 | 0.25 | || | | movl %edi, %ebp
|
||||
2280 | 0.00 | | | | | | 1.00 | || | 1.0 | shlb $4, %bpl
|
||||
2281 | 0.00 | 0.50 | | | | 0.00 | 0.50 | || | 1.0 | subb %al, %bpl
|
||||
2282 | 0.00 | 0.25 | | | | 0.00 | 0.75 | || | 1.0 | addb $-17, %bpl
|
||||
2283 | 1.00 | | | | | | | || | | kmovd %ebp, %k1
|
||||
2284 | | | | | | 1.00 | | || | | vcmpltpd %zmm0, %zmm17, %k1 {%k1}
|
||||
2285 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 272(%rsp), %zmm17 # 64-byte Reload
|
||||
2286 | 0.25 | | | | | 0.75 | | || | | vsubpd %zmm29, %zmm17, %zmm17
|
||||
2287 | | 1.00 | | | | 0.00 | | || | | leal (%rdx,%rdx), %eax
|
||||
2288 | 0.00 | 0.75 | | | | 0.00 | 0.25 | || | | movl %edi, %ebp
|
||||
2289 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm2, %zmm18, %zmm18
|
||||
2290 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm18, %zmm3, %zmm14 {%k1} # zmm14 {%k1} = (zmm3 * zmm18) + zmm14
|
||||
2291 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm20, %zmm20, %zmm3
|
||||
2292 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm17, %zmm17, %zmm3 # zmm3 = (zmm17 * zmm17) + zmm3
|
||||
2293 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm19, %zmm19, %zmm3 # zmm3 = (zmm19 * zmm19) + zmm3
|
||||
2294 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm18, %zmm16, %zmm11 {%k1} # zmm11 {%k1} = (zmm16 * zmm18) + zmm11
|
||||
2295 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm3, %zmm16
|
||||
2296 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm18, %zmm31, %zmm7 {%k1} # zmm7 {%k1} = (zmm31 * zmm18) + zmm7
|
||||
2297 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm16, %zmm21, %zmm18
|
||||
2298 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm18, %zmm16, %zmm18
|
||||
2299 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm18, %zmm16, %zmm18
|
||||
2300 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm1, %zmm18, %zmm31
|
||||
2301 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm16, %zmm22, %zmm16
|
||||
2302 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm31, %zmm16, %zmm16
|
||||
2303 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 464(%rsp), %zmm31 # 64-byte Reload
|
||||
2304 | 0.75 | | | | | 0.25 | | || | | vsubpd %zmm28, %zmm31, %zmm31
|
||||
2305 | 0.00 | | | | | | 1.00 | || | 1.0 | shlb $5, %bpl
|
||||
2306 | 0.00 | 0.50 | | | | 0.00 | 0.50 | || | 1.0 | orb %al, %bpl
|
||||
2307 | 0.00 | 0.50 | | | | 0.00 | 0.50 | || | 1.0 | orb $-35, %bpl
|
||||
2308 | 1.00 | | | | | | | || | | kmovd %ebp, %k1
|
||||
2309 | | | | | | 1.00 | | || | | vcmpltpd %zmm0, %zmm3, %k1 {%k1}
|
||||
2310 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 208(%rsp), %zmm3 # 64-byte Reload
|
||||
2311 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm29, %zmm3, %zmm3
|
||||
2312 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm16, %zmm18, %zmm16
|
||||
2313 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm30, %zmm26, %zmm18
|
||||
2314 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm2, %zmm16, %zmm16
|
||||
2315 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm16, %zmm19, %zmm15 {%k1} # zmm15 {%k1} = (zmm19 * zmm16) + zmm15
|
||||
2316 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm18, %zmm18, %zmm19
|
||||
2317 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm3, %zmm3, %zmm19 # zmm19 = (zmm3 * zmm3) + zmm19
|
||||
2318 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm31, %zmm31, %zmm19 # zmm19 = (zmm31 * zmm31) + zmm19
|
||||
2319 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm16, %zmm17, %zmm10 {%k1} # zmm10 {%k1} = (zmm17 * zmm16) + zmm10
|
||||
2320 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm19, %zmm17
|
||||
2321 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm16, %zmm20, %zmm6 {%k1} # zmm6 {%k1} = (zmm20 * zmm16) + zmm6
|
||||
2322 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm17, %zmm21, %zmm16
|
||||
2323 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm16, %zmm17, %zmm16
|
||||
2324 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm16, %zmm17, %zmm16
|
||||
2325 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm1, %zmm16, %zmm20
|
||||
2326 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm17, %zmm22, %zmm17
|
||||
2327 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm20, %zmm17, %zmm17
|
||||
2328 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm17, %zmm16, %zmm16
|
||||
2329 | | 1.00 | | | | 0.00 | | || | | leal (,%rdx,4), %eax
|
||||
2330 | 0.00 | | | | | | 1.00 | || | | shlb $6, %dil
|
||||
2331 | 0.00 | 0.50 | | | | 0.00 | 0.50 | || | | orb %al, %dil
|
||||
2332 | 0.00 | 1.00 | | | | 0.00 | 0.00 | || | | orb $-69, %dil
|
||||
2333 | 1.00 | | | | | | | || | | kmovd %edi, %k1
|
||||
2334 | | | | | | 1.00 | | || | | vcmpltpd %zmm0, %zmm19, %k1 {%k1}
|
||||
2335 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 400(%rsp), %zmm17 # 64-byte Reload
|
||||
2336 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm28, %zmm17, %zmm17
|
||||
2337 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm29, %zmm23, %zmm19
|
||||
2338 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm30, %zmm27, %zmm20
|
||||
2339 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm2, %zmm16, %zmm16
|
||||
2340 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm16, %zmm31, %zmm13 {%k1} # zmm13 {%k1} = (zmm31 * zmm16) + zmm13
|
||||
2341 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm20, %zmm20, %zmm28
|
||||
2342 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm19, %zmm19, %zmm28 # zmm28 = (zmm19 * zmm19) + zmm28
|
||||
2343 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm17, %zmm17, %zmm28 # zmm28 = (zmm17 * zmm17) + zmm28
|
||||
2344 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm16, %zmm3, %zmm9 {%k1} # zmm9 {%k1} = (zmm3 * zmm16) + zmm9
|
||||
2345 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm28, %zmm3
|
||||
2346 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm16, %zmm18, %zmm5 {%k1} # zmm5 {%k1} = (zmm18 * zmm16) + zmm5
|
||||
2347 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm3, %zmm21, %zmm16
|
||||
2348 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm16, %zmm3, %zmm16
|
||||
2349 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm16, %zmm3, %zmm16
|
||||
2350 | 0.00 | | | | | 1.00 | | || | | vaddpd %zmm1, %zmm16, %zmm18
|
||||
2351 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm3, %zmm22, %zmm3
|
||||
2352 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm18, %zmm3, %zmm3
|
||||
2353 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm3, %zmm16, %zmm3
|
||||
2354 | 0.00 | | | | | | 1.00 | || | | shlb $3, %dl
|
||||
2355 | 0.00 | | | | | | 1.00 | || | | shlb $7, %cl
|
||||
2356 | 0.00 | 0.25 | | | | 0.00 | 0.75 | || | | orb %dl, %cl
|
||||
2357 | 0.00 | 1.00 | | | | 0.00 | 0.00 | || | | addb $-9, %cl
|
||||
2358 | 1.00 | | | | | | | || | | kmovd %ecx, %k1
|
||||
2359 | | | | | | 1.00 | | || | | vcmpltpd %zmm0, %zmm28, %k1 {%k1}
|
||||
2360 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm2, %zmm3, %zmm3
|
||||
2361 | 0.00 | | | | | 1.00 | | || | | vfmadd231pd %zmm3, %zmm17, %zmm12 {%k1} # zmm12 {%k1} = (zmm17 * zmm3) + zmm12
|
||||
2362 | 0.00 | | | | | 1.00 | | || | | vfmadd231pd %zmm3, %zmm19, %zmm8 {%k1} # zmm8 {%k1} = (zmm19 * zmm3) + zmm8
|
||||
2363 | 0.00 | | | | | 1.00 | | || | | vfmadd231pd %zmm3, %zmm20, %zmm4 {%k1} # zmm4 {%k1} = (zmm20 * zmm3) + zmm4
|
||||
2364 | 0.00 | 1.00 | | | | 0.00 | 0.00 | || | | incq %rbx
|
||||
2365 | 0.00 | 1.00 | | | | 0.00 | 0.00 | || | | cmpq %rbx, %r9
|
||||
2366 | | | | | | | | || | | * jne .LBB5_12
|
||||
2367 | | | | | | | | || | | # LLVM-MCA-END
|
||||
|
||||
44.0 15.0 5.50 5.50 5.50 5.50 44.0 15.0 66.0 6.0
|
||||
|
||||
|
||||
|
||||
|
||||
Loop-Carried Dependencies Analysis Report
|
||||
-----------------------------------------
|
||||
2280 | 6.0 | shlb $4, %bpl | [2280, 2281, 2282, 2305, 2306, 2307]
|
||||
2363 | 4.0 | vfmadd231pd %zmm3, %zmm20, %zmm4 {%k1} # zmm4 {%k1} = (zmm20 * zmm3) + zmm4| [2363]
|
||||
2362 | 4.0 | vfmadd231pd %zmm3, %zmm19, %zmm8 {%k1} # zmm8 {%k1} = (zmm19 * zmm3) + zmm8| [2362]
|
||||
2361 | 4.0 | vfmadd231pd %zmm3, %zmm17, %zmm12 {%k1} # zmm12 {%k1} = (zmm17 * zmm3) + zmm12| [2361]
|
||||
2346 | 4.0 | vfmadd231pd %zmm16, %zmm18, %zmm5 {%k1} # zmm5 {%k1} = (zmm18 * zmm16) + zmm5| [2346]
|
||||
2344 | 4.0 | vfmadd231pd %zmm16, %zmm3, %zmm9 {%k1} # zmm9 {%k1} = (zmm3 * zmm16) + zmm9| [2344]
|
||||
2340 | 4.0 | vfmadd231pd %zmm16, %zmm31, %zmm13 {%k1} # zmm13 {%k1} = (zmm31 * zmm16) + zmm13| [2340]
|
||||
2321 | 4.0 | vfmadd231pd %zmm16, %zmm20, %zmm6 {%k1} # zmm6 {%k1} = (zmm20 * zmm16) + zmm6| [2321]
|
||||
2319 | 4.0 | vfmadd231pd %zmm16, %zmm17, %zmm10 {%k1} # zmm10 {%k1} = (zmm17 * zmm16) + zmm10| [2319]
|
||||
2315 | 4.0 | vfmadd231pd %zmm16, %zmm19, %zmm15 {%k1} # zmm15 {%k1} = (zmm19 * zmm16) + zmm15| [2315]
|
||||
2296 | 4.0 | vfmadd231pd %zmm18, %zmm31, %zmm7 {%k1} # zmm7 {%k1} = (zmm31 * zmm18) + zmm7| [2296]
|
||||
2294 | 4.0 | vfmadd231pd %zmm18, %zmm16, %zmm11 {%k1} # zmm11 {%k1} = (zmm16 * zmm18) + zmm11| [2294]
|
||||
2290 | 4.0 | vfmadd231pd %zmm18, %zmm3, %zmm14 {%k1} # zmm14 {%k1} = (zmm3 * zmm18) + zmm14| [2290]
|
||||
2330 | 3.0 | shlb $6, %dil | [2330, 2331, 2332]
|
||||
2364 | 1.0 | incq %rbx | [2364]
|
||||
|
162
static_analysis/jan/analyses/gromacs-icx-avx512-sp-iaca.out
Normal file
162
static_analysis/jan/analyses/gromacs-icx-avx512-sp-iaca.out
Normal file
@@ -0,0 +1,162 @@
|
||||
Intel(R) Architecture Code Analyzer Version - v3.0-28-g1ba2cbb build date: 2017-10-23;16:42:45
|
||||
Analyzed File - gromacs-icx-avx512-sp.o
|
||||
Binary Format - 64Bit
|
||||
Architecture - SKX
|
||||
Analysis Type - Throughput
|
||||
|
||||
Throughput Analysis Report
|
||||
--------------------------
|
||||
Block Throughput: 64.00 Cycles Throughput Bottleneck: Backend
|
||||
Loop Count: 22
|
||||
Port Binding In Cycles Per Iteration:
|
||||
--------------------------------------------------------------------------------------------------
|
||||
| Port | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
||||
--------------------------------------------------------------------------------------------------
|
||||
| Cycles | 50.0 0.0 | 7.0 | 9.5 8.1 | 9.5 7.9 | 3.0 | 50.0 | 7.0 | 0.0 |
|
||||
--------------------------------------------------------------------------------------------------
|
||||
|
||||
DV - Divider pipe (on port 0)
|
||||
D - Data fetch pipe (on ports 2 and 3)
|
||||
F - Macro Fusion with the previous instruction occurred
|
||||
* - instruction micro-ops not bound to a port
|
||||
^ - Micro Fusion occurred
|
||||
# - ESP Tracking sync uop was issued
|
||||
@ - SSE instruction followed an AVX256/AVX512 instruction, dozens of cycles penalty is expected
|
||||
X - instruction not supported, was not accounted in Analysis
|
||||
|
||||
| Num Of | Ports pressure in cycles | |
|
||||
| Uops | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
||||
-----------------------------------------------------------------------------------------
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | movsxd rax, dword ptr [r11+rdx*4]
|
||||
| 1* | | | | | | | | | mov rsi, rax
|
||||
| 1 | | | | | | | 1.0 | | shl rsi, 0x5
|
||||
| 1 | | 1.0 | | | | | | | lea rbx, ptr [rsi+rsi*2]
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm15, zmmword ptr [rdi+rbx*1]
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm16, zmmword ptr [rdi+rbx*1+0x20]
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm27, zmmword ptr [rdi+rbx*1+0x40]
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm1, zmmword ptr [rsp+0x80]
|
||||
| 1 | | | | | | 1.0 | | | vsubps zmm24, zmm1, zmm15
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm1, zmmword ptr [rsp+0x140]
|
||||
| 1 | 1.0 | | | | | | | | vsubps zmm25, zmm1, zmm16
|
||||
| 1 | | | | | | 1.0 | | | vsubps zmm26, zmm9, zmm27
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm1, zmmword ptr [rsp]
|
||||
| 1 | 1.0 | | | | | | | | vsubps zmm21, zmm1, zmm15
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm1, zmmword ptr [rsp+0x100]
|
||||
| 1 | | | | | | 1.0 | | | vsubps zmm22, zmm1, zmm16
|
||||
| 1 | 1.0 | | | | | | | | vsubps zmm23, zmm10, zmm27
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm1, zmmword ptr [rsp+0x1c0]
|
||||
| 1 | | | | | | 1.0 | | | vsubps zmm17, zmm1, zmm15
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm1, zmmword ptr [rsp+0xc0]
|
||||
| 1 | 1.0 | | | | | | | | vsubps zmm19, zmm1, zmm16
|
||||
| 1 | | | | | | 1.0 | | | vsubps zmm20, zmm11, zmm27
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm1, zmmword ptr [rsp+0x180]
|
||||
| 1 | 1.0 | | | | | | | | vsubps zmm18, zmm1, zmm15
|
||||
| 1 | | | | | | 1.0 | | | vsubps zmm16, zmm8, zmm16
|
||||
| 1 | 1.0 | | | | | | | | vsubps zmm15, zmm12, zmm27
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm27, zmm26, zmm26
|
||||
| 1 | 1.0 | | | | | | | | vfmadd231ps zmm27, zmm25, zmm25
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231ps zmm27, zmm24, zmm24
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm28, zmm23, zmm23
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231ps zmm28, zmm22, zmm22
|
||||
| 1 | 1.0 | | | | | | | | vfmadd231ps zmm28, zmm21, zmm21
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm29, zmm20, zmm20
|
||||
| 1 | 1.0 | | | | | | | | vfmadd231ps zmm29, zmm19, zmm19
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231ps zmm29, zmm17, zmm17
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm30, zmm15, zmm15
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231ps zmm30, zmm16, zmm16
|
||||
| 3 | 2.0 | | | | | 1.0 | | | vrcp14ps zmm31, zmm27
|
||||
| 3 | 2.0 | | | | | 1.0 | | | vrcp14ps zmm1, zmm28
|
||||
| 3 | 2.0 | | | | | 1.0 | | | vrcp14ps zmm2, zmm29
|
||||
| 1 | | | | | | 1.0 | | | vfmadd231ps zmm30, zmm18, zmm18
|
||||
| 3 | 2.0 | | | | | 1.0 | | | vrcp14ps zmm3, zmm30
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm4, zmm6, zmm31
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm4, zmm31, zmm4
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm4, zmm31, zmm4
|
||||
| 1 | 1.0 | | | | | | | | vaddps zmm5, zmm4, zmm13
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm31, zmm7, zmm31
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm5, zmm31, zmm5
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm31, zmm6, zmm1
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm31, zmm1, zmm31
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm31, zmm1, zmm31
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm4, zmm4, zmm5
|
||||
| 1 | | | | | | 1.0 | | | vaddps zmm5, zmm31, zmm13
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm1, zmm7, zmm1
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm1, zmm1, zmm5
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm5, zmm6, zmm2
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm5, zmm2, zmm5
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm5, zmm2, zmm5
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm1, zmm31, zmm1
|
||||
| 1 | 1.0 | | | | | | | | vaddps zmm31, zmm5, zmm13
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm2, zmm7, zmm2
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm2, zmm2, zmm31
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm31, zmm6, zmm3
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm31, zmm3, zmm31
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm31, zmm3, zmm31
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm2, zmm5, zmm2
|
||||
| 1 | | | | | | 1.0 | | | vaddps zmm5, zmm31, zmm13
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm3, zmm7, zmm3
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm3, zmm3, zmm5
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm3, zmm31, zmm3
|
||||
| 1* | | | | | | | | | xor esi, esi
|
||||
| 1* | | | | | | | | | xor edi, edi
|
||||
| 1 | | 1.0 | | | | | | | test eax, 0x7fffffff
|
||||
| 1 | | | | | | | 1.0 | | setz sil
|
||||
| 1 | | | | | | | 1.0 | | setnz dil
|
||||
| 1 | | 1.0 | | | | | | | mov eax, 0xff
|
||||
| 1 | | | | | | | 1.0 | | cmovz eax, r8d
|
||||
| 1 | | 1.0 | | | | | | | mov ecx, 0xff
|
||||
| 1 | | | | | | | 1.0 | | cmovz ecx, r9d
|
||||
| 1 | | 1.0 | | | | | | | xor esi, 0xff
|
||||
| 1 | | | | | | 1.0 | | | kmovd k1, esi
|
||||
| 1 | | | | | | 1.0 | | | vcmpps k1{k1}, zmm27, zmm0, 0x1
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm4, zmm4, zmm14
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm5{k1}{z}, zmm24, zmm4
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm24{k1}{z}, zmm25, zmm4
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm4{k1}{z}, zmm26, zmm4
|
||||
| 1 | | 1.0 | | | | | | | lea esi, ptr [rdi+rdi*2]
|
||||
| 1 | | | | | | | 1.0 | | or esi, 0xfc
|
||||
| 1 | | | | | | 1.0 | | | kmovd k1, esi
|
||||
| 1 | | | | | | 1.0 | | | vcmpps k1{k1}, zmm28, zmm0, 0x1
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm1, zmm1, zmm14
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm21{k1}{z}, zmm21, zmm1
|
||||
| 1 | | | | | | 1.0 | | | vaddps zmm5, zmm5, zmm21
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm21{k1}{z}, zmm22, zmm1
|
||||
| 1 | | | | | | 1.0 | | | vaddps zmm21, zmm24, zmm21
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm1{k1}{z}, zmm23, zmm1
|
||||
| 1 | | | | | | 1.0 | | | vaddps zmm1, zmm4, zmm1
|
||||
| 1 | | | | | | 1.0 | | | kmovd k1, eax
|
||||
| 1 | | | | | | 1.0 | | | vcmpps k1{k1}, zmm29, zmm0, 0x1
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm2, zmm2, zmm14
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm4{k1}{z}, zmm17, zmm2
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm17{k1}{z}, zmm19, zmm2
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm2{k1}{z}, zmm20, zmm2
|
||||
| 1 | | | | | | 1.0 | | | kmovd k1, ecx
|
||||
| 1 | | | | | | 1.0 | | | vcmpps k1{k1}, zmm30, zmm0, 0x1
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm3, zmm3, zmm14
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm18{k1}{z}, zmm18, zmm3
|
||||
| 1 | 1.0 | | | | | | | | vaddps zmm4, zmm4, zmm18
|
||||
| 1 | | | | | | 1.0 | | | vaddps zmm4, zmm5, zmm4
|
||||
| 1 | 1.0 | | | | | | | | vmulps zmm5{k1}{z}, zmm16, zmm3
|
||||
| 1 | | | | | | 1.0 | | | vaddps zmm5, zmm17, zmm5
|
||||
| 1 | 1.0 | | | | | | | | vaddps zmm5, zmm21, zmm5
|
||||
| 1 | | | | | | 1.0 | | | vmulps zmm3{k1}{z}, zmm15, zmm3
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | mov rax, qword ptr [r15+0xb0]
|
||||
| 1 | 1.0 | | | | | | | | vaddps zmm2, zmm2, zmm3
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm3, zmmword ptr [rax+rbx*1]
|
||||
| 1 | | | | | | 1.0 | | | vsubps zmm3, zmm3, zmm4
|
||||
| 2 | | | 0.5 | 0.5 | 1.0 | | | | vmovups zmmword ptr [rax+rbx*1], zmm3
|
||||
| 1 | 1.0 | | | | | | | | vaddps zmm1, zmm1, zmm2
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm2, zmmword ptr [rax+rbx*1+0x20]
|
||||
| 1 | | | | | | 1.0 | | | vsubps zmm2, zmm2, zmm5
|
||||
| 2 | | | 0.5 | 0.5 | 1.0 | | | | vmovups zmmword ptr [rax+rbx*1+0x20], zmm2
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups zmm2, zmmword ptr [rax+rbx*1+0x40]
|
||||
| 1 | 1.0 | | | | | | | | vsubps zmm1, zmm2, zmm1
|
||||
| 2 | | | 0.5 | 0.5 | 1.0 | | | | vmovups zmmword ptr [rax+rbx*1+0x40], zmm1
|
||||
| 1* | | | | | | | | | cmp r10, rdx
|
||||
| 0*F | | | | | | | | | jz 0x34
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | mov rdi, qword ptr [r15+0xa0]
|
||||
| 1 | | 1.0 | | | | | | | inc rdx
|
||||
| 1 | | | | | | | 1.0 | | jmp 0xfffffffffffffcfc
|
||||
Total Num Of Uops: 140
|
||||
Analysis Notes:
|
||||
Backend allocation was stalled due to unavailable allocation resources.
|
304
static_analysis/jan/analyses/gromacs-icx-avx512-sp-mca.out
Normal file
304
static_analysis/jan/analyses/gromacs-icx-avx512-sp-mca.out
Normal file
@@ -0,0 +1,304 @@
|
||||
|
||||
[0] Code Region
|
||||
|
||||
Iterations: 100
|
||||
Instructions: 13000
|
||||
Total Cycles: 5640
|
||||
Total uOps: 15400
|
||||
|
||||
Dispatch Width: 6
|
||||
uOps Per Cycle: 2.73
|
||||
IPC: 2.30
|
||||
Block RThroughput: 40.0
|
||||
|
||||
|
||||
Instruction Info:
|
||||
[1]: #uOps
|
||||
[2]: Latency
|
||||
[3]: RThroughput
|
||||
[4]: MayLoad
|
||||
[5]: MayStore
|
||||
[6]: HasSideEffects (U)
|
||||
|
||||
[1] [2] [3] [4] [5] [6] Instructions:
|
||||
1 5 0.50 * movslq (%r11,%rdx,4), %rax
|
||||
1 1 0.25 movq %rax, %rsi
|
||||
1 1 0.50 shlq $5, %rsi
|
||||
1 1 0.50 leaq (%rsi,%rsi,2), %rbx
|
||||
2 8 0.50 * vmovups (%rdi,%rbx), %zmm15
|
||||
2 8 0.50 * vmovups 32(%rdi,%rbx), %zmm16
|
||||
2 8 0.50 * vmovups 64(%rdi,%rbx), %zmm27
|
||||
2 8 0.50 * vmovups 128(%rsp), %zmm1
|
||||
1 4 0.50 vsubps %zmm15, %zmm1, %zmm24
|
||||
2 8 0.50 * vmovups 320(%rsp), %zmm1
|
||||
1 4 0.50 vsubps %zmm16, %zmm1, %zmm25
|
||||
1 4 0.50 vsubps %zmm27, %zmm9, %zmm26
|
||||
2 8 0.50 * vmovups (%rsp), %zmm1
|
||||
1 4 0.50 vsubps %zmm15, %zmm1, %zmm21
|
||||
2 8 0.50 * vmovups 256(%rsp), %zmm1
|
||||
1 4 0.50 vsubps %zmm16, %zmm1, %zmm22
|
||||
1 4 0.50 vsubps %zmm27, %zmm10, %zmm23
|
||||
2 8 0.50 * vmovups 448(%rsp), %zmm1
|
||||
1 4 0.50 vsubps %zmm15, %zmm1, %zmm17
|
||||
2 8 0.50 * vmovups 192(%rsp), %zmm1
|
||||
1 4 0.50 vsubps %zmm16, %zmm1, %zmm19
|
||||
1 4 0.50 vsubps %zmm27, %zmm11, %zmm20
|
||||
2 8 0.50 * vmovups 384(%rsp), %zmm1
|
||||
1 4 0.50 vsubps %zmm15, %zmm1, %zmm18
|
||||
1 4 0.50 vsubps %zmm16, %zmm8, %zmm16
|
||||
1 4 0.50 vsubps %zmm27, %zmm12, %zmm15
|
||||
1 4 0.50 vmulps %zmm26, %zmm26, %zmm27
|
||||
1 4 0.50 vfmadd231ps %zmm25, %zmm25, %zmm27
|
||||
1 4 0.50 vfmadd231ps %zmm24, %zmm24, %zmm27
|
||||
1 4 0.50 vmulps %zmm23, %zmm23, %zmm28
|
||||
1 4 0.50 vfmadd231ps %zmm22, %zmm22, %zmm28
|
||||
1 4 0.50 vfmadd231ps %zmm21, %zmm21, %zmm28
|
||||
1 4 0.50 vmulps %zmm20, %zmm20, %zmm29
|
||||
1 4 0.50 vfmadd231ps %zmm19, %zmm19, %zmm29
|
||||
1 4 0.50 vfmadd231ps %zmm17, %zmm17, %zmm29
|
||||
1 4 0.50 vmulps %zmm15, %zmm15, %zmm30
|
||||
1 4 0.50 vfmadd231ps %zmm16, %zmm16, %zmm30
|
||||
3 4 2.00 vrcp14ps %zmm27, %zmm31
|
||||
3 4 2.00 vrcp14ps %zmm28, %zmm1
|
||||
3 4 2.00 vrcp14ps %zmm29, %zmm2
|
||||
1 4 0.50 vfmadd231ps %zmm18, %zmm18, %zmm30
|
||||
3 4 2.00 vrcp14ps %zmm30, %zmm3
|
||||
1 4 0.50 vmulps %zmm31, %zmm6, %zmm4
|
||||
1 4 0.50 vmulps %zmm4, %zmm31, %zmm4
|
||||
1 4 0.50 vmulps %zmm4, %zmm31, %zmm4
|
||||
1 4 0.50 vaddps %zmm13, %zmm4, %zmm5
|
||||
1 4 0.50 vmulps %zmm31, %zmm7, %zmm31
|
||||
1 4 0.50 vmulps %zmm5, %zmm31, %zmm5
|
||||
1 4 0.50 vmulps %zmm1, %zmm6, %zmm31
|
||||
1 4 0.50 vmulps %zmm31, %zmm1, %zmm31
|
||||
1 4 0.50 vmulps %zmm31, %zmm1, %zmm31
|
||||
1 4 0.50 vmulps %zmm5, %zmm4, %zmm4
|
||||
1 4 0.50 vaddps %zmm13, %zmm31, %zmm5
|
||||
1 4 0.50 vmulps %zmm1, %zmm7, %zmm1
|
||||
1 4 0.50 vmulps %zmm5, %zmm1, %zmm1
|
||||
1 4 0.50 vmulps %zmm2, %zmm6, %zmm5
|
||||
1 4 0.50 vmulps %zmm5, %zmm2, %zmm5
|
||||
1 4 0.50 vmulps %zmm5, %zmm2, %zmm5
|
||||
1 4 0.50 vmulps %zmm1, %zmm31, %zmm1
|
||||
1 4 0.50 vaddps %zmm13, %zmm5, %zmm31
|
||||
1 4 0.50 vmulps %zmm2, %zmm7, %zmm2
|
||||
1 4 0.50 vmulps %zmm31, %zmm2, %zmm2
|
||||
1 4 0.50 vmulps %zmm3, %zmm6, %zmm31
|
||||
1 4 0.50 vmulps %zmm31, %zmm3, %zmm31
|
||||
1 4 0.50 vmulps %zmm31, %zmm3, %zmm31
|
||||
1 4 0.50 vmulps %zmm2, %zmm5, %zmm2
|
||||
1 4 0.50 vaddps %zmm13, %zmm31, %zmm5
|
||||
1 4 0.50 vmulps %zmm3, %zmm7, %zmm3
|
||||
1 4 0.50 vmulps %zmm5, %zmm3, %zmm3
|
||||
1 4 0.50 vmulps %zmm3, %zmm31, %zmm3
|
||||
1 0 0.17 xorl %esi, %esi
|
||||
1 0 0.17 xorl %edi, %edi
|
||||
1 1 0.25 testl $2147483647, %eax
|
||||
1 1 0.50 sete %sil
|
||||
1 1 0.50 setne %dil
|
||||
1 1 0.25 movl $255, %eax
|
||||
1 1 0.50 cmovel %r8d, %eax
|
||||
1 1 0.25 movl $255, %ecx
|
||||
1 1 0.50 cmovel %r9d, %ecx
|
||||
1 1 0.25 xorl $255, %esi
|
||||
1 1 1.00 kmovd %esi, %k1
|
||||
1 4 1.00 vcmpltps %zmm0, %zmm27, %k1 {%k1}
|
||||
1 4 0.50 vmulps %zmm14, %zmm4, %zmm4
|
||||
1 4 0.50 vmulps %zmm4, %zmm24, %zmm5 {%k1} {z}
|
||||
1 4 0.50 vmulps %zmm4, %zmm25, %zmm24 {%k1} {z}
|
||||
1 4 0.50 vmulps %zmm4, %zmm26, %zmm4 {%k1} {z}
|
||||
1 1 0.50 leal (%rdi,%rdi,2), %esi
|
||||
1 1 0.25 orl $252, %esi
|
||||
1 1 1.00 kmovd %esi, %k1
|
||||
1 4 1.00 vcmpltps %zmm0, %zmm28, %k1 {%k1}
|
||||
1 4 0.50 vmulps %zmm14, %zmm1, %zmm1
|
||||
1 4 0.50 vmulps %zmm1, %zmm21, %zmm21 {%k1} {z}
|
||||
1 4 0.50 vaddps %zmm21, %zmm5, %zmm5
|
||||
1 4 0.50 vmulps %zmm1, %zmm22, %zmm21 {%k1} {z}
|
||||
1 4 0.50 vaddps %zmm21, %zmm24, %zmm21
|
||||
1 4 0.50 vmulps %zmm1, %zmm23, %zmm1 {%k1} {z}
|
||||
1 4 0.50 vaddps %zmm1, %zmm4, %zmm1
|
||||
1 1 1.00 kmovd %eax, %k1
|
||||
1 4 1.00 vcmpltps %zmm0, %zmm29, %k1 {%k1}
|
||||
1 4 0.50 vmulps %zmm14, %zmm2, %zmm2
|
||||
1 4 0.50 vmulps %zmm2, %zmm17, %zmm4 {%k1} {z}
|
||||
1 4 0.50 vmulps %zmm2, %zmm19, %zmm17 {%k1} {z}
|
||||
1 4 0.50 vmulps %zmm2, %zmm20, %zmm2 {%k1} {z}
|
||||
1 1 1.00 kmovd %ecx, %k1
|
||||
1 4 1.00 vcmpltps %zmm0, %zmm30, %k1 {%k1}
|
||||
1 4 0.50 vmulps %zmm14, %zmm3, %zmm3
|
||||
1 4 0.50 vmulps %zmm3, %zmm18, %zmm18 {%k1} {z}
|
||||
1 4 0.50 vaddps %zmm18, %zmm4, %zmm4
|
||||
1 4 0.50 vaddps %zmm4, %zmm5, %zmm4
|
||||
1 4 0.50 vmulps %zmm3, %zmm16, %zmm5 {%k1} {z}
|
||||
1 4 0.50 vaddps %zmm5, %zmm17, %zmm5
|
||||
1 4 0.50 vaddps %zmm5, %zmm21, %zmm5
|
||||
1 4 0.50 vmulps %zmm3, %zmm15, %zmm3 {%k1} {z}
|
||||
1 5 0.50 * movq 176(%r15), %rax
|
||||
1 4 0.50 vaddps %zmm3, %zmm2, %zmm2
|
||||
2 8 0.50 * vmovups (%rax,%rbx), %zmm3
|
||||
1 4 0.50 vsubps %zmm4, %zmm3, %zmm3
|
||||
2 1 1.00 * vmovups %zmm3, (%rax,%rbx)
|
||||
1 4 0.50 vaddps %zmm2, %zmm1, %zmm1
|
||||
2 8 0.50 * vmovups 32(%rax,%rbx), %zmm2
|
||||
1 4 0.50 vsubps %zmm5, %zmm2, %zmm2
|
||||
2 1 1.00 * vmovups %zmm2, 32(%rax,%rbx)
|
||||
2 8 0.50 * vmovups 64(%rax,%rbx), %zmm2
|
||||
1 4 0.50 vsubps %zmm1, %zmm2, %zmm1
|
||||
2 1 1.00 * vmovups %zmm1, 64(%rax,%rbx)
|
||||
1 1 0.25 cmpq %rdx, %r10
|
||||
1 1 0.50 je .LBB4_18
|
||||
1 5 0.50 * movq 160(%r15), %rdi
|
||||
1 1 0.25 incq %rdx
|
||||
1 1 0.50 jmp .LBB4_8
|
||||
|
||||
|
||||
Resources:
|
||||
[0] - SKXDivider
|
||||
[1] - SKXFPDivider
|
||||
[2] - SKXPort0
|
||||
[3] - SKXPort1
|
||||
[4] - SKXPort2
|
||||
[5] - SKXPort3
|
||||
[6] - SKXPort4
|
||||
[7] - SKXPort5
|
||||
[8] - SKXPort6
|
||||
[9] - SKXPort7
|
||||
|
||||
|
||||
Resource pressure per iteration:
|
||||
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
|
||||
- - 52.01 14.97 8.49 8.51 3.00 52.02 11.00 2.00
|
||||
|
||||
Resource pressure by instruction:
|
||||
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
|
||||
- - - - 0.49 0.51 - - - - movslq (%r11,%rdx,4), %rax
|
||||
- - - - - - - - 1.00 - movq %rax, %rsi
|
||||
- - - - - - - - 1.00 - shlq $5, %rsi
|
||||
- - - 1.00 - - - - - - leaq (%rsi,%rsi,2), %rbx
|
||||
- - 0.01 0.99 0.50 0.50 - - - - vmovups (%rdi,%rbx), %zmm15
|
||||
- - - - 0.50 0.50 - 1.00 - - vmovups 32(%rdi,%rbx), %zmm16
|
||||
- - - 1.00 0.50 0.50 - - - - vmovups 64(%rdi,%rbx), %zmm27
|
||||
- - - 0.99 0.51 0.49 - 0.01 - - vmovups 128(%rsp), %zmm1
|
||||
- - 0.01 - - - - 0.99 - - vsubps %zmm15, %zmm1, %zmm24
|
||||
- - - 1.00 0.49 0.51 - - - - vmovups 320(%rsp), %zmm1
|
||||
- - 0.99 - - - - 0.01 - - vsubps %zmm16, %zmm1, %zmm25
|
||||
- - 0.01 - - - - 0.99 - - vsubps %zmm27, %zmm9, %zmm26
|
||||
- - 0.01 0.99 0.51 0.49 - - - - vmovups (%rsp), %zmm1
|
||||
- - - - - - - 1.00 - - vsubps %zmm15, %zmm1, %zmm21
|
||||
- - - - 0.49 0.51 - 1.00 - - vmovups 256(%rsp), %zmm1
|
||||
- - 0.01 - - - - 0.99 - - vsubps %zmm16, %zmm1, %zmm22
|
||||
- - 0.99 - - - - 0.01 - - vsubps %zmm27, %zmm10, %zmm23
|
||||
- - - 1.00 0.51 0.49 - - - - vmovups 448(%rsp), %zmm1
|
||||
- - 0.01 - - - - 0.99 - - vsubps %zmm15, %zmm1, %zmm17
|
||||
- - 0.01 - 0.49 0.51 - 0.99 - - vmovups 192(%rsp), %zmm1
|
||||
- - - - - - - 1.00 - - vsubps %zmm16, %zmm1, %zmm19
|
||||
- - 0.01 - - - - 0.99 - - vsubps %zmm27, %zmm11, %zmm20
|
||||
- - 0.99 - 0.50 0.50 - 0.01 - - vmovups 384(%rsp), %zmm1
|
||||
- - - - - - - 1.00 - - vsubps %zmm15, %zmm1, %zmm18
|
||||
- - 0.01 - - - - 0.99 - - vsubps %zmm16, %zmm8, %zmm16
|
||||
- - - - - - - 1.00 - - vsubps %zmm27, %zmm12, %zmm15
|
||||
- - 1.00 - - - - - - - vmulps %zmm26, %zmm26, %zmm27
|
||||
- - 1.00 - - - - - - - vfmadd231ps %zmm25, %zmm25, %zmm27
|
||||
- - 0.99 - - - - 0.01 - - vfmadd231ps %zmm24, %zmm24, %zmm27
|
||||
- - - - - - - 1.00 - - vmulps %zmm23, %zmm23, %zmm28
|
||||
- - - - - - - 1.00 - - vfmadd231ps %zmm22, %zmm22, %zmm28
|
||||
- - 0.01 - - - - 0.99 - - vfmadd231ps %zmm21, %zmm21, %zmm28
|
||||
- - 0.01 - - - - 0.99 - - vmulps %zmm20, %zmm20, %zmm29
|
||||
- - 0.01 - - - - 0.99 - - vfmadd231ps %zmm19, %zmm19, %zmm29
|
||||
- - - - - - - 1.00 - - vfmadd231ps %zmm17, %zmm17, %zmm29
|
||||
- - 0.01 - - - - 0.99 - - vmulps %zmm15, %zmm15, %zmm30
|
||||
- - 0.01 - - - - 0.99 - - vfmadd231ps %zmm16, %zmm16, %zmm30
|
||||
- - 2.00 - - - - 1.00 - - vrcp14ps %zmm27, %zmm31
|
||||
- - 2.00 - - - - 1.00 - - vrcp14ps %zmm28, %zmm1
|
||||
- - 2.00 - - - - 1.00 - - vrcp14ps %zmm29, %zmm2
|
||||
- - 1.00 - - - - - - - vfmadd231ps %zmm18, %zmm18, %zmm30
|
||||
- - 2.00 - - - - 1.00 - - vrcp14ps %zmm30, %zmm3
|
||||
- - - - - - - 1.00 - - vmulps %zmm31, %zmm6, %zmm4
|
||||
- - 1.00 - - - - - - - vmulps %zmm4, %zmm31, %zmm4
|
||||
- - 1.00 - - - - - - - vmulps %zmm4, %zmm31, %zmm4
|
||||
- - 1.00 - - - - - - - vaddps %zmm13, %zmm4, %zmm5
|
||||
- - - - - - - 1.00 - - vmulps %zmm31, %zmm7, %zmm31
|
||||
- - 1.00 - - - - - - - vmulps %zmm5, %zmm31, %zmm5
|
||||
- - 1.00 - - - - - - - vmulps %zmm1, %zmm6, %zmm31
|
||||
- - 1.00 - - - - - - - vmulps %zmm31, %zmm1, %zmm31
|
||||
- - 0.99 - - - - 0.01 - - vmulps %zmm31, %zmm1, %zmm31
|
||||
- - 1.00 - - - - - - - vmulps %zmm5, %zmm4, %zmm4
|
||||
- - - - - - - 1.00 - - vaddps %zmm13, %zmm31, %zmm5
|
||||
- - - - - - - 1.00 - - vmulps %zmm1, %zmm7, %zmm1
|
||||
- - - - - - - 1.00 - - vmulps %zmm5, %zmm1, %zmm1
|
||||
- - - - - - - 1.00 - - vmulps %zmm2, %zmm6, %zmm5
|
||||
- - - - - - - 1.00 - - vmulps %zmm5, %zmm2, %zmm5
|
||||
- - - - - - - 1.00 - - vmulps %zmm5, %zmm2, %zmm5
|
||||
- - 0.99 - - - - 0.01 - - vmulps %zmm1, %zmm31, %zmm1
|
||||
- - - - - - - 1.00 - - vaddps %zmm13, %zmm5, %zmm31
|
||||
- - 1.00 - - - - - - - vmulps %zmm2, %zmm7, %zmm2
|
||||
- - - - - - - 1.00 - - vmulps %zmm31, %zmm2, %zmm2
|
||||
- - - - - - - 1.00 - - vmulps %zmm3, %zmm6, %zmm31
|
||||
- - 1.00 - - - - - - - vmulps %zmm31, %zmm3, %zmm31
|
||||
- - 1.00 - - - - - - - vmulps %zmm31, %zmm3, %zmm31
|
||||
- - 1.00 - - - - - - - vmulps %zmm2, %zmm5, %zmm2
|
||||
- - 1.00 - - - - - - - vaddps %zmm13, %zmm31, %zmm5
|
||||
- - 1.00 - - - - - - - vmulps %zmm3, %zmm7, %zmm3
|
||||
- - 1.00 - - - - - - - vmulps %zmm5, %zmm3, %zmm3
|
||||
- - 1.00 - - - - - - - vmulps %zmm3, %zmm31, %zmm3
|
||||
- - - - - - - - - - xorl %esi, %esi
|
||||
- - - - - - - - - - xorl %edi, %edi
|
||||
- - - - - - - - 1.00 - testl $2147483647, %eax
|
||||
- - - - - - - - 1.00 - sete %sil
|
||||
- - - - - - - - 1.00 - setne %dil
|
||||
- - - 1.00 - - - - - - movl $255, %eax
|
||||
- - - - - - - - 1.00 - cmovel %r8d, %eax
|
||||
- - - 1.00 - - - - - - movl $255, %ecx
|
||||
- - - - - - - - 1.00 - cmovel %r9d, %ecx
|
||||
- - - 1.00 - - - - - - xorl $255, %esi
|
||||
- - - - - - - 1.00 - - kmovd %esi, %k1
|
||||
- - - - - - - 1.00 - - vcmpltps %zmm0, %zmm27, %k1 {%k1}
|
||||
- - - - - - - 1.00 - - vmulps %zmm14, %zmm4, %zmm4
|
||||
- - 1.00 - - - - - - - vmulps %zmm4, %zmm24, %zmm5 {%k1} {z}
|
||||
- - - - - - - 1.00 - - vmulps %zmm4, %zmm25, %zmm24 {%k1} {z}
|
||||
- - 1.00 - - - - - - - vmulps %zmm4, %zmm26, %zmm4 {%k1} {z}
|
||||
- - - 1.00 - - - - - - leal (%rdi,%rdi,2), %esi
|
||||
- - - - - - - - 1.00 - orl $252, %esi
|
||||
- - - - - - - 1.00 - - kmovd %esi, %k1
|
||||
- - - - - - - 1.00 - - vcmpltps %zmm0, %zmm28, %k1 {%k1}
|
||||
- - 0.99 - - - - 0.01 - - vmulps %zmm14, %zmm1, %zmm1
|
||||
- - 0.99 - - - - 0.01 - - vmulps %zmm1, %zmm21, %zmm21 {%k1} {z}
|
||||
- - 1.00 - - - - - - - vaddps %zmm21, %zmm5, %zmm5
|
||||
- - 0.01 - - - - 0.99 - - vmulps %zmm1, %zmm22, %zmm21 {%k1} {z}
|
||||
- - - - - - - 1.00 - - vaddps %zmm21, %zmm24, %zmm21
|
||||
- - 0.99 - - - - 0.01 - - vmulps %zmm1, %zmm23, %zmm1 {%k1} {z}
|
||||
- - 1.00 - - - - - - - vaddps %zmm1, %zmm4, %zmm1
|
||||
- - - - - - - 1.00 - - kmovd %eax, %k1
|
||||
- - - - - - - 1.00 - - vcmpltps %zmm0, %zmm29, %k1 {%k1}
|
||||
- - - - - - - 1.00 - - vmulps %zmm14, %zmm2, %zmm2
|
||||
- - 1.00 - - - - - - - vmulps %zmm2, %zmm17, %zmm4 {%k1} {z}
|
||||
- - - - - - - 1.00 - - vmulps %zmm2, %zmm19, %zmm17 {%k1} {z}
|
||||
- - 1.00 - - - - - - - vmulps %zmm2, %zmm20, %zmm2 {%k1} {z}
|
||||
- - - - - - - 1.00 - - kmovd %ecx, %k1
|
||||
- - - - - - - 1.00 - - vcmpltps %zmm0, %zmm30, %k1 {%k1}
|
||||
- - 1.00 - - - - - - - vmulps %zmm14, %zmm3, %zmm3
|
||||
- - - - - - - 1.00 - - vmulps %zmm3, %zmm18, %zmm18 {%k1} {z}
|
||||
- - 0.99 - - - - 0.01 - - vaddps %zmm18, %zmm4, %zmm4
|
||||
- - 1.00 - - - - - - - vaddps %zmm4, %zmm5, %zmm4
|
||||
- - - - - - - 1.00 - - vmulps %zmm3, %zmm16, %zmm5 {%k1} {z}
|
||||
- - 1.00 - - - - - - - vaddps %zmm5, %zmm17, %zmm5
|
||||
- - 0.99 - - - - 0.01 - - vaddps %zmm5, %zmm21, %zmm5
|
||||
- - 1.00 - - - - - - - vmulps %zmm3, %zmm15, %zmm3 {%k1} {z}
|
||||
- - - - 1.00 - - - - - movq 176(%r15), %rax
|
||||
- - 0.99 - - - - 0.01 - - vaddps %zmm3, %zmm2, %zmm2
|
||||
- - - 1.00 0.50 0.50 - - - - vmovups (%rax,%rbx), %zmm3
|
||||
- - 0.99 - - - - 0.01 - - vsubps %zmm4, %zmm3, %zmm3
|
||||
- - - - - - 1.00 - - 1.00 vmovups %zmm3, (%rax,%rbx)
|
||||
- - 1.00 - - - - - - - vaddps %zmm2, %zmm1, %zmm1
|
||||
- - - 1.00 0.50 0.50 - - - - vmovups 32(%rax,%rbx), %zmm2
|
||||
- - 1.00 - - - - - - - vsubps %zmm5, %zmm2, %zmm2
|
||||
- - - - - - 1.00 - - 1.00 vmovups %zmm2, 32(%rax,%rbx)
|
||||
- - - 1.00 0.50 0.50 - - - - vmovups 64(%rax,%rbx), %zmm2
|
||||
- - 0.99 - - - - 0.01 - - vsubps %zmm1, %zmm2, %zmm1
|
||||
- - - - - 1.00 1.00 - - - vmovups %zmm1, 64(%rax,%rbx)
|
||||
- - - - - - - - 1.00 - cmpq %rdx, %r10
|
||||
- - - - - - - - 1.00 - je .LBB4_18
|
||||
- - - - 0.50 0.50 - - - - movq 160(%r15), %rdi
|
||||
- - - 1.00 - - - - - - incq %rdx
|
||||
- - - - - - - - 1.00 - jmp .LBB4_8
|
161
static_analysis/jan/analyses/gromacs-icx-avx512-sp-osaca.out
Normal file
161
static_analysis/jan/analyses/gromacs-icx-avx512-sp-osaca.out
Normal file
@@ -0,0 +1,161 @@
|
||||
Open Source Architecture Code Analyzer (OSACA) - 0.4.12
|
||||
Analyzed file: gromacs-icx-avx512-sp.s
|
||||
Architecture: CSX
|
||||
Timestamp: 2023-02-10 16:31:04
|
||||
|
||||
|
||||
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
||||
* - Instruction micro-ops not bound to a port
|
||||
X - No throughput/latency information for this instruction in data file
|
||||
|
||||
|
||||
Combined Analysis Report
|
||||
------------------------
|
||||
Port pressure in cycles
|
||||
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |
|
||||
--------------------------------------------------------------------------------------------------
|
||||
1662 | | | | | | | | || | | # pointer_increment=64 0f91ac4f7fe1a70d0c899f7f3e745649
|
||||
1663 | | | | | | | | || | | # LLVM-MCA-BEGIN
|
||||
1664 | | | | | | | | || | | .LBB4_8: # =>This Inner Loop Header: Depth=1
|
||||
1665 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | movslq (%r11,%rdx,4), %rax
|
||||
1666 | 0.00 | 0.50 | | | | 0.00 | 0.50 | || 1.0 | | movq %rax, %rsi
|
||||
1667 | 0.00 | | | | | | 1.00 | || 1.0 | | shlq $5, %rsi
|
||||
1668 | | 1.00 | | | | 0.00 | | || 1.0 | | leaq (%rsi,%rsi,2), %rbx
|
||||
1669 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovups (%rdi,%rbx), %zmm15 # AlignMOV convert to UnAlignMOV
|
||||
1670 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovups 32(%rdi,%rbx), %zmm16 # AlignMOV convert to UnAlignMOV
|
||||
1671 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | vmovups 64(%rdi,%rbx), %zmm27 # AlignMOV convert to UnAlignMOV
|
||||
1672 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovups 128(%rsp), %zmm1 # 64-byte Reload
|
||||
1673 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm15, %zmm1, %zmm24
|
||||
1674 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovups 320(%rsp), %zmm1 # 64-byte Reload
|
||||
1675 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm16, %zmm1, %zmm25
|
||||
1676 | 0.50 | | | | | 0.50 | | || 4.0 | | vsubps %zmm27, %zmm9, %zmm26
|
||||
1677 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovups (%rsp), %zmm1 # 64-byte Reload
|
||||
1678 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm15, %zmm1, %zmm21
|
||||
1679 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovups 256(%rsp), %zmm1 # 64-byte Reload
|
||||
1680 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm16, %zmm1, %zmm22
|
||||
1681 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm27, %zmm10, %zmm23
|
||||
1682 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovups 448(%rsp), %zmm1 # 64-byte Reload
|
||||
1683 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm15, %zmm1, %zmm17
|
||||
1684 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovups 192(%rsp), %zmm1 # 64-byte Reload
|
||||
1685 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm16, %zmm1, %zmm19
|
||||
1686 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm27, %zmm11, %zmm20
|
||||
1687 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovups 384(%rsp), %zmm1 # 64-byte Reload
|
||||
1688 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm15, %zmm1, %zmm18
|
||||
1689 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm16, %zmm8, %zmm16
|
||||
1690 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm27, %zmm12, %zmm15
|
||||
1691 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulps %zmm26, %zmm26, %zmm27
|
||||
1692 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231ps %zmm25, %zmm25, %zmm27 # zmm27 = (zmm25 * zmm25) + zmm27
|
||||
1693 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231ps %zmm24, %zmm24, %zmm27 # zmm27 = (zmm24 * zmm24) + zmm27
|
||||
1694 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm23, %zmm23, %zmm28
|
||||
1695 | 0.50 | | | | | 0.50 | | || | | vfmadd231ps %zmm22, %zmm22, %zmm28 # zmm28 = (zmm22 * zmm22) + zmm28
|
||||
1696 | 0.50 | | | | | 0.50 | | || | | vfmadd231ps %zmm21, %zmm21, %zmm28 # zmm28 = (zmm21 * zmm21) + zmm28
|
||||
1697 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm20, %zmm20, %zmm29
|
||||
1698 | 0.50 | | | | | 0.50 | | || | | vfmadd231ps %zmm19, %zmm19, %zmm29 # zmm29 = (zmm19 * zmm19) + zmm29
|
||||
1699 | 0.50 | | | | | 0.50 | | || | | vfmadd231ps %zmm17, %zmm17, %zmm29 # zmm29 = (zmm17 * zmm17) + zmm29
|
||||
1700 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm15, %zmm15, %zmm30
|
||||
1701 | 0.50 | | | | | 0.50 | | || | | vfmadd231ps %zmm16, %zmm16, %zmm30 # zmm30 = (zmm16 * zmm16) + zmm30
|
||||
1702 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14ps %zmm27, %zmm31
|
||||
1703 | 2.50 | | | | | 0.50 | | || | | vrcp14ps %zmm28, %zmm1
|
||||
1704 | 2.50 | | | | | 0.50 | | || | | vrcp14ps %zmm29, %zmm2
|
||||
1705 | 0.50 | | | | | 0.50 | | || | | vfmadd231ps %zmm18, %zmm18, %zmm30 # zmm30 = (zmm18 * zmm18) + zmm30
|
||||
1706 | 2.50 | | | | | 0.50 | | || | | vrcp14ps %zmm30, %zmm3
|
||||
1707 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulps %zmm31, %zmm6, %zmm4
|
||||
1708 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulps %zmm4, %zmm31, %zmm4
|
||||
1709 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulps %zmm4, %zmm31, %zmm4
|
||||
1710 | 0.50 | | | | | 0.50 | | || 4.0 | | vaddps %zmm13, %zmm4, %zmm5
|
||||
1711 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm31, %zmm7, %zmm31
|
||||
1712 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulps %zmm5, %zmm31, %zmm5
|
||||
1713 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm1, %zmm6, %zmm31
|
||||
1714 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm31, %zmm1, %zmm31
|
||||
1715 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm31, %zmm1, %zmm31
|
||||
1716 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulps %zmm5, %zmm4, %zmm4
|
||||
1717 | 0.50 | | | | | 0.50 | | || | | vaddps %zmm13, %zmm31, %zmm5
|
||||
1718 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm1, %zmm7, %zmm1
|
||||
1719 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm5, %zmm1, %zmm1
|
||||
1720 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm2, %zmm6, %zmm5
|
||||
1721 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm5, %zmm2, %zmm5
|
||||
1722 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm5, %zmm2, %zmm5
|
||||
1723 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm1, %zmm31, %zmm1
|
||||
1724 | 0.50 | | | | | 0.50 | | || | | vaddps %zmm13, %zmm5, %zmm31
|
||||
1725 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm2, %zmm7, %zmm2
|
||||
1726 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm31, %zmm2, %zmm2
|
||||
1727 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm3, %zmm6, %zmm31
|
||||
1728 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm31, %zmm3, %zmm31
|
||||
1729 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm31, %zmm3, %zmm31
|
||||
1730 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm2, %zmm5, %zmm2
|
||||
1731 | 0.50 | | | | | 0.50 | | || | | vaddps %zmm13, %zmm31, %zmm5
|
||||
1732 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm3, %zmm7, %zmm3
|
||||
1733 | 1.00 | | | | | 0.00 | | || | | vmulps %zmm5, %zmm3, %zmm3
|
||||
1734 | 1.00 | | | | | 0.00 | | || | | vmulps %zmm3, %zmm31, %zmm3
|
||||
1735 | 0.00 | 0.50 | | | | 0.00 | 0.50 | || | | xorl %esi, %esi
|
||||
1736 | 0.00 | 0.50 | | | | 0.00 | 0.50 | || | | xorl %edi, %edi
|
||||
1737 | 0.00 | 1.00 | | | | 0.00 | 0.00 | || | | testl $2147483647, %eax # imm = 0x7FFFFFFF
|
||||
1738 | 0.00 | | | | | | 1.00 | || | | sete %sil
|
||||
1739 | 0.00 | | | | | | 1.00 | || | | setne %dil
|
||||
1740 | 0.00 | 0.75 | | | | 0.00 | 0.25 | || | | movl $255, %eax
|
||||
1741 | 0.00 | | | | | | 1.00 | || | | cmovel %r8d, %eax
|
||||
1742 | 0.00 | 0.75 | | | | 0.00 | 0.25 | || | | movl $255, %ecx
|
||||
1743 | 0.00 | | | | | | 1.00 | || | | cmovel %r9d, %ecx
|
||||
1744 | 0.00 | 0.25 | | | | 0.00 | 0.75 | || | | xorl $255, %esi
|
||||
1745 | 1.00 | | | | | | | || | | kmovd %esi, %k1
|
||||
1746 | | | | | | 1.00 | | || | | vcmpltps %zmm0, %zmm27, %k1 {%k1}
|
||||
1747 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulps %zmm14, %zmm4, %zmm4
|
||||
1748 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulps %zmm4, %zmm24, %zmm5 {%k1} {z}
|
||||
1749 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm4, %zmm25, %zmm24 {%k1} {z}
|
||||
1750 | 0.25 | | | | | 0.75 | | || | | vmulps %zmm4, %zmm26, %zmm4 {%k1} {z}
|
||||
1751 | | 1.00 | | | | 0.00 | | || | | leal (%rdi,%rdi,2), %esi
|
||||
1752 | 0.00 | 0.75 | | | | 0.00 | 0.25 | || | | orl $252, %esi
|
||||
1753 | 1.00 | | | | | | | || | | kmovd %esi, %k1
|
||||
1754 | | | | | | 1.00 | | || | | vcmpltps %zmm0, %zmm28, %k1 {%k1}
|
||||
1755 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm14, %zmm1, %zmm1
|
||||
1756 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm1, %zmm21, %zmm21 {%k1} {z}
|
||||
1757 | 0.50 | | | | | 0.50 | | || 4.0 | | vaddps %zmm21, %zmm5, %zmm5
|
||||
1758 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm1, %zmm22, %zmm21 {%k1} {z}
|
||||
1759 | 0.50 | | | | | 0.50 | | || | | vaddps %zmm21, %zmm24, %zmm21
|
||||
1760 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm1, %zmm23, %zmm1 {%k1} {z}
|
||||
1761 | 0.50 | | | | | 0.50 | | || | | vaddps %zmm1, %zmm4, %zmm1
|
||||
1762 | 1.00 | | | | | | | || | | kmovd %eax, %k1
|
||||
1763 | | | | | | 1.00 | | || | | vcmpltps %zmm0, %zmm29, %k1 {%k1}
|
||||
1764 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm14, %zmm2, %zmm2
|
||||
1765 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm2, %zmm17, %zmm4 {%k1} {z}
|
||||
1766 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm2, %zmm19, %zmm17 {%k1} {z}
|
||||
1767 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm2, %zmm20, %zmm2 {%k1} {z}
|
||||
1768 | 1.00 | | | | | | | || | | kmovd %ecx, %k1
|
||||
1769 | | | | | | 1.00 | | || | | vcmpltps %zmm0, %zmm30, %k1 {%k1}
|
||||
1770 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm14, %zmm3, %zmm3
|
||||
1771 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm3, %zmm18, %zmm18 {%k1} {z}
|
||||
1772 | 0.50 | | | | | 0.50 | | || | | vaddps %zmm18, %zmm4, %zmm4
|
||||
1773 | 0.25 | | | | | 0.75 | | || 4.0 | | vaddps %zmm4, %zmm5, %zmm4
|
||||
1774 | 0.00 | | | | | 1.00 | | || | | vmulps %zmm3, %zmm16, %zmm5 {%k1} {z}
|
||||
1775 | 0.00 | | | | | 1.00 | | || | | vaddps %zmm5, %zmm17, %zmm5
|
||||
1776 | 0.00 | | | | | 1.00 | | || | | vaddps %zmm5, %zmm21, %zmm5
|
||||
1777 | 0.00 | | | | | 1.00 | | || | | vmulps %zmm3, %zmm15, %zmm3 {%k1} {z}
|
||||
1778 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movq 176(%r15), %rax
|
||||
1779 | 0.00 | | | | | 1.00 | | || | | vaddps %zmm3, %zmm2, %zmm2
|
||||
1780 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovups (%rax,%rbx), %zmm3 # AlignMOV convert to UnAlignMOV
|
||||
1781 | 0.00 | | | | | 1.00 | | || 4.0 | | vsubps %zmm4, %zmm3, %zmm3
|
||||
1782 | | | 0.50 | 0.50 | 1.00 | | | || 0.0 | | vmovups %zmm3, (%rax,%rbx) # AlignMOV convert to UnAlignMOV
|
||||
1783 | 0.00 | | | | | 1.00 | | || | | vaddps %zmm2, %zmm1, %zmm1
|
||||
1784 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovups 32(%rax,%rbx), %zmm2 # AlignMOV convert to UnAlignMOV
|
||||
1785 | 0.00 | | | | | 1.00 | | || | | vsubps %zmm5, %zmm2, %zmm2
|
||||
1786 | | | 0.50 | 0.50 | 1.00 | | | || | | vmovups %zmm2, 32(%rax,%rbx) # AlignMOV convert to UnAlignMOV
|
||||
1787 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovups 64(%rax,%rbx), %zmm2 # AlignMOV convert to UnAlignMOV
|
||||
1788 | 0.00 | | | | | 1.00 | | || | | vsubps %zmm1, %zmm2, %zmm1
|
||||
1789 | | | 0.50 | 0.50 | 1.00 | | | || | | vmovups %zmm1, 64(%rax,%rbx) # AlignMOV convert to UnAlignMOV
|
||||
1790 | 0.00 | 1.00 | | | | 0.00 | 0.00 | || | | cmpq %rdx, %r10
|
||||
1791 | | | | | | | | || | | * je .LBB4_18
|
||||
1792 | | | | | | | | || | | # %bb.9: # in Loop: Header=BB4_8 Depth=1
|
||||
1793 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movq 160(%r15), %rdi
|
||||
1794 | 0.00 | 1.00 | | | | 0.00 | 0.00 | || | 1.0 | incq %rdx
|
||||
1795 | 0.00 | | | | | | 1.00 | || | | jmp .LBB4_8
|
||||
1796 | | | | | | | | || | | # LLVM-MCA-END
|
||||
|
||||
50.0 9.00 9.50 8.00 9.50 8.00 3.00 50.0 9.00 79.0 1.0
|
||||
|
||||
|
||||
|
||||
|
||||
Loop-Carried Dependencies Analysis Report
|
||||
-----------------------------------------
|
||||
1794 | 1.0 | incq %rdx | [1794]
|
||||
|
88
static_analysis/jan/analyses/lammps-icc-avx2-iaca.out
Normal file
88
static_analysis/jan/analyses/lammps-icc-avx2-iaca.out
Normal file
@@ -0,0 +1,88 @@
|
||||
Intel(R) Architecture Code Analyzer Version - v3.0-28-g1ba2cbb build date: 2017-10-23;16:42:45
|
||||
Analyzed File - lammps-icc-avx2.o
|
||||
Binary Format - 64Bit
|
||||
Architecture - SKX
|
||||
Analysis Type - Throughput
|
||||
|
||||
Throughput Analysis Report
|
||||
--------------------------
|
||||
Block Throughput: 25.58 Cycles Throughput Bottleneck: Backend
|
||||
Loop Count: 22
|
||||
Port Binding In Cycles Per Iteration:
|
||||
--------------------------------------------------------------------------------------------------
|
||||
| Port | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
||||
--------------------------------------------------------------------------------------------------
|
||||
| Cycles | 13.7 8.0 | 13.6 | 5.5 5.5 | 5.5 5.5 | 0.0 | 13.7 | 7.0 | 0.0 |
|
||||
--------------------------------------------------------------------------------------------------
|
||||
|
||||
DV - Divider pipe (on port 0)
|
||||
D - Data fetch pipe (on ports 2 and 3)
|
||||
F - Macro Fusion with the previous instruction occurred
|
||||
* - instruction micro-ops not bound to a port
|
||||
^ - Micro Fusion occurred
|
||||
# - ESP Tracking sync uop was issued
|
||||
@ - SSE instruction followed an AVX256/AVX512 instruction, dozens of cycles penalty is expected
|
||||
X - instruction not supported, was not accounted in Analysis
|
||||
|
||||
| Num Of | Ports pressure in cycles | |
|
||||
| Uops | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
||||
-----------------------------------------------------------------------------------------
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovdqu xmm0, xmmword ptr [rbx+rdx*4]
|
||||
| 1 | 1.0 | | | | | | | | vmovq rcx, xmm0
|
||||
| 1 | | | | | | 1.0 | | | vpunpckhqdq xmm2, xmm0, xmm0
|
||||
| 1 | 1.0 | | | | | | | | vmovq r15, xmm2
|
||||
| 1* | | | | | | | | | mov r8d, ecx
|
||||
| 1 | | | | | | | 1.0 | | shr rcx, 0x20
|
||||
| 1 | | | | | | 1.0 | | | lea r14d, ptr [rcx+rcx*2]
|
||||
| 1 | | | | | | 1.0 | | | lea r8d, ptr [r8+r8*2]
|
||||
| 1 | | | | | | | 1.0 | | movsxd rcx, r8d
|
||||
| 1 | | | | | | | 1.0 | | movsxd r8, r14d
|
||||
| 1* | | | | | | | | | mov r14d, r15d
|
||||
| 1 | | | | | | | 1.0 | | shr r15, 0x20
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups xmm7, xmmword ptr [r11+rcx*8]
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovups xmm6, xmmword ptr [r11+r8*8]
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovq xmm14, qword ptr [r11+rcx*8+0x10]
|
||||
| 1 | | 0.3 | | | | 0.7 | | | lea r14d, ptr [r14+r14*2]
|
||||
| 1 | | | | | | | 1.0 | | movsxd r14, r14d
|
||||
| 1 | | 0.7 | | | | 0.3 | | | lea r15d, ptr [r15+r15*2]
|
||||
| 1 | | | | | | | 1.0 | | movsxd r15, r15d
|
||||
| 2 | | | 0.5 0.5 | 0.5 0.5 | | 1.0 | | | vmovhpd xmm15, xmm14, qword ptr [r11+r8*8+0x10]
|
||||
| 2 | | 1.0 | 0.5 0.5 | 0.5 0.5 | | | | | vinsertf128 ymm1, ymm7, xmmword ptr [r11+r14*8], 0x1
|
||||
| 1 | | | 0.5 0.5 | 0.5 0.5 | | | | | vmovq xmm0, qword ptr [r11+r14*8+0x10]
|
||||
| 2 | | 0.3 | 0.5 0.5 | 0.5 0.5 | | 0.7 | | | vinsertf128 ymm6, ymm6, xmmword ptr [r11+r15*8], 0x1
|
||||
| 2 | | | 0.5 0.5 | 0.5 0.5 | | 1.0 | | | vmovhpd xmm2, xmm0, qword ptr [r11+r15*8+0x10]
|
||||
| 1 | | | | | | 1.0 | | | vunpcklpd ymm14, ymm1, ymm6
|
||||
| 1 | | | | | | 1.0 | | | vunpckhpd ymm1, ymm1, ymm6
|
||||
| 1 | 0.7 | 0.3 | | | | | | | vsubpd ymm6, ymm10, ymm14
|
||||
| 1 | | | | | | 1.0 | | | vinsertf128 ymm7, ymm15, xmm2, 0x1
|
||||
| 1 | 0.3 | 0.7 | | | | | | | vsubpd ymm2, ymm9, ymm1
|
||||
| 1 | 0.7 | 0.3 | | | | | | | vsubpd ymm0, ymm8, ymm7
|
||||
| 1 | 0.3 | 0.7 | | | | | | | vmulpd ymm14, ymm2, ymm2
|
||||
| 1 | 0.7 | 0.3 | | | | | | | vfmadd231pd ymm14, ymm6, ymm6
|
||||
| 1 | 0.3 | 0.7 | | | | | | | vfmadd231pd ymm14, ymm0, ymm0
|
||||
| 1 | 0.7 | 0.3 | | | | | | | vcmppd ymm1, ymm14, ymm5, 0x1
|
||||
| 1 | 0.3 | 0.7 | | | | | | | vpcmpeqd ymm7, ymm7, ymm7
|
||||
| 2 | 1.0 | | | | | 1.0 | | | vptest ymm1, ymm7
|
||||
| 1 | 1.0 8.0 | | | | | | | | vdivpd ymm7, ymm4, ymm14
|
||||
| 2^ | | 1.0 | 0.5 0.5 | 0.5 0.5 | | | | | vmulpd ymm14, ymm7, ymmword ptr [rsp+0x60]
|
||||
| 1 | | 1.0 | | | | | | | vmulpd ymm14, ymm7, ymm14
|
||||
| 1 | 0.7 | 0.3 | | | | | | | vmulpd ymm15, ymm7, ymm14
|
||||
| 1 | 0.3 | 0.7 | | | | | | | vfmsub213pd ymm14, ymm7, ymm3
|
||||
| 2^ | 0.7 | 0.3 | 0.5 0.5 | 0.5 0.5 | | | | | vmulpd ymm7, ymm7, ymmword ptr [rsp+0x40]
|
||||
| 1 | 0.3 | 0.7 | | | | | | | vmulpd ymm15, ymm15, ymm7
|
||||
| 1 | 0.7 | 0.3 | | | | | | | vmulpd ymm7, ymm15, ymm14
|
||||
| 1 | 0.3 | 0.7 | | | | | | | vmulpd ymm6, ymm6, ymm7
|
||||
| 1 | 0.7 | 0.3 | | | | | | | vmulpd ymm2, ymm2, ymm7
|
||||
| 1 | | | | | | 1.0 | | | vandpd ymm6, ymm1, ymm6
|
||||
| 1 | 0.3 | 0.7 | | | | | | | vaddpd ymm13, ymm13, ymm6
|
||||
| 1 | 0.7 | 0.3 | | | | | | | vmulpd ymm6, ymm0, ymm7
|
||||
| 1 | | | | | | 1.0 | | | vandpd ymm0, ymm1, ymm2
|
||||
| 1 | | | | | | 1.0 | | | vandpd ymm1, ymm1, ymm6
|
||||
| 1 | 0.3 | 0.7 | | | | | | | vaddpd ymm12, ymm12, ymm0
|
||||
| 1 | 0.7 | 0.3 | | | | | | | vaddpd ymm11, ymm11, ymm1
|
||||
| 1 | | | | | | | 1.0 | | add rdx, 0x4
|
||||
| 1* | | | | | | | | | cmp rdx, rsi
|
||||
| 0*F | | | | | | | | | jb 0xffffffffffffff02
|
||||
Total Num Of Uops: 62
|
||||
Analysis Notes:
|
||||
Backend allocation was stalled due to unavailable allocation resources.
|
156
static_analysis/jan/analyses/lammps-icc-avx2-mca-csx.out
Normal file
156
static_analysis/jan/analyses/lammps-icc-avx2-mca-csx.out
Normal file
@@ -0,0 +1,156 @@
|
||||
|
||||
[0] Code Region
|
||||
|
||||
Iterations: 100
|
||||
Instructions: 5600
|
||||
Total Cycles: 2352
|
||||
Total uOps: 6300
|
||||
|
||||
Dispatch Width: 6
|
||||
uOps Per Cycle: 2.68
|
||||
IPC: 2.38
|
||||
Block RThroughput: 10.5
|
||||
|
||||
|
||||
Instruction Info:
|
||||
[1]: #uOps
|
||||
[2]: Latency
|
||||
[3]: RThroughput
|
||||
[4]: MayLoad
|
||||
[5]: MayStore
|
||||
[6]: HasSideEffects (U)
|
||||
|
||||
[1] [2] [3] [4] [5] [6] Instructions:
|
||||
1 6 0.50 * vmovdqu (%rbx,%rdx,4), %xmm0
|
||||
1 2 1.00 vmovq %xmm0, %rcx
|
||||
1 1 1.00 vpunpckhqdq %xmm0, %xmm0, %xmm2
|
||||
1 2 1.00 vmovq %xmm2, %r15
|
||||
1 1 0.25 movl %ecx, %r8d
|
||||
1 1 0.50 shrq $32, %rcx
|
||||
1 1 0.50 leal (%rcx,%rcx,2), %r14d
|
||||
1 1 0.50 leal (%r8,%r8,2), %r8d
|
||||
1 1 0.25 movslq %r8d, %rcx
|
||||
1 1 0.25 movslq %r14d, %r8
|
||||
1 1 0.25 movl %r15d, %r14d
|
||||
1 1 0.50 shrq $32, %r15
|
||||
1 6 0.50 * vmovups (%r11,%rcx,8), %xmm7
|
||||
1 6 0.50 * vmovups (%r11,%r8,8), %xmm6
|
||||
1 5 0.50 * vmovq 16(%r11,%rcx,8), %xmm14
|
||||
1 1 0.50 leal (%r14,%r14,2), %r14d
|
||||
1 1 0.25 movslq %r14d, %r14
|
||||
1 1 0.50 leal (%r15,%r15,2), %r15d
|
||||
1 1 0.25 movslq %r15d, %r15
|
||||
2 6 1.00 * vmovhpd 16(%r11,%r8,8), %xmm14, %xmm15
|
||||
2 7 0.50 * vinsertf128 $1, (%r11,%r14,8), %ymm7, %ymm1
|
||||
1 5 0.50 * vmovq 16(%r11,%r14,8), %xmm0
|
||||
2 7 0.50 * vinsertf128 $1, (%r11,%r15,8), %ymm6, %ymm6
|
||||
2 6 1.00 * vmovhpd 16(%r11,%r15,8), %xmm0, %xmm2
|
||||
1 1 1.00 vunpcklpd %ymm6, %ymm1, %ymm14
|
||||
1 1 1.00 vunpckhpd %ymm6, %ymm1, %ymm1
|
||||
1 4 0.50 vsubpd %ymm14, %ymm10, %ymm6
|
||||
1 3 1.00 vinsertf128 $1, %xmm2, %ymm15, %ymm7
|
||||
1 4 0.50 vsubpd %ymm1, %ymm9, %ymm2
|
||||
1 4 0.50 vsubpd %ymm7, %ymm8, %ymm0
|
||||
1 4 0.50 vmulpd %ymm2, %ymm2, %ymm14
|
||||
1 4 0.50 vfmadd231pd %ymm6, %ymm6, %ymm14
|
||||
1 4 0.50 vfmadd231pd %ymm0, %ymm0, %ymm14
|
||||
1 4 0.50 vcmpltpd %ymm5, %ymm14, %ymm1
|
||||
1 1 0.50 vpcmpeqd %ymm7, %ymm7, %ymm7
|
||||
2 3 1.00 vptest %ymm7, %ymm1
|
||||
1 14 5.00 vdivpd %ymm14, %ymm4, %ymm7
|
||||
2 11 0.50 * vmulpd 96(%rsp), %ymm7, %ymm14
|
||||
1 4 0.50 vmulpd %ymm14, %ymm7, %ymm14
|
||||
1 4 0.50 vmulpd %ymm14, %ymm7, %ymm15
|
||||
1 4 0.50 vfmsub213pd %ymm3, %ymm7, %ymm14
|
||||
2 11 0.50 * vmulpd 64(%rsp), %ymm7, %ymm7
|
||||
1 4 0.50 vmulpd %ymm7, %ymm15, %ymm15
|
||||
1 4 0.50 vmulpd %ymm14, %ymm15, %ymm7
|
||||
1 4 0.50 vmulpd %ymm7, %ymm6, %ymm6
|
||||
1 4 0.50 vmulpd %ymm7, %ymm2, %ymm2
|
||||
1 1 0.33 vandpd %ymm6, %ymm1, %ymm6
|
||||
1 4 0.50 vaddpd %ymm6, %ymm13, %ymm13
|
||||
1 4 0.50 vmulpd %ymm7, %ymm0, %ymm6
|
||||
1 1 0.33 vandpd %ymm2, %ymm1, %ymm0
|
||||
1 1 0.33 vandpd %ymm6, %ymm1, %ymm1
|
||||
1 4 0.50 vaddpd %ymm0, %ymm12, %ymm12
|
||||
1 4 0.50 vaddpd %ymm1, %ymm11, %ymm11
|
||||
1 1 0.25 addq $4, %rdx
|
||||
1 1 0.25 cmpq %rsi, %rdx
|
||||
1 1 0.50 jb ..B1.22
|
||||
|
||||
|
||||
Resources:
|
||||
[0] - SKXDivider
|
||||
[1] - SKXFPDivider
|
||||
[2] - SKXPort0
|
||||
[3] - SKXPort1
|
||||
[4] - SKXPort2
|
||||
[5] - SKXPort3
|
||||
[6] - SKXPort4
|
||||
[7] - SKXPort5
|
||||
[8] - SKXPort6
|
||||
[9] - SKXPort7
|
||||
|
||||
|
||||
Resource pressure per iteration:
|
||||
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
|
||||
- 5.00 16.00 14.12 5.50 5.50 - 13.47 8.41 -
|
||||
|
||||
Resource pressure by instruction:
|
||||
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
|
||||
- - - - 0.50 0.50 - - - - vmovdqu (%rbx,%rdx,4), %xmm0
|
||||
- - 1.00 - - - - - - - vmovq %xmm0, %rcx
|
||||
- - - - - - - 1.00 - - vpunpckhqdq %xmm0, %xmm0, %xmm2
|
||||
- - 1.00 - - - - - - - vmovq %xmm2, %r15
|
||||
- - - - - - - - 1.00 - movl %ecx, %r8d
|
||||
- - 0.06 - - - - - 0.94 - shrq $32, %rcx
|
||||
- - - 0.02 - - - 0.98 - - leal (%rcx,%rcx,2), %r14d
|
||||
- - - 0.02 - - - 0.98 - - leal (%r8,%r8,2), %r8d
|
||||
- - 0.47 0.02 - - - - 0.51 - movslq %r8d, %rcx
|
||||
- - 0.46 0.02 - - - 0.01 0.51 - movslq %r14d, %r8
|
||||
- - 0.03 0.01 - - - 0.45 0.51 - movl %r15d, %r14d
|
||||
- - 0.51 - - - - - 0.49 - shrq $32, %r15
|
||||
- - - - 0.49 0.51 - - - - vmovups (%r11,%rcx,8), %xmm7
|
||||
- - - - 0.49 0.51 - - - - vmovups (%r11,%r8,8), %xmm6
|
||||
- - - - 0.52 0.48 - - - - vmovq 16(%r11,%rcx,8), %xmm14
|
||||
- - - 0.02 - - - 0.98 - - leal (%r14,%r14,2), %r14d
|
||||
- - 0.01 0.01 - - - 0.01 0.97 - movslq %r14d, %r14
|
||||
- - - 0.03 - - - 0.97 - - leal (%r15,%r15,2), %r15d
|
||||
- - 0.04 - - - - - 0.96 - movslq %r15d, %r15
|
||||
- - - - 0.07 0.93 - 1.00 - - vmovhpd 16(%r11,%r8,8), %xmm14, %xmm15
|
||||
- - 0.03 0.46 0.49 0.51 - 0.51 - - vinsertf128 $1, (%r11,%r14,8), %ymm7, %ymm1
|
||||
- - - - 0.51 0.49 - - - - vmovq 16(%r11,%r14,8), %xmm0
|
||||
- - 0.47 0.02 0.93 0.07 - 0.51 - - vinsertf128 $1, (%r11,%r15,8), %ymm6, %ymm6
|
||||
- - - - 0.50 0.50 - 1.00 - - vmovhpd 16(%r11,%r15,8), %xmm0, %xmm2
|
||||
- - - - - - - 1.00 - - vunpcklpd %ymm6, %ymm1, %ymm14
|
||||
- - - - - - - 1.00 - - vunpckhpd %ymm6, %ymm1, %ymm1
|
||||
- - 0.01 0.99 - - - - - - vsubpd %ymm14, %ymm10, %ymm6
|
||||
- - - - - - - 1.00 - - vinsertf128 $1, %xmm2, %ymm15, %ymm7
|
||||
- - 0.96 0.04 - - - - - - vsubpd %ymm1, %ymm9, %ymm2
|
||||
- - 0.49 0.51 - - - - - - vsubpd %ymm7, %ymm8, %ymm0
|
||||
- - 0.48 0.52 - - - - - - vmulpd %ymm2, %ymm2, %ymm14
|
||||
- - 0.03 0.97 - - - - - - vfmadd231pd %ymm6, %ymm6, %ymm14
|
||||
- - 0.94 0.06 - - - - - - vfmadd231pd %ymm0, %ymm0, %ymm14
|
||||
- - 0.47 0.53 - - - - - - vcmpltpd %ymm5, %ymm14, %ymm1
|
||||
- - 0.96 0.04 - - - - - - vpcmpeqd %ymm7, %ymm7, %ymm7
|
||||
- - 1.00 - - - - 1.00 - - vptest %ymm7, %ymm1
|
||||
- 5.00 1.00 - - - - - - - vdivpd %ymm14, %ymm4, %ymm7
|
||||
- - 0.93 0.07 0.49 0.51 - - - - vmulpd 96(%rsp), %ymm7, %ymm14
|
||||
- - 0.05 0.95 - - - - - - vmulpd %ymm14, %ymm7, %ymm14
|
||||
- - 0.02 0.98 - - - - - - vmulpd %ymm14, %ymm7, %ymm15
|
||||
- - 0.98 0.02 - - - - - - vfmsub213pd %ymm3, %ymm7, %ymm14
|
||||
- - 0.07 0.93 0.51 0.49 - - - - vmulpd 64(%rsp), %ymm7, %ymm7
|
||||
- - 0.01 0.99 - - - - - - vmulpd %ymm7, %ymm15, %ymm15
|
||||
- - 0.01 0.99 - - - - - - vmulpd %ymm14, %ymm15, %ymm7
|
||||
- - 0.03 0.97 - - - - - - vmulpd %ymm7, %ymm6, %ymm6
|
||||
- - 0.97 0.03 - - - - - - vmulpd %ymm7, %ymm2, %ymm2
|
||||
- - 0.03 0.90 - - - 0.07 - - vandpd %ymm6, %ymm1, %ymm6
|
||||
- - 0.06 0.94 - - - - - - vaddpd %ymm6, %ymm13, %ymm13
|
||||
- - 0.03 0.97 - - - - - - vmulpd %ymm7, %ymm0, %ymm6
|
||||
- - 0.46 0.08 - - - 0.46 - - vandpd %ymm2, %ymm1, %ymm0
|
||||
- - 0.47 0.01 - - - 0.52 - - vandpd %ymm6, %ymm1, %ymm1
|
||||
- - 0.48 0.52 - - - - - - vaddpd %ymm0, %ymm12, %ymm12
|
||||
- - 0.52 0.48 - - - - - - vaddpd %ymm1, %ymm11, %ymm11
|
||||
- - 0.01 - - - - - 0.99 - addq $4, %rdx
|
||||
- - - - - - - 0.02 0.98 - cmpq %rsi, %rdx
|
||||
- - 0.45 - - - - - 0.55 - jb ..B1.22
|
158
static_analysis/jan/analyses/lammps-icc-avx2-mca-icx.out
Normal file
158
static_analysis/jan/analyses/lammps-icc-avx2-mca-icx.out
Normal file
@@ -0,0 +1,158 @@
|
||||
|
||||
[0] Code Region
|
||||
|
||||
Iterations: 100
|
||||
Instructions: 5600
|
||||
Total Cycles: 2306
|
||||
Total uOps: 6300
|
||||
|
||||
Dispatch Width: 6
|
||||
uOps Per Cycle: 2.73
|
||||
IPC: 2.43
|
||||
Block RThroughput: 10.5
|
||||
|
||||
|
||||
Instruction Info:
|
||||
[1]: #uOps
|
||||
[2]: Latency
|
||||
[3]: RThroughput
|
||||
[4]: MayLoad
|
||||
[5]: MayStore
|
||||
[6]: HasSideEffects (U)
|
||||
|
||||
[1] [2] [3] [4] [5] [6] Instructions:
|
||||
1 6 0.50 * vmovdqu (%rbx,%rdx,4), %xmm0
|
||||
1 2 1.00 vmovq %xmm0, %rcx
|
||||
1 1 0.50 vpunpckhqdq %xmm0, %xmm0, %xmm2
|
||||
1 2 1.00 vmovq %xmm2, %r15
|
||||
1 1 0.25 movl %ecx, %r8d
|
||||
1 1 0.50 shrq $32, %rcx
|
||||
1 1 0.50 leal (%rcx,%rcx,2), %r14d
|
||||
1 1 0.50 leal (%r8,%r8,2), %r8d
|
||||
1 1 0.25 movslq %r8d, %rcx
|
||||
1 1 0.25 movslq %r14d, %r8
|
||||
1 1 0.25 movl %r15d, %r14d
|
||||
1 1 0.50 shrq $32, %r15
|
||||
1 6 0.50 * vmovups (%r11,%rcx,8), %xmm7
|
||||
1 6 0.50 * vmovups (%r11,%r8,8), %xmm6
|
||||
1 5 0.50 * vmovq 16(%r11,%rcx,8), %xmm14
|
||||
1 1 0.50 leal (%r14,%r14,2), %r14d
|
||||
1 1 0.25 movslq %r14d, %r14
|
||||
1 1 0.50 leal (%r15,%r15,2), %r15d
|
||||
1 1 0.25 movslq %r15d, %r15
|
||||
2 6 1.00 * vmovhpd 16(%r11,%r8,8), %xmm14, %xmm15
|
||||
2 7 0.50 * vinsertf128 $1, (%r11,%r14,8), %ymm7, %ymm1
|
||||
1 5 0.50 * vmovq 16(%r11,%r14,8), %xmm0
|
||||
2 7 0.50 * vinsertf128 $1, (%r11,%r15,8), %ymm6, %ymm6
|
||||
2 6 1.00 * vmovhpd 16(%r11,%r15,8), %xmm0, %xmm2
|
||||
1 1 1.00 vunpcklpd %ymm6, %ymm1, %ymm14
|
||||
1 1 1.00 vunpckhpd %ymm6, %ymm1, %ymm1
|
||||
1 4 0.50 vsubpd %ymm14, %ymm10, %ymm6
|
||||
1 3 1.00 vinsertf128 $1, %xmm2, %ymm15, %ymm7
|
||||
1 4 0.50 vsubpd %ymm1, %ymm9, %ymm2
|
||||
1 4 0.50 vsubpd %ymm7, %ymm8, %ymm0
|
||||
1 4 0.50 vmulpd %ymm2, %ymm2, %ymm14
|
||||
1 4 0.50 vfmadd231pd %ymm6, %ymm6, %ymm14
|
||||
1 4 0.50 vfmadd231pd %ymm0, %ymm0, %ymm14
|
||||
1 4 0.50 vcmpltpd %ymm5, %ymm14, %ymm1
|
||||
1 1 0.50 vpcmpeqd %ymm7, %ymm7, %ymm7
|
||||
2 3 1.00 vptest %ymm7, %ymm1
|
||||
1 14 5.00 vdivpd %ymm14, %ymm4, %ymm7
|
||||
2 11 0.50 * vmulpd 96(%rsp), %ymm7, %ymm14
|
||||
1 4 0.50 vmulpd %ymm14, %ymm7, %ymm14
|
||||
1 4 0.50 vmulpd %ymm14, %ymm7, %ymm15
|
||||
1 4 0.50 vfmsub213pd %ymm3, %ymm7, %ymm14
|
||||
2 11 0.50 * vmulpd 64(%rsp), %ymm7, %ymm7
|
||||
1 4 0.50 vmulpd %ymm7, %ymm15, %ymm15
|
||||
1 4 0.50 vmulpd %ymm14, %ymm15, %ymm7
|
||||
1 4 0.50 vmulpd %ymm7, %ymm6, %ymm6
|
||||
1 4 0.50 vmulpd %ymm7, %ymm2, %ymm2
|
||||
1 1 0.33 vandpd %ymm6, %ymm1, %ymm6
|
||||
1 4 0.50 vaddpd %ymm6, %ymm13, %ymm13
|
||||
1 4 0.50 vmulpd %ymm7, %ymm0, %ymm6
|
||||
1 1 0.33 vandpd %ymm2, %ymm1, %ymm0
|
||||
1 1 0.33 vandpd %ymm6, %ymm1, %ymm1
|
||||
1 4 0.50 vaddpd %ymm0, %ymm12, %ymm12
|
||||
1 4 0.50 vaddpd %ymm1, %ymm11, %ymm11
|
||||
1 1 0.25 addq $4, %rdx
|
||||
1 1 0.25 cmpq %rsi, %rdx
|
||||
1 1 0.50 jb ..B1.22
|
||||
|
||||
|
||||
Resources:
|
||||
[0] - ICXDivider
|
||||
[1] - ICXFPDivider
|
||||
[2] - ICXPort0
|
||||
[3] - ICXPort1
|
||||
[4] - ICXPort2
|
||||
[5] - ICXPort3
|
||||
[6] - ICXPort4
|
||||
[7] - ICXPort5
|
||||
[8] - ICXPort6
|
||||
[9] - ICXPort7
|
||||
[10] - ICXPort8
|
||||
[11] - ICXPort9
|
||||
|
||||
|
||||
Resource pressure per iteration:
|
||||
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
|
||||
- 5.00 15.12 15.03 5.50 5.50 - 13.45 8.40 - - -
|
||||
|
||||
Resource pressure by instruction:
|
||||
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
|
||||
- - - - 0.50 0.50 - - - - - - vmovdqu (%rbx,%rdx,4), %xmm0
|
||||
- - 1.00 - - - - - - - - - vmovq %xmm0, %rcx
|
||||
- - - 0.46 - - - 0.54 - - - - vpunpckhqdq %xmm0, %xmm0, %xmm2
|
||||
- - 1.00 - - - - - - - - - vmovq %xmm2, %r15
|
||||
- - - - - - - - 1.00 - - - movl %ecx, %r8d
|
||||
- - 0.96 - - - - - 0.04 - - - shrq $32, %rcx
|
||||
- - - 0.01 - - - 0.99 - - - - leal (%rcx,%rcx,2), %r14d
|
||||
- - - 0.03 - - - 0.97 - - - - leal (%r8,%r8,2), %r8d
|
||||
- - 0.48 0.01 - - - - 0.51 - - - movslq %r8d, %rcx
|
||||
- - 0.02 0.02 - - - 0.01 0.95 - - - movslq %r14d, %r8
|
||||
- - 0.02 - - - - - 0.98 - - - movl %r15d, %r14d
|
||||
- - 0.52 - - - - - 0.48 - - - shrq $32, %r15
|
||||
- - - - 0.49 0.51 - - - - - - vmovups (%r11,%rcx,8), %xmm7
|
||||
- - - - 0.49 0.51 - - - - - - vmovups (%r11,%r8,8), %xmm6
|
||||
- - - - 0.52 0.48 - - - - - - vmovq 16(%r11,%rcx,8), %xmm14
|
||||
- - - 0.47 - - - 0.53 - - - - leal (%r14,%r14,2), %r14d
|
||||
- - 0.01 0.01 - - - 0.01 0.97 - - - movslq %r14d, %r14
|
||||
- - - 0.04 - - - 0.96 - - - - leal (%r15,%r15,2), %r15d
|
||||
- - 0.48 - - - - 0.01 0.51 - - - movslq %r15d, %r15
|
||||
- - - - 0.51 0.49 - 1.00 - - - - vmovhpd 16(%r11,%r8,8), %xmm14, %xmm15
|
||||
- - 0.02 0.01 0.95 0.05 - 0.97 - - - - vinsertf128 $1, (%r11,%r14,8), %ymm7, %ymm1
|
||||
- - - - 0.05 0.95 - - - - - - vmovq 16(%r11,%r14,8), %xmm0
|
||||
- - 0.02 0.49 0.49 0.51 - 0.49 - - - - vinsertf128 $1, (%r11,%r15,8), %ymm6, %ymm6
|
||||
- - - - 0.50 0.50 - 1.00 - - - - vmovhpd 16(%r11,%r15,8), %xmm0, %xmm2
|
||||
- - - - - - - 1.00 - - - - vunpcklpd %ymm6, %ymm1, %ymm14
|
||||
- - - - - - - 1.00 - - - - vunpckhpd %ymm6, %ymm1, %ymm1
|
||||
- - 0.47 0.53 - - - - - - - - vsubpd %ymm14, %ymm10, %ymm6
|
||||
- - - - - - - 1.00 - - - - vinsertf128 $1, %xmm2, %ymm15, %ymm7
|
||||
- - 0.50 0.50 - - - - - - - - vsubpd %ymm1, %ymm9, %ymm2
|
||||
- - 0.94 0.06 - - - - - - - - vsubpd %ymm7, %ymm8, %ymm0
|
||||
- - 0.06 0.94 - - - - - - - - vmulpd %ymm2, %ymm2, %ymm14
|
||||
- - 0.04 0.96 - - - - - - - - vfmadd231pd %ymm6, %ymm6, %ymm14
|
||||
- - 0.95 0.05 - - - - - - - - vfmadd231pd %ymm0, %ymm0, %ymm14
|
||||
- - 0.02 0.98 - - - - - - - - vcmpltpd %ymm5, %ymm14, %ymm1
|
||||
- - 0.05 0.95 - - - - - - - - vpcmpeqd %ymm7, %ymm7, %ymm7
|
||||
- - 1.00 - - - - 1.00 - - - - vptest %ymm7, %ymm1
|
||||
- 5.00 1.00 - - - - - - - - - vdivpd %ymm14, %ymm4, %ymm7
|
||||
- - 0.51 0.49 0.49 0.51 - - - - - - vmulpd 96(%rsp), %ymm7, %ymm14
|
||||
- - 0.04 0.96 - - - - - - - - vmulpd %ymm14, %ymm7, %ymm14
|
||||
- - 0.01 0.99 - - - - - - - - vmulpd %ymm14, %ymm7, %ymm15
|
||||
- - 0.99 0.01 - - - - - - - - vfmsub213pd %ymm3, %ymm7, %ymm14
|
||||
- - 0.49 0.51 0.51 0.49 - - - - - - vmulpd 64(%rsp), %ymm7, %ymm7
|
||||
- - 0.01 0.99 - - - - - - - - vmulpd %ymm7, %ymm15, %ymm15
|
||||
- - 0.01 0.99 - - - - - - - - vmulpd %ymm14, %ymm15, %ymm7
|
||||
- - 0.48 0.52 - - - - - - - - vmulpd %ymm7, %ymm6, %ymm6
|
||||
- - 0.52 0.48 - - - - - - - - vmulpd %ymm7, %ymm2, %ymm2
|
||||
- - 0.46 0.02 - - - 0.52 - - - - vandpd %ymm6, %ymm1, %ymm6
|
||||
- - 0.49 0.51 - - - - - - - - vaddpd %ymm6, %ymm13, %ymm13
|
||||
- - 0.48 0.52 - - - - - - - - vmulpd %ymm7, %ymm0, %ymm6
|
||||
- - 0.02 0.52 - - - 0.46 - - - - vandpd %ymm2, %ymm1, %ymm0
|
||||
- - 0.02 - - - - 0.98 - - - - vandpd %ymm6, %ymm1, %ymm1
|
||||
- - 0.49 0.51 - - - - - - - - vaddpd %ymm0, %ymm12, %ymm12
|
||||
- - 0.51 0.49 - - - - - - - - vaddpd %ymm1, %ymm11, %ymm11
|
||||
- - 0.01 - - - - - 0.99 - - - addq $4, %rdx
|
||||
- - 0.01 - - - - 0.01 0.98 - - - cmpq %rsi, %rdx
|
||||
- - 0.01 - - - - - 0.99 - - - jb ..B1.22
|
97
static_analysis/jan/analyses/lammps-icc-avx2-osaca-csx.out
Normal file
97
static_analysis/jan/analyses/lammps-icc-avx2-osaca-csx.out
Normal file
@@ -0,0 +1,97 @@
|
||||
Open Source Architecture Code Analyzer (OSACA) - 0.4.12
|
||||
Analyzed file: lammps-icc-avx2.s
|
||||
Architecture: CSX
|
||||
Timestamp: 2023-02-10 16:29:58
|
||||
|
||||
|
||||
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
||||
* - Instruction micro-ops not bound to a port
|
||||
X - No throughput/latency information for this instruction in data file
|
||||
|
||||
|
||||
Combined Analysis Report
|
||||
------------------------
|
||||
Port pressure in cycles
|
||||
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |
|
||||
----------------------------------------------------------------------------------------------------
|
||||
256 | | | | | | | | || | | # pointer_increment=32 724d27eafcb27eabca1528ddfdbdba3e
|
||||
257 | | | | | | | | || | | # LLVM-MCA-BEGIN
|
||||
258 | | | | | | | | || | | ..B1.22: # Preds ..B1.24 ..B1.21
|
||||
259 | | | | | | | | || | | # Execution count [2.50e+01]
|
||||
260 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | vmovdqu (%rbx,%rdx,4), %xmm0 #60.21
|
||||
261 | 1.00 | | | | | | | || 1.0 | | vmovq %xmm0, %rcx #60.21
|
||||
262 | | | | | | 1.000 | | || | | vpunpckhqdq %xmm0, %xmm0, %xmm2 #60.21
|
||||
263 | 1.00 | | | | | | | || | | vmovq %xmm2, %r15 #60.21
|
||||
264 | 0.00 | 0.000 | | | | 0.000 | 1.00 | || 1.0 | | movl %ecx, %r8d #60.21
|
||||
265 | 0.00 | | | | | | 1.00 | || | | shrq $32, %rcx #60.21
|
||||
266 | | 0.500 | | | | 0.500 | | || | | lea (%rcx,%rcx,2), %r14d #61.36
|
||||
267 | | 0.500 | | | | 0.500 | | || 1.0 | | lea (%r8,%r8,2), %r8d #61.36
|
||||
268 | 0.00 | 0.000 | | | | 0.000 | 1.00 | || 1.0 | | movslq %r8d, %rcx #61.36
|
||||
269 | 0.00 | 0.000 | | | | 0.000 | 1.00 | || | | movslq %r14d, %r8 #61.36
|
||||
270 | 0.00 | 0.000 | | | | 0.000 | 1.00 | || | | movl %r15d, %r14d #60.21
|
||||
271 | 0.00 | | | | | | 1.00 | || | | shrq $32, %r15 #60.21
|
||||
272 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | vmovups (%r11,%rcx,8), %xmm7 #61.36
|
||||
273 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovups (%r11,%r8,8), %xmm6 #61.36
|
||||
274 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovq 16(%r11,%rcx,8), %xmm14 #61.36
|
||||
275 | | 0.500 | | | | 0.500 | | || | | lea (%r14,%r14,2), %r14d #61.36
|
||||
276 | 0.00 | 0.000 | | | | 0.000 | 1.00 | || | | movslq %r14d, %r14 #61.36
|
||||
277 | | 0.500 | | | | 0.500 | | || | | lea (%r15,%r15,2), %r15d #61.36
|
||||
278 | 0.00 | 0.000 | | | | 0.000 | 1.00 | || | | movslq %r15d, %r15 #61.36
|
||||
279 | | | 0.50 0.50 | 0.50 0.50 | | 1.000 | | || | | vmovhpd 16(%r11,%r8,8), %xmm14, %xmm15 #61.36
|
||||
280 | | | 0.50 0.50 | 0.50 0.50 | | 1.000 | | || 3.0 | | vinsertf128 $1, (%r11,%r14,8), %ymm7, %ymm1 #61.36
|
||||
281 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovq 16(%r11,%r14,8), %xmm0 #61.36
|
||||
282 | | | 0.50 0.50 | 0.50 0.50 | | 1.000 | | || | | vinsertf128 $1, (%r11,%r15,8), %ymm6, %ymm6 #61.36
|
||||
283 | | | 0.50 0.50 | 0.50 0.50 | | 1.000 | | || | | vmovhpd 16(%r11,%r15,8), %xmm0, %xmm2 #61.36
|
||||
284 | | | | | | 1.000 | | || | | vunpcklpd %ymm6, %ymm1, %ymm14 #61.36
|
||||
285 | | | | | | 1.000 | | || 1.0 | | vunpckhpd %ymm6, %ymm1, %ymm1 #61.36
|
||||
286 | 0.50 | 0.500 | | | | | | || | | vsubpd %ymm14, %ymm10, %ymm6 #61.36
|
||||
287 | | | | | | 1.000 | | || | | vinsertf128 $1, %xmm2, %ymm15, %ymm7 #61.36
|
||||
288 | 0.50 | 0.500 | | | | | | || 4.0 | | vsubpd %ymm1, %ymm9, %ymm2 #62.36
|
||||
289 | 0.50 | 0.500 | | | | | | || | | vsubpd %ymm7, %ymm8, %ymm0 #63.36
|
||||
290 | 0.50 | 0.500 | | | | | | || 4.0 | | vmulpd %ymm2, %ymm2, %ymm14 #64.49
|
||||
291 | 0.50 | 0.500 | | | | | | || 4.0 | | vfmadd231pd %ymm6, %ymm6, %ymm14 #64.49
|
||||
292 | 0.50 | 0.500 | | | | | | || 4.0 | | vfmadd231pd %ymm0, %ymm0, %ymm14 #64.63
|
||||
293 | | | | | | 1.000 | | || | | vcmpltpd %ymm5, %ymm14, %ymm1 #74.22
|
||||
294 | 0.50 | 0.500 | | | | | | || | | vpcmpeqd %ymm7, %ymm7, %ymm7 #74.22
|
||||
295 | 1.00 | | | | | 1.000 | | || | | vptest %ymm7, %ymm1 #74.22
|
||||
296 | | | | | | | | || | | #je ..B1.24 # Prob 50% #74.22
|
||||
297 | | | | | | | | || | | # LOE rax rdx rbx rsi rdi r9 r10 r11 r12 r13d ymm0 ymm1 ymm2 ymm3 ymm4 ymm5 ymm6 ymm8 ymm9 ymm10 ymm11 ymm12 ymm13 ymm14
|
||||
298 | | | | | | | | || | | ..B1.23: # Preds ..B1.22
|
||||
299 | | | | | | | | || | | # Execution count [1.25e+01]
|
||||
300 | 1.00 8.00 | | | | | | | || 15.0 | | vdivpd %ymm14, %ymm4, %ymm7 #75.39
|
||||
301 | 0.50 | 0.500 | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | vmulpd 96(%rsp), %ymm7, %ymm14 #76.38[spill]
|
||||
302 | 0.50 | 0.500 | | | | | | || 4.0 | | vmulpd %ymm14, %ymm7, %ymm14 #76.44
|
||||
303 | 0.50 | 0.500 | | | | | | || 4.0 | | vmulpd %ymm14, %ymm7, %ymm15 #76.50
|
||||
304 | 0.50 | 0.500 | | | | | | || | | vfmsub213pd %ymm3, %ymm7, %ymm14 #77.55
|
||||
305 | 0.50 | 0.500 | 0.50 0.50 | 0.50 0.50 | | | | || | | vmulpd 64(%rsp), %ymm7, %ymm7 #77.55[spill]
|
||||
306 | 0.50 | 0.500 | | | | | | || 4.0 | | vmulpd %ymm7, %ymm15, %ymm15 #77.64
|
||||
307 | 0.50 | 0.500 | | | | | | || 4.0 | | vmulpd %ymm14, %ymm15, %ymm7 #77.70
|
||||
308 | 0.50 | 0.500 | | | | | | || 4.0 | | vmulpd %ymm7, %ymm6, %ymm6 #78.31
|
||||
309 | 0.50 | 0.500 | | | | | | || | | vmulpd %ymm7, %ymm2, %ymm2 #79.31
|
||||
310 | 0.25 | 0.253 | | | | 0.493 | | || 1.0 | | vandpd %ymm6, %ymm1, %ymm6 #78.31
|
||||
311 | 0.50 | 0.500 | | | | | | || 4.0 | | vaddpd %ymm6, %ymm13, %ymm13 #78.17
|
||||
312 | 0.25 | 0.750 | | | | | | || | | vmulpd %ymm7, %ymm0, %ymm6 #80.31
|
||||
313 | 0.16 | 0.417 | | | | 0.423 | | || | | vandpd %ymm2, %ymm1, %ymm0 #79.31
|
||||
314 | 0.00 | 0.250 | | | | 0.750 | | || | | vandpd %ymm6, %ymm1, %ymm1 #80.31
|
||||
315 | 0.00 | 1.000 | | | | | | || | | vaddpd %ymm0, %ymm12, %ymm12 #79.17
|
||||
316 | 0.50 | 0.500 | | | | | | || | 4.0 | vaddpd %ymm1, %ymm11, %ymm11 #80.17
|
||||
317 | | | | | | | | || | | # LOE rax rdx rbx rsi rdi r9 r10 r11 r12 r13d ymm3 ymm4 ymm5 ymm8 ymm9 ymm10 ymm11 ymm12 ymm13
|
||||
318 | | | | | | | | || | | ..B1.24: # Preds ..B1.23 ..B1.22
|
||||
319 | | | | | | | | || | | # Execution count [2.50e+01]
|
||||
320 | 0.00 | 0.000 | | | | -0.01 | 1.00 | || | | addq $4, %rdx #59.9
|
||||
321 | 0.00 | -0.01 | | | | 0.000 | 1.00 | || | | cmpq %rsi, %rdx #59.9
|
||||
322 | | | | | | | | || | | * jb ..B1.22 # Prob 82% #59.9
|
||||
323 | | | | | | | | || | | # LLVM-MCA-END
|
||||
|
||||
13.7 8.00 13.66 5.50 5.50 5.50 5.50 13.66 10.0 76.0 4.0
|
||||
|
||||
|
||||
|
||||
|
||||
Loop-Carried Dependencies Analysis Report
|
||||
-----------------------------------------
|
||||
316 | 4.0 | vaddpd %ymm1, %ymm11, %ymm11 #80.17| [316]
|
||||
315 | 4.0 | vaddpd %ymm0, %ymm12, %ymm12 #79.17| [315]
|
||||
311 | 4.0 | vaddpd %ymm6, %ymm13, %ymm13 #78.17| [311]
|
||||
320 | 1.0 | addq $4, %rdx #59.9| [320]
|
||||
|
97
static_analysis/jan/analyses/lammps-icc-avx2-osaca-icx.out
Normal file
97
static_analysis/jan/analyses/lammps-icc-avx2-osaca-icx.out
Normal file
@@ -0,0 +1,97 @@
|
||||
Open Source Architecture Code Analyzer (OSACA) - 0.4.12
|
||||
Analyzed file: lammps-icc-avx2.s
|
||||
Architecture: ICX
|
||||
Timestamp: 2023-02-10 16:29:48
|
||||
|
||||
|
||||
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
||||
* - Instruction micro-ops not bound to a port
|
||||
X - No throughput/latency information for this instruction in data file
|
||||
|
||||
|
||||
Combined Analysis Report
|
||||
------------------------
|
||||
Port pressure in cycles
|
||||
| 0 - 0DV | 1 - 1DV | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 | 8 | 9 || CP | LCD |
|
||||
-----------------------------------------------------------------------------------------------------------------------
|
||||
256 | | | | | | | | | | || | | # pointer_increment=32 724d27eafcb27eabca1528ddfdbdba3e
|
||||
257 | | | | | | | | | | || | | # LLVM-MCA-BEGIN
|
||||
258 | | | | | | | | | | || | | ..B1.22: # Preds ..B1.24 ..B1.21
|
||||
259 | | | | | | | | | | || | | # Execution count [2.50e+01]
|
||||
260 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || 5.0 | | vmovdqu (%rbx,%rdx,4), %xmm0 #60.21
|
||||
261 | 1.00 | | | | | | | | | || 1.0 | | vmovq %xmm0, %rcx #60.21
|
||||
262 | | 0.50 | | | | 0.50 | | | | || | | vpunpckhqdq %xmm0, %xmm0, %xmm2 #60.21
|
||||
263 | 1.00 | | | | | | | | | || | | vmovq %xmm2, %r15 #60.21
|
||||
264 | 0.37 | 0.00 | | | | 0.25 | 0.38 | | | || 1.0 | | movl %ecx, %r8d #60.21
|
||||
265 | 0.50 | | | | | | 0.50 | | | || | | shrq $32, %rcx #60.21
|
||||
266 | 0.13 | 0.00 | | | | 0.00 | 0.87 | | | || | | lea (%rcx,%rcx,2), %r14d #61.36
|
||||
267 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || 6.0 | | lea (%r8,%r8,2), %r8d #61.36
|
||||
268 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || 1.0 | | movslq %r8d, %rcx #61.36
|
||||
269 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || | | movslq %r14d, %r8 #61.36
|
||||
270 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || | | movl %r15d, %r14d #60.21
|
||||
271 | 0.00 | | | | | | 1.00 | | | || | | shrq $32, %r15 #60.21
|
||||
272 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || 5.0 | | vmovups (%r11,%rcx,8), %xmm7 #61.36
|
||||
273 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovups (%r11,%r8,8), %xmm6 #61.36
|
||||
274 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovq 16(%r11,%rcx,8), %xmm14 #61.36
|
||||
275 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || | | lea (%r14,%r14,2), %r14d #61.36
|
||||
276 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || | | movslq %r14d, %r14 #61.36
|
||||
277 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || | | lea (%r15,%r15,2), %r15d #61.36
|
||||
278 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || | | movslq %r15d, %r15 #61.36
|
||||
279 | | | 0.50 0.50 | 0.50 0.50 | | 1.00 | | | | || | | vmovhpd 16(%r11,%r8,8), %xmm14, %xmm15 #61.36
|
||||
280 | | | 0.50 0.50 | 0.50 0.50 | | 1.00 | | | | || 3.0 | | vinsertf128 $1, (%r11,%r14,8), %ymm7, %ymm1 #61.36
|
||||
281 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovq 16(%r11,%r14,8), %xmm0 #61.36
|
||||
282 | | | 0.50 0.50 | 0.50 0.50 | | 1.00 | | | | || | | vinsertf128 $1, (%r11,%r15,8), %ymm6, %ymm6 #61.36
|
||||
283 | | | 0.50 0.50 | 0.50 0.50 | | 1.00 | | | | || | | vmovhpd 16(%r11,%r15,8), %xmm0, %xmm2 #61.36
|
||||
284 | | | | | | 1.00 | | | | || | | vunpcklpd %ymm6, %ymm1, %ymm14 #61.36
|
||||
285 | | | | | | 1.00 | | | | || 1.0 | | vunpckhpd %ymm6, %ymm1, %ymm1 #61.36
|
||||
286 | 0.50 | 0.50 | | | | | | | | || | | vsubpd %ymm14, %ymm10, %ymm6 #61.36
|
||||
287 | | | | | | 1.00 | | | | || | | vinsertf128 $1, %xmm2, %ymm15, %ymm7 #61.36
|
||||
288 | 0.50 | 0.50 | | | | | | | | || 4.0 | | vsubpd %ymm1, %ymm9, %ymm2 #62.36
|
||||
289 | 0.50 | 0.50 | | | | | | | | || | | vsubpd %ymm7, %ymm8, %ymm0 #63.36
|
||||
290 | 0.50 | 0.50 | | | | | | | | || 4.0 | | vmulpd %ymm2, %ymm2, %ymm14 #64.49
|
||||
291 | 0.50 | 0.50 | | | | | | | | || 4.0 | | vfmadd231pd %ymm6, %ymm6, %ymm14 #64.49
|
||||
292 | 0.75 | 0.25 | | | | | | | | || 4.0 | | vfmadd231pd %ymm0, %ymm0, %ymm14 #64.63
|
||||
293 | 0.00 | | | | | 1.00 | | | | || | | vcmpltpd %ymm5, %ymm14, %ymm1 #74.22
|
||||
294 | 0.50 | 0.50 | | | | | | | | || | | vpcmpeqd %ymm7, %ymm7, %ymm7 #74.22
|
||||
295 | 1.00 | | | | | 1.00 | | | | || | | vptest %ymm7, %ymm1 #74.22
|
||||
296 | | | | | | | | | | || | | #je ..B1.24 # Prob 50% #74.22
|
||||
297 | | | | | | | | | | || | | # LOE rax rdx rbx rsi rdi r9 r10 r11 r12 r13d ymm0 ymm1 ymm2 ymm3 ymm4 ymm5 ymm6 ymm8 ymm9 ymm10 ymm11 ymm12 ymm13 ymm14
|
||||
298 | | | | | | | | | | || | | ..B1.23: # Preds ..B1.22
|
||||
299 | | | | | | | | | | || | | # Execution count [1.25e+01]
|
||||
300 | 1.00 8.00 | | | | | | | | | || 13.0 | | vdivpd %ymm14, %ymm4, %ymm7 #75.39
|
||||
301 | 0.50 | 0.50 | 0.50 0.50 | 0.50 0.50 | | | | | | || 4.0 | | vmulpd 96(%rsp), %ymm7, %ymm14 #76.38[spill]
|
||||
302 | 0.50 | 0.50 | | | | | | | | || 4.0 | | vmulpd %ymm14, %ymm7, %ymm14 #76.44
|
||||
303 | 0.50 | 0.50 | | | | | | | | || 4.0 | | vmulpd %ymm14, %ymm7, %ymm15 #76.50
|
||||
304 | 0.50 | 0.50 | | | | | | | | || | | vfmsub213pd %ymm3, %ymm7, %ymm14 #77.55
|
||||
305 | 0.50 | 0.50 | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmulpd 64(%rsp), %ymm7, %ymm7 #77.55[spill]
|
||||
306 | 0.50 | 0.50 | | | | | | | | || 4.0 | | vmulpd %ymm7, %ymm15, %ymm15 #77.64
|
||||
307 | 0.50 | 0.50 | | | | | | | | || 4.0 | | vmulpd %ymm14, %ymm15, %ymm7 #77.70
|
||||
308 | 0.50 | 0.50 | | | | | | | | || 4.0 | | vmulpd %ymm7, %ymm6, %ymm6 #78.31
|
||||
309 | 0.00 | 1.00 | | | | | | | | || | | vmulpd %ymm7, %ymm2, %ymm2 #79.31
|
||||
310 | 0.00 | 0.00 | | | | 1.00 | | | | || 1.0 | | vandpd %ymm6, %ymm1, %ymm6 #78.31
|
||||
311 | 0.00 | 1.00 | | | | | | | | || 4.0 | | vaddpd %ymm6, %ymm13, %ymm13 #78.17
|
||||
312 | 0.00 | 1.00 | | | | | | | | || | | vmulpd %ymm7, %ymm0, %ymm6 #80.31
|
||||
313 | 0.00 | 0.00 | | | | 1.00 | | | | || | | vandpd %ymm2, %ymm1, %ymm0 #79.31
|
||||
314 | 0.00 | 0.00 | | | | 1.00 | | | | || | | vandpd %ymm6, %ymm1, %ymm1 #80.31
|
||||
315 | 0.00 | 1.00 | | | | | | | | || | | vaddpd %ymm0, %ymm12, %ymm12 #79.17
|
||||
316 | 0.00 | 1.00 | | | | | | | | || | 4.0 | vaddpd %ymm1, %ymm11, %ymm11 #80.17
|
||||
317 | | | | | | | | | | || | | # LOE rax rdx rbx rsi rdi r9 r10 r11 r12 r13d ymm3 ymm4 ymm5 ymm8 ymm9 ymm10 ymm11 ymm12 ymm13
|
||||
318 | | | | | | | | | | || | | ..B1.24: # Preds ..B1.23 ..B1.22
|
||||
319 | | | | | | | | | | || | | # Execution count [2.50e+01]
|
||||
320 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || | | addq $4, %rdx #59.9
|
||||
321 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || | | cmpq %rsi, %rdx #59.9
|
||||
322 | | | | | | | | | | || | | * jb ..B1.22 # Prob 82% #59.9
|
||||
323 | | | | | | | | | | || | | # LLVM-MCA-END
|
||||
|
||||
12.8 8.00 12.8 5.50 5.50 5.50 5.50 12.8 12.8 81 4.0
|
||||
|
||||
|
||||
|
||||
|
||||
Loop-Carried Dependencies Analysis Report
|
||||
-----------------------------------------
|
||||
316 | 4.0 | vaddpd %ymm1, %ymm11, %ymm11 #80.17| [316]
|
||||
315 | 4.0 | vaddpd %ymm0, %ymm12, %ymm12 #79.17| [315]
|
||||
311 | 4.0 | vaddpd %ymm6, %ymm13, %ymm13 #78.17| [311]
|
||||
320 | 1.0 | addq $4, %rdx #59.9| [320]
|
||||
|
75
static_analysis/jan/analyses/lammps-icc-avx512-iaca.out
Normal file
75
static_analysis/jan/analyses/lammps-icc-avx512-iaca.out
Normal file
@@ -0,0 +1,75 @@
|
||||
Intel(R) Architecture Code Analyzer Version - v3.0-28-g1ba2cbb build date: 2017-10-23;16:42:45
|
||||
Analyzed File - lammps-icc-avx512.o
|
||||
Binary Format - 64Bit
|
||||
Architecture - SKX
|
||||
Analysis Type - Throughput
|
||||
|
||||
Throughput Analysis Report
|
||||
--------------------------
|
||||
Block Throughput: 30.89 Cycles Throughput Bottleneck: Backend
|
||||
Loop Count: 22
|
||||
Port Binding In Cycles Per Iteration:
|
||||
--------------------------------------------------------------------------------------------------
|
||||
| Port | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
||||
--------------------------------------------------------------------------------------------------
|
||||
| Cycles | 19.0 0.0 | 4.0 | 13.0 13.0 | 13.0 13.0 | 0.0 | 17.0 | 4.0 | 0.0 |
|
||||
--------------------------------------------------------------------------------------------------
|
||||
|
||||
DV - Divider pipe (on port 0)
|
||||
D - Data fetch pipe (on ports 2 and 3)
|
||||
F - Macro Fusion with the previous instruction occurred
|
||||
* - instruction micro-ops not bound to a port
|
||||
^ - Micro Fusion occurred
|
||||
# - ESP Tracking sync uop was issued
|
||||
@ - SSE instruction followed an AVX256/AVX512 instruction, dozens of cycles penalty is expected
|
||||
X - instruction not supported, was not accounted in Analysis
|
||||
|
||||
| Num Of | Ports pressure in cycles | |
|
||||
| Uops | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
||||
-----------------------------------------------------------------------------------------
|
||||
| 1 | | | | | | 1.0 | | | vpcmpgtd k5, ymm3, ymm4
|
||||
| 1 | | 1.0 | | | | | | | vpaddd ymm4, ymm4, ymm15
|
||||
| 2 | | 1.0 | 1.0 1.0 | | | | | | vmovdqu32 ymm17{k5}{z}, ymmword ptr [r10+r15*4]
|
||||
| 1 | | 1.0 | | | | | | | vpaddd ymm18, ymm17, ymm17
|
||||
| 1 | | | | | | | 1.0 | | add r15, 0x8
|
||||
| 1 | | 1.0 | | | | | | | vpaddd ymm19, ymm17, ymm18
|
||||
| 1 | 1.0 | | | | | | | | kmovw k2, k5
|
||||
| 1 | 1.0 | | | | | | | | kmovw k3, k5
|
||||
| 1 | 1.0 | | | | | | | | kmovw k1, k5
|
||||
| 1* | | | | | | | | | vpxord zmm21, zmm21, zmm21
|
||||
| 1* | | | | | | | | | vpxord zmm20, zmm20, zmm20
|
||||
| 1* | | | | | | | | | vpxord zmm22, zmm22, zmm22
|
||||
| 5^ | 1.0 | | 4.0 4.0 | 4.0 4.0 | | 1.0 | 1.0 | | vgatherdpd zmm21, k2, zmmword ptr [rbx+ymm19*8+0x8]
|
||||
| 5^ | 1.0 | | 4.0 4.0 | 4.0 4.0 | | 1.0 | 1.0 | | vgatherdpd zmm20, k3, zmmword ptr [rbx+ymm19*8]
|
||||
| 5^ | 1.0 | | 4.0 4.0 | 4.0 4.0 | | 1.0 | 1.0 | | vgatherdpd zmm22, k1, zmmword ptr [rbx+ymm19*8+0x10]
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vsubpd zmm18, zmm1, zmm21
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vsubpd zmm17, zmm2, zmm20
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vsubpd zmm19, zmm0, zmm22
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulpd zmm31, zmm18, zmm18
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231pd zmm31, zmm17, zmm17
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231pd zmm31, zmm19, zmm19
|
||||
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm30, zmm31
|
||||
| 1 | | | | | | 1.0 | | | vcmppd k6{k5}, zmm31, zmm14, 0x1
|
||||
| 1 | | | | | | 1.0 | | | vfpclasspd k0, zmm30, 0x1e
|
||||
| 1* | | | | | | | | | vmovaps zmm23, zmm31
|
||||
| 2^ | 1.0 | | | 1.0 1.0 | | | | | vfnmadd213pd zmm23, zmm30, qword ptr [rip]{1to8}
|
||||
| 1 | 1.0 | | | | | | | | knotw k4, k0
|
||||
| 1 | | | | | | 1.0 | | | vmulpd zmm24, zmm23, zmm23
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd213pd zmm30{k4}, zmm23, zmm30
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd213pd zmm30{k4}, zmm24, zmm30
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulpd zmm25, zmm30, zmm13
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulpd zmm27, zmm30, zmm12
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulpd zmm28, zmm30, zmm25
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulpd zmm26, zmm30, zmm28
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmsub213pd zmm30, zmm28, zmm5
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulpd zmm29, zmm26, zmm27
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vmulpd zmm23, zmm29, zmm30
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231pd zmm10{k6}, zmm23, zmm17
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231pd zmm9{k6}, zmm23, zmm18
|
||||
| 1 | 0.5 | | | | | 0.5 | | | vfmadd231pd zmm8{k6}, zmm23, zmm19
|
||||
| 1* | | | | | | | | | cmp r15, r14
|
||||
| 0*F | | | | | | | | | jb 0xffffffffffffff0c
|
||||
Total Num Of Uops: 57
|
||||
Analysis Notes:
|
||||
Backend allocation was stalled due to unavailable allocation resources.
|
||||
There were bubbles in the frontend.
|
128
static_analysis/jan/analyses/lammps-icc-avx512-mca-csx.out
Normal file
128
static_analysis/jan/analyses/lammps-icc-avx512-mca-csx.out
Normal file
@@ -0,0 +1,128 @@
|
||||
|
||||
[0] Code Region
|
||||
|
||||
Iterations: 100
|
||||
Instructions: 4200
|
||||
Total Cycles: 2465
|
||||
Total uOps: 5800
|
||||
|
||||
Dispatch Width: 6
|
||||
uOps Per Cycle: 2.35
|
||||
IPC: 1.70
|
||||
Block RThroughput: 13.0
|
||||
|
||||
|
||||
Instruction Info:
|
||||
[1]: #uOps
|
||||
[2]: Latency
|
||||
[3]: RThroughput
|
||||
[4]: MayLoad
|
||||
[5]: MayStore
|
||||
[6]: HasSideEffects (U)
|
||||
|
||||
[1] [2] [3] [4] [5] [6] Instructions:
|
||||
1 4 1.00 vpcmpgtd %ymm4, %ymm3, %k5
|
||||
1 1 0.33 vpaddd %ymm15, %ymm4, %ymm4
|
||||
2 8 0.50 * vmovdqu32 (%r10,%r15,4), %ymm17 {%k5} {z}
|
||||
1 1 0.33 vpaddd %ymm17, %ymm17, %ymm18
|
||||
1 1 0.25 addq $8, %r15
|
||||
1 1 0.33 vpaddd %ymm18, %ymm17, %ymm19
|
||||
1 1 1.00 kmovw %k5, %k2
|
||||
1 1 1.00 kmovw %k5, %k3
|
||||
1 1 1.00 kmovw %k5, %k1
|
||||
1 0 0.17 vpxord %zmm21, %zmm21, %zmm21
|
||||
1 0 0.17 vpxord %zmm20, %zmm20, %zmm20
|
||||
1 0 0.17 vpxord %zmm22, %zmm22, %zmm22
|
||||
5 21 4.00 * vgatherdpd 8(%rbx,%ymm19,8), %zmm21 {%k2}
|
||||
5 21 4.00 * vgatherdpd (%rbx,%ymm19,8), %zmm20 {%k3}
|
||||
5 21 4.00 * vgatherdpd 16(%rbx,%ymm19,8), %zmm22 {%k1}
|
||||
1 4 0.50 vsubpd %zmm21, %zmm1, %zmm18
|
||||
1 4 0.50 vsubpd %zmm20, %zmm2, %zmm17
|
||||
1 4 0.50 vsubpd %zmm22, %zmm0, %zmm19
|
||||
1 4 0.50 vmulpd %zmm18, %zmm18, %zmm31
|
||||
1 4 0.50 vfmadd231pd %zmm17, %zmm17, %zmm31
|
||||
1 4 0.50 vfmadd231pd %zmm19, %zmm19, %zmm31
|
||||
3 4 2.00 vrcp14pd %zmm31, %zmm30
|
||||
1 4 1.00 vcmpltpd %zmm14, %zmm31, %k6 {%k5}
|
||||
1 4 1.00 vfpclasspd $30, %zmm30, %k0
|
||||
1 1 0.50 vmovaps %zmm31, %zmm23
|
||||
2 11 0.50 * vfnmadd213pd .L_2il0floatpacket.5(%rip){1to8}, %zmm30, %zmm23
|
||||
1 1 1.00 knotw %k0, %k4
|
||||
1 4 0.50 vmulpd %zmm23, %zmm23, %zmm24
|
||||
1 4 0.50 vfmadd213pd %zmm30, %zmm23, %zmm30 {%k4}
|
||||
1 4 0.50 vfmadd213pd %zmm30, %zmm24, %zmm30 {%k4}
|
||||
1 4 0.50 vmulpd %zmm13, %zmm30, %zmm25
|
||||
1 4 0.50 vmulpd %zmm12, %zmm30, %zmm27
|
||||
1 4 0.50 vmulpd %zmm25, %zmm30, %zmm28
|
||||
1 4 0.50 vmulpd %zmm28, %zmm30, %zmm26
|
||||
1 4 0.50 vfmsub213pd %zmm5, %zmm28, %zmm30
|
||||
1 4 0.50 vmulpd %zmm27, %zmm26, %zmm29
|
||||
1 4 0.50 vmulpd %zmm30, %zmm29, %zmm23
|
||||
1 4 0.50 vfmadd231pd %zmm17, %zmm23, %zmm10 {%k6}
|
||||
1 4 0.50 vfmadd231pd %zmm18, %zmm23, %zmm9 {%k6}
|
||||
1 4 0.50 vfmadd231pd %zmm19, %zmm23, %zmm8 {%k6}
|
||||
1 1 0.25 cmpq %r14, %r15
|
||||
1 1 0.50 jb ..B1.16
|
||||
|
||||
|
||||
Resources:
|
||||
[0] - SKXDivider
|
||||
[1] - SKXFPDivider
|
||||
[2] - SKXPort0
|
||||
[3] - SKXPort1
|
||||
[4] - SKXPort2
|
||||
[5] - SKXPort3
|
||||
[6] - SKXPort4
|
||||
[7] - SKXPort5
|
||||
[8] - SKXPort6
|
||||
[9] - SKXPort7
|
||||
|
||||
|
||||
Resource pressure per iteration:
|
||||
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
|
||||
- - 19.02 6.79 12.64 13.36 - 16.03 5.16 -
|
||||
|
||||
Resource pressure by instruction:
|
||||
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
|
||||
- - - - - - - 1.00 - - vpcmpgtd %ymm4, %ymm3, %k5
|
||||
- - 0.28 0.72 - - - - - - vpaddd %ymm15, %ymm4, %ymm4
|
||||
- - 0.14 0.71 0.55 0.45 - 0.15 - - vmovdqu32 (%r10,%r15,4), %ymm17 {%k5} {z}
|
||||
- - - 0.97 - - - 0.03 - - vpaddd %ymm17, %ymm17, %ymm18
|
||||
- - 0.14 0.41 - - - 0.13 0.32 - addq $8, %r15
|
||||
- - - 0.99 - - - 0.01 - - vpaddd %ymm18, %ymm17, %ymm19
|
||||
- - 1.00 - - - - - - - kmovw %k5, %k2
|
||||
- - 1.00 - - - - - - - kmovw %k5, %k3
|
||||
- - 1.00 - - - - - - - kmovw %k5, %k1
|
||||
- - - - - - - - - - vpxord %zmm21, %zmm21, %zmm21
|
||||
- - - - - - - - - - vpxord %zmm20, %zmm20, %zmm20
|
||||
- - - - - - - - - - vpxord %zmm22, %zmm22, %zmm22
|
||||
- - 1.00 0.99 3.52 4.48 - 0.01 1.00 - vgatherdpd 8(%rbx,%ymm19,8), %zmm21 {%k2}
|
||||
- - 1.00 0.99 4.48 3.52 - 0.01 1.00 - vgatherdpd (%rbx,%ymm19,8), %zmm20 {%k3}
|
||||
- - 1.00 1.00 3.52 4.48 - - 1.00 - vgatherdpd 16(%rbx,%ymm19,8), %zmm22 {%k1}
|
||||
- - 0.02 - - - - 0.98 - - vsubpd %zmm21, %zmm1, %zmm18
|
||||
- - 0.17 - - - - 0.83 - - vsubpd %zmm20, %zmm2, %zmm17
|
||||
- - 0.18 - - - - 0.82 - - vsubpd %zmm22, %zmm0, %zmm19
|
||||
- - 0.01 - - - - 0.99 - - vmulpd %zmm18, %zmm18, %zmm31
|
||||
- - 0.69 - - - - 0.31 - - vfmadd231pd %zmm17, %zmm17, %zmm31
|
||||
- - 0.68 - - - - 0.32 - - vfmadd231pd %zmm19, %zmm19, %zmm31
|
||||
- - 2.00 - - - - 1.00 - - vrcp14pd %zmm31, %zmm30
|
||||
- - - - - - - 1.00 - - vcmpltpd %zmm14, %zmm31, %k6 {%k5}
|
||||
- - - - - - - 1.00 - - vfpclasspd $30, %zmm30, %k0
|
||||
- - 0.83 - - - - 0.17 - - vmovaps %zmm31, %zmm23
|
||||
- - 1.00 - 0.57 0.43 - - - - vfnmadd213pd .L_2il0floatpacket.5(%rip){1to8}, %zmm30, %zmm23
|
||||
- - 1.00 - - - - - - - knotw %k0, %k4
|
||||
- - 0.44 - - - - 0.56 - - vmulpd %zmm23, %zmm23, %zmm24
|
||||
- - 0.56 - - - - 0.44 - - vfmadd213pd %zmm30, %zmm23, %zmm30 {%k4}
|
||||
- - 0.55 - - - - 0.45 - - vfmadd213pd %zmm30, %zmm24, %zmm30 {%k4}
|
||||
- - 0.69 - - - - 0.31 - - vmulpd %zmm13, %zmm30, %zmm25
|
||||
- - 0.31 - - - - 0.69 - - vmulpd %zmm12, %zmm30, %zmm27
|
||||
- - 0.56 - - - - 0.44 - - vmulpd %zmm25, %zmm30, %zmm28
|
||||
- - 0.02 - - - - 0.98 - - vmulpd %zmm28, %zmm30, %zmm26
|
||||
- - 0.98 - - - - 0.02 - - vfmsub213pd %zmm5, %zmm28, %zmm30
|
||||
- - 0.30 - - - - 0.70 - - vmulpd %zmm27, %zmm26, %zmm29
|
||||
- - 0.16 - - - - 0.84 - - vmulpd %zmm30, %zmm29, %zmm23
|
||||
- - 0.17 - - - - 0.83 - - vfmadd231pd %zmm17, %zmm23, %zmm10 {%k6}
|
||||
- - 0.83 - - - - 0.17 - - vfmadd231pd %zmm18, %zmm23, %zmm9 {%k6}
|
||||
- - 0.17 - - - - 0.83 - - vfmadd231pd %zmm19, %zmm23, %zmm8 {%k6}
|
||||
- - - 0.01 - - - 0.01 0.98 - cmpq %r14, %r15
|
||||
- - 0.14 - - - - - 0.86 - jb ..B1.16
|
130
static_analysis/jan/analyses/lammps-icc-avx512-mca-icx.out
Normal file
130
static_analysis/jan/analyses/lammps-icc-avx512-mca-icx.out
Normal file
@@ -0,0 +1,130 @@
|
||||
|
||||
[0] Code Region
|
||||
|
||||
Iterations: 100
|
||||
Instructions: 4200
|
||||
Total Cycles: 2465
|
||||
Total uOps: 5800
|
||||
|
||||
Dispatch Width: 6
|
||||
uOps Per Cycle: 2.35
|
||||
IPC: 1.70
|
||||
Block RThroughput: 13.0
|
||||
|
||||
|
||||
Instruction Info:
|
||||
[1]: #uOps
|
||||
[2]: Latency
|
||||
[3]: RThroughput
|
||||
[4]: MayLoad
|
||||
[5]: MayStore
|
||||
[6]: HasSideEffects (U)
|
||||
|
||||
[1] [2] [3] [4] [5] [6] Instructions:
|
||||
1 4 1.00 vpcmpgtd %ymm4, %ymm3, %k5
|
||||
1 1 0.33 vpaddd %ymm15, %ymm4, %ymm4
|
||||
2 8 0.50 * vmovdqu32 (%r10,%r15,4), %ymm17 {%k5} {z}
|
||||
1 1 0.33 vpaddd %ymm17, %ymm17, %ymm18
|
||||
1 1 0.25 addq $8, %r15
|
||||
1 1 0.33 vpaddd %ymm18, %ymm17, %ymm19
|
||||
1 1 1.00 kmovw %k5, %k2
|
||||
1 1 1.00 kmovw %k5, %k3
|
||||
1 1 1.00 kmovw %k5, %k1
|
||||
1 0 0.17 vpxord %zmm21, %zmm21, %zmm21
|
||||
1 0 0.17 vpxord %zmm20, %zmm20, %zmm20
|
||||
1 0 0.17 vpxord %zmm22, %zmm22, %zmm22
|
||||
5 21 4.00 * vgatherdpd 8(%rbx,%ymm19,8), %zmm21 {%k2}
|
||||
5 21 4.00 * vgatherdpd (%rbx,%ymm19,8), %zmm20 {%k3}
|
||||
5 21 4.00 * vgatherdpd 16(%rbx,%ymm19,8), %zmm22 {%k1}
|
||||
1 4 0.50 vsubpd %zmm21, %zmm1, %zmm18
|
||||
1 4 0.50 vsubpd %zmm20, %zmm2, %zmm17
|
||||
1 4 0.50 vsubpd %zmm22, %zmm0, %zmm19
|
||||
1 4 0.50 vmulpd %zmm18, %zmm18, %zmm31
|
||||
1 4 0.50 vfmadd231pd %zmm17, %zmm17, %zmm31
|
||||
1 4 0.50 vfmadd231pd %zmm19, %zmm19, %zmm31
|
||||
3 4 2.00 vrcp14pd %zmm31, %zmm30
|
||||
1 4 1.00 vcmpltpd %zmm14, %zmm31, %k6 {%k5}
|
||||
1 4 1.00 vfpclasspd $30, %zmm30, %k0
|
||||
1 1 0.50 vmovaps %zmm31, %zmm23
|
||||
2 11 0.50 * vfnmadd213pd .L_2il0floatpacket.5(%rip){1to8}, %zmm30, %zmm23
|
||||
1 1 1.00 knotw %k0, %k4
|
||||
1 4 0.50 vmulpd %zmm23, %zmm23, %zmm24
|
||||
1 4 0.50 vfmadd213pd %zmm30, %zmm23, %zmm30 {%k4}
|
||||
1 4 0.50 vfmadd213pd %zmm30, %zmm24, %zmm30 {%k4}
|
||||
1 4 0.50 vmulpd %zmm13, %zmm30, %zmm25
|
||||
1 4 0.50 vmulpd %zmm12, %zmm30, %zmm27
|
||||
1 4 0.50 vmulpd %zmm25, %zmm30, %zmm28
|
||||
1 4 0.50 vmulpd %zmm28, %zmm30, %zmm26
|
||||
1 4 0.50 vfmsub213pd %zmm5, %zmm28, %zmm30
|
||||
1 4 0.50 vmulpd %zmm27, %zmm26, %zmm29
|
||||
1 4 0.50 vmulpd %zmm30, %zmm29, %zmm23
|
||||
1 4 0.50 vfmadd231pd %zmm17, %zmm23, %zmm10 {%k6}
|
||||
1 4 0.50 vfmadd231pd %zmm18, %zmm23, %zmm9 {%k6}
|
||||
1 4 0.50 vfmadd231pd %zmm19, %zmm23, %zmm8 {%k6}
|
||||
1 1 0.25 cmpq %r14, %r15
|
||||
1 1 0.50 jb ..B1.16
|
||||
|
||||
|
||||
Resources:
|
||||
[0] - ICXDivider
|
||||
[1] - ICXFPDivider
|
||||
[2] - ICXPort0
|
||||
[3] - ICXPort1
|
||||
[4] - ICXPort2
|
||||
[5] - ICXPort3
|
||||
[6] - ICXPort4
|
||||
[7] - ICXPort5
|
||||
[8] - ICXPort6
|
||||
[9] - ICXPort7
|
||||
[10] - ICXPort8
|
||||
[11] - ICXPort9
|
||||
|
||||
|
||||
Resource pressure per iteration:
|
||||
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
|
||||
- - 19.02 6.79 12.64 13.36 - 16.03 5.16 - - -
|
||||
|
||||
Resource pressure by instruction:
|
||||
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
|
||||
- - - - - - - 1.00 - - - - vpcmpgtd %ymm4, %ymm3, %k5
|
||||
- - 0.28 0.72 - - - - - - - - vpaddd %ymm15, %ymm4, %ymm4
|
||||
- - 0.14 0.71 0.55 0.45 - 0.15 - - - - vmovdqu32 (%r10,%r15,4), %ymm17 {%k5} {z}
|
||||
- - - 0.97 - - - 0.03 - - - - vpaddd %ymm17, %ymm17, %ymm18
|
||||
- - 0.14 0.41 - - - 0.13 0.32 - - - addq $8, %r15
|
||||
- - - 0.99 - - - 0.01 - - - - vpaddd %ymm18, %ymm17, %ymm19
|
||||
- - 1.00 - - - - - - - - - kmovw %k5, %k2
|
||||
- - 1.00 - - - - - - - - - kmovw %k5, %k3
|
||||
- - 1.00 - - - - - - - - - kmovw %k5, %k1
|
||||
- - - - - - - - - - - - vpxord %zmm21, %zmm21, %zmm21
|
||||
- - - - - - - - - - - - vpxord %zmm20, %zmm20, %zmm20
|
||||
- - - - - - - - - - - - vpxord %zmm22, %zmm22, %zmm22
|
||||
- - 1.00 0.99 3.52 4.48 - 0.01 1.00 - - - vgatherdpd 8(%rbx,%ymm19,8), %zmm21 {%k2}
|
||||
- - 1.00 0.99 4.48 3.52 - 0.01 1.00 - - - vgatherdpd (%rbx,%ymm19,8), %zmm20 {%k3}
|
||||
- - 1.00 1.00 3.52 4.48 - - 1.00 - - - vgatherdpd 16(%rbx,%ymm19,8), %zmm22 {%k1}
|
||||
- - 0.02 - - - - 0.98 - - - - vsubpd %zmm21, %zmm1, %zmm18
|
||||
- - 0.17 - - - - 0.83 - - - - vsubpd %zmm20, %zmm2, %zmm17
|
||||
- - 0.18 - - - - 0.82 - - - - vsubpd %zmm22, %zmm0, %zmm19
|
||||
- - 0.01 - - - - 0.99 - - - - vmulpd %zmm18, %zmm18, %zmm31
|
||||
- - 0.69 - - - - 0.31 - - - - vfmadd231pd %zmm17, %zmm17, %zmm31
|
||||
- - 0.68 - - - - 0.32 - - - - vfmadd231pd %zmm19, %zmm19, %zmm31
|
||||
- - 2.00 - - - - 1.00 - - - - vrcp14pd %zmm31, %zmm30
|
||||
- - - - - - - 1.00 - - - - vcmpltpd %zmm14, %zmm31, %k6 {%k5}
|
||||
- - - - - - - 1.00 - - - - vfpclasspd $30, %zmm30, %k0
|
||||
- - 0.83 - - - - 0.17 - - - - vmovaps %zmm31, %zmm23
|
||||
- - 1.00 - 0.57 0.43 - - - - - - vfnmadd213pd .L_2il0floatpacket.5(%rip){1to8}, %zmm30, %zmm23
|
||||
- - 1.00 - - - - - - - - - knotw %k0, %k4
|
||||
- - 0.44 - - - - 0.56 - - - - vmulpd %zmm23, %zmm23, %zmm24
|
||||
- - 0.56 - - - - 0.44 - - - - vfmadd213pd %zmm30, %zmm23, %zmm30 {%k4}
|
||||
- - 0.55 - - - - 0.45 - - - - vfmadd213pd %zmm30, %zmm24, %zmm30 {%k4}
|
||||
- - 0.69 - - - - 0.31 - - - - vmulpd %zmm13, %zmm30, %zmm25
|
||||
- - 0.31 - - - - 0.69 - - - - vmulpd %zmm12, %zmm30, %zmm27
|
||||
- - 0.56 - - - - 0.44 - - - - vmulpd %zmm25, %zmm30, %zmm28
|
||||
- - 0.02 - - - - 0.98 - - - - vmulpd %zmm28, %zmm30, %zmm26
|
||||
- - 0.98 - - - - 0.02 - - - - vfmsub213pd %zmm5, %zmm28, %zmm30
|
||||
- - 0.30 - - - - 0.70 - - - - vmulpd %zmm27, %zmm26, %zmm29
|
||||
- - 0.16 - - - - 0.84 - - - - vmulpd %zmm30, %zmm29, %zmm23
|
||||
- - 0.17 - - - - 0.83 - - - - vfmadd231pd %zmm17, %zmm23, %zmm10 {%k6}
|
||||
- - 0.83 - - - - 0.17 - - - - vfmadd231pd %zmm18, %zmm23, %zmm9 {%k6}
|
||||
- - 0.17 - - - - 0.83 - - - - vfmadd231pd %zmm19, %zmm23, %zmm8 {%k6}
|
||||
- - - 0.01 - - - 0.01 0.98 - - - cmpq %r14, %r15
|
||||
- - 0.14 - - - - - 0.86 - - - jb ..B1.16
|
77
static_analysis/jan/analyses/lammps-icc-avx512-osaca-csx.out
Normal file
77
static_analysis/jan/analyses/lammps-icc-avx512-osaca-csx.out
Normal file
@@ -0,0 +1,77 @@
|
||||
Open Source Architecture Code Analyzer (OSACA) - 0.4.12
|
||||
Analyzed file: lammps-icc-avx512.s
|
||||
Architecture: CSX
|
||||
Timestamp: 2023-02-10 16:30:08
|
||||
|
||||
|
||||
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
||||
* - Instruction micro-ops not bound to a port
|
||||
X - No throughput/latency information for this instruction in data file
|
||||
|
||||
|
||||
Combined Analysis Report
|
||||
------------------------
|
||||
Port pressure in cycles
|
||||
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |
|
||||
--------------------------------------------------------------------------------------------------
|
||||
200 | | | | | | | | || | | # pointer_increment=64 1303ca335e79351a96cfc07b8b9ec9d4
|
||||
201 | | | | | | | | || | | # LLVM-MCA-BEGIN
|
||||
202 | | | | | | | | || | | ..B1.16: # Preds ..B1.16 ..B1.15
|
||||
203 | | | | | | | | || | | # Execution count [2.50e+01]
|
||||
204 | | | | | | 1.00 | | || | | vpcmpgtd %ymm4, %ymm3, %k5 #59.9
|
||||
205 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm15, %ymm4, %ymm4 #59.9
|
||||
206 | 0.00 | 1.00 | 0.50 0.50 | 0.50 0.50 | | 0.00 | | || 0.0 | | vmovdqu32 (%r10,%r15,4), %ymm17{%k5}{z} #60.21
|
||||
207 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm17, %ymm17, %ymm18 #61.36
|
||||
208 | 0.00 | 0.16 | | | | 0.00 | 0.84 | || | | addq $8, %r15 #59.9
|
||||
209 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm18, %ymm17, %ymm19 #61.36
|
||||
210 | 1.00 | | | | | | | || | | kmovw %k5, %k2 #61.36
|
||||
211 | 1.00 | | | | | | | || | | kmovw %k5, %k3 #61.36
|
||||
212 | 1.00 | | | | | | | || | | kmovw %k5, %k1 #61.36
|
||||
213 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm21, %zmm21, %zmm21 #61.36
|
||||
214 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm20, %zmm20, %zmm20 #61.36
|
||||
215 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm22, %zmm22, %zmm22 #61.36
|
||||
216 | 1.25 | 0.75 | 5.00 5.00 | 5.00 5.00 | | 0.25 | 0.75 | || 24.0 | | vgatherdpd 8(%rbx,%ymm19,8), %zmm21{%k2} #61.36
|
||||
217 | 1.25 | 0.25 | 5.00 5.00 | 5.00 5.00 | | 0.25 | 1.25 | || | | vgatherdpd (%rbx,%ymm19,8), %zmm20{%k3} #61.36
|
||||
218 | 1.25 | 0.09 | 5.00 5.00 | 5.00 5.00 | | 0.25 | 1.41 | || | | vgatherdpd 16(%rbx,%ymm19,8), %zmm22{%k1} #61.36
|
||||
219 | 0.50 | | | | | 0.50 | | || 4.0 | | vsubpd %zmm21, %zmm1, %zmm18 #62.36
|
||||
220 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm20, %zmm2, %zmm17 #61.36
|
||||
221 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm22, %zmm0, %zmm19 #63.36
|
||||
222 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm18, %zmm18, %zmm31 #64.49
|
||||
223 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm17, %zmm17, %zmm31 #64.49
|
||||
224 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm19, %zmm19, %zmm31 #64.63
|
||||
225 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14pd %zmm31, %zmm30 #75.39
|
||||
226 | | | | | | 1.00 | | || | | vcmppd $1, %zmm14, %zmm31, %k6{%k5} #74.22
|
||||
227 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm30, %k0 #75.39
|
||||
228 | | | | | | | | || | | * vmovaps %zmm31, %zmm23 #75.39
|
||||
229 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || 4.0 | | vfnmadd213pd .L_2il0floatpacket.5(%rip){1to8}, %zmm30, %zmm23 #75.39
|
||||
230 | 1.00 | | | | | | | || | | knotw %k0, %k4 #75.39
|
||||
231 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm23, %zmm23, %zmm24 #75.39
|
||||
232 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm30, %zmm23, %zmm30{%k4} #75.39
|
||||
233 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd213pd %zmm30, %zmm24, %zmm30{%k4} #75.39
|
||||
234 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm13, %zmm30, %zmm25 #76.38
|
||||
235 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm30, %zmm27 #77.55
|
||||
236 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm25, %zmm30, %zmm28 #76.44
|
||||
237 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm28, %zmm30, %zmm26 #76.50
|
||||
238 | 0.00 | | | | | 1.00 | | || | | vfmsub213pd %zmm5, %zmm28, %zmm30 #77.55
|
||||
239 | 0.00 | | | | | 1.00 | | || 4.0 | | vmulpd %zmm27, %zmm26, %zmm29 #77.64
|
||||
240 | 0.00 | | | | | 1.00 | | || 4.0 | | vmulpd %zmm30, %zmm29, %zmm23 #77.70
|
||||
241 | 0.00 | | | | | 1.00 | | || 4.0 | | vfmadd231pd %zmm17, %zmm23, %zmm10{%k6} #78.17
|
||||
242 | 0.00 | | | | | 1.00 | | || | | vfmadd231pd %zmm18, %zmm23, %zmm9{%k6} #79.17
|
||||
243 | 0.00 | | | | | 1.00 | | || | 4.0 | vfmadd231pd %zmm19, %zmm23, %zmm8{%k6} #80.17
|
||||
244 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | cmpq %r14, %r15 #59.9
|
||||
245 | | | | | | | | || | | * jb ..B1.16 # Prob 82% #59.9
|
||||
246 | | | | | | | | || | | # LLVM-MCA-END
|
||||
|
||||
18.8 5.25 16.0 16.0 16.0 16.0 18.8 5.25 86.0 4.0
|
||||
|
||||
|
||||
|
||||
|
||||
Loop-Carried Dependencies Analysis Report
|
||||
-----------------------------------------
|
||||
243 | 4.0 | vfmadd231pd %zmm19, %zmm23, %zmm8{%k6} #80.17| [243]
|
||||
242 | 4.0 | vfmadd231pd %zmm18, %zmm23, %zmm9{%k6} #79.17| [242]
|
||||
241 | 4.0 | vfmadd231pd %zmm17, %zmm23, %zmm10{%k6} #78.17| [241]
|
||||
208 | 1.0 | addq $8, %r15 #59.9| [208]
|
||||
205 | 1.0 | vpaddd %ymm15, %ymm4, %ymm4 #59.9| [205]
|
||||
|
77
static_analysis/jan/analyses/lammps-icc-avx512-osaca-icx.out
Normal file
77
static_analysis/jan/analyses/lammps-icc-avx512-osaca-icx.out
Normal file
@@ -0,0 +1,77 @@
|
||||
Open Source Architecture Code Analyzer (OSACA) - 0.4.12
|
||||
Analyzed file: lammps-icc-avx512.s
|
||||
Architecture: ICX
|
||||
Timestamp: 2023-02-10 16:29:42
|
||||
|
||||
|
||||
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
||||
* - Instruction micro-ops not bound to a port
|
||||
X - No throughput/latency information for this instruction in data file
|
||||
|
||||
|
||||
Combined Analysis Report
|
||||
------------------------
|
||||
Port pressure in cycles
|
||||
| 0 - 0DV | 1 - 1DV | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 | 8 | 9 || CP | LCD |
|
||||
------------------------------------------------------------------------------------------------------------------------
|
||||
200 | | | | | | | | | | || | | # pointer_increment=64 1303ca335e79351a96cfc07b8b9ec9d4
|
||||
201 | | | | | | | | | | || | | # LLVM-MCA-BEGIN
|
||||
202 | | | | | | | | | | || | | ..B1.16: # Preds ..B1.16 ..B1.15
|
||||
203 | | | | | | | | | | || | | # Execution count [2.50e+01]
|
||||
204 | | | | | | 1.000 | | | | || | | vpcmpgtd %ymm4, %ymm3, %k5 #59.9
|
||||
205 | 0.00 | 1.00 | | | | 0.000 | | | | || | | vpaddd %ymm15, %ymm4, %ymm4 #59.9
|
||||
206 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || 5.0 | | vmovdqu32 (%r10,%r15,4), %ymm17{%k5}{z} #60.21
|
||||
207 | 0.00 | 1.00 | | | | 0.000 | | | | || 1.0 | | vpaddd %ymm17, %ymm17, %ymm18 #61.36
|
||||
208 | 0.00 | 0.00 | | | | 0.000 | 1.00 | | | || | | addq $8, %r15 #59.9
|
||||
209 | 0.00 | 1.00 | | | | 0.000 | | | | || 1.0 | | vpaddd %ymm18, %ymm17, %ymm19 #61.36
|
||||
210 | 1.00 | | | | | | | | | || | | kmovw %k5, %k2 #61.36
|
||||
211 | 1.00 | | | | | | | | | || | | kmovw %k5, %k3 #61.36
|
||||
212 | 1.00 | | | | | | | | | || | | kmovw %k5, %k1 #61.36
|
||||
213 | 0.50 | | | | | 0.500 | | | | || | | vpxord %zmm21, %zmm21, %zmm21 #61.36
|
||||
214 | 0.24 | | | | | 0.760 | | | | || | | vpxord %zmm20, %zmm20, %zmm20 #61.36
|
||||
215 | 0.50 | | | | | 0.500 | | | | || | | vpxord %zmm22, %zmm22, %zmm22 #61.36
|
||||
216 | 0.67 | 2.33 | 7.00 7.00 | 7.00 7.00 | | 0.000 | | | | || 24.0 | | vgatherdpd 8(%rbx,%ymm19,8), %zmm21{%k2} #61.36
|
||||
217 | 0.67 | 2.33 | 7.00 7.00 | 7.00 7.00 | | 0.000 | | | | || | | vgatherdpd (%rbx,%ymm19,8), %zmm20{%k3} #61.36
|
||||
218 | 0.67 | 2.33 | 7.00 7.00 | 7.00 7.00 | | 0.000 | | | | || | | vgatherdpd 16(%rbx,%ymm19,8), %zmm22{%k1} #61.36
|
||||
219 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vsubpd %zmm21, %zmm1, %zmm18 #62.36
|
||||
220 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm20, %zmm2, %zmm17 #61.36
|
||||
221 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm22, %zmm0, %zmm19 #63.36
|
||||
222 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm18, %zmm18, %zmm31 #64.49
|
||||
223 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231pd %zmm17, %zmm17, %zmm31 #64.49
|
||||
224 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231pd %zmm19, %zmm19, %zmm31 #64.63
|
||||
225 | 2.50 | | | | | 0.500 | | | | || 6.0 | | vrcp14pd %zmm31, %zmm30 #75.39
|
||||
226 | | | | | | 1.000 | | | | || | | vcmppd $1, %zmm14, %zmm31, %k6{%k5} #74.22
|
||||
227 | | | | | | 1.000 | | | | || | | vfpclasspd $30, %zmm30, %k0 #75.39
|
||||
228 | 0.50 | | | | | 0.500 | | | | || | | vmovaps %zmm31, %zmm23 #75.39
|
||||
229 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.500 | | | | || 4.0 | | vfnmadd213pd .L_2il0floatpacket.5(%rip){1to8}, %zmm30, %zmm23 #75.39
|
||||
230 | 1.00 | | | | | | | | | || | | knotw %k0, %k4 #75.39
|
||||
231 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm23, %zmm23, %zmm24 #75.39
|
||||
232 | 0.50 | | | | | 0.500 | | | | || | | vfmadd213pd %zmm30, %zmm23, %zmm30{%k4} #75.39
|
||||
233 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd213pd %zmm30, %zmm24, %zmm30{%k4} #75.39
|
||||
234 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm13, %zmm30, %zmm25 #76.38
|
||||
235 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm12, %zmm30, %zmm27 #77.55
|
||||
236 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm25, %zmm30, %zmm28 #76.44
|
||||
237 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm28, %zmm30, %zmm26 #76.50
|
||||
238 | 0.50 | | | | | 0.500 | | | | || | | vfmsub213pd %zmm5, %zmm28, %zmm30 #77.55
|
||||
239 | 0.25 | | | | | 0.750 | | | | || 4.0 | | vmulpd %zmm27, %zmm26, %zmm29 #77.64
|
||||
240 | 0.00 | | | | | 1.000 | | | | || 4.0 | | vmulpd %zmm30, %zmm29, %zmm23 #77.70
|
||||
241 | 0.00 | | | | | 1.000 | | | | || 4.0 | | vfmadd231pd %zmm17, %zmm23, %zmm10{%k6} #78.17
|
||||
242 | 0.00 | | | | | 1.000 | | | | || | | vfmadd231pd %zmm18, %zmm23, %zmm9{%k6} #79.17
|
||||
243 | 0.00 | | | | | 1.000 | | | | || | 4.0 | vfmadd231pd %zmm19, %zmm23, %zmm8{%k6} #80.17
|
||||
244 | 0.00 | 0.00 | | | | -0.01 | 1.00 | | | || | | cmpq %r14, %r15 #59.9
|
||||
245 | | | | | | | | | | || | | * jb ..B1.16 # Prob 82% #59.9
|
||||
246 | | | | | | | | | | || | | # LLVM-MCA-END
|
||||
|
||||
18.0 9.98 22.0 22.0 22.0 22.0 18.00 2.00 89 4.0
|
||||
|
||||
|
||||
|
||||
|
||||
Loop-Carried Dependencies Analysis Report
|
||||
-----------------------------------------
|
||||
243 | 4.0 | vfmadd231pd %zmm19, %zmm23, %zmm8{%k6} #80.17| [243]
|
||||
242 | 4.0 | vfmadd231pd %zmm18, %zmm23, %zmm9{%k6} #79.17| [242]
|
||||
241 | 4.0 | vfmadd231pd %zmm17, %zmm23, %zmm10{%k6} #78.17| [241]
|
||||
208 | 1.0 | addq $8, %r15 #59.9| [208]
|
||||
205 | 1.0 | vpaddd %ymm15, %ymm4, %ymm4 #59.9| [205]
|
||||
|
197
static_analysis/jan/analyses/lammps-icx-avx2zen-mca.out
Normal file
197
static_analysis/jan/analyses/lammps-icx-avx2zen-mca.out
Normal file
@@ -0,0 +1,197 @@
|
||||
|
||||
[0] Code Region
|
||||
|
||||
Iterations: 100
|
||||
Instructions: 7000
|
||||
Total Cycles: 3866
|
||||
Total uOps: 7900
|
||||
|
||||
Dispatch Width: 6
|
||||
uOps Per Cycle: 2.04
|
||||
IPC: 1.81
|
||||
Block RThroughput: 21.5
|
||||
|
||||
|
||||
Instruction Info:
|
||||
[1]: #uOps
|
||||
[2]: Latency
|
||||
[3]: RThroughput
|
||||
[4]: MayLoad
|
||||
[5]: MayStore
|
||||
[6]: HasSideEffects (U)
|
||||
|
||||
[1] [2] [3] [4] [5] [6] Instructions:
|
||||
1 8 0.50 * vpbroadcastd .LCPI0_1(%rip), %xmm1
|
||||
1 10 0.50 * vpmulld (%r11,%rbp,4), %xmm1, %xmm11
|
||||
2 4 1.50 vpmovsxdq %xmm11, %ymm1
|
||||
1 1 0.50 vpsllq $3, %ymm1, %ymm1
|
||||
1 1 0.25 vpaddq %ymm1, %ymm3, %ymm1
|
||||
1 1 1.00 vmovq %xmm1, %r14
|
||||
2 1 1.00 vpextrq $1, %xmm1, %r9
|
||||
1 4 1.00 vextracti128 $1, %ymm1, %xmm1
|
||||
1 8 0.50 * vmovsd (%r14), %xmm2
|
||||
1 8 0.50 * vpsubd .LCPI0_5, %xmm11, %xmm6
|
||||
2 4 1.50 vpmovsxdq %xmm6, %ymm6
|
||||
1 1 0.50 vpsllq $3, %ymm6, %ymm6
|
||||
1 1 1.00 vmovq %xmm1, %rdi
|
||||
1 1 0.25 vpaddq %ymm6, %ymm3, %ymm6
|
||||
1 1 1.00 vmovq %xmm6, %rcx
|
||||
2 1 1.00 vpextrq $1, %xmm1, %rbx
|
||||
2 1 1.00 vpextrq $1, %xmm6, %rax
|
||||
1 4 1.00 vextracti128 $1, %ymm6, %xmm1
|
||||
1 8 0.50 * vmovsd (%rdi), %xmm6
|
||||
1 1 1.00 vmovq %xmm1, %rdi
|
||||
2 1 1.00 vpextrq $1, %xmm1, %rsi
|
||||
1 8 0.50 * vmovsd (%rdi), %xmm1
|
||||
1 8 0.50 * vmovsd (%rcx), %xmm7
|
||||
1 8 0.50 * vpbroadcastd .LCPI0_2(%rip), %xmm12
|
||||
1 8 0.50 * vmovhpd (%r9), %xmm2, %xmm2
|
||||
1 1 0.25 vpaddd %xmm12, %xmm11, %xmm4
|
||||
2 4 1.50 vpmovsxdq %xmm4, %ymm4
|
||||
1 8 0.50 * vmovhpd (%rax), %xmm7, %xmm7
|
||||
1 1 0.50 vpsllq $3, %ymm4, %ymm4
|
||||
1 1 0.25 vpaddq %ymm4, %ymm3, %ymm4
|
||||
1 8 0.50 * vmovhpd (%rbx), %xmm6, %xmm6
|
||||
2 1 1.00 vpextrq $1, %xmm4, %rax
|
||||
1 8 0.50 * vmovhpd (%rsi), %xmm1, %xmm1
|
||||
1 1 1.00 vmovq %xmm4, %rcx
|
||||
1 4 1.00 vextracti128 $1, %ymm4, %xmm4
|
||||
1 1 1.00 vmovq %xmm4, %rsi
|
||||
1 2 1.00 vinsertf128 $1, %xmm6, %ymm2, %ymm2
|
||||
2 1 1.00 vpextrq $1, %xmm4, %rdi
|
||||
1 8 0.50 * vmovsd (%rsi), %xmm4
|
||||
1 3 0.50 vsubpd %ymm2, %ymm14, %ymm2
|
||||
1 8 0.50 * vmovhpd (%rdi), %xmm4, %xmm4
|
||||
1 8 0.50 * vmovsd (%rcx), %xmm6
|
||||
1 2 1.00 vinsertf128 $1, %xmm1, %ymm7, %ymm1
|
||||
1 8 0.50 * vmovhpd (%rax), %xmm6, %xmm6
|
||||
1 2 1.00 vinsertf128 $1, %xmm4, %ymm6, %ymm4
|
||||
1 3 0.50 vsubpd %ymm1, %ymm5, %ymm1
|
||||
1 3 0.50 vsubpd %ymm4, %ymm10, %ymm4
|
||||
1 3 0.50 vmulpd %ymm2, %ymm2, %ymm6
|
||||
1 4 1.00 vfmadd231pd %ymm1, %ymm1, %ymm6
|
||||
1 4 1.00 vfmadd231pd %ymm4, %ymm4, %ymm6
|
||||
1 8 0.50 * vbroadcastsd .LCPI0_3(%rip), %ymm7
|
||||
1 13 5.00 vdivpd %ymm6, %ymm7, %ymm7
|
||||
1 3 0.50 vmulpd %ymm7, %ymm7, %ymm11
|
||||
1 3 0.50 vmulpd %ymm9, %ymm11, %ymm11
|
||||
1 8 0.50 * vbroadcastsd .LCPI0_4(%rip), %ymm12
|
||||
1 3 0.50 vmulpd %ymm7, %ymm11, %ymm11
|
||||
1 3 0.50 vaddpd %ymm12, %ymm11, %ymm12
|
||||
1 10 0.50 * vmulpd 128(%rsp), %ymm7, %ymm7
|
||||
1 3 0.50 vmulpd %ymm7, %ymm11, %ymm7
|
||||
1 3 0.50 vmulpd %ymm7, %ymm12, %ymm7
|
||||
1 1 0.50 vcmpltpd %ymm8, %ymm6, %ymm6
|
||||
1 4 1.00 vfmadd213pd %ymm0, %ymm7, %ymm2
|
||||
1 1 0.50 vblendvpd %ymm6, %ymm2, %ymm0, %ymm0
|
||||
1 4 1.00 vfmadd213pd %ymm15, %ymm7, %ymm1
|
||||
1 4 1.00 vfmadd213pd %ymm13, %ymm7, %ymm4
|
||||
1 1 0.50 vblendvpd %ymm6, %ymm1, %ymm15, %ymm15
|
||||
1 1 0.50 vblendvpd %ymm6, %ymm4, %ymm13, %ymm13
|
||||
1 1 0.25 addq $4, %rbp
|
||||
1 1 0.25 cmpq %rdx, %rbp
|
||||
1 1 0.50 jb .LBB0_9
|
||||
|
||||
|
||||
Resources:
|
||||
[0] - Zn3AGU0
|
||||
[1] - Zn3AGU1
|
||||
[2] - Zn3AGU2
|
||||
[3] - Zn3ALU0
|
||||
[4] - Zn3ALU1
|
||||
[5] - Zn3ALU2
|
||||
[6] - Zn3ALU3
|
||||
[7] - Zn3BRU1
|
||||
[8] - Zn3FPP0
|
||||
[9] - Zn3FPP1
|
||||
[10] - Zn3FPP2
|
||||
[11] - Zn3FPP3
|
||||
[12.0] - Zn3FPP45
|
||||
[12.1] - Zn3FPP45
|
||||
[13] - Zn3FPSt
|
||||
[14.0] - Zn3LSU
|
||||
[14.1] - Zn3LSU
|
||||
[14.2] - Zn3LSU
|
||||
[15.0] - Zn3Load
|
||||
[15.1] - Zn3Load
|
||||
[15.2] - Zn3Load
|
||||
[16.0] - Zn3Store
|
||||
[16.1] - Zn3Store
|
||||
|
||||
|
||||
Resource pressure per iteration:
|
||||
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
|
||||
- - - 0.60 0.60 0.60 0.60 0.60 16.84 23.53 16.30 7.33 21.50 21.50 - 6.33 6.33 6.34 6.33 6.33 6.34 - -
|
||||
|
||||
Resource pressure by instruction:
|
||||
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
|
||||
- - - - - - - - - 0.03 0.97 - 0.51 0.49 - 0.34 0.33 0.33 0.34 0.33 0.33 - - vpbroadcastd .LCPI0_1(%rip), %xmm1
|
||||
- - - - - - - - 0.65 - - 0.35 0.34 0.66 - 0.49 0.05 0.46 0.49 0.05 0.46 - - vpmulld (%r11,%rbp,4), %xmm1, %xmm11
|
||||
- - - - - - - - - 0.06 2.94 - - - - - - - - - - - - vpmovsxdq %xmm11, %ymm1
|
||||
- - - - - - - - - 0.65 0.35 - - - - - - - - - - - - vpsllq $3, %ymm1, %ymm1
|
||||
- - - - - - - - - - - 1.00 - - - - - - - - - - - vpaddq %ymm1, %ymm3, %ymm1
|
||||
- - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vmovq %xmm1, %r14
|
||||
- - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vpextrq $1, %xmm1, %r9
|
||||
- - - - - - - - 1.00 - - - - - - - - - - - - - - vextracti128 $1, %ymm1, %xmm1
|
||||
- - - - - - - - - - - - 0.50 0.50 - 0.48 0.35 0.17 0.48 0.35 0.17 - - vmovsd (%r14), %xmm2
|
||||
- - - - - - - - 0.01 0.18 0.17 0.64 0.47 0.53 - 0.34 0.33 0.33 0.34 0.33 0.33 - - vpsubd .LCPI0_5, %xmm11, %xmm6
|
||||
- - - - - - - - - 1.92 1.08 - - - - - - - - - - - - vpmovsxdq %xmm6, %ymm6
|
||||
- - - - - - - - - 0.32 0.68 - - - - - - - - - - - - vpsllq $3, %ymm6, %ymm6
|
||||
- - - - - - - - - - - - 1.30 0.70 - - - - - - - - - vmovq %xmm1, %rdi
|
||||
- - - - - - - - - - 0.32 0.68 - - - - - - - - - - - vpaddq %ymm6, %ymm3, %ymm6
|
||||
- - - - - - - - - - - - 1.34 0.66 - - - - - - - - - vmovq %xmm6, %rcx
|
||||
- - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vpextrq $1, %xmm1, %rbx
|
||||
- - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vpextrq $1, %xmm6, %rax
|
||||
- - - - - - - - 1.00 - - - - - - - - - - - - - - vextracti128 $1, %ymm6, %xmm1
|
||||
- - - - - - - - - - - - 0.50 0.50 - 0.03 0.65 0.32 0.03 0.65 0.32 - - vmovsd (%rdi), %xmm6
|
||||
- - - - - - - - - - - - 0.36 1.64 - - - - - - - - - vmovq %xmm1, %rdi
|
||||
- - - - - - - - - - - - 1.64 0.36 - - - - - - - - - vpextrq $1, %xmm1, %rsi
|
||||
- - - - - - - - - - - - 0.32 0.68 - 0.51 0.33 0.16 0.51 0.33 0.16 - - vmovsd (%rdi), %xmm1
|
||||
- - - - - - - - - - - - 0.68 0.32 - 0.49 0.01 0.50 0.49 0.01 0.50 - - vmovsd (%rcx), %xmm7
|
||||
- - - - - - - - - 0.48 0.52 - 0.67 0.33 - 0.17 0.62 0.21 0.17 0.62 0.21 - - vpbroadcastd .LCPI0_2(%rip), %xmm12
|
||||
- - - - - - - - - 0.01 0.99 - 0.17 0.83 - 0.02 0.64 0.34 0.02 0.64 0.34 - - vmovhpd (%r9), %xmm2, %xmm2
|
||||
- - - - - - - - 0.01 - - 0.99 - - - - - - - - - - - vpaddd %xmm12, %xmm11, %xmm4
|
||||
- - - - - - - - - 0.57 2.43 - - - - - - - - - - - - vpmovsxdq %xmm4, %ymm4
|
||||
- - - - - - - - - 0.34 0.66 - 0.82 0.18 - 0.49 0.35 0.16 0.49 0.35 0.16 - - vmovhpd (%rax), %xmm7, %xmm7
|
||||
- - - - - - - - - 0.34 0.66 - - - - - - - - - - - - vpsllq $3, %ymm4, %ymm4
|
||||
- - - - - - - - - - 0.01 0.99 - - - - - - - - - - - vpaddq %ymm4, %ymm3, %ymm4
|
||||
- - - - - - - - - 0.51 0.49 - 0.49 0.51 - 0.35 0.16 0.49 0.35 0.16 0.49 - - vmovhpd (%rbx), %xmm6, %xmm6
|
||||
- - - - - - - - - - - - 1.04 0.96 - - - - - - - - - vpextrq $1, %xmm4, %rax
|
||||
- - - - - - - - - 0.49 0.51 - 0.17 0.83 - 0.16 0.49 0.35 0.16 0.49 0.35 - - vmovhpd (%rsi), %xmm1, %xmm1
|
||||
- - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vmovq %xmm4, %rcx
|
||||
- - - - - - - - 1.00 - - - - - - - - - - - - - - vextracti128 $1, %ymm4, %xmm4
|
||||
- - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vmovq %xmm4, %rsi
|
||||
- - - - - - - - - 1.00 - - - - - - - - - - - - - vinsertf128 $1, %xmm6, %ymm2, %ymm2
|
||||
- - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vpextrq $1, %xmm4, %rdi
|
||||
- - - - - - - - - - - - 0.50 0.50 - 0.49 0.35 0.16 0.49 0.35 0.16 - - vmovsd (%rsi), %xmm4
|
||||
- - - - - - - - - - 0.31 0.69 - - - - - - - - - - - vsubpd %ymm2, %ymm14, %ymm2
|
||||
- - - - - - - - - 0.49 0.51 - 0.48 0.52 - 0.35 0.16 0.49 0.35 0.16 0.49 - - vmovhpd (%rdi), %xmm4, %xmm4
|
||||
- - - - - - - - - - - - 0.52 0.48 - 0.16 0.49 0.35 0.16 0.49 0.35 - - vmovsd (%rcx), %xmm6
|
||||
- - - - - - - - - 1.00 - - - - - - - - - - - - - vinsertf128 $1, %xmm1, %ymm7, %ymm1
|
||||
- - - - - - - - - 0.35 0.65 - 0.50 0.50 - 0.47 0.35 0.18 0.47 0.35 0.18 - - vmovhpd (%rax), %xmm6, %xmm6
|
||||
- - - - - - - - - 1.00 - - - - - - - - - - - - - vinsertf128 $1, %xmm4, %ymm6, %ymm4
|
||||
- - - - - - - - - - 0.33 0.67 - - - - - - - - - - - vsubpd %ymm1, %ymm5, %ymm1
|
||||
- - - - - - - - - - 0.51 0.49 - - - - - - - - - - - vsubpd %ymm4, %ymm10, %ymm4
|
||||
- - - - - - - - 0.52 0.48 - - - - - - - - - - - - - vmulpd %ymm2, %ymm2, %ymm6
|
||||
- - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd231pd %ymm1, %ymm1, %ymm6
|
||||
- - - - - - - - 1.34 0.66 - - - - - - - - - - - - - vfmadd231pd %ymm4, %ymm4, %ymm6
|
||||
- - - - - - - - - 0.66 0.34 - 0.51 0.49 - 0.19 0.32 0.49 0.19 0.32 0.49 - - vbroadcastsd .LCPI0_3(%rip), %ymm7
|
||||
- - - - - - - - - 5.00 - - - - - - - - - - - - - vdivpd %ymm6, %ymm7, %ymm7
|
||||
- - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vmulpd %ymm7, %ymm7, %ymm11
|
||||
- - - - - - - - 0.99 0.01 - - - - - - - - - - - - - vmulpd %ymm9, %ymm11, %ymm11
|
||||
- - - - - - - - - 0.30 0.70 - 0.49 0.51 - 0.34 0.33 0.33 0.34 0.33 0.33 - - vbroadcastsd .LCPI0_4(%rip), %ymm12
|
||||
- - - - - - - - 0.82 0.18 - - - - - - - - - - - - - vmulpd %ymm7, %ymm11, %ymm11
|
||||
- - - - - - - - - - 0.17 0.83 - - - - - - - - - - - vaddpd %ymm12, %ymm11, %ymm12
|
||||
- - - - - - - - 0.01 0.99 - - 0.18 0.82 - 0.46 0.02 0.52 0.46 0.02 0.52 - - vmulpd 128(%rsp), %ymm7, %ymm7
|
||||
- - - - - - - - 0.99 0.01 - - - - - - - - - - - - - vmulpd %ymm7, %ymm11, %ymm7
|
||||
- - - - - - - - 0.67 0.33 - - - - - - - - - - - - - vmulpd %ymm7, %ymm12, %ymm7
|
||||
- - - - - - - - 1.00 - - - - - - - - - - - - - - vcmpltpd %ymm8, %ymm6, %ymm6
|
||||
- - - - - - - - 1.34 0.66 - - - - - - - - - - - - - vfmadd213pd %ymm0, %ymm7, %ymm2
|
||||
- - - - - - - - 0.66 0.34 - - - - - - - - - - - - - vblendvpd %ymm6, %ymm2, %ymm0, %ymm0
|
||||
- - - - - - - - 0.66 1.34 - - - - - - - - - - - - - vfmadd213pd %ymm15, %ymm7, %ymm1
|
||||
- - - - - - - - 1.34 0.66 - - - - - - - - - - - - - vfmadd213pd %ymm13, %ymm7, %ymm4
|
||||
- - - - - - - - 0.34 0.66 - - - - - - - - - - - - - vblendvpd %ymm6, %ymm1, %ymm15, %ymm15
|
||||
- - - - - - - - 0.99 0.01 - - - - - - - - - - - - - vblendvpd %ymm6, %ymm4, %ymm13, %ymm13
|
||||
- - - - 0.40 0.20 0.40 - - - - - - - - - - - - - - - - addq $4, %rbp
|
||||
- - - 0.20 0.20 0.40 0.20 - - - - - - - - - - - - - - - - cmpq %rdx, %rbp
|
||||
- - - 0.40 - - - 0.60 - - - - - - - - - - - - - - - jb .LBB0_9
|
108
static_analysis/jan/analyses/lammps-icx-avx2zen-osaca.out
Normal file
108
static_analysis/jan/analyses/lammps-icx-avx2zen-osaca.out
Normal file
@@ -0,0 +1,108 @@
|
||||
Open Source Architecture Code Analyzer (OSACA) - 0.4.12
|
||||
Analyzed file: lammps-icx-avx2zen.s
|
||||
Architecture: ZEN3
|
||||
Timestamp: 2023-02-10 16:31:30
|
||||
|
||||
|
||||
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
||||
* - Instruction micro-ops not bound to a port
|
||||
X - No throughput/latency information for this instruction in data file
|
||||
|
||||
|
||||
Combined Analysis Report
|
||||
------------------------
|
||||
Port pressure in cycles
|
||||
| 0 | 1 | 2 | 3 | DV0 | DV1 | 4 | 5 | 6 | 7 | 8 - 8DV | 9 | 10 | 11 | 12 | 13 || CP | LCD |
|
||||
--------------------------------------------------------------------------------------------------------------------------------------------
|
||||
175 | | | | | | | | | | | | | | | | || | | # pointer_increment=16 e95035fc9e97f63299dd5188a0872bfc
|
||||
176 | | | | | | | | | | | | | | | | || | | # LLVM-MCA-BEGIN
|
||||
177 | | | | | | | | | | | | | | | | || | | .LBB0_9: #
|
||||
178 | | | | | | | | | | | | | | | | || | | # Parent Loop BB0_6 Depth=1
|
||||
179 | | | | | | | | | | | | | | | | || | | # => This Inner Loop Header: Depth=2
|
||||
180 | | 0.00 | 1.00 | | | | | | | | | | | 0.50 | 0.50 | || 1.0 | | vpbroadcastd .LCPI0_1(%rip), %xmm1 # xmm1 = [3,3,3,3]
|
||||
181 | 0.00 | | | 1.00 | | | | | | | | | | 0.50 | 0.50 | || 3.0 | | vpmulld (%r11,%rbp,4), %xmm1, %xmm11
|
||||
182 | 0.00 | 0.75 | 0.38 | 0.87 | | | | | | | | | | | | || 4.0 | | vpmovsxdq %xmm11, %ymm1
|
||||
183 | | 0.00 | 1.00 | | | | | | | | | | | | | || 1.0 | | vpsllq $3, %ymm1, %ymm1
|
||||
184 | 0.00 | 0.00 | 0.50 | 0.50 | | | | | | | | | | | | || 1.0 | | vpaddq %ymm1, %ymm3, %ymm1
|
||||
185 | 0.00 | 0.00 | 0.50 | 0.50 | | | | | | | | | | | | || | | vmovq %xmm1, %r14
|
||||
186 | 0.12 | 1.88 | | | | | | | | | | | | | | || | | vpextrq $1, %xmm1, %r9
|
||||
187 | | 1.00 | | | | | | | | | | | | | | || 3.0 | | vextracti128 $1, %ymm1, %xmm1
|
||||
188 | | | | | | | | | | | | | | 0.50 | 0.50 | || | | vmovsd (%r14), %xmm2 # xmm2 = mem[0],zero
|
||||
189 | 0.00 | 0.00 | 0.50 | 0.50 | | | | | | | | | | | | || | | vpsubd .LCPI0_5, %xmm11, %xmm6
|
||||
190 | 0.00 | 0.75 | 0.38 | 0.87 | | | | | | | | | | | | || | | vpmovsxdq %xmm6, %ymm6
|
||||
191 | | 0.00 | 1.00 | | | | | | | | | | | | | || | | vpsllq $3, %ymm6, %ymm6
|
||||
192 | 0.00 | 0.00 | 0.49 | 0.51 | | | | | | | | | | | | || | | vmovq %xmm1, %rdi
|
||||
193 | 0.00 | 0.00 | 0.51 | 0.49 | | | | | | | | | | | | || | | vpaddq %ymm6, %ymm3, %ymm6
|
||||
194 | 0.00 | 0.00 | 0.49 | 0.51 | | | | | | | | | | | | || | | vmovq %xmm6, %rcx
|
||||
195 | 0.13 | 1.87 | | | | | | | | | | | | | | || 6.0 | | vpextrq $1, %xmm1, %rbx
|
||||
196 | 0.00 | 2.00 | | | | | | | | | | | | | | || | | vpextrq $1, %xmm6, %rax
|
||||
197 | | 1.00 | | | | | | | | | | | | | | || | | vextracti128 $1, %ymm6, %xmm1
|
||||
198 | | | | | | | | | | | | | | 0.50 | 0.50 | || | | vmovsd (%rdi), %xmm6 # xmm6 = mem[0],zero
|
||||
199 | 0.00 | 0.00 | 0.00 | 1.00 | | | | | | | | | | | | || | | vmovq %xmm1, %rdi
|
||||
200 | 0.00 | 2.00 | | | | | | | | | | | | | | || | | vpextrq $1, %xmm1, %rsi
|
||||
201 | | | | | | | | | | | | | | 0.50 | 0.50 | || | | vmovsd (%rdi), %xmm1 # xmm1 = mem[0],zero
|
||||
202 | | | | | | | | | | | | | | 0.50 | 0.50 | || | | vmovsd (%rcx), %xmm7 # xmm7 = mem[0],zero
|
||||
203 | | 0.00 | 1.00 | | | | | | | | | | | 0.50 | 0.50 | || | | vpbroadcastd .LCPI0_2(%rip), %xmm12 # xmm12 = [2,2,2,2]
|
||||
204 | | 0.00 | 1.00 | | | | | | | | | | | 0.50 | 0.50 | || | | vmovhpd (%r9), %xmm2, %xmm2 # xmm2 = xmm2[0],mem[0]
|
||||
205 | 0.00 | 0.00 | 0.63 | 0.37 | | | | | | | | | | | | || | | vpaddd %xmm12, %xmm11, %xmm4
|
||||
206 | 0.00 | 0.75 | 0.00 | 1.25 | | | | | | | | | | | | || | | vpmovsxdq %xmm4, %ymm4
|
||||
207 | | 0.00 | 1.00 | | | | | | | | | | | 0.50 | 0.50 | || | | vmovhpd (%rax), %xmm7, %xmm7 # xmm7 = xmm7[0],mem[0]
|
||||
208 | | 0.00 | 1.00 | | | | | | | | | | | | | || | | vpsllq $3, %ymm4, %ymm4
|
||||
209 | 0.00 | 0.00 | 0.00 | 1.00 | | | | | | | | | | | | || | | vpaddq %ymm4, %ymm3, %ymm4
|
||||
210 | | 0.00 | 1.00 | | | | | | | | | | | 0.50 | 0.50 | || 5.0 | | vmovhpd (%rbx), %xmm6, %xmm6 # xmm6 = xmm6[0],mem[0]
|
||||
211 | 0.75 | 1.25 | | | | | | | | | | | | | | || | | vpextrq $1, %xmm4, %rax
|
||||
212 | | 0.00 | 1.00 | | | | | | | | | | | 0.50 | 0.50 | || | | vmovhpd (%rsi), %xmm1, %xmm1 # xmm1 = xmm1[0],mem[0]
|
||||
213 | 0.00 | 0.00 | 0.50 | 0.50 | | | | | | | | | | | | || | | vmovq %xmm4, %rcx
|
||||
214 | | 1.00 | | | | | | | | | | | | | | || | | vextracti128 $1, %ymm4, %xmm4
|
||||
215 | 0.00 | 0.00 | 0.00 | 1.00 | | | | | | | | | | | | || | | vmovq %xmm4, %rsi
|
||||
216 | | 1.00 | | | | | | | | | | | | | | || 1.0 | | vinsertf128 $1, %xmm6, %ymm2, %ymm2
|
||||
217 | 1.00 | 1.00 | | | | | | | | | | | | | | || | | vpextrq $1, %xmm4, %rdi
|
||||
218 | | | | | | | | | | | | | | 0.50 | 0.50 | || | | vmovsd (%rsi), %xmm4 # xmm4 = mem[0],zero
|
||||
219 | | | 0.00 | 1.00 | | | | | | | | | | | | || 3.0 | | vsubpd %ymm2, %ymm14, %ymm2
|
||||
220 | | 0.00 | 1.00 | | | | | | | | | | | 0.50 | 0.50 | || | | vmovhpd (%rdi), %xmm4, %xmm4 # xmm4 = xmm4[0],mem[0]
|
||||
221 | | | | | | | | | | | | | | 0.50 | 0.50 | || | | vmovsd (%rcx), %xmm6 # xmm6 = mem[0],zero
|
||||
222 | | 1.00 | | | | | | | | | | | | | | || | | vinsertf128 $1, %xmm1, %ymm7, %ymm1
|
||||
223 | | 0.00 | 1.00 | | | | | | | | | | | 0.50 | 0.50 | || | | vmovhpd (%rax), %xmm6, %xmm6 # xmm6 = xmm6[0],mem[0]
|
||||
224 | | 1.00 | | | | | | | | | | | | | | || | | vinsertf128 $1, %xmm4, %ymm6, %ymm4
|
||||
225 | | | 0.00 | 1.00 | | | | | | | | | | | | || | | vsubpd %ymm1, %ymm5, %ymm1
|
||||
226 | | | 0.00 | 1.00 | | | | | | | | | | | | || | | vsubpd %ymm4, %ymm10, %ymm4
|
||||
227 | 1.00 | 0.00 | | | | | | | | | | | | | | || 3.0 | | vmulpd %ymm2, %ymm2, %ymm6
|
||||
228 | 1.00 | 0.00 | | | | | | | | | | | | | | || 4.0 | | vfmadd231pd %ymm1, %ymm1, %ymm6 # ymm6 = (ymm1 * ymm1) + ymm6
|
||||
229 | 1.00 | 0.00 | | | | | | | | | | | | | | || 4.0 | | vfmadd231pd %ymm4, %ymm4, %ymm6 # ymm6 = (ymm4 * ymm4) + ymm6
|
||||
230 | 1.00 | | | | | | | | | | | | | 0.50 | 0.50 | || | | vbroadcastsd .LCPI0_3(%rip), %ymm7 # ymm7 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
|
||||
231 | | | | | 4.50 | 4.50 | | | | | | | | | | || 13.0 | | vdivpd %ymm6, %ymm7, %ymm7
|
||||
232 | 1.00 | 0.00 | | | | | | | | | | | | | | || 3.0 | | vmulpd %ymm7, %ymm7, %ymm11
|
||||
233 | 1.00 | 0.00 | | | | | | | | | | | | | | || 3.0 | | vmulpd %ymm9, %ymm11, %ymm11
|
||||
234 | 1.00 | | | | | | | | | | | | | 0.50 | 0.50 | || | | vbroadcastsd .LCPI0_4(%rip), %ymm12 # ymm12 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
|
||||
235 | 1.00 | 0.00 | | | | | | | | | | | | | | || 3.0 | | vmulpd %ymm7, %ymm11, %ymm11
|
||||
236 | | | 0.00 | 1.00 | | | | | | | | | | | | || 3.0 | | vaddpd %ymm12, %ymm11, %ymm12
|
||||
237 | 1.00 | 0.00 | | | | | | | | | | | | 0.50 | 0.50 | || | | vmulpd 128(%rsp), %ymm7, %ymm7 # 32-byte Folded Reload
|
||||
238 | 1.00 | 0.00 | | | | | | | | | | | | | | || | | vmulpd %ymm7, %ymm11, %ymm7
|
||||
239 | 1.00 | 0.00 | | | | | | | | | | | | | | || 3.0 | | vmulpd %ymm7, %ymm12, %ymm7
|
||||
240 | | | 0.00 | 1.00 | | | | | | | | | | | | || | | vcmpltpd %ymm8, %ymm6, %ymm6
|
||||
241 | 1.00 | 0.00 | | | | | | | | | | | | | | || 4.0 | | vfmadd213pd %ymm0, %ymm7, %ymm2 # ymm2 = (ymm7 * ymm2) + ymm0
|
||||
242 | 1.00 | 0.00 | | | | | | | | | | | | | | || 1.0 | | vblendvpd %ymm6, %ymm2, %ymm0, %ymm0
|
||||
243 | 1.00 | 0.00 | | | | | | | | | | | | | | || | | vfmadd213pd %ymm15, %ymm7, %ymm1 # ymm1 = (ymm7 * ymm1) + ymm15
|
||||
244 | 1.00 | 0.00 | | | | | | | | | | | | | | || | 4.0 | vfmadd213pd %ymm13, %ymm7, %ymm4 # ymm4 = (ymm7 * ymm4) + ymm13
|
||||
245 | 1.00 | 0.00 | | | | | | | | | | | | | | || | | vblendvpd %ymm6, %ymm1, %ymm15, %ymm15
|
||||
246 | 0.75 | 0.25 | | | | | | | | | | | | | | || | 1.0 | vblendvpd %ymm6, %ymm4, %ymm13, %ymm13
|
||||
247 | | | | | | | | | 0.25 | 0.25 | 0.25 | 0.25 | | | | || | | addq $4, %rbp
|
||||
248 | | | | | | | | | 0.25 | 0.25 | 0.25 | 0.25 | | | | || | | cmpq %rdx, %rbp
|
||||
249 | | | | | | | | | 0.00 | | | | 1.00 | | | || | | jb .LBB0_9
|
||||
250 | | | | | | | | | | | | | | | | || | | # LLVM-MCA-END
|
||||
|
||||
18.8 18.5 15.9 15.9 4.50 4.50 0.50 0.50 0.50 0.50 9.00 9.00 72 5.0
|
||||
|
||||
|
||||
|
||||
|
||||
Loop-Carried Dependencies Analysis Report
|
||||
-----------------------------------------
|
||||
244 | 5.0 | vfmadd213pd %ymm13, %ymm7, %ymm4 # ymm4 = (ymm7 * ymm4) + ymm13| [244, 246]
|
||||
243 | 5.0 | vfmadd213pd %ymm15, %ymm7, %ymm1 # ymm1 = (ymm7 * ymm1) + ymm15| [243, 245]
|
||||
241 | 5.0 | vfmadd213pd %ymm0, %ymm7, %ymm2 # ymm2 = (ymm7 * ymm2) + ymm0| [241, 242]
|
||||
247 | 1.0 | addq $4, %rbp | [247]
|
||||
246 | 1.0 | vblendvpd %ymm6, %ymm4, %ymm13, %ymm13| [246]
|
||||
245 | 1.0 | vblendvpd %ymm6, %ymm1, %ymm15, %ymm15| [245]
|
||||
242 | 1.0 | vblendvpd %ymm6, %ymm2, %ymm0, %ymm0| [242]
|
||||
|
Reference in New Issue
Block a user