Add outputs for new analyses
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
4e5fe27c0f
commit
300776f512
120
static_analysis/rafael/analyses/gromacs-icx-avx512-dp-iaca.out
Normal file
120
static_analysis/rafael/analyses/gromacs-icx-avx512-dp-iaca.out
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
Intel(R) Architecture Code Analyzer Version - v3.0-28-g1ba2cbb build date: 2017-10-23;16:42:45
|
||||||
|
Analyzed File - gromacs-avx512-dp.o
|
||||||
|
Binary Format - 64Bit
|
||||||
|
Architecture - SKX
|
||||||
|
Analysis Type - Throughput
|
||||||
|
|
||||||
|
Throughput Analysis Report
|
||||||
|
--------------------------
|
||||||
|
Block Throughput: 45.95 Cycles Throughput Bottleneck: Backend
|
||||||
|
Loop Count: 22
|
||||||
|
Port Binding In Cycles Per Iteration:
|
||||||
|
--------------------------------------------------------------------------------------------------
|
||||||
|
| Port | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
||||||
|
--------------------------------------------------------------------------------------------------
|
||||||
|
| Cycles | 40.0 0.0 | 2.0 | 5.0 5.0 | 5.0 5.0 | 0.0 | 40.0 | 2.0 | 0.0 |
|
||||||
|
--------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
DV - Divider pipe (on port 0)
|
||||||
|
D - Data fetch pipe (on ports 2 and 3)
|
||||||
|
F - Macro Fusion with the previous instruction occurred
|
||||||
|
* - instruction micro-ops not bound to a port
|
||||||
|
^ - Micro Fusion occurred
|
||||||
|
# - ESP Tracking sync uop was issued
|
||||||
|
@ - SSE instruction followed an AVX256/AVX512 instruction, dozens of cycles penalty is expected
|
||||||
|
X - instruction not supported, was not accounted in Analysis
|
||||||
|
|
||||||
|
| Num Of | Ports pressure in cycles | |
|
||||||
|
| Uops | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
||||||
|
-----------------------------------------------------------------------------------------
|
||||||
|
| 1 | | | 1.0 1.0 | | | | | | movsxd rax, dword ptr [rdx+rcx*4]
|
||||||
|
| 1 | | 1.0 | | | | | | | lea rax, ptr [rax+rax*2]
|
||||||
|
| 1 | | | | | | | 1.0 | | shl rax, 0x6
|
||||||
|
| 1 | | | | 1.0 1.0 | | | | | vmovapd zmm28, zmmword ptr [rdi+rax*1]
|
||||||
|
| 1 | | | 1.0 1.0 | | | | | | vmovapd zmm29, zmmword ptr [rdi+rax*1+0x40]
|
||||||
|
| 1 | | | | 1.0 1.0 | | | | | vmovapd zmm30, zmmword ptr [rdi+rax*1+0x80]
|
||||||
|
| 1 | | | 1.0 1.0 | | | | | | vmovupd zmm3, zmmword ptr [rsp+0x40]
|
||||||
|
| 1 | | | | | | 1.0 | | | vsubpd zmm14, zmm3, zmm28
|
||||||
|
| 1 | 1.0 | | | | | | | | vsubpd zmm12, zmm26, zmm29
|
||||||
|
| 1 | | | | | | 1.0 | | | vsubpd zmm31, zmm25, zmm30
|
||||||
|
| 1 | 1.0 | | | | | | | | vsubpd zmm3, zmm23, zmm28
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm15, zmm31, zmm31
|
||||||
|
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm15, zmm12, zmm12
|
||||||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm15, zmm14, zmm14
|
||||||
|
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm16, zmm15
|
||||||
|
| 1 | | | | | | 1.0 | | | vcmppd k1, zmm15, zmm0, 0x1
|
||||||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm15, zmm16, zmm20
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm18, zmm16, zmm16
|
||||||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm15, zmm18, zmm15
|
||||||
|
| 1 | | | | 1.0 1.0 | | | | | vmovupd zmm18, zmmword ptr [rsp+0x200]
|
||||||
|
| 1 | | | | | | 1.0 | | | vsubpd zmm18, zmm18, zmm29
|
||||||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm1, zmm16
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm16, zmm16, zmm15
|
||||||
|
| 1 | 1.0 | | | | | | | | vaddpd zmm15, zmm15, zmm2
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm15, zmm16, zmm15
|
||||||
|
| 1 | | | 1.0 1.0 | | | | | | vmovupd zmm16, zmmword ptr [rsp+0x1c0]
|
||||||
|
| 1 | 1.0 | | | | | | | | vsubpd zmm16, zmm16, zmm30
|
||||||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm19{k1}, zmm15, zmm14
|
||||||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm14, zmm16, zmm16
|
||||||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm14, zmm18, zmm18
|
||||||
|
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm14, zmm3, zmm3
|
||||||
|
| 1 | | | | | | 1.0 | | | vcmppd k2, zmm14, zmm0, 0x1
|
||||||
|
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm14, zmm14
|
||||||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm11{k1}, zmm15, zmm12
|
||||||
|
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm7{k1}, zmm15, zmm31
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm12, zmm14, zmm20
|
||||||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm15, zmm14, zmm14
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm12, zmm15, zmm12
|
||||||
|
| 1 | 1.0 | | | | | | | | vsubpd zmm15, zmm24, zmm28
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm14, zmm1, zmm14
|
||||||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm14, zmm14, zmm12
|
||||||
|
| 1 | | | | | | 1.0 | | | vaddpd zmm12, zmm12, zmm2
|
||||||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm12, zmm14, zmm12
|
||||||
|
| 1 | | | | | | 1.0 | | | vsubpd zmm14, zmm22, zmm29
|
||||||
|
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm17{k2}, zmm12, zmm3
|
||||||
|
| 1 | | | | | | 1.0 | | | vsubpd zmm3, zmm27, zmm30
|
||||||
|
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm9{k2}, zmm12, zmm18
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm18, zmm3, zmm3
|
||||||
|
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm18, zmm14, zmm14
|
||||||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm18, zmm15, zmm15
|
||||||
|
| 1 | | | | | | 1.0 | | | vcmppd k1, zmm18, zmm0, 0x1
|
||||||
|
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm18, zmm18
|
||||||
|
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm5{k2}, zmm12, zmm16
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm12, zmm18, zmm20
|
||||||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm18, zmm18
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm12, zmm16, zmm12
|
||||||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm16, zmm1, zmm18
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm16, zmm16, zmm12
|
||||||
|
| 1 | 1.0 | | | | | | | | vaddpd zmm12, zmm12, zmm2
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm12, zmm16, zmm12
|
||||||
|
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm21{k1}, zmm12, zmm15
|
||||||
|
| 1 | | | | 1.0 1.0 | | | | | vmovupd zmm15, zmmword ptr [rsp+0x240]
|
||||||
|
| 1 | | | | | | 1.0 | | | vsubpd zmm15, zmm15, zmm28
|
||||||
|
| 1 | | | 1.0 1.0 | | | | | | vmovupd zmm16, zmmword ptr [rsp+0x80]
|
||||||
|
| 1 | 1.0 | | | | | | | | vsubpd zmm16, zmm16, zmm29
|
||||||
|
| 1 | | | | 1.0 1.0 | | | | | vmovupd zmm18, zmmword ptr [rsp+0x180]
|
||||||
|
| 1 | | | | | | 1.0 | | | vsubpd zmm18, zmm18, zmm30
|
||||||
|
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm10{k1}, zmm12, zmm14
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm14, zmm18, zmm18
|
||||||
|
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm14, zmm16, zmm16
|
||||||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm14, zmm15, zmm15
|
||||||
|
| 1 | | | | | | 1.0 | | | vcmppd k2, zmm14, zmm0, 0x1
|
||||||
|
| 3 | 2.0 | | | | | 1.0 | | | vrcp14pd zmm14, zmm14
|
||||||
|
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm6{k1}, zmm12, zmm3
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm3, zmm14, zmm20
|
||||||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm12, zmm14, zmm14
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm3, zmm12, zmm3
|
||||||
|
| 1 | 1.0 | | | | | | | | vmulpd zmm12, zmm1, zmm14
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm12, zmm12, zmm3
|
||||||
|
| 1 | 1.0 | | | | | | | | vaddpd zmm3, zmm3, zmm2
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulpd zmm3, zmm12, zmm3
|
||||||
|
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm13{k2}, zmm3, zmm15
|
||||||
|
| 1 | | | | | | 1.0 | | | vfmadd231pd zmm8{k2}, zmm3, zmm16
|
||||||
|
| 1 | 1.0 | | | | | | | | vfmadd231pd zmm4{k2}, zmm3, zmm18
|
||||||
|
| 1 | | 1.0 | | | | | | | inc rcx
|
||||||
|
| 1* | | | | | | | | | cmp r11, rcx
|
||||||
|
| 0*F | | | | | | | | | jnz 0xfffffffffffffdf3
|
||||||
|
| 1 | | | | | | | 1.0 | | jmp 0xfffffffffffff7a5
|
||||||
|
Total Num Of Uops: 95
|
||||||
|
Analysis Notes:
|
||||||
|
Backend allocation was stalled due to unavailable allocation resources.
|
@ -0,0 +1,132 @@
|
|||||||
|
Open Source Architecture Code Analyzer (OSACA) - 0.4.12
|
||||||
|
Analyzed file: gromacs-avx512-dp.s
|
||||||
|
Architecture: CSX
|
||||||
|
Timestamp: 2023-04-05 00:41:22
|
||||||
|
|
||||||
|
|
||||||
|
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
||||||
|
* - Instruction micro-ops not bound to a port
|
||||||
|
X - No throughput/latency information for this instruction in data file
|
||||||
|
|
||||||
|
|
||||||
|
Combined Analysis Report
|
||||||
|
------------------------
|
||||||
|
Port pressure in cycles
|
||||||
|
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |
|
||||||
|
--------------------------------------------------------------------------------------------------
|
||||||
|
2814 | | | | | | | | || | | # pointer_increment=64 da67166e5736661e6b03ea29ee7bfd67
|
||||||
|
2815 | | | | | | | | || | | # LLVM-MCA-BEGIN
|
||||||
|
2816 | | | | | | | | || | | .LBB5_11: #
|
||||||
|
2817 | | | | | | | | || | | # Parent Loop BB5_7 Depth=1
|
||||||
|
2818 | | | | | | | | || | | # => This Inner Loop Header: Depth=2
|
||||||
|
2819 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | movslq (%rdx,%rcx,4), %rax
|
||||||
|
2820 | | 1.00 | | | | 0.00 | | || 1.0 | | leaq (%rax,%rax,2), %rax
|
||||||
|
2821 | 0.00 | | | | | | 1.00 | || 1.0 | | shlq $6, %rax
|
||||||
|
2822 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovapd (%rdi,%rax), %zmm28
|
||||||
|
2823 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovapd 64(%rdi,%rax), %zmm29
|
||||||
|
2824 | | | 0.50 0.50 | 0.50 0.50 | | | | || 0.0 | | vmovapd 128(%rdi,%rax), %zmm30
|
||||||
|
2825 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 64(%rsp), %zmm3 # 64-byte Reload
|
||||||
|
2826 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm28, %zmm3, %zmm14
|
||||||
|
2827 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm29, %zmm26, %zmm12
|
||||||
|
2828 | 0.50 | | | | | 0.50 | | || 4.0 | | vsubpd %zmm30, %zmm25, %zmm31
|
||||||
|
2829 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm28, %zmm23, %zmm3
|
||||||
|
2830 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm31, %zmm31, %zmm15
|
||||||
|
2831 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm12, %zmm12, %zmm15 # zmm15 = (zmm12 * zmm12) + zmm15
|
||||||
|
2832 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm14, %zmm14, %zmm15 # zmm15 = (zmm14 * zmm14) + zmm15
|
||||||
|
2833 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14pd %zmm15, %zmm16
|
||||||
|
2834 | | | | | | | | || | | X vcmpltpd %zmm0, %zmm15, %k1
|
||||||
|
2835 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm20, %zmm16, %zmm15
|
||||||
|
2836 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm16, %zmm16, %zmm18
|
||||||
|
2837 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm15, %zmm18, %zmm15
|
||||||
|
2838 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 512(%rsp), %zmm18 # 64-byte Reload
|
||||||
|
2839 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm29, %zmm18, %zmm18
|
||||||
|
2840 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm16, %zmm1, %zmm16
|
||||||
|
2841 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm15, %zmm16, %zmm16
|
||||||
|
2842 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm2, %zmm15, %zmm15
|
||||||
|
2843 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm15, %zmm16, %zmm15
|
||||||
|
2844 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 448(%rsp), %zmm16 # 64-byte Reload
|
||||||
|
2845 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm30, %zmm16, %zmm16
|
||||||
|
2846 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm14, %zmm15, %zmm19 {%k1} # zmm19 = (zmm15 * zmm14) + zmm19
|
||||||
|
2847 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm16, %zmm16, %zmm14
|
||||||
|
2848 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm18, %zmm18, %zmm14 # zmm14 = (zmm18 * zmm18) + zmm14
|
||||||
|
2849 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm3, %zmm3, %zmm14 # zmm14 = (zmm3 * zmm3) + zmm14
|
||||||
|
2850 | | | | | | | | || | | X vcmpltpd %zmm0, %zmm14, %k2
|
||||||
|
2851 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm14, %zmm14
|
||||||
|
2852 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm12, %zmm15, %zmm11 {%k1} # zmm11 = (zmm15 * zmm12) + zmm11
|
||||||
|
2853 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm31, %zmm15, %zmm7 {%k1} # zmm7 = (zmm15 * zmm31) + zmm7
|
||||||
|
2854 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm20, %zmm14, %zmm12
|
||||||
|
2855 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm14, %zmm14, %zmm15
|
||||||
|
2856 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm15, %zmm12
|
||||||
|
2857 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm28, %zmm24, %zmm15
|
||||||
|
2858 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm14, %zmm1, %zmm14
|
||||||
|
2859 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm14, %zmm14
|
||||||
|
2860 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm2, %zmm12, %zmm12
|
||||||
|
2861 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm14, %zmm12
|
||||||
|
2862 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm29, %zmm22, %zmm14
|
||||||
|
2863 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm3, %zmm12, %zmm17 {%k2} # zmm17 = (zmm12 * zmm3) + zmm17
|
||||||
|
2864 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm30, %zmm27, %zmm3
|
||||||
|
2865 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm18, %zmm12, %zmm9 {%k2} # zmm9 = (zmm12 * zmm18) + zmm9
|
||||||
|
2866 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm3, %zmm3, %zmm18
|
||||||
|
2867 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm14, %zmm14, %zmm18 # zmm18 = (zmm14 * zmm14) + zmm18
|
||||||
|
2868 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm15, %zmm15, %zmm18 # zmm18 = (zmm15 * zmm15) + zmm18
|
||||||
|
2869 | | | | | | | | || | | X vcmpltpd %zmm0, %zmm18, %k1
|
||||||
|
2870 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm18, %zmm18
|
||||||
|
2871 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm16, %zmm12, %zmm5 {%k2} # zmm5 = (zmm12 * zmm16) + zmm5
|
||||||
|
2872 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm20, %zmm18, %zmm12
|
||||||
|
2873 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm18, %zmm18, %zmm16
|
||||||
|
2874 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm16, %zmm12
|
||||||
|
2875 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm18, %zmm1, %zmm16
|
||||||
|
2876 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm16, %zmm16
|
||||||
|
2877 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm2, %zmm12, %zmm12
|
||||||
|
2878 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm16, %zmm12
|
||||||
|
2879 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm15, %zmm12, %zmm21 {%k1} # zmm21 = (zmm12 * zmm15) + zmm21
|
||||||
|
2880 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 576(%rsp), %zmm15 # 64-byte Reload
|
||||||
|
2881 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm28, %zmm15, %zmm15
|
||||||
|
2882 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 128(%rsp), %zmm16 # 64-byte Reload
|
||||||
|
2883 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm29, %zmm16, %zmm16
|
||||||
|
2884 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 384(%rsp), %zmm18 # 64-byte Reload
|
||||||
|
2885 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm30, %zmm18, %zmm18
|
||||||
|
2886 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm14, %zmm12, %zmm10 {%k1} # zmm10 = (zmm12 * zmm14) + zmm10
|
||||||
|
2887 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm18, %zmm18, %zmm14
|
||||||
|
2888 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm16, %zmm16, %zmm14 # zmm14 = (zmm16 * zmm16) + zmm14
|
||||||
|
2889 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm15, %zmm15, %zmm14 # zmm14 = (zmm15 * zmm15) + zmm14
|
||||||
|
2890 | | | | | | | | || | | X vcmpltpd %zmm0, %zmm14, %k2
|
||||||
|
2891 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm14, %zmm14
|
||||||
|
2892 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm3, %zmm12, %zmm6 {%k1} # zmm6 = (zmm12 * zmm3) + zmm6
|
||||||
|
2893 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm20, %zmm14, %zmm3
|
||||||
|
2894 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm14, %zmm14, %zmm12
|
||||||
|
2895 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm3, %zmm12, %zmm3
|
||||||
|
2896 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm14, %zmm1, %zmm12
|
||||||
|
2897 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm3, %zmm12, %zmm12
|
||||||
|
2898 | 0.00 | | | | | 1.00 | | || | | vaddpd %zmm2, %zmm3, %zmm3
|
||||||
|
2899 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm3, %zmm12, %zmm3
|
||||||
|
2900 | 0.00 | | | | | 1.00 | | || | | vfmadd231pd %zmm15, %zmm3, %zmm13 {%k2} # zmm13 = (zmm3 * zmm15) + zmm13
|
||||||
|
2901 | 0.00 | | | | | 1.00 | | || | | vfmadd231pd %zmm16, %zmm3, %zmm8 {%k2} # zmm8 = (zmm3 * zmm16) + zmm8
|
||||||
|
2902 | 0.00 | | | | | 1.00 | | || | 4.0 | vfmadd231pd %zmm18, %zmm3, %zmm4 {%k2} # zmm4 = (zmm3 * zmm18) + zmm4
|
||||||
|
2903 | 0.00 | 0.50 | | | | 0.00 | 0.50 | || | | incq %rcx
|
||||||
|
2904 | 0.00 | 1.00 | | | | 0.00 | 0.00 | || | | cmpq %rcx, %r11
|
||||||
|
2905 | | | | | | | | || | | * jne .LBB5_11
|
||||||
|
2906 | 0.00 | | | | | | 1.00 | || | | jmp .LBB5_12
|
||||||
|
2907 | | | | | | | | || | | # LLVM-MCA-END
|
||||||
|
|
||||||
|
38.0 2.50 5.00 5.00 5.00 5.00 38.0 2.50 50.0 4.0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loop-Carried Dependencies Analysis Report
|
||||||
|
-----------------------------------------
|
||||||
|
2902 | 4.0 | vfmadd231pd %zmm18, %zmm3, %zmm4 {%k2} # zmm4 = (zmm3 * zmm18) + zmm4| [2902]
|
||||||
|
2901 | 4.0 | vfmadd231pd %zmm16, %zmm3, %zmm8 {%k2} # zmm8 = (zmm3 * zmm16) + zmm8| [2901]
|
||||||
|
2900 | 4.0 | vfmadd231pd %zmm15, %zmm3, %zmm13 {%k2} # zmm13 = (zmm3 * zmm15) + zmm13| [2900]
|
||||||
|
2892 | 4.0 | vfmadd231pd %zmm3, %zmm12, %zmm6 {%k1} # zmm6 = (zmm12 * zmm3) + zmm6| [2892]
|
||||||
|
2886 | 4.0 | vfmadd231pd %zmm14, %zmm12, %zmm10 {%k1} # zmm10 = (zmm12 * zmm14) + zmm10| [2886]
|
||||||
|
2879 | 4.0 | vfmadd231pd %zmm15, %zmm12, %zmm21 {%k1} # zmm21 = (zmm12 * zmm15) + zmm21| [2879]
|
||||||
|
2871 | 4.0 | vfmadd231pd %zmm16, %zmm12, %zmm5 {%k2} # zmm5 = (zmm12 * zmm16) + zmm5| [2871]
|
||||||
|
2865 | 4.0 | vfmadd231pd %zmm18, %zmm12, %zmm9 {%k2} # zmm9 = (zmm12 * zmm18) + zmm9| [2865]
|
||||||
|
2863 | 4.0 | vfmadd231pd %zmm3, %zmm12, %zmm17 {%k2} # zmm17 = (zmm12 * zmm3) + zmm17| [2863]
|
||||||
|
2853 | 4.0 | vfmadd231pd %zmm31, %zmm15, %zmm7 {%k1} # zmm7 = (zmm15 * zmm31) + zmm7| [2853]
|
||||||
|
2852 | 4.0 | vfmadd231pd %zmm12, %zmm15, %zmm11 {%k1} # zmm11 = (zmm15 * zmm12) + zmm11| [2852]
|
||||||
|
2846 | 4.0 | vfmadd231pd %zmm14, %zmm15, %zmm19 {%k1} # zmm19 = (zmm15 * zmm14) + zmm19| [2846]
|
||||||
|
2903 | 1.0 | incq %rcx | [2903]
|
||||||
|
|
@ -0,0 +1,132 @@
|
|||||||
|
Open Source Architecture Code Analyzer (OSACA) - 0.4.12
|
||||||
|
Analyzed file: gromacs-avx512-dp.s
|
||||||
|
Architecture: ICX
|
||||||
|
Timestamp: 2023-04-05 00:41:45
|
||||||
|
|
||||||
|
|
||||||
|
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
||||||
|
* - Instruction micro-ops not bound to a port
|
||||||
|
X - No throughput/latency information for this instruction in data file
|
||||||
|
|
||||||
|
|
||||||
|
Combined Analysis Report
|
||||||
|
------------------------
|
||||||
|
Port pressure in cycles
|
||||||
|
| 0 - 0DV | 1 - 1DV | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 | 8 | 9 || CP | LCD |
|
||||||
|
------------------------------------------------------------------------------------------------------------------------
|
||||||
|
2814 | | | | | | | | | | || | | # pointer_increment=64 da67166e5736661e6b03ea29ee7bfd67
|
||||||
|
2815 | | | | | | | | | | || | | # LLVM-MCA-BEGIN
|
||||||
|
2816 | | | | | | | | | | || | | .LBB5_11: #
|
||||||
|
2817 | | | | | | | | | | || | | # Parent Loop BB5_7 Depth=1
|
||||||
|
2818 | | | | | | | | | | || | | # => This Inner Loop Header: Depth=2
|
||||||
|
2819 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || 5.0 | | movslq (%rdx,%rcx,4), %rax
|
||||||
|
2820 | 0.00 | 0.75 | | | | 0.000 | 0.25 | | | || 6.0 | | leaq (%rax,%rax,2), %rax
|
||||||
|
2821 | 0.00 | | | | | | 1.00 | | | || 1.0 | | shlq $6, %rax
|
||||||
|
2822 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovapd (%rdi,%rax), %zmm28
|
||||||
|
2823 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovapd 64(%rdi,%rax), %zmm29
|
||||||
|
2824 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || 5.0 | | vmovapd 128(%rdi,%rax), %zmm30
|
||||||
|
2825 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 64(%rsp), %zmm3 # 64-byte Reload
|
||||||
|
2826 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm28, %zmm3, %zmm14
|
||||||
|
2827 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm29, %zmm26, %zmm12
|
||||||
|
2828 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vsubpd %zmm30, %zmm25, %zmm31
|
||||||
|
2829 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm28, %zmm23, %zmm3
|
||||||
|
2830 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm31, %zmm31, %zmm15
|
||||||
|
2831 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231pd %zmm12, %zmm12, %zmm15 # zmm15 = (zmm12 * zmm12) + zmm15
|
||||||
|
2832 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231pd %zmm14, %zmm14, %zmm15 # zmm15 = (zmm14 * zmm14) + zmm15
|
||||||
|
2833 | 2.50 | | | | | 0.500 | | | | || 6.0 | | vrcp14pd %zmm15, %zmm16
|
||||||
|
2834 | 0.50 | | | | | 0.500 | | | | || | | vcmpltpd %zmm0, %zmm15, %k1
|
||||||
|
2835 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm20, %zmm16, %zmm15
|
||||||
|
2836 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm16, %zmm16, %zmm18
|
||||||
|
2837 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm15, %zmm18, %zmm15
|
||||||
|
2838 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 512(%rsp), %zmm18 # 64-byte Reload
|
||||||
|
2839 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm29, %zmm18, %zmm18
|
||||||
|
2840 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm16, %zmm1, %zmm16
|
||||||
|
2841 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm15, %zmm16, %zmm16
|
||||||
|
2842 | 0.50 | | | | | 0.500 | | | | || | | vaddpd %zmm2, %zmm15, %zmm15
|
||||||
|
2843 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm15, %zmm16, %zmm15
|
||||||
|
2844 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 448(%rsp), %zmm16 # 64-byte Reload
|
||||||
|
2845 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm30, %zmm16, %zmm16
|
||||||
|
2846 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231pd %zmm14, %zmm15, %zmm19 {%k1} # zmm19 = (zmm15 * zmm14) + zmm19
|
||||||
|
2847 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm16, %zmm16, %zmm14
|
||||||
|
2848 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm18, %zmm18, %zmm14 # zmm14 = (zmm18 * zmm18) + zmm14
|
||||||
|
2849 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm3, %zmm3, %zmm14 # zmm14 = (zmm3 * zmm3) + zmm14
|
||||||
|
2850 | 0.50 | | | | | 0.500 | | | | || | | vcmpltpd %zmm0, %zmm14, %k2
|
||||||
|
2851 | 2.50 | | | | | 0.500 | | | | || | | vrcp14pd %zmm14, %zmm14
|
||||||
|
2852 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm12, %zmm15, %zmm11 {%k1} # zmm11 = (zmm15 * zmm12) + zmm11
|
||||||
|
2853 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm31, %zmm15, %zmm7 {%k1} # zmm7 = (zmm15 * zmm31) + zmm7
|
||||||
|
2854 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm20, %zmm14, %zmm12
|
||||||
|
2855 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm14, %zmm14, %zmm15
|
||||||
|
2856 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm12, %zmm15, %zmm12
|
||||||
|
2857 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm28, %zmm24, %zmm15
|
||||||
|
2858 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm14, %zmm1, %zmm14
|
||||||
|
2859 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm12, %zmm14, %zmm14
|
||||||
|
2860 | 0.50 | | | | | 0.500 | | | | || | | vaddpd %zmm2, %zmm12, %zmm12
|
||||||
|
2861 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm12, %zmm14, %zmm12
|
||||||
|
2862 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm29, %zmm22, %zmm14
|
||||||
|
2863 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm3, %zmm12, %zmm17 {%k2} # zmm17 = (zmm12 * zmm3) + zmm17
|
||||||
|
2864 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm30, %zmm27, %zmm3
|
||||||
|
2865 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm18, %zmm12, %zmm9 {%k2} # zmm9 = (zmm12 * zmm18) + zmm9
|
||||||
|
2866 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm3, %zmm3, %zmm18
|
||||||
|
2867 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm14, %zmm14, %zmm18 # zmm18 = (zmm14 * zmm14) + zmm18
|
||||||
|
2868 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm15, %zmm15, %zmm18 # zmm18 = (zmm15 * zmm15) + zmm18
|
||||||
|
2869 | 0.50 | | | | | 0.500 | | | | || | | vcmpltpd %zmm0, %zmm18, %k1
|
||||||
|
2870 | 2.50 | | | | | 0.500 | | | | || | | vrcp14pd %zmm18, %zmm18
|
||||||
|
2871 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm16, %zmm12, %zmm5 {%k2} # zmm5 = (zmm12 * zmm16) + zmm5
|
||||||
|
2872 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm20, %zmm18, %zmm12
|
||||||
|
2873 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm18, %zmm18, %zmm16
|
||||||
|
2874 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm12, %zmm16, %zmm12
|
||||||
|
2875 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm18, %zmm1, %zmm16
|
||||||
|
2876 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm12, %zmm16, %zmm16
|
||||||
|
2877 | 0.50 | | | | | 0.500 | | | | || | | vaddpd %zmm2, %zmm12, %zmm12
|
||||||
|
2878 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm12, %zmm16, %zmm12
|
||||||
|
2879 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm15, %zmm12, %zmm21 {%k1} # zmm21 = (zmm12 * zmm15) + zmm21
|
||||||
|
2880 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 576(%rsp), %zmm15 # 64-byte Reload
|
||||||
|
2881 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm28, %zmm15, %zmm15
|
||||||
|
2882 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 128(%rsp), %zmm16 # 64-byte Reload
|
||||||
|
2883 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm29, %zmm16, %zmm16
|
||||||
|
2884 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 384(%rsp), %zmm18 # 64-byte Reload
|
||||||
|
2885 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm30, %zmm18, %zmm18
|
||||||
|
2886 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm14, %zmm12, %zmm10 {%k1} # zmm10 = (zmm12 * zmm14) + zmm10
|
||||||
|
2887 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm18, %zmm18, %zmm14
|
||||||
|
2888 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm16, %zmm16, %zmm14 # zmm14 = (zmm16 * zmm16) + zmm14
|
||||||
|
2889 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm15, %zmm15, %zmm14 # zmm14 = (zmm15 * zmm15) + zmm14
|
||||||
|
2890 | 0.50 | | | | | 0.500 | | | | || | | vcmpltpd %zmm0, %zmm14, %k2
|
||||||
|
2891 | 2.50 | | | | | 0.500 | | | | || | | vrcp14pd %zmm14, %zmm14
|
||||||
|
2892 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm3, %zmm12, %zmm6 {%k1} # zmm6 = (zmm12 * zmm3) + zmm6
|
||||||
|
2893 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm20, %zmm14, %zmm3
|
||||||
|
2894 | 0.25 | | | | | 0.750 | | | | || | | vmulpd %zmm14, %zmm14, %zmm12
|
||||||
|
2895 | 0.00 | | | | | 1.000 | | | | || | | vmulpd %zmm3, %zmm12, %zmm3
|
||||||
|
2896 | 0.00 | | | | | 1.000 | | | | || | | vmulpd %zmm14, %zmm1, %zmm12
|
||||||
|
2897 | 0.00 | | | | | 1.000 | | | | || | | vmulpd %zmm3, %zmm12, %zmm12
|
||||||
|
2898 | 0.00 | | | | | 1.000 | | | | || | | vaddpd %zmm2, %zmm3, %zmm3
|
||||||
|
2899 | 0.00 | | | | | 1.000 | | | | || | | vmulpd %zmm3, %zmm12, %zmm3
|
||||||
|
2900 | 0.00 | | | | | 1.000 | | | | || | | vfmadd231pd %zmm15, %zmm3, %zmm13 {%k2} # zmm13 = (zmm3 * zmm15) + zmm13
|
||||||
|
2901 | 0.00 | | | | | 1.000 | | | | || | | vfmadd231pd %zmm16, %zmm3, %zmm8 {%k2} # zmm8 = (zmm3 * zmm16) + zmm8
|
||||||
|
2902 | 0.24 | | | | | 0.760 | | | | || | 4.0 | vfmadd231pd %zmm18, %zmm3, %zmm4 {%k2} # zmm4 = (zmm3 * zmm18) + zmm4
|
||||||
|
2903 | 0.00 | 0.50 | | | | -0.01 | 0.50 | | | || | | incq %rcx
|
||||||
|
2904 | 0.00 | 0.75 | | | | -0.01 | 0.25 | | | || | | cmpq %rcx, %r11
|
||||||
|
2905 | | | | | | | | | | || | | * jne .LBB5_11
|
||||||
|
2906 | | | | | | | | | | || | | * jmp .LBB5_12
|
||||||
|
2907 | | | | | | | | | | || | | # LLVM-MCA-END
|
||||||
|
|
||||||
|
40.0 2.00 5.00 5.00 5.00 5.00 39.99 2.00 59 4.0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loop-Carried Dependencies Analysis Report
|
||||||
|
-----------------------------------------
|
||||||
|
2902 | 4.0 | vfmadd231pd %zmm18, %zmm3, %zmm4 {%k2} # zmm4 = (zmm3 * zmm18) + zmm4| [2902]
|
||||||
|
2901 | 4.0 | vfmadd231pd %zmm16, %zmm3, %zmm8 {%k2} # zmm8 = (zmm3 * zmm16) + zmm8| [2901]
|
||||||
|
2900 | 4.0 | vfmadd231pd %zmm15, %zmm3, %zmm13 {%k2} # zmm13 = (zmm3 * zmm15) + zmm13| [2900]
|
||||||
|
2892 | 4.0 | vfmadd231pd %zmm3, %zmm12, %zmm6 {%k1} # zmm6 = (zmm12 * zmm3) + zmm6| [2892]
|
||||||
|
2886 | 4.0 | vfmadd231pd %zmm14, %zmm12, %zmm10 {%k1} # zmm10 = (zmm12 * zmm14) + zmm10| [2886]
|
||||||
|
2879 | 4.0 | vfmadd231pd %zmm15, %zmm12, %zmm21 {%k1} # zmm21 = (zmm12 * zmm15) + zmm21| [2879]
|
||||||
|
2871 | 4.0 | vfmadd231pd %zmm16, %zmm12, %zmm5 {%k2} # zmm5 = (zmm12 * zmm16) + zmm5| [2871]
|
||||||
|
2865 | 4.0 | vfmadd231pd %zmm18, %zmm12, %zmm9 {%k2} # zmm9 = (zmm12 * zmm18) + zmm9| [2865]
|
||||||
|
2863 | 4.0 | vfmadd231pd %zmm3, %zmm12, %zmm17 {%k2} # zmm17 = (zmm12 * zmm3) + zmm17| [2863]
|
||||||
|
2853 | 4.0 | vfmadd231pd %zmm31, %zmm15, %zmm7 {%k1} # zmm7 = (zmm15 * zmm31) + zmm7| [2853]
|
||||||
|
2852 | 4.0 | vfmadd231pd %zmm12, %zmm15, %zmm11 {%k1} # zmm11 = (zmm15 * zmm12) + zmm11| [2852]
|
||||||
|
2846 | 4.0 | vfmadd231pd %zmm14, %zmm15, %zmm19 {%k1} # zmm19 = (zmm15 * zmm14) + zmm19| [2846]
|
||||||
|
2903 | 1.0 | incq %rcx | [2903]
|
||||||
|
|
@ -0,0 +1,79 @@
|
|||||||
|
Intel(R) Architecture Code Analyzer Version - v3.0-28-g1ba2cbb build date: 2017-10-23;16:42:45
|
||||||
|
Analyzed File - gromacs-avx512-sp.o
|
||||||
|
Binary Format - 64Bit
|
||||||
|
Architecture - SKX
|
||||||
|
Analysis Type - Throughput
|
||||||
|
|
||||||
|
Throughput Analysis Report
|
||||||
|
--------------------------
|
||||||
|
Block Throughput: 25.21 Cycles Throughput Bottleneck: Backend
|
||||||
|
Loop Count: 22
|
||||||
|
Port Binding In Cycles Per Iteration:
|
||||||
|
--------------------------------------------------------------------------------------------------
|
||||||
|
| Port | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
||||||
|
--------------------------------------------------------------------------------------------------
|
||||||
|
| Cycles | 21.0 0.0 | 2.0 | 2.0 2.0 | 2.0 2.0 | 0.0 | 21.0 | 2.0 | 0.0 |
|
||||||
|
--------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
DV - Divider pipe (on port 0)
|
||||||
|
D - Data fetch pipe (on ports 2 and 3)
|
||||||
|
F - Macro Fusion with the previous instruction occurred
|
||||||
|
* - instruction micro-ops not bound to a port
|
||||||
|
^ - Micro Fusion occurred
|
||||||
|
# - ESP Tracking sync uop was issued
|
||||||
|
@ - SSE instruction followed an AVX256/AVX512 instruction, dozens of cycles penalty is expected
|
||||||
|
X - instruction not supported, was not accounted in Analysis
|
||||||
|
|
||||||
|
| Num Of | Ports pressure in cycles | |
|
||||||
|
| Uops | 0 - DV | 1 | 2 - D | 3 - D | 4 | 5 | 6 | 7 |
|
||||||
|
-----------------------------------------------------------------------------------------
|
||||||
|
| 1 | | | 1.0 1.0 | | | | | | movsxd rdi, dword ptr [rsi+rdx*4]
|
||||||
|
| 1 | | 1.0 | | | | | | | lea rdi, ptr [rdi+rdi*2]
|
||||||
|
| 1 | | | | | | | 1.0 | | shl rdi, 0x5
|
||||||
|
| 1 | | | | 1.0 1.0 | | | | | vmovupd zmm16, zmmword ptr [rcx+rdi*1]
|
||||||
|
| 2 | | | 1.0 1.0 | | | 1.0 | | | vinsertf64x4 zmm17, zmm16, ymmword ptr [rcx+rdi*1], 0x1
|
||||||
|
| 1 | | | | 1.0 1.0 | | | | | vbroadcastf64x4 zmm18, ymmword ptr [rcx+rdi*1+0x40]
|
||||||
|
| 1 | | | | | | 1.0 | | | vshuff64x2 zmm16, zmm16, zmm16, 0xee
|
||||||
|
| 1 | 1.0 | | | | | | | | vsubps zmm19, zmm6, zmm17
|
||||||
|
| 1 | 1.0 | | | | | | | | vsubps zmm20, zmm10, zmm16
|
||||||
|
| 1 | | | | | | 1.0 | | | vsubps zmm21, zmm12, zmm18
|
||||||
|
| 1 | 1.0 | | | | | | | | vsubps zmm17, zmm9, zmm17
|
||||||
|
| 1 | | | | | | 1.0 | | | vsubps zmm18, zmm14, zmm18
|
||||||
|
| 1 | 1.0 | | | | | | | | vsubps zmm16, zmm11, zmm16
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulps zmm22, zmm21, zmm21
|
||||||
|
| 1 | 1.0 | | | | | | | | vfmadd231ps zmm22, zmm20, zmm20
|
||||||
|
| 1 | | | | | | 1.0 | | | vfmadd231ps zmm22, zmm19, zmm19
|
||||||
|
| 1 | 1.0 | | | | | | | | vmulps zmm23, zmm18, zmm18
|
||||||
|
| 1 | | | | | | 1.0 | | | vfmadd231ps zmm23, zmm16, zmm16
|
||||||
|
| 1 | 1.0 | | | | | | | | vfmadd231ps zmm23, zmm17, zmm17
|
||||||
|
| 3 | 2.0 | | | | | 1.0 | | | vrcp14ps zmm24, zmm22
|
||||||
|
| 3 | 2.0 | | | | | 1.0 | | | vrcp14ps zmm25, zmm23
|
||||||
|
| 1 | | | | | | 1.0 | | | vcmpps k2, zmm22, zmm0, 0x1
|
||||||
|
| 1 | | | | | | 1.0 | | | vcmpps k1, zmm23, zmm0, 0x1
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulps zmm22, zmm24, zmm29
|
||||||
|
| 1 | 1.0 | | | | | | | | vmulps zmm23, zmm24, zmm24
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulps zmm26, zmm25, zmm29
|
||||||
|
| 1 | 1.0 | | | | | | | | vmulps zmm22, zmm23, zmm22
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulps zmm23, zmm25, zmm25
|
||||||
|
| 1 | 1.0 | | | | | | | | vmulps zmm23, zmm23, zmm26
|
||||||
|
| 1 | | | | | | 1.0 | | | vaddps zmm26, zmm22, zmm2
|
||||||
|
| 1 | 1.0 | | | | | | | | vmulps zmm24, zmm1, zmm24
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulps zmm22, zmm24, zmm22
|
||||||
|
| 1 | 1.0 | | | | | | | | vmulps zmm22, zmm22, zmm26
|
||||||
|
| 1 | | | | | | 1.0 | | | vaddps zmm24, zmm23, zmm2
|
||||||
|
| 1 | 1.0 | | | | | | | | vmulps zmm25, zmm1, zmm25
|
||||||
|
| 1 | | | | | | 1.0 | | | vmulps zmm23, zmm25, zmm23
|
||||||
|
| 1 | 1.0 | | | | | | | | vmulps zmm23, zmm23, zmm24
|
||||||
|
| 1 | | | | | | 1.0 | | | vfmadd231ps zmm13{k2}, zmm22, zmm19
|
||||||
|
| 1 | 1.0 | | | | | | | | vfmadd231ps zmm8{k2}, zmm22, zmm20
|
||||||
|
| 1 | | | | | | 1.0 | | | vfmadd231ps zmm5{k2}, zmm22, zmm21
|
||||||
|
| 1 | 1.0 | | | | | | | | vfmadd231ps zmm15{k1}, zmm23, zmm17
|
||||||
|
| 1 | | | | | | 1.0 | | | vfmadd231ps zmm7{k1}, zmm23, zmm16
|
||||||
|
| 1 | 1.0 | | | | | | | | vfmadd231ps zmm4{k1}, zmm23, zmm18
|
||||||
|
| 1 | | 1.0 | | | | | | | inc rdx
|
||||||
|
| 1* | | | | | | | | | cmp r12, rdx
|
||||||
|
| 0*F | | | | | | | | | jnz 0xfffffffffffffef6
|
||||||
|
| 1 | | | | | | | 1.0 | | jmp 0xfffffffffffffb58
|
||||||
|
Total Num Of Uops: 51
|
||||||
|
Analysis Notes:
|
||||||
|
Backend allocation was stalled due to unavailable allocation resources.
|
@ -0,0 +1,84 @@
|
|||||||
|
Open Source Architecture Code Analyzer (OSACA) - 0.4.12
|
||||||
|
Analyzed file: gromacs-avx512-sp.s
|
||||||
|
Architecture: CSX
|
||||||
|
Timestamp: 2023-04-05 00:42:20
|
||||||
|
|
||||||
|
|
||||||
|
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
||||||
|
* - Instruction micro-ops not bound to a port
|
||||||
|
X - No throughput/latency information for this instruction in data file
|
||||||
|
|
||||||
|
|
||||||
|
Combined Analysis Report
|
||||||
|
------------------------
|
||||||
|
Port pressure in cycles
|
||||||
|
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |
|
||||||
|
--------------------------------------------------------------------------------------------------
|
||||||
|
1615 | | | | | | | | || | | # LLVM-MCA-BEGIN
|
||||||
|
1616 | | | | | | | | || | | .LBB2_11: #
|
||||||
|
1617 | | | | | | | | || | | # Parent Loop BB2_7 Depth=1
|
||||||
|
1618 | | | | | | | | || | | # => This Inner Loop Header: Depth=2
|
||||||
|
1619 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | movslq (%rsi,%rdx,4), %rdi
|
||||||
|
1620 | | 1.00 | | | | 0.00 | | || 1.0 | | leaq (%rdi,%rdi,2), %rdi
|
||||||
|
1621 | 0.00 | | | | | | 1.00 | || 1.0 | | shlq $5, %rdi
|
||||||
|
1622 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | vmovupd (%rcx,%rdi), %zmm16
|
||||||
|
1623 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || | | vinsertf64x4 $1, (%rcx,%rdi), %zmm16, %zmm17
|
||||||
|
1624 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vbroadcastf64x4 64(%rcx,%rdi), %zmm18 # zmm18 = mem[0,1,2,3,0,1,2,3]
|
||||||
|
1625 | | | | | | 1.00 | | || 3.0 | | vshuff64x2 $238, %zmm16, %zmm16, %zmm16 # zmm16 = zmm16[4,5,6,7,4,5,6,7]
|
||||||
|
1626 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm17, %zmm6, %zmm19
|
||||||
|
1627 | 0.50 | | | | | 0.50 | | || 4.0 | | vsubps %zmm16, %zmm10, %zmm20
|
||||||
|
1628 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm18, %zmm12, %zmm21
|
||||||
|
1629 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm17, %zmm9, %zmm17
|
||||||
|
1630 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm18, %zmm14, %zmm18
|
||||||
|
1631 | 0.50 | | | | | 0.50 | | || | | vsubps %zmm16, %zmm11, %zmm16
|
||||||
|
1632 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm21, %zmm21, %zmm22
|
||||||
|
1633 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231ps %zmm20, %zmm20, %zmm22 # zmm22 = (zmm20 * zmm20) + zmm22
|
||||||
|
1634 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231ps %zmm19, %zmm19, %zmm22 # zmm22 = (zmm19 * zmm19) + zmm22
|
||||||
|
1635 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm18, %zmm18, %zmm23
|
||||||
|
1636 | 0.50 | | | | | 0.50 | | || | | vfmadd231ps %zmm16, %zmm16, %zmm23 # zmm23 = (zmm16 * zmm16) + zmm23
|
||||||
|
1637 | 0.50 | | | | | 0.50 | | || | | vfmadd231ps %zmm17, %zmm17, %zmm23 # zmm23 = (zmm17 * zmm17) + zmm23
|
||||||
|
1638 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14ps %zmm22, %zmm24
|
||||||
|
1639 | 2.50 | | | | | 0.50 | | || | | vrcp14ps %zmm23, %zmm25
|
||||||
|
1640 | | | | | | | | || | | X vcmpltps %zmm0, %zmm22, %k2
|
||||||
|
1641 | | | | | | | | || | | X vcmpltps %zmm0, %zmm23, %k1
|
||||||
|
1642 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulps %zmm29, %zmm24, %zmm22
|
||||||
|
1643 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm24, %zmm24, %zmm23
|
||||||
|
1644 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm29, %zmm25, %zmm26
|
||||||
|
1645 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulps %zmm22, %zmm23, %zmm22
|
||||||
|
1646 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm25, %zmm25, %zmm23
|
||||||
|
1647 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm26, %zmm23, %zmm23
|
||||||
|
1648 | 0.50 | | | | | 0.50 | | || 4.0 | | vaddps %zmm2, %zmm22, %zmm26
|
||||||
|
1649 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm24, %zmm1, %zmm24
|
||||||
|
1650 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm22, %zmm24, %zmm22
|
||||||
|
1651 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulps %zmm26, %zmm22, %zmm22
|
||||||
|
1652 | 0.50 | | | | | 0.50 | | || | | vaddps %zmm2, %zmm23, %zmm24
|
||||||
|
1653 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm25, %zmm1, %zmm25
|
||||||
|
1654 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm23, %zmm25, %zmm23
|
||||||
|
1655 | 0.50 | | | | | 0.50 | | || | | vmulps %zmm24, %zmm23, %zmm23
|
||||||
|
1656 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231ps %zmm19, %zmm22, %zmm13 {%k2} # zmm13 = (zmm22 * zmm19) + zmm13
|
||||||
|
1657 | 0.50 | | | | | 0.50 | | || | | vfmadd231ps %zmm20, %zmm22, %zmm8 {%k2} # zmm8 = (zmm22 * zmm20) + zmm8
|
||||||
|
1658 | 0.50 | | | | | 0.50 | | || | | vfmadd231ps %zmm21, %zmm22, %zmm5 {%k2} # zmm5 = (zmm22 * zmm21) + zmm5
|
||||||
|
1659 | 0.00 | | | | | 1.00 | | || | | vfmadd231ps %zmm17, %zmm23, %zmm15 {%k1} # zmm15 = (zmm23 * zmm17) + zmm15
|
||||||
|
1660 | 0.00 | | | | | 1.00 | | || | | vfmadd231ps %zmm16, %zmm23, %zmm7 {%k1} # zmm7 = (zmm23 * zmm16) + zmm7
|
||||||
|
1661 | 0.00 | | | | | 1.00 | | || | 4.0 | vfmadd231ps %zmm18, %zmm23, %zmm4 {%k1} # zmm4 = (zmm23 * zmm18) + zmm4
|
||||||
|
1662 | 0.00 | 0.50 | | | | 0.00 | 0.50 | || | | incq %rdx
|
||||||
|
1663 | 0.00 | 1.00 | | | | 0.00 | 0.00 | || | | cmpq %rdx, %r12
|
||||||
|
1664 | | | | | | | | || | | * jne .LBB2_11
|
||||||
|
1665 | 0.00 | | | | | | 1.00 | || | | jmp .LBB2_12
|
||||||
|
1666 | | | | | | | | || | | # LLVM-MCA-END
|
||||||
|
|
||||||
|
20.0 2.50 2.00 2.00 2.00 2.00 20.0 2.50 53.0 4.0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loop-Carried Dependencies Analysis Report
|
||||||
|
-----------------------------------------
|
||||||
|
1661 | 4.0 | vfmadd231ps %zmm18, %zmm23, %zmm4 {%k1} # zmm4 = (zmm23 * zmm18) + zmm4| [1661]
|
||||||
|
1660 | 4.0 | vfmadd231ps %zmm16, %zmm23, %zmm7 {%k1} # zmm7 = (zmm23 * zmm16) + zmm7| [1660]
|
||||||
|
1659 | 4.0 | vfmadd231ps %zmm17, %zmm23, %zmm15 {%k1} # zmm15 = (zmm23 * zmm17) + zmm15| [1659]
|
||||||
|
1658 | 4.0 | vfmadd231ps %zmm21, %zmm22, %zmm5 {%k2} # zmm5 = (zmm22 * zmm21) + zmm5| [1658]
|
||||||
|
1657 | 4.0 | vfmadd231ps %zmm20, %zmm22, %zmm8 {%k2} # zmm8 = (zmm22 * zmm20) + zmm8| [1657]
|
||||||
|
1656 | 4.0 | vfmadd231ps %zmm19, %zmm22, %zmm13 {%k2} # zmm13 = (zmm22 * zmm19) + zmm13| [1656]
|
||||||
|
1662 | 1.0 | incq %rdx | [1662]
|
||||||
|
|
@ -0,0 +1,84 @@
|
|||||||
|
Open Source Architecture Code Analyzer (OSACA) - 0.4.12
|
||||||
|
Analyzed file: gromacs-avx512-sp.s
|
||||||
|
Architecture: ICX
|
||||||
|
Timestamp: 2023-04-05 00:42:45
|
||||||
|
|
||||||
|
|
||||||
|
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
||||||
|
* - Instruction micro-ops not bound to a port
|
||||||
|
X - No throughput/latency information for this instruction in data file
|
||||||
|
|
||||||
|
|
||||||
|
Combined Analysis Report
|
||||||
|
------------------------
|
||||||
|
Port pressure in cycles
|
||||||
|
| 0 - 0DV | 1 - 1DV | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 | 8 | 9 || CP | LCD |
|
||||||
|
------------------------------------------------------------------------------------------------------------------------
|
||||||
|
1615 | | | | | | | | | | || | | # LLVM-MCA-BEGIN
|
||||||
|
1616 | | | | | | | | | | || | | .LBB2_11: #
|
||||||
|
1617 | | | | | | | | | | || | | # Parent Loop BB2_7 Depth=1
|
||||||
|
1618 | | | | | | | | | | || | | # => This Inner Loop Header: Depth=2
|
||||||
|
1619 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || 5.0 | | movslq (%rsi,%rdx,4), %rdi
|
||||||
|
1620 | 0.00 | 0.75 | | | | 0.000 | 0.25 | | | || 6.0 | | leaq (%rdi,%rdi,2), %rdi
|
||||||
|
1621 | 0.00 | | | | | | 1.00 | | | || 1.0 | | shlq $5, %rdi
|
||||||
|
1622 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || 5.0 | | vmovupd (%rcx,%rdi), %zmm16
|
||||||
|
1623 | | | 0.50 0.50 | 0.50 0.50 | | 1.000 | | | | || | | vinsertf64x4 $1, (%rcx,%rdi), %zmm16, %zmm17
|
||||||
|
1624 | | | | | | | | | | || | | X vbroadcastf64x4 64(%rcx,%rdi), %zmm18 # zmm18 = mem[0,1,2,3,0,1,2,3]
|
||||||
|
1625 | | | | | | | | | | || 0.0 | | X vshuff64x2 $238, %zmm16, %zmm16, %zmm16 # zmm16 = zmm16[4,5,6,7,4,5,6,7]
|
||||||
|
1626 | 0.50 | | | | | 0.500 | | | | || | | vsubps %zmm17, %zmm6, %zmm19
|
||||||
|
1627 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vsubps %zmm16, %zmm10, %zmm20
|
||||||
|
1628 | 0.50 | | | | | 0.500 | | | | || | | vsubps %zmm18, %zmm12, %zmm21
|
||||||
|
1629 | 0.50 | | | | | 0.500 | | | | || | | vsubps %zmm17, %zmm9, %zmm17
|
||||||
|
1630 | 0.50 | | | | | 0.500 | | | | || | | vsubps %zmm18, %zmm14, %zmm18
|
||||||
|
1631 | 0.50 | | | | | 0.500 | | | | || | | vsubps %zmm16, %zmm11, %zmm16
|
||||||
|
1632 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm21, %zmm21, %zmm22
|
||||||
|
1633 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231ps %zmm20, %zmm20, %zmm22 # zmm22 = (zmm20 * zmm20) + zmm22
|
||||||
|
1634 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231ps %zmm19, %zmm19, %zmm22 # zmm22 = (zmm19 * zmm19) + zmm22
|
||||||
|
1635 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm18, %zmm18, %zmm23
|
||||||
|
1636 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231ps %zmm16, %zmm16, %zmm23 # zmm23 = (zmm16 * zmm16) + zmm23
|
||||||
|
1637 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231ps %zmm17, %zmm17, %zmm23 # zmm23 = (zmm17 * zmm17) + zmm23
|
||||||
|
1638 | 2.50 | | | | | 0.500 | | | | || 6.0 | | vrcp14ps %zmm22, %zmm24
|
||||||
|
1639 | 2.50 | | | | | 0.500 | | | | || | | vrcp14ps %zmm23, %zmm25
|
||||||
|
1640 | | | | | | | | | | || | | X vcmpltps %zmm0, %zmm22, %k2
|
||||||
|
1641 | | | | | | | | | | || | | X vcmpltps %zmm0, %zmm23, %k1
|
||||||
|
1642 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulps %zmm29, %zmm24, %zmm22
|
||||||
|
1643 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm24, %zmm24, %zmm23
|
||||||
|
1644 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm29, %zmm25, %zmm26
|
||||||
|
1645 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulps %zmm22, %zmm23, %zmm22
|
||||||
|
1646 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm25, %zmm25, %zmm23
|
||||||
|
1647 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm26, %zmm23, %zmm23
|
||||||
|
1648 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vaddps %zmm2, %zmm22, %zmm26
|
||||||
|
1649 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm24, %zmm1, %zmm24
|
||||||
|
1650 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm22, %zmm24, %zmm22
|
||||||
|
1651 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulps %zmm26, %zmm22, %zmm22
|
||||||
|
1652 | 0.50 | | | | | 0.500 | | | | || | | vaddps %zmm2, %zmm23, %zmm24
|
||||||
|
1653 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm25, %zmm1, %zmm25
|
||||||
|
1654 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm23, %zmm25, %zmm23
|
||||||
|
1655 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm24, %zmm23, %zmm23
|
||||||
|
1656 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231ps %zmm19, %zmm22, %zmm13 {%k2} # zmm13 = (zmm22 * zmm19) + zmm13
|
||||||
|
1657 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231ps %zmm20, %zmm22, %zmm8 {%k2} # zmm8 = (zmm22 * zmm20) + zmm8
|
||||||
|
1658 | 0.25 | | | | | 0.750 | | | | || | | vfmadd231ps %zmm21, %zmm22, %zmm5 {%k2} # zmm5 = (zmm22 * zmm21) + zmm5
|
||||||
|
1659 | 0.00 | | | | | 1.000 | | | | || | | vfmadd231ps %zmm17, %zmm23, %zmm15 {%k1} # zmm15 = (zmm23 * zmm17) + zmm15
|
||||||
|
1660 | 0.00 | | | | | 1.000 | | | | || | | vfmadd231ps %zmm16, %zmm23, %zmm7 {%k1} # zmm7 = (zmm23 * zmm16) + zmm7
|
||||||
|
1661 | 0.24 | | | | | 0.760 | | | | || | 4.0 | vfmadd231ps %zmm18, %zmm23, %zmm4 {%k1} # zmm4 = (zmm23 * zmm18) + zmm4
|
||||||
|
1662 | 0.00 | 0.50 | | | | -0.01 | 0.50 | | | || | | incq %rdx
|
||||||
|
1663 | 0.00 | 0.75 | | | | -0.01 | 0.25 | | | || | | cmpq %rdx, %r12
|
||||||
|
1664 | | | | | | | | | | || | | * jne .LBB2_11
|
||||||
|
1665 | | | | | | | | | | || | | * jmp .LBB2_12
|
||||||
|
1666 | | | | | | | | | | || | | # LLVM-MCA-END
|
||||||
|
|
||||||
|
19.5 2.00 1.50 1.50 1.50 1.50 19.49 2.00 55.0 4.0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loop-Carried Dependencies Analysis Report
|
||||||
|
-----------------------------------------
|
||||||
|
1661 | 4.0 | vfmadd231ps %zmm18, %zmm23, %zmm4 {%k1} # zmm4 = (zmm23 * zmm18) + zmm4| [1661]
|
||||||
|
1660 | 4.0 | vfmadd231ps %zmm16, %zmm23, %zmm7 {%k1} # zmm7 = (zmm23 * zmm16) + zmm7| [1660]
|
||||||
|
1659 | 4.0 | vfmadd231ps %zmm17, %zmm23, %zmm15 {%k1} # zmm15 = (zmm23 * zmm17) + zmm15| [1659]
|
||||||
|
1658 | 4.0 | vfmadd231ps %zmm21, %zmm22, %zmm5 {%k2} # zmm5 = (zmm22 * zmm21) + zmm5| [1658]
|
||||||
|
1657 | 4.0 | vfmadd231ps %zmm20, %zmm22, %zmm8 {%k2} # zmm8 = (zmm22 * zmm20) + zmm8| [1657]
|
||||||
|
1656 | 4.0 | vfmadd231ps %zmm19, %zmm22, %zmm13 {%k2} # zmm13 = (zmm22 * zmm19) + zmm13| [1656]
|
||||||
|
1662 | 1.0 | incq %rdx | [1662]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user