MD-Bench/static_analysis/rafael/analyses/gromacs-icx-avx512-dp-osaca-csx.out

133 lines
15 KiB
Plaintext
Raw Normal View History

Open Source Architecture Code Analyzer (OSACA) - 0.4.12
Analyzed file: gromacs-avx512-dp.s
Architecture: CSX
Timestamp: 2023-04-05 00:41:22
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
* - Instruction micro-ops not bound to a port
X - No throughput/latency information for this instruction in data file
Combined Analysis Report
------------------------
Port pressure in cycles
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |
--------------------------------------------------------------------------------------------------
2814 | | | | | | | | || | | # pointer_increment=64 da67166e5736661e6b03ea29ee7bfd67
2815 | | | | | | | | || | | # LLVM-MCA-BEGIN
2816 | | | | | | | | || | | .LBB5_11: #
2817 | | | | | | | | || | | # Parent Loop BB5_7 Depth=1
2818 | | | | | | | | || | | # => This Inner Loop Header: Depth=2
2819 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | movslq (%rdx,%rcx,4), %rax
2820 | | 1.00 | | | | 0.00 | | || 1.0 | | leaq (%rax,%rax,2), %rax
2821 | 0.00 | | | | | | 1.00 | || 1.0 | | shlq $6, %rax
2822 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovapd (%rdi,%rax), %zmm28
2823 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovapd 64(%rdi,%rax), %zmm29
2824 | | | 0.50 0.50 | 0.50 0.50 | | | | || 0.0 | | vmovapd 128(%rdi,%rax), %zmm30
2825 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 64(%rsp), %zmm3 # 64-byte Reload
2826 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm28, %zmm3, %zmm14
2827 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm29, %zmm26, %zmm12
2828 | 0.50 | | | | | 0.50 | | || 4.0 | | vsubpd %zmm30, %zmm25, %zmm31
2829 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm28, %zmm23, %zmm3
2830 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm31, %zmm31, %zmm15
2831 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm12, %zmm12, %zmm15 # zmm15 = (zmm12 * zmm12) + zmm15
2832 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm14, %zmm14, %zmm15 # zmm15 = (zmm14 * zmm14) + zmm15
2833 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14pd %zmm15, %zmm16
2834 | | | | | | | | || | | X vcmpltpd %zmm0, %zmm15, %k1
2835 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm20, %zmm16, %zmm15
2836 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm16, %zmm16, %zmm18
2837 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm15, %zmm18, %zmm15
2838 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 512(%rsp), %zmm18 # 64-byte Reload
2839 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm29, %zmm18, %zmm18
2840 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm16, %zmm1, %zmm16
2841 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm15, %zmm16, %zmm16
2842 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm2, %zmm15, %zmm15
2843 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm15, %zmm16, %zmm15
2844 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 448(%rsp), %zmm16 # 64-byte Reload
2845 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm30, %zmm16, %zmm16
2846 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm14, %zmm15, %zmm19 {%k1} # zmm19 = (zmm15 * zmm14) + zmm19
2847 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm16, %zmm16, %zmm14
2848 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm18, %zmm18, %zmm14 # zmm14 = (zmm18 * zmm18) + zmm14
2849 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm3, %zmm3, %zmm14 # zmm14 = (zmm3 * zmm3) + zmm14
2850 | | | | | | | | || | | X vcmpltpd %zmm0, %zmm14, %k2
2851 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm14, %zmm14
2852 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm12, %zmm15, %zmm11 {%k1} # zmm11 = (zmm15 * zmm12) + zmm11
2853 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm31, %zmm15, %zmm7 {%k1} # zmm7 = (zmm15 * zmm31) + zmm7
2854 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm20, %zmm14, %zmm12
2855 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm14, %zmm14, %zmm15
2856 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm15, %zmm12
2857 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm28, %zmm24, %zmm15
2858 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm14, %zmm1, %zmm14
2859 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm14, %zmm14
2860 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm2, %zmm12, %zmm12
2861 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm14, %zmm12
2862 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm29, %zmm22, %zmm14
2863 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm3, %zmm12, %zmm17 {%k2} # zmm17 = (zmm12 * zmm3) + zmm17
2864 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm30, %zmm27, %zmm3
2865 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm18, %zmm12, %zmm9 {%k2} # zmm9 = (zmm12 * zmm18) + zmm9
2866 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm3, %zmm3, %zmm18
2867 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm14, %zmm14, %zmm18 # zmm18 = (zmm14 * zmm14) + zmm18
2868 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm15, %zmm15, %zmm18 # zmm18 = (zmm15 * zmm15) + zmm18
2869 | | | | | | | | || | | X vcmpltpd %zmm0, %zmm18, %k1
2870 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm18, %zmm18
2871 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm16, %zmm12, %zmm5 {%k2} # zmm5 = (zmm12 * zmm16) + zmm5
2872 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm20, %zmm18, %zmm12
2873 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm18, %zmm18, %zmm16
2874 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm16, %zmm12
2875 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm18, %zmm1, %zmm16
2876 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm16, %zmm16
2877 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm2, %zmm12, %zmm12
2878 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm16, %zmm12
2879 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm15, %zmm12, %zmm21 {%k1} # zmm21 = (zmm12 * zmm15) + zmm21
2880 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 576(%rsp), %zmm15 # 64-byte Reload
2881 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm28, %zmm15, %zmm15
2882 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 128(%rsp), %zmm16 # 64-byte Reload
2883 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm29, %zmm16, %zmm16
2884 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 384(%rsp), %zmm18 # 64-byte Reload
2885 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm30, %zmm18, %zmm18
2886 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm14, %zmm12, %zmm10 {%k1} # zmm10 = (zmm12 * zmm14) + zmm10
2887 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm18, %zmm18, %zmm14
2888 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm16, %zmm16, %zmm14 # zmm14 = (zmm16 * zmm16) + zmm14
2889 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm15, %zmm15, %zmm14 # zmm14 = (zmm15 * zmm15) + zmm14
2890 | | | | | | | | || | | X vcmpltpd %zmm0, %zmm14, %k2
2891 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm14, %zmm14
2892 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm3, %zmm12, %zmm6 {%k1} # zmm6 = (zmm12 * zmm3) + zmm6
2893 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm20, %zmm14, %zmm3
2894 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm14, %zmm14, %zmm12
2895 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm3, %zmm12, %zmm3
2896 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm14, %zmm1, %zmm12
2897 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm3, %zmm12, %zmm12
2898 | 0.00 | | | | | 1.00 | | || | | vaddpd %zmm2, %zmm3, %zmm3
2899 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm3, %zmm12, %zmm3
2900 | 0.00 | | | | | 1.00 | | || | | vfmadd231pd %zmm15, %zmm3, %zmm13 {%k2} # zmm13 = (zmm3 * zmm15) + zmm13
2901 | 0.00 | | | | | 1.00 | | || | | vfmadd231pd %zmm16, %zmm3, %zmm8 {%k2} # zmm8 = (zmm3 * zmm16) + zmm8
2902 | 0.00 | | | | | 1.00 | | || | 4.0 | vfmadd231pd %zmm18, %zmm3, %zmm4 {%k2} # zmm4 = (zmm3 * zmm18) + zmm4
2903 | 0.00 | 0.50 | | | | 0.00 | 0.50 | || | | incq %rcx
2904 | 0.00 | 1.00 | | | | 0.00 | 0.00 | || | | cmpq %rcx, %r11
2905 | | | | | | | | || | | * jne .LBB5_11
2906 | 0.00 | | | | | | 1.00 | || | | jmp .LBB5_12
2907 | | | | | | | | || | | # LLVM-MCA-END
38.0 2.50 5.00 5.00 5.00 5.00 38.0 2.50 50.0 4.0
Loop-Carried Dependencies Analysis Report
-----------------------------------------
2902 | 4.0 | vfmadd231pd %zmm18, %zmm3, %zmm4 {%k2} # zmm4 = (zmm3 * zmm18) + zmm4| [2902]
2901 | 4.0 | vfmadd231pd %zmm16, %zmm3, %zmm8 {%k2} # zmm8 = (zmm3 * zmm16) + zmm8| [2901]
2900 | 4.0 | vfmadd231pd %zmm15, %zmm3, %zmm13 {%k2} # zmm13 = (zmm3 * zmm15) + zmm13| [2900]
2892 | 4.0 | vfmadd231pd %zmm3, %zmm12, %zmm6 {%k1} # zmm6 = (zmm12 * zmm3) + zmm6| [2892]
2886 | 4.0 | vfmadd231pd %zmm14, %zmm12, %zmm10 {%k1} # zmm10 = (zmm12 * zmm14) + zmm10| [2886]
2879 | 4.0 | vfmadd231pd %zmm15, %zmm12, %zmm21 {%k1} # zmm21 = (zmm12 * zmm15) + zmm21| [2879]
2871 | 4.0 | vfmadd231pd %zmm16, %zmm12, %zmm5 {%k2} # zmm5 = (zmm12 * zmm16) + zmm5| [2871]
2865 | 4.0 | vfmadd231pd %zmm18, %zmm12, %zmm9 {%k2} # zmm9 = (zmm12 * zmm18) + zmm9| [2865]
2863 | 4.0 | vfmadd231pd %zmm3, %zmm12, %zmm17 {%k2} # zmm17 = (zmm12 * zmm3) + zmm17| [2863]
2853 | 4.0 | vfmadd231pd %zmm31, %zmm15, %zmm7 {%k1} # zmm7 = (zmm15 * zmm31) + zmm7| [2853]
2852 | 4.0 | vfmadd231pd %zmm12, %zmm15, %zmm11 {%k1} # zmm11 = (zmm15 * zmm12) + zmm11| [2852]
2846 | 4.0 | vfmadd231pd %zmm14, %zmm15, %zmm19 {%k1} # zmm19 = (zmm15 * zmm14) + zmm19| [2846]
2903 | 1.0 | incq %rcx | [2903]