133 lines
15 KiB
Plaintext
133 lines
15 KiB
Plaintext
|
Open Source Architecture Code Analyzer (OSACA) - 0.4.12
|
||
|
Analyzed file: gromacs-avx512-dp.s
|
||
|
Architecture: CSX
|
||
|
Timestamp: 2023-04-05 00:41:22
|
||
|
|
||
|
|
||
|
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
||
|
* - Instruction micro-ops not bound to a port
|
||
|
X - No throughput/latency information for this instruction in data file
|
||
|
|
||
|
|
||
|
Combined Analysis Report
|
||
|
------------------------
|
||
|
Port pressure in cycles
|
||
|
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |
|
||
|
--------------------------------------------------------------------------------------------------
|
||
|
2814 | | | | | | | | || | | # pointer_increment=64 da67166e5736661e6b03ea29ee7bfd67
|
||
|
2815 | | | | | | | | || | | # LLVM-MCA-BEGIN
|
||
|
2816 | | | | | | | | || | | .LBB5_11: #
|
||
|
2817 | | | | | | | | || | | # Parent Loop BB5_7 Depth=1
|
||
|
2818 | | | | | | | | || | | # => This Inner Loop Header: Depth=2
|
||
|
2819 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | movslq (%rdx,%rcx,4), %rax
|
||
|
2820 | | 1.00 | | | | 0.00 | | || 1.0 | | leaq (%rax,%rax,2), %rax
|
||
|
2821 | 0.00 | | | | | | 1.00 | || 1.0 | | shlq $6, %rax
|
||
|
2822 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovapd (%rdi,%rax), %zmm28
|
||
|
2823 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovapd 64(%rdi,%rax), %zmm29
|
||
|
2824 | | | 0.50 0.50 | 0.50 0.50 | | | | || 0.0 | | vmovapd 128(%rdi,%rax), %zmm30
|
||
|
2825 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 64(%rsp), %zmm3 # 64-byte Reload
|
||
|
2826 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm28, %zmm3, %zmm14
|
||
|
2827 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm29, %zmm26, %zmm12
|
||
|
2828 | 0.50 | | | | | 0.50 | | || 4.0 | | vsubpd %zmm30, %zmm25, %zmm31
|
||
|
2829 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm28, %zmm23, %zmm3
|
||
|
2830 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm31, %zmm31, %zmm15
|
||
|
2831 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm12, %zmm12, %zmm15 # zmm15 = (zmm12 * zmm12) + zmm15
|
||
|
2832 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm14, %zmm14, %zmm15 # zmm15 = (zmm14 * zmm14) + zmm15
|
||
|
2833 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14pd %zmm15, %zmm16
|
||
|
2834 | | | | | | | | || | | X vcmpltpd %zmm0, %zmm15, %k1
|
||
|
2835 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm20, %zmm16, %zmm15
|
||
|
2836 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm16, %zmm16, %zmm18
|
||
|
2837 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm15, %zmm18, %zmm15
|
||
|
2838 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 512(%rsp), %zmm18 # 64-byte Reload
|
||
|
2839 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm29, %zmm18, %zmm18
|
||
|
2840 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm16, %zmm1, %zmm16
|
||
|
2841 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm15, %zmm16, %zmm16
|
||
|
2842 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm2, %zmm15, %zmm15
|
||
|
2843 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm15, %zmm16, %zmm15
|
||
|
2844 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 448(%rsp), %zmm16 # 64-byte Reload
|
||
|
2845 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm30, %zmm16, %zmm16
|
||
|
2846 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm14, %zmm15, %zmm19 {%k1} # zmm19 = (zmm15 * zmm14) + zmm19
|
||
|
2847 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm16, %zmm16, %zmm14
|
||
|
2848 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm18, %zmm18, %zmm14 # zmm14 = (zmm18 * zmm18) + zmm14
|
||
|
2849 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm3, %zmm3, %zmm14 # zmm14 = (zmm3 * zmm3) + zmm14
|
||
|
2850 | | | | | | | | || | | X vcmpltpd %zmm0, %zmm14, %k2
|
||
|
2851 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm14, %zmm14
|
||
|
2852 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm12, %zmm15, %zmm11 {%k1} # zmm11 = (zmm15 * zmm12) + zmm11
|
||
|
2853 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm31, %zmm15, %zmm7 {%k1} # zmm7 = (zmm15 * zmm31) + zmm7
|
||
|
2854 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm20, %zmm14, %zmm12
|
||
|
2855 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm14, %zmm14, %zmm15
|
||
|
2856 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm15, %zmm12
|
||
|
2857 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm28, %zmm24, %zmm15
|
||
|
2858 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm14, %zmm1, %zmm14
|
||
|
2859 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm14, %zmm14
|
||
|
2860 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm2, %zmm12, %zmm12
|
||
|
2861 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm14, %zmm12
|
||
|
2862 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm29, %zmm22, %zmm14
|
||
|
2863 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm3, %zmm12, %zmm17 {%k2} # zmm17 = (zmm12 * zmm3) + zmm17
|
||
|
2864 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm30, %zmm27, %zmm3
|
||
|
2865 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm18, %zmm12, %zmm9 {%k2} # zmm9 = (zmm12 * zmm18) + zmm9
|
||
|
2866 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm3, %zmm3, %zmm18
|
||
|
2867 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm14, %zmm14, %zmm18 # zmm18 = (zmm14 * zmm14) + zmm18
|
||
|
2868 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm15, %zmm15, %zmm18 # zmm18 = (zmm15 * zmm15) + zmm18
|
||
|
2869 | | | | | | | | || | | X vcmpltpd %zmm0, %zmm18, %k1
|
||
|
2870 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm18, %zmm18
|
||
|
2871 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm16, %zmm12, %zmm5 {%k2} # zmm5 = (zmm12 * zmm16) + zmm5
|
||
|
2872 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm20, %zmm18, %zmm12
|
||
|
2873 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm18, %zmm18, %zmm16
|
||
|
2874 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm16, %zmm12
|
||
|
2875 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm18, %zmm1, %zmm16
|
||
|
2876 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm16, %zmm16
|
||
|
2877 | 0.50 | | | | | 0.50 | | || | | vaddpd %zmm2, %zmm12, %zmm12
|
||
|
2878 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm16, %zmm12
|
||
|
2879 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm15, %zmm12, %zmm21 {%k1} # zmm21 = (zmm12 * zmm15) + zmm21
|
||
|
2880 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 576(%rsp), %zmm15 # 64-byte Reload
|
||
|
2881 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm28, %zmm15, %zmm15
|
||
|
2882 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 128(%rsp), %zmm16 # 64-byte Reload
|
||
|
2883 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm29, %zmm16, %zmm16
|
||
|
2884 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovupd 384(%rsp), %zmm18 # 64-byte Reload
|
||
|
2885 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm30, %zmm18, %zmm18
|
||
|
2886 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm14, %zmm12, %zmm10 {%k1} # zmm10 = (zmm12 * zmm14) + zmm10
|
||
|
2887 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm18, %zmm18, %zmm14
|
||
|
2888 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm16, %zmm16, %zmm14 # zmm14 = (zmm16 * zmm16) + zmm14
|
||
|
2889 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm15, %zmm15, %zmm14 # zmm14 = (zmm15 * zmm15) + zmm14
|
||
|
2890 | | | | | | | | || | | X vcmpltpd %zmm0, %zmm14, %k2
|
||
|
2891 | 2.50 | | | | | 0.50 | | || | | vrcp14pd %zmm14, %zmm14
|
||
|
2892 | 0.50 | | | | | 0.50 | | || | | vfmadd231pd %zmm3, %zmm12, %zmm6 {%k1} # zmm6 = (zmm12 * zmm3) + zmm6
|
||
|
2893 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm20, %zmm14, %zmm3
|
||
|
2894 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm14, %zmm14, %zmm12
|
||
|
2895 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm3, %zmm12, %zmm3
|
||
|
2896 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm14, %zmm1, %zmm12
|
||
|
2897 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm3, %zmm12, %zmm12
|
||
|
2898 | 0.00 | | | | | 1.00 | | || | | vaddpd %zmm2, %zmm3, %zmm3
|
||
|
2899 | 0.00 | | | | | 1.00 | | || | | vmulpd %zmm3, %zmm12, %zmm3
|
||
|
2900 | 0.00 | | | | | 1.00 | | || | | vfmadd231pd %zmm15, %zmm3, %zmm13 {%k2} # zmm13 = (zmm3 * zmm15) + zmm13
|
||
|
2901 | 0.00 | | | | | 1.00 | | || | | vfmadd231pd %zmm16, %zmm3, %zmm8 {%k2} # zmm8 = (zmm3 * zmm16) + zmm8
|
||
|
2902 | 0.00 | | | | | 1.00 | | || | 4.0 | vfmadd231pd %zmm18, %zmm3, %zmm4 {%k2} # zmm4 = (zmm3 * zmm18) + zmm4
|
||
|
2903 | 0.00 | 0.50 | | | | 0.00 | 0.50 | || | | incq %rcx
|
||
|
2904 | 0.00 | 1.00 | | | | 0.00 | 0.00 | || | | cmpq %rcx, %r11
|
||
|
2905 | | | | | | | | || | | * jne .LBB5_11
|
||
|
2906 | 0.00 | | | | | | 1.00 | || | | jmp .LBB5_12
|
||
|
2907 | | | | | | | | || | | # LLVM-MCA-END
|
||
|
|
||
|
38.0 2.50 5.00 5.00 5.00 5.00 38.0 2.50 50.0 4.0
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
Loop-Carried Dependencies Analysis Report
|
||
|
-----------------------------------------
|
||
|
2902 | 4.0 | vfmadd231pd %zmm18, %zmm3, %zmm4 {%k2} # zmm4 = (zmm3 * zmm18) + zmm4| [2902]
|
||
|
2901 | 4.0 | vfmadd231pd %zmm16, %zmm3, %zmm8 {%k2} # zmm8 = (zmm3 * zmm16) + zmm8| [2901]
|
||
|
2900 | 4.0 | vfmadd231pd %zmm15, %zmm3, %zmm13 {%k2} # zmm13 = (zmm3 * zmm15) + zmm13| [2900]
|
||
|
2892 | 4.0 | vfmadd231pd %zmm3, %zmm12, %zmm6 {%k1} # zmm6 = (zmm12 * zmm3) + zmm6| [2892]
|
||
|
2886 | 4.0 | vfmadd231pd %zmm14, %zmm12, %zmm10 {%k1} # zmm10 = (zmm12 * zmm14) + zmm10| [2886]
|
||
|
2879 | 4.0 | vfmadd231pd %zmm15, %zmm12, %zmm21 {%k1} # zmm21 = (zmm12 * zmm15) + zmm21| [2879]
|
||
|
2871 | 4.0 | vfmadd231pd %zmm16, %zmm12, %zmm5 {%k2} # zmm5 = (zmm12 * zmm16) + zmm5| [2871]
|
||
|
2865 | 4.0 | vfmadd231pd %zmm18, %zmm12, %zmm9 {%k2} # zmm9 = (zmm12 * zmm18) + zmm9| [2865]
|
||
|
2863 | 4.0 | vfmadd231pd %zmm3, %zmm12, %zmm17 {%k2} # zmm17 = (zmm12 * zmm3) + zmm17| [2863]
|
||
|
2853 | 4.0 | vfmadd231pd %zmm31, %zmm15, %zmm7 {%k1} # zmm7 = (zmm15 * zmm31) + zmm7| [2853]
|
||
|
2852 | 4.0 | vfmadd231pd %zmm12, %zmm15, %zmm11 {%k1} # zmm11 = (zmm15 * zmm12) + zmm11| [2852]
|
||
|
2846 | 4.0 | vfmadd231pd %zmm14, %zmm15, %zmm19 {%k1} # zmm19 = (zmm15 * zmm14) + zmm19| [2846]
|
||
|
2903 | 1.0 | incq %rcx | [2903]
|
||
|
|