78 lines
8.7 KiB
Plaintext
78 lines
8.7 KiB
Plaintext
|
Open Source Architecture Code Analyzer (OSACA) - 0.4.12
|
||
|
Analyzed file: lammps-icc-avx512.s
|
||
|
Architecture: CSX
|
||
|
Timestamp: 2023-02-10 16:30:08
|
||
|
|
||
|
|
||
|
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
||
|
* - Instruction micro-ops not bound to a port
|
||
|
X - No throughput/latency information for this instruction in data file
|
||
|
|
||
|
|
||
|
Combined Analysis Report
|
||
|
------------------------
|
||
|
Port pressure in cycles
|
||
|
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |
|
||
|
--------------------------------------------------------------------------------------------------
|
||
|
200 | | | | | | | | || | | # pointer_increment=64 1303ca335e79351a96cfc07b8b9ec9d4
|
||
|
201 | | | | | | | | || | | # LLVM-MCA-BEGIN
|
||
|
202 | | | | | | | | || | | ..B1.16: # Preds ..B1.16 ..B1.15
|
||
|
203 | | | | | | | | || | | # Execution count [2.50e+01]
|
||
|
204 | | | | | | 1.00 | | || | | vpcmpgtd %ymm4, %ymm3, %k5 #59.9
|
||
|
205 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm15, %ymm4, %ymm4 #59.9
|
||
|
206 | 0.00 | 1.00 | 0.50 0.50 | 0.50 0.50 | | 0.00 | | || 0.0 | | vmovdqu32 (%r10,%r15,4), %ymm17{%k5}{z} #60.21
|
||
|
207 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm17, %ymm17, %ymm18 #61.36
|
||
|
208 | 0.00 | 0.16 | | | | 0.00 | 0.84 | || | | addq $8, %r15 #59.9
|
||
|
209 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm18, %ymm17, %ymm19 #61.36
|
||
|
210 | 1.00 | | | | | | | || | | kmovw %k5, %k2 #61.36
|
||
|
211 | 1.00 | | | | | | | || | | kmovw %k5, %k3 #61.36
|
||
|
212 | 1.00 | | | | | | | || | | kmovw %k5, %k1 #61.36
|
||
|
213 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm21, %zmm21, %zmm21 #61.36
|
||
|
214 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm20, %zmm20, %zmm20 #61.36
|
||
|
215 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm22, %zmm22, %zmm22 #61.36
|
||
|
216 | 1.25 | 0.75 | 5.00 5.00 | 5.00 5.00 | | 0.25 | 0.75 | || 24.0 | | vgatherdpd 8(%rbx,%ymm19,8), %zmm21{%k2} #61.36
|
||
|
217 | 1.25 | 0.25 | 5.00 5.00 | 5.00 5.00 | | 0.25 | 1.25 | || | | vgatherdpd (%rbx,%ymm19,8), %zmm20{%k3} #61.36
|
||
|
218 | 1.25 | 0.09 | 5.00 5.00 | 5.00 5.00 | | 0.25 | 1.41 | || | | vgatherdpd 16(%rbx,%ymm19,8), %zmm22{%k1} #61.36
|
||
|
219 | 0.50 | | | | | 0.50 | | || 4.0 | | vsubpd %zmm21, %zmm1, %zmm18 #62.36
|
||
|
220 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm20, %zmm2, %zmm17 #61.36
|
||
|
221 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm22, %zmm0, %zmm19 #63.36
|
||
|
222 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm18, %zmm18, %zmm31 #64.49
|
||
|
223 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm17, %zmm17, %zmm31 #64.49
|
||
|
224 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm19, %zmm19, %zmm31 #64.63
|
||
|
225 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14pd %zmm31, %zmm30 #75.39
|
||
|
226 | | | | | | 1.00 | | || | | vcmppd $1, %zmm14, %zmm31, %k6{%k5} #74.22
|
||
|
227 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm30, %k0 #75.39
|
||
|
228 | | | | | | | | || | | * vmovaps %zmm31, %zmm23 #75.39
|
||
|
229 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || 4.0 | | vfnmadd213pd .L_2il0floatpacket.5(%rip){1to8}, %zmm30, %zmm23 #75.39
|
||
|
230 | 1.00 | | | | | | | || | | knotw %k0, %k4 #75.39
|
||
|
231 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm23, %zmm23, %zmm24 #75.39
|
||
|
232 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm30, %zmm23, %zmm30{%k4} #75.39
|
||
|
233 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd213pd %zmm30, %zmm24, %zmm30{%k4} #75.39
|
||
|
234 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm13, %zmm30, %zmm25 #76.38
|
||
|
235 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm12, %zmm30, %zmm27 #77.55
|
||
|
236 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm25, %zmm30, %zmm28 #76.44
|
||
|
237 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm28, %zmm30, %zmm26 #76.50
|
||
|
238 | 0.00 | | | | | 1.00 | | || | | vfmsub213pd %zmm5, %zmm28, %zmm30 #77.55
|
||
|
239 | 0.00 | | | | | 1.00 | | || 4.0 | | vmulpd %zmm27, %zmm26, %zmm29 #77.64
|
||
|
240 | 0.00 | | | | | 1.00 | | || 4.0 | | vmulpd %zmm30, %zmm29, %zmm23 #77.70
|
||
|
241 | 0.00 | | | | | 1.00 | | || 4.0 | | vfmadd231pd %zmm17, %zmm23, %zmm10{%k6} #78.17
|
||
|
242 | 0.00 | | | | | 1.00 | | || | | vfmadd231pd %zmm18, %zmm23, %zmm9{%k6} #79.17
|
||
|
243 | 0.00 | | | | | 1.00 | | || | 4.0 | vfmadd231pd %zmm19, %zmm23, %zmm8{%k6} #80.17
|
||
|
244 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | cmpq %r14, %r15 #59.9
|
||
|
245 | | | | | | | | || | | * jb ..B1.16 # Prob 82% #59.9
|
||
|
246 | | | | | | | | || | | # LLVM-MCA-END
|
||
|
|
||
|
18.8 5.25 16.0 16.0 16.0 16.0 18.8 5.25 86.0 4.0
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
Loop-Carried Dependencies Analysis Report
|
||
|
-----------------------------------------
|
||
|
243 | 4.0 | vfmadd231pd %zmm19, %zmm23, %zmm8{%k6} #80.17| [243]
|
||
|
242 | 4.0 | vfmadd231pd %zmm18, %zmm23, %zmm9{%k6} #79.17| [242]
|
||
|
241 | 4.0 | vfmadd231pd %zmm17, %zmm23, %zmm10{%k6} #78.17| [241]
|
||
|
208 | 1.0 | addq $8, %r15 #59.9| [208]
|
||
|
205 | 1.0 | vpaddd %ymm15, %ymm4, %ymm4 #59.9| [205]
|
||
|
|