98 lines
12 KiB
Plaintext
98 lines
12 KiB
Plaintext
|
Open Source Architecture Code Analyzer (OSACA) - 0.4.12
|
||
|
Analyzed file: lammps-icc-avx2.s
|
||
|
Architecture: CSX
|
||
|
Timestamp: 2023-02-10 16:29:58
|
||
|
|
||
|
|
||
|
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
||
|
* - Instruction micro-ops not bound to a port
|
||
|
X - No throughput/latency information for this instruction in data file
|
||
|
|
||
|
|
||
|
Combined Analysis Report
|
||
|
------------------------
|
||
|
Port pressure in cycles
|
||
|
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |
|
||
|
----------------------------------------------------------------------------------------------------
|
||
|
256 | | | | | | | | || | | # pointer_increment=32 724d27eafcb27eabca1528ddfdbdba3e
|
||
|
257 | | | | | | | | || | | # LLVM-MCA-BEGIN
|
||
|
258 | | | | | | | | || | | ..B1.22: # Preds ..B1.24 ..B1.21
|
||
|
259 | | | | | | | | || | | # Execution count [2.50e+01]
|
||
|
260 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | vmovdqu (%rbx,%rdx,4), %xmm0 #60.21
|
||
|
261 | 1.00 | | | | | | | || 1.0 | | vmovq %xmm0, %rcx #60.21
|
||
|
262 | | | | | | 1.000 | | || | | vpunpckhqdq %xmm0, %xmm0, %xmm2 #60.21
|
||
|
263 | 1.00 | | | | | | | || | | vmovq %xmm2, %r15 #60.21
|
||
|
264 | 0.00 | 0.000 | | | | 0.000 | 1.00 | || 1.0 | | movl %ecx, %r8d #60.21
|
||
|
265 | 0.00 | | | | | | 1.00 | || | | shrq $32, %rcx #60.21
|
||
|
266 | | 0.500 | | | | 0.500 | | || | | lea (%rcx,%rcx,2), %r14d #61.36
|
||
|
267 | | 0.500 | | | | 0.500 | | || 1.0 | | lea (%r8,%r8,2), %r8d #61.36
|
||
|
268 | 0.00 | 0.000 | | | | 0.000 | 1.00 | || 1.0 | | movslq %r8d, %rcx #61.36
|
||
|
269 | 0.00 | 0.000 | | | | 0.000 | 1.00 | || | | movslq %r14d, %r8 #61.36
|
||
|
270 | 0.00 | 0.000 | | | | 0.000 | 1.00 | || | | movl %r15d, %r14d #60.21
|
||
|
271 | 0.00 | | | | | | 1.00 | || | | shrq $32, %r15 #60.21
|
||
|
272 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | vmovups (%r11,%rcx,8), %xmm7 #61.36
|
||
|
273 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovups (%r11,%r8,8), %xmm6 #61.36
|
||
|
274 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovq 16(%r11,%rcx,8), %xmm14 #61.36
|
||
|
275 | | 0.500 | | | | 0.500 | | || | | lea (%r14,%r14,2), %r14d #61.36
|
||
|
276 | 0.00 | 0.000 | | | | 0.000 | 1.00 | || | | movslq %r14d, %r14 #61.36
|
||
|
277 | | 0.500 | | | | 0.500 | | || | | lea (%r15,%r15,2), %r15d #61.36
|
||
|
278 | 0.00 | 0.000 | | | | 0.000 | 1.00 | || | | movslq %r15d, %r15 #61.36
|
||
|
279 | | | 0.50 0.50 | 0.50 0.50 | | 1.000 | | || | | vmovhpd 16(%r11,%r8,8), %xmm14, %xmm15 #61.36
|
||
|
280 | | | 0.50 0.50 | 0.50 0.50 | | 1.000 | | || 3.0 | | vinsertf128 $1, (%r11,%r14,8), %ymm7, %ymm1 #61.36
|
||
|
281 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovq 16(%r11,%r14,8), %xmm0 #61.36
|
||
|
282 | | | 0.50 0.50 | 0.50 0.50 | | 1.000 | | || | | vinsertf128 $1, (%r11,%r15,8), %ymm6, %ymm6 #61.36
|
||
|
283 | | | 0.50 0.50 | 0.50 0.50 | | 1.000 | | || | | vmovhpd 16(%r11,%r15,8), %xmm0, %xmm2 #61.36
|
||
|
284 | | | | | | 1.000 | | || | | vunpcklpd %ymm6, %ymm1, %ymm14 #61.36
|
||
|
285 | | | | | | 1.000 | | || 1.0 | | vunpckhpd %ymm6, %ymm1, %ymm1 #61.36
|
||
|
286 | 0.50 | 0.500 | | | | | | || | | vsubpd %ymm14, %ymm10, %ymm6 #61.36
|
||
|
287 | | | | | | 1.000 | | || | | vinsertf128 $1, %xmm2, %ymm15, %ymm7 #61.36
|
||
|
288 | 0.50 | 0.500 | | | | | | || 4.0 | | vsubpd %ymm1, %ymm9, %ymm2 #62.36
|
||
|
289 | 0.50 | 0.500 | | | | | | || | | vsubpd %ymm7, %ymm8, %ymm0 #63.36
|
||
|
290 | 0.50 | 0.500 | | | | | | || 4.0 | | vmulpd %ymm2, %ymm2, %ymm14 #64.49
|
||
|
291 | 0.50 | 0.500 | | | | | | || 4.0 | | vfmadd231pd %ymm6, %ymm6, %ymm14 #64.49
|
||
|
292 | 0.50 | 0.500 | | | | | | || 4.0 | | vfmadd231pd %ymm0, %ymm0, %ymm14 #64.63
|
||
|
293 | | | | | | 1.000 | | || | | vcmpltpd %ymm5, %ymm14, %ymm1 #74.22
|
||
|
294 | 0.50 | 0.500 | | | | | | || | | vpcmpeqd %ymm7, %ymm7, %ymm7 #74.22
|
||
|
295 | 1.00 | | | | | 1.000 | | || | | vptest %ymm7, %ymm1 #74.22
|
||
|
296 | | | | | | | | || | | #je ..B1.24 # Prob 50% #74.22
|
||
|
297 | | | | | | | | || | | # LOE rax rdx rbx rsi rdi r9 r10 r11 r12 r13d ymm0 ymm1 ymm2 ymm3 ymm4 ymm5 ymm6 ymm8 ymm9 ymm10 ymm11 ymm12 ymm13 ymm14
|
||
|
298 | | | | | | | | || | | ..B1.23: # Preds ..B1.22
|
||
|
299 | | | | | | | | || | | # Execution count [1.25e+01]
|
||
|
300 | 1.00 8.00 | | | | | | | || 15.0 | | vdivpd %ymm14, %ymm4, %ymm7 #75.39
|
||
|
301 | 0.50 | 0.500 | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | vmulpd 96(%rsp), %ymm7, %ymm14 #76.38[spill]
|
||
|
302 | 0.50 | 0.500 | | | | | | || 4.0 | | vmulpd %ymm14, %ymm7, %ymm14 #76.44
|
||
|
303 | 0.50 | 0.500 | | | | | | || 4.0 | | vmulpd %ymm14, %ymm7, %ymm15 #76.50
|
||
|
304 | 0.50 | 0.500 | | | | | | || | | vfmsub213pd %ymm3, %ymm7, %ymm14 #77.55
|
||
|
305 | 0.50 | 0.500 | 0.50 0.50 | 0.50 0.50 | | | | || | | vmulpd 64(%rsp), %ymm7, %ymm7 #77.55[spill]
|
||
|
306 | 0.50 | 0.500 | | | | | | || 4.0 | | vmulpd %ymm7, %ymm15, %ymm15 #77.64
|
||
|
307 | 0.50 | 0.500 | | | | | | || 4.0 | | vmulpd %ymm14, %ymm15, %ymm7 #77.70
|
||
|
308 | 0.50 | 0.500 | | | | | | || 4.0 | | vmulpd %ymm7, %ymm6, %ymm6 #78.31
|
||
|
309 | 0.50 | 0.500 | | | | | | || | | vmulpd %ymm7, %ymm2, %ymm2 #79.31
|
||
|
310 | 0.25 | 0.253 | | | | 0.493 | | || 1.0 | | vandpd %ymm6, %ymm1, %ymm6 #78.31
|
||
|
311 | 0.50 | 0.500 | | | | | | || 4.0 | | vaddpd %ymm6, %ymm13, %ymm13 #78.17
|
||
|
312 | 0.25 | 0.750 | | | | | | || | | vmulpd %ymm7, %ymm0, %ymm6 #80.31
|
||
|
313 | 0.16 | 0.417 | | | | 0.423 | | || | | vandpd %ymm2, %ymm1, %ymm0 #79.31
|
||
|
314 | 0.00 | 0.250 | | | | 0.750 | | || | | vandpd %ymm6, %ymm1, %ymm1 #80.31
|
||
|
315 | 0.00 | 1.000 | | | | | | || | | vaddpd %ymm0, %ymm12, %ymm12 #79.17
|
||
|
316 | 0.50 | 0.500 | | | | | | || | 4.0 | vaddpd %ymm1, %ymm11, %ymm11 #80.17
|
||
|
317 | | | | | | | | || | | # LOE rax rdx rbx rsi rdi r9 r10 r11 r12 r13d ymm3 ymm4 ymm5 ymm8 ymm9 ymm10 ymm11 ymm12 ymm13
|
||
|
318 | | | | | | | | || | | ..B1.24: # Preds ..B1.23 ..B1.22
|
||
|
319 | | | | | | | | || | | # Execution count [2.50e+01]
|
||
|
320 | 0.00 | 0.000 | | | | -0.01 | 1.00 | || | | addq $4, %rdx #59.9
|
||
|
321 | 0.00 | -0.01 | | | | 0.000 | 1.00 | || | | cmpq %rsi, %rdx #59.9
|
||
|
322 | | | | | | | | || | | * jb ..B1.22 # Prob 82% #59.9
|
||
|
323 | | | | | | | | || | | # LLVM-MCA-END
|
||
|
|
||
|
13.7 8.00 13.66 5.50 5.50 5.50 5.50 13.66 10.0 76.0 4.0
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
Loop-Carried Dependencies Analysis Report
|
||
|
-----------------------------------------
|
||
|
316 | 4.0 | vaddpd %ymm1, %ymm11, %ymm11 #80.17| [316]
|
||
|
315 | 4.0 | vaddpd %ymm0, %ymm12, %ymm12 #79.17| [315]
|
||
|
311 | 4.0 | vaddpd %ymm6, %ymm13, %ymm13 #78.17| [311]
|
||
|
320 | 1.0 | addq $4, %rdx #59.9| [320]
|
||
|
|