MD-Bench/arch_analysis/avx512/osaca_force_aos_lt1200.txt

113 lines
15 KiB
Plaintext
Raw Normal View History

iwia021h@testfront1:~/MD-Bench/ICC$ /home/hpc/iwia/iwia021h/.local/bin/osaca --ignore-unknown --arch=CSX force.s
Open Source Architecture Code Analyzer (OSACA) - 0.3.14
Analyzed file: force.s
Architecture: CSX
Timestamp: 2021-04-26 22:33:06
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
* - Instruction micro-ops not bound to a port
X - No throughput/latency information for this instruction in data file
Combined Analysis Report
------------------------
Port pressure in cycles
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |
-------------------------------------------------------------------------------------------------
261 | | | | | | | | || | | ..B1.25: # Preds ..B1.24
262 | | | | | | | | || | | # Execution count [4.50e+00]
263 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || 1.0 | 1.0 | movq %r8, %r13 #56.43
264 | | 1.00 | | | | | | || 3.0 | 3.0 | imulq %rcx, %r13 #56.43
265 | | | | | | 1.00 | | || | | vbroadcastsd %xmm6, %zmm2 #58.23
266 | | | | | | 1.00 | | || | | vbroadcastsd %xmm7, %zmm1 #59.23
267 | | | | | | 1.00 | | || | | vbroadcastsd %xmm12, %zmm0 #60.23
268 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | movslq %r12d, %rbx #67.9
269 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || 1.0 | 1.0 | addq %r10, %r13 #37.5
270 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %rax, -64(%rsp) #37.5[spill]
271 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %r8, -56(%rsp) #37.5[spill]
272 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %r10, -48(%rsp) #37.5[spill]
273 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %rsi, -40(%rsp) #37.5[spill]
274 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %rcx, -32(%rsp) #37.5[spill]
275 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %r9, -80(%rsp) #37.5[spill]
276 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %rdx, -72(%rsp) #37.5[spill]
277 | | | | | | | | || | | # LOE rbx rbp rdi r13 r11d r12d r14d xmm6 xmm7 xmm12 ymm15 ymm16 zmm0 zmm1 zmm2 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14
278 | | | | | | | | || | | ..B1.26: # Preds ..B1.30 ..B1.25
279 | | | | | | | | || | | # Execution count [2.50e+01]
280 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | vmovdqu (%r13,%rbx,4), %ymm3 #68.21
281 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm3, %ymm3, %ymm4 #69.36
282 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm4, %ymm3, %ymm3 #69.36
283 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl (%r13,%rbx,4), %r10d #68.21
284 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl 4(%r13,%rbx,4), %r9d #68.21
285 | | | 0.50 0.50 | 0.50 0.50 | | | | || | 4.0 | movl 8(%r13,%rbx,4), %r8d #68.21
286 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl 12(%r13,%rbx,4), %esi #68.21
287 | | 1.00 | | | | 0.00 | | || | | lea (%r10,%r10,2), %r10d #69.36
288 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl 16(%r13,%rbx,4), %ecx #68.21
289 | | 1.00 | | | | 0.00 | | || | | lea (%r9,%r9,2), %r9d #69.36
290 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl 20(%r13,%rbx,4), %edx #68.21
291 | | 1.00 | | | | 0.00 | | || | 1.0 | lea (%r8,%r8,2), %r8d #69.36
292 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl 24(%r13,%rbx,4), %eax #68.21
293 | | 1.00 | | | | 0.00 | | || | | lea (%rsi,%rsi,2), %esi #69.36
294 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl 28(%r13,%rbx,4), %r15d #68.21
295 | | 1.00 | | | | 0.00 | | || | | lea (%rcx,%rcx,2), %ecx #69.36
296 | | 1.00 | | | | 0.00 | | || | | lea (%rdx,%rdx,2), %edx #69.36
297 | | 1.00 | | | | 0.00 | | || | | lea (%rax,%rax,2), %eax #69.36
298 | | 1.00 | | | | 0.00 | | || | | lea (%r15,%r15,2), %r15d #69.36
299 | | | | | | | | || | | # LOE rbx rbp rdi r13 eax edx ecx esi r8d r9d r10d r11d r12d r14d r15d xmm6 xmm7 xmm12 ymm3 ymm15 ymm16 zmm0 zmm1 zmm2 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14
300 | | | | | | | | || | | ..B1.29: # Preds ..B1.26
301 | | | | | | | | || | | # Execution count [1.25e+01]
302 | | | | | | | | || | | X vpcmpeqb %xmm0, %xmm0, %k1 #69.36
303 | | | | | | | | || | | X vpcmpeqb %xmm0, %xmm0, %k2 #69.36
304 | | | | | | | | || | | X vpcmpeqb %xmm0, %xmm0, %k3 #69.36
305 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm4, %zmm4, %zmm4 #69.36
306 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm17, %zmm17, %zmm17 #69.36
307 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm18, %zmm18, %zmm18 #69.36
308 | 1.50 | 0.17 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 0.83 | || | | vgatherdpd 16(%rdi,%ymm3,8), %zmm4{%k1} #69.36
309 | 1.50 | 0.00 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 1.00 | || 4.0 | | vgatherdpd 8(%rdi,%ymm3,8), %zmm17{%k2} #69.36
310 | 1.50 | 0.00 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 1.00 | || | | vgatherdpd (%rdi,%ymm3,8), %zmm18{%k3} #69.36
311 | | | | | | | | || | | # LOE rbx rbp rdi r13 r11d r12d r14d xmm6 xmm7 xmm12 ymm15 ymm16 zmm0 zmm1 zmm2 zmm4 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14 zmm17 zmm18
312 | | | | | | | | || | | ..B1.30: # Preds ..B1.29
313 | | | | | | | | || | | # Execution count [2.50e+01]
314 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | addl $8, %r12d #67.9
315 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | addq $8, %rbx #67.9
316 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm4, %zmm0, %zmm26 #71.36
317 | 0.50 | | | | | 0.50 | | || 4.0 | | vsubpd %zmm17, %zmm1, %zmm24 #70.36
318 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm18, %zmm2, %zmm23 #69.36
319 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm24, %zmm24, %zmm3 #72.49
320 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm23, %zmm23, %zmm3 #72.49
321 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm26, %zmm26, %zmm3 #72.63
322 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14pd %zmm3, %zmm22 #75.38
323 | | | | | | 1.00 | | || | | vcmppd $1, %zmm14, %zmm3, %k2 #74.22
324 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm22, %k0 #75.38
325 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || 4.0 | | vfnmadd213pd .L_2il0floatpacket.9(%rip){1to8}, %zmm22, %zmm3 #75.38
326 | 1.00 | | | | | | | || | | knotw %k0, %k1 #75.38
327 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm3, %zmm3, %zmm4 #75.38
328 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm22, %zmm3, %zmm22{%k1} #75.38
329 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd213pd %zmm22, %zmm4, %zmm22{%k1} #75.38
330 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm13, %zmm22, %zmm17 #76.38
331 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm10, %zmm22, %zmm19 #77.54
332 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm17, %zmm22, %zmm20 #76.44
333 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm20, %zmm22, %zmm18 #76.50
334 | 0.50 | | | | | 0.50 | | || | | vfmsub213pd %zmm5, %zmm20, %zmm22 #77.54
335 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm19, %zmm18, %zmm21 #77.61
336 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm22, %zmm21, %zmm25 #77.67
337 | 1.00 | | | | | 0.00 | | || | | vfmadd231pd %zmm23, %zmm25, %zmm9{%k2} #78.17
338 | 1.00 | | | | | 0.00 | | || | | vfmadd231pd %zmm24, %zmm25, %zmm8{%k2} #79.17
339 | 1.00 | | | | | 0.00 | | || 4.0 | | vfmadd231pd %zmm26, %zmm25, %zmm11{%k2} #80.17
340 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | cmpl %r14d, %r12d #67.9
341 | | | | | | | | || | | * jb ..B1.26 # Prob 82% #67.9
342 | | | | | | | | || | | # LOE rbx rbp rdi r13 r11d r12d r14d xmm6 xmm7 xmm12 ymm15 ymm16 zmm0 zmm1 zmm2 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14
21.0 11.2 17.0 6.50 17.0 6.50 7.00 17.0 8.83 7.00 75.0 10.0
Loop-Carried Dependencies Analysis Report
-----------------------------------------
287 | 6.0 | lea (%r10,%r10,2), %r10d #69.36| [269, 283, 287]
291 | 10.0 | lea (%r8,%r8,2), %r8d #69.36| [263, 264, 269, 285, 291]
295 | 9.0 | lea (%rcx,%rcx,2), %ecx #69.36| [264, 269, 288, 295]
314 | 1.0 | addl $8, %r12d #67.9| [314]
339 | 4.0 | vfmadd231pd %zmm26, %zmm25, %zmm11{%k2} #80.17| [339]
338 | 4.0 | vfmadd231pd %zmm24, %zmm25, %zmm8{%k2} #79.17| [338]
337 | 4.0 | vfmadd231pd %zmm23, %zmm25, %zmm9{%k2} #78.17| [337]