Add OSACA output for SOA
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
1a195a30e2
commit
d0d2bf8a0c
112
arch_analysis/osaca_output_force_soa.txt
Normal file
112
arch_analysis/osaca_output_force_soa.txt
Normal file
@ -0,0 +1,112 @@
|
||||
iwia021h@testfront1:~/MD-Bench/ICC$ /home/hpc/iwia/iwia021h/.local/bin/osaca --ignore-unknown --arch=CSX force.s
|
||||
Open Source Architecture Code Analyzer (OSACA) - 0.3.14
|
||||
Analyzed file: force.s
|
||||
Architecture: CSX
|
||||
Timestamp: 2021-04-26 22:33:06
|
||||
|
||||
|
||||
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
||||
* - Instruction micro-ops not bound to a port
|
||||
X - No throughput/latency information for this instruction in data file
|
||||
|
||||
|
||||
Combined Analysis Report
|
||||
------------------------
|
||||
Port pressure in cycles
|
||||
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |
|
||||
-------------------------------------------------------------------------------------------------
|
||||
261 | | | | | | | | || | | ..B1.25: # Preds ..B1.24
|
||||
262 | | | | | | | | || | | # Execution count [4.50e+00]
|
||||
263 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || 1.0 | 1.0 | movq %r8, %r13 #56.43
|
||||
264 | | 1.00 | | | | | | || 3.0 | 3.0 | imulq %rcx, %r13 #56.43
|
||||
265 | | | | | | 1.00 | | || | | vbroadcastsd %xmm6, %zmm2 #58.23
|
||||
266 | | | | | | 1.00 | | || | | vbroadcastsd %xmm7, %zmm1 #59.23
|
||||
267 | | | | | | 1.00 | | || | | vbroadcastsd %xmm12, %zmm0 #60.23
|
||||
268 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | movslq %r12d, %rbx #67.9
|
||||
269 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || 1.0 | 1.0 | addq %r10, %r13 #37.5
|
||||
270 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %rax, -64(%rsp) #37.5[spill]
|
||||
271 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %r8, -56(%rsp) #37.5[spill]
|
||||
272 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %r10, -48(%rsp) #37.5[spill]
|
||||
273 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %rsi, -40(%rsp) #37.5[spill]
|
||||
274 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %rcx, -32(%rsp) #37.5[spill]
|
||||
275 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %r9, -80(%rsp) #37.5[spill]
|
||||
276 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %rdx, -72(%rsp) #37.5[spill]
|
||||
277 | | | | | | | | || | | # LOE rbx rbp rdi r13 r11d r12d r14d xmm6 xmm7 xmm12 ymm15 ymm16 zmm0 zmm1 zmm2 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14
|
||||
278 | | | | | | | | || | | ..B1.26: # Preds ..B1.30 ..B1.25
|
||||
279 | | | | | | | | || | | # Execution count [2.50e+01]
|
||||
280 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | vmovdqu (%r13,%rbx,4), %ymm3 #68.21
|
||||
281 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm3, %ymm3, %ymm4 #69.36
|
||||
282 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm4, %ymm3, %ymm3 #69.36
|
||||
283 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl (%r13,%rbx,4), %r10d #68.21
|
||||
284 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl 4(%r13,%rbx,4), %r9d #68.21
|
||||
285 | | | 0.50 0.50 | 0.50 0.50 | | | | || | 4.0 | movl 8(%r13,%rbx,4), %r8d #68.21
|
||||
286 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl 12(%r13,%rbx,4), %esi #68.21
|
||||
287 | | 1.00 | | | | 0.00 | | || | | lea (%r10,%r10,2), %r10d #69.36
|
||||
288 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl 16(%r13,%rbx,4), %ecx #68.21
|
||||
289 | | 1.00 | | | | 0.00 | | || | | lea (%r9,%r9,2), %r9d #69.36
|
||||
290 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl 20(%r13,%rbx,4), %edx #68.21
|
||||
291 | | 1.00 | | | | 0.00 | | || | 1.0 | lea (%r8,%r8,2), %r8d #69.36
|
||||
292 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl 24(%r13,%rbx,4), %eax #68.21
|
||||
293 | | 1.00 | | | | 0.00 | | || | | lea (%rsi,%rsi,2), %esi #69.36
|
||||
294 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl 28(%r13,%rbx,4), %r15d #68.21
|
||||
295 | | 1.00 | | | | 0.00 | | || | | lea (%rcx,%rcx,2), %ecx #69.36
|
||||
296 | | 1.00 | | | | 0.00 | | || | | lea (%rdx,%rdx,2), %edx #69.36
|
||||
297 | | 1.00 | | | | 0.00 | | || | | lea (%rax,%rax,2), %eax #69.36
|
||||
298 | | 1.00 | | | | 0.00 | | || | | lea (%r15,%r15,2), %r15d #69.36
|
||||
299 | | | | | | | | || | | # LOE rbx rbp rdi r13 eax edx ecx esi r8d r9d r10d r11d r12d r14d r15d xmm6 xmm7 xmm12 ymm3 ymm15 ymm16 zmm0 zmm1 zmm2 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14
|
||||
300 | | | | | | | | || | | ..B1.29: # Preds ..B1.26
|
||||
301 | | | | | | | | || | | # Execution count [1.25e+01]
|
||||
302 | | | | | | | | || | | X vpcmpeqb %xmm0, %xmm0, %k1 #69.36
|
||||
303 | | | | | | | | || | | X vpcmpeqb %xmm0, %xmm0, %k2 #69.36
|
||||
304 | | | | | | | | || | | X vpcmpeqb %xmm0, %xmm0, %k3 #69.36
|
||||
305 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm4, %zmm4, %zmm4 #69.36
|
||||
306 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm17, %zmm17, %zmm17 #69.36
|
||||
307 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm18, %zmm18, %zmm18 #69.36
|
||||
308 | 1.50 | 0.17 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 0.83 | || | | vgatherdpd 16(%rdi,%ymm3,8), %zmm4{%k1} #69.36
|
||||
309 | 1.50 | 0.00 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 1.00 | || 4.0 | | vgatherdpd 8(%rdi,%ymm3,8), %zmm17{%k2} #69.36
|
||||
310 | 1.50 | 0.00 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 1.00 | || | | vgatherdpd (%rdi,%ymm3,8), %zmm18{%k3} #69.36
|
||||
311 | | | | | | | | || | | # LOE rbx rbp rdi r13 r11d r12d r14d xmm6 xmm7 xmm12 ymm15 ymm16 zmm0 zmm1 zmm2 zmm4 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14 zmm17 zmm18
|
||||
312 | | | | | | | | || | | ..B1.30: # Preds ..B1.29
|
||||
313 | | | | | | | | || | | # Execution count [2.50e+01]
|
||||
314 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | addl $8, %r12d #67.9
|
||||
315 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | addq $8, %rbx #67.9
|
||||
316 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm4, %zmm0, %zmm26 #71.36
|
||||
317 | 0.50 | | | | | 0.50 | | || 4.0 | | vsubpd %zmm17, %zmm1, %zmm24 #70.36
|
||||
318 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm18, %zmm2, %zmm23 #69.36
|
||||
319 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm24, %zmm24, %zmm3 #72.49
|
||||
320 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm23, %zmm23, %zmm3 #72.49
|
||||
321 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm26, %zmm26, %zmm3 #72.63
|
||||
322 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14pd %zmm3, %zmm22 #75.38
|
||||
323 | | | | | | 1.00 | | || | | vcmppd $1, %zmm14, %zmm3, %k2 #74.22
|
||||
324 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm22, %k0 #75.38
|
||||
325 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || 4.0 | | vfnmadd213pd .L_2il0floatpacket.9(%rip){1to8}, %zmm22, %zmm3 #75.38
|
||||
326 | 1.00 | | | | | | | || | | knotw %k0, %k1 #75.38
|
||||
327 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm3, %zmm3, %zmm4 #75.38
|
||||
328 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm22, %zmm3, %zmm22{%k1} #75.38
|
||||
329 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd213pd %zmm22, %zmm4, %zmm22{%k1} #75.38
|
||||
330 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm13, %zmm22, %zmm17 #76.38
|
||||
331 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm10, %zmm22, %zmm19 #77.54
|
||||
332 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm17, %zmm22, %zmm20 #76.44
|
||||
333 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm20, %zmm22, %zmm18 #76.50
|
||||
334 | 0.50 | | | | | 0.50 | | || | | vfmsub213pd %zmm5, %zmm20, %zmm22 #77.54
|
||||
335 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm19, %zmm18, %zmm21 #77.61
|
||||
336 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm22, %zmm21, %zmm25 #77.67
|
||||
337 | 1.00 | | | | | 0.00 | | || | | vfmadd231pd %zmm23, %zmm25, %zmm9{%k2} #78.17
|
||||
338 | 1.00 | | | | | 0.00 | | || | | vfmadd231pd %zmm24, %zmm25, %zmm8{%k2} #79.17
|
||||
339 | 1.00 | | | | | 0.00 | | || 4.0 | | vfmadd231pd %zmm26, %zmm25, %zmm11{%k2} #80.17
|
||||
340 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | cmpl %r14d, %r12d #67.9
|
||||
341 | | | | | | | | || | | * jb ..B1.26 # Prob 82% #67.9
|
||||
342 | | | | | | | | || | | # LOE rbx rbp rdi r13 r11d r12d r14d xmm6 xmm7 xmm12 ymm15 ymm16 zmm0 zmm1 zmm2 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14
|
||||
|
||||
21.0 11.2 17.0 6.50 17.0 6.50 7.00 17.0 8.83 7.00 75.0 10.0
|
||||
|
||||
|
||||
Loop-Carried Dependencies Analysis Report
|
||||
-----------------------------------------
|
||||
287 | 6.0 | lea (%r10,%r10,2), %r10d #69.36| [269, 283, 287]
|
||||
291 | 10.0 | lea (%r8,%r8,2), %r8d #69.36| [263, 264, 269, 285, 291]
|
||||
295 | 9.0 | lea (%rcx,%rcx,2), %ecx #69.36| [264, 269, 288, 295]
|
||||
314 | 1.0 | addl $8, %r12d #67.9| [314]
|
||||
339 | 4.0 | vfmadd231pd %zmm26, %zmm25, %zmm11{%k2} #80.17| [339]
|
||||
338 | 4.0 | vfmadd231pd %zmm24, %zmm25, %zmm8{%k2} #79.17| [338]
|
||||
337 | 4.0 | vfmadd231pd %zmm23, %zmm25, %zmm9{%k2} #78.17| [337]
|
Loading…
Reference in New Issue
Block a user