From d0d2bf8a0c7dae4a64771d1ad9609e4442acb831 Mon Sep 17 00:00:00 2001 From: Rafael Ravedutti Date: Tue, 27 Apr 2021 00:36:34 +0200 Subject: [PATCH] Add OSACA output for SOA Signed-off-by: Rafael Ravedutti --- arch_analysis/osaca_output_force_soa.txt | 112 +++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 arch_analysis/osaca_output_force_soa.txt diff --git a/arch_analysis/osaca_output_force_soa.txt b/arch_analysis/osaca_output_force_soa.txt new file mode 100644 index 0000000..a30919e --- /dev/null +++ b/arch_analysis/osaca_output_force_soa.txt @@ -0,0 +1,112 @@ +iwia021h@testfront1:~/MD-Bench/ICC$ /home/hpc/iwia/iwia021h/.local/bin/osaca --ignore-unknown --arch=CSX force.s +Open Source Architecture Code Analyzer (OSACA) - 0.3.14 +Analyzed file: force.s +Architecture: CSX +Timestamp: 2021-04-26 22:33:06 + + + P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction + * - Instruction micro-ops not bound to a port + X - No throughput/latency information for this instruction in data file + + +Combined Analysis Report +------------------------ + Port pressure in cycles + | 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD | +------------------------------------------------------------------------------------------------- + 261 | | | | | | | | || | | ..B1.25: # Preds ..B1.24 + 262 | | | | | | | | || | | # Execution count [4.50e+00] + 263 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || 1.0 | 1.0 | movq %r8, %r13 #56.43 + 264 | | 1.00 | | | | | | || 3.0 | 3.0 | imulq %rcx, %r13 #56.43 + 265 | | | | | | 1.00 | | || | | vbroadcastsd %xmm6, %zmm2 #58.23 + 266 | | | | | | 1.00 | | || | | vbroadcastsd %xmm7, %zmm1 #59.23 + 267 | | | | | | 1.00 | | || | | vbroadcastsd %xmm12, %zmm0 #60.23 + 268 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | movslq %r12d, %rbx #67.9 + 269 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || 1.0 | 1.0 | addq %r10, %r13 #37.5 + 270 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %rax, -64(%rsp) #37.5[spill] + 271 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %r8, -56(%rsp) #37.5[spill] + 272 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %r10, -48(%rsp) #37.5[spill] + 273 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %rsi, -40(%rsp) #37.5[spill] + 274 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %rcx, -32(%rsp) #37.5[spill] + 275 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %r9, -80(%rsp) #37.5[spill] + 276 | | | 0.00 | 0.00 | 1.00 | | | 1.00 || | | movq %rdx, -72(%rsp) #37.5[spill] + 277 | | | | | | | | || | | # LOE rbx rbp rdi r13 r11d r12d r14d xmm6 xmm7 xmm12 ymm15 ymm16 zmm0 zmm1 zmm2 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14 + 278 | | | | | | | | || | | ..B1.26: # Preds ..B1.30 ..B1.25 + 279 | | | | | | | | || | | # Execution count [2.50e+01] + 280 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | vmovdqu (%r13,%rbx,4), %ymm3 #68.21 + 281 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm3, %ymm3, %ymm4 #69.36 + 282 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm4, %ymm3, %ymm3 #69.36 + 283 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl (%r13,%rbx,4), %r10d #68.21 + 284 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl 4(%r13,%rbx,4), %r9d #68.21 + 285 | | | 0.50 0.50 | 0.50 0.50 | | | | || | 4.0 | movl 8(%r13,%rbx,4), %r8d #68.21 + 286 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl 12(%r13,%rbx,4), %esi #68.21 + 287 | | 1.00 | | | | 0.00 | | || | | lea (%r10,%r10,2), %r10d #69.36 + 288 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl 16(%r13,%rbx,4), %ecx #68.21 + 289 | | 1.00 | | | | 0.00 | | || | | lea (%r9,%r9,2), %r9d #69.36 + 290 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl 20(%r13,%rbx,4), %edx #68.21 + 291 | | 1.00 | | | | 0.00 | | || | 1.0 | lea (%r8,%r8,2), %r8d #69.36 + 292 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl 24(%r13,%rbx,4), %eax #68.21 + 293 | | 1.00 | | | | 0.00 | | || | | lea (%rsi,%rsi,2), %esi #69.36 + 294 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | movl 28(%r13,%rbx,4), %r15d #68.21 + 295 | | 1.00 | | | | 0.00 | | || | | lea (%rcx,%rcx,2), %ecx #69.36 + 296 | | 1.00 | | | | 0.00 | | || | | lea (%rdx,%rdx,2), %edx #69.36 + 297 | | 1.00 | | | | 0.00 | | || | | lea (%rax,%rax,2), %eax #69.36 + 298 | | 1.00 | | | | 0.00 | | || | | lea (%r15,%r15,2), %r15d #69.36 + 299 | | | | | | | | || | | # LOE rbx rbp rdi r13 eax edx ecx esi r8d r9d r10d r11d r12d r14d r15d xmm6 xmm7 xmm12 ymm3 ymm15 ymm16 zmm0 zmm1 zmm2 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14 + 300 | | | | | | | | || | | ..B1.29: # Preds ..B1.26 + 301 | | | | | | | | || | | # Execution count [1.25e+01] + 302 | | | | | | | | || | | X vpcmpeqb %xmm0, %xmm0, %k1 #69.36 + 303 | | | | | | | | || | | X vpcmpeqb %xmm0, %xmm0, %k2 #69.36 + 304 | | | | | | | | || | | X vpcmpeqb %xmm0, %xmm0, %k3 #69.36 + 305 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm4, %zmm4, %zmm4 #69.36 + 306 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm17, %zmm17, %zmm17 #69.36 + 307 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm18, %zmm18, %zmm18 #69.36 + 308 | 1.50 | 0.17 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 0.83 | || | | vgatherdpd 16(%rdi,%ymm3,8), %zmm4{%k1} #69.36 + 309 | 1.50 | 0.00 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 1.00 | || 4.0 | | vgatherdpd 8(%rdi,%ymm3,8), %zmm17{%k2} #69.36 + 310 | 1.50 | 0.00 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 1.00 | || | | vgatherdpd (%rdi,%ymm3,8), %zmm18{%k3} #69.36 + 311 | | | | | | | | || | | # LOE rbx rbp rdi r13 r11d r12d r14d xmm6 xmm7 xmm12 ymm15 ymm16 zmm0 zmm1 zmm2 zmm4 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14 zmm17 zmm18 + 312 | | | | | | | | || | | ..B1.30: # Preds ..B1.29 + 313 | | | | | | | | || | | # Execution count [2.50e+01] + 314 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | addl $8, %r12d #67.9 + 315 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | addq $8, %rbx #67.9 + 316 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm4, %zmm0, %zmm26 #71.36 + 317 | 0.50 | | | | | 0.50 | | || 4.0 | | vsubpd %zmm17, %zmm1, %zmm24 #70.36 + 318 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm18, %zmm2, %zmm23 #69.36 + 319 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm24, %zmm24, %zmm3 #72.49 + 320 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm23, %zmm23, %zmm3 #72.49 + 321 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm26, %zmm26, %zmm3 #72.63 + 322 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14pd %zmm3, %zmm22 #75.38 + 323 | | | | | | 1.00 | | || | | vcmppd $1, %zmm14, %zmm3, %k2 #74.22 + 324 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm22, %k0 #75.38 + 325 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || 4.0 | | vfnmadd213pd .L_2il0floatpacket.9(%rip){1to8}, %zmm22, %zmm3 #75.38 + 326 | 1.00 | | | | | | | || | | knotw %k0, %k1 #75.38 + 327 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm3, %zmm3, %zmm4 #75.38 + 328 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm22, %zmm3, %zmm22{%k1} #75.38 + 329 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd213pd %zmm22, %zmm4, %zmm22{%k1} #75.38 + 330 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm13, %zmm22, %zmm17 #76.38 + 331 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm10, %zmm22, %zmm19 #77.54 + 332 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm17, %zmm22, %zmm20 #76.44 + 333 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm20, %zmm22, %zmm18 #76.50 + 334 | 0.50 | | | | | 0.50 | | || | | vfmsub213pd %zmm5, %zmm20, %zmm22 #77.54 + 335 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm19, %zmm18, %zmm21 #77.61 + 336 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm22, %zmm21, %zmm25 #77.67 + 337 | 1.00 | | | | | 0.00 | | || | | vfmadd231pd %zmm23, %zmm25, %zmm9{%k2} #78.17 + 338 | 1.00 | | | | | 0.00 | | || | | vfmadd231pd %zmm24, %zmm25, %zmm8{%k2} #79.17 + 339 | 1.00 | | | | | 0.00 | | || 4.0 | | vfmadd231pd %zmm26, %zmm25, %zmm11{%k2} #80.17 + 340 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | cmpl %r14d, %r12d #67.9 + 341 | | | | | | | | || | | * jb ..B1.26 # Prob 82% #67.9 + 342 | | | | | | | | || | | # LOE rbx rbp rdi r13 r11d r12d r14d xmm6 xmm7 xmm12 ymm15 ymm16 zmm0 zmm1 zmm2 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14 + + 21.0 11.2 17.0 6.50 17.0 6.50 7.00 17.0 8.83 7.00 75.0 10.0 + + +Loop-Carried Dependencies Analysis Report +----------------------------------------- + 287 | 6.0 | lea (%r10,%r10,2), %r10d #69.36| [269, 283, 287] + 291 | 10.0 | lea (%r8,%r8,2), %r8d #69.36| [263, 264, 269, 285, 291] + 295 | 9.0 | lea (%rcx,%rcx,2), %ecx #69.36| [264, 269, 288, 295] + 314 | 1.0 | addl $8, %r12d #67.9| [314] + 339 | 4.0 | vfmadd231pd %zmm26, %zmm25, %zmm11{%k2} #80.17| [339] + 338 | 4.0 | vfmadd231pd %zmm24, %zmm25, %zmm8{%k2} #79.17| [338] + 337 | 4.0 | vfmadd231pd %zmm23, %zmm25, %zmm9{%k2} #78.17| [337]