Add arch_analysis directory and first AVX2 results

Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
Rafael Ravedutti
2021-05-06 23:18:28 +02:00
parent 327cc302b8
commit 9c28ff1e9e
17 changed files with 26360 additions and 0 deletions

View File

@@ -0,0 +1,74 @@
iwia021h@testfront1:~/MD-Bench/asm/avx2$ /home/hpc/iwia/iwia021h/.local/bin/osaca --ignore-unknown --arch=BDW force_soa_lt600_markers.s
Open Source Architecture Code Analyzer (OSACA) - 0.3.14
Analyzed file: force_soa_lt600_markers.s
Architecture: BDW
Timestamp: 2021-05-06 15:40:52
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
* - Instruction micro-ops not bound to a port
X - No throughput/latency information for this instruction in data file
Combined Analysis Report
------------------------
Port pressure in cycles
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |
----------------------------------------------------------------------------------------------------
12343 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | vmovups (%rax,%rdx,4), %xmm2 #60.25
12344 | | 1.00 | | | | 0.00 | | || | 1.0 | vpcmpeqd %ymm13, %ymm13, %ymm13 #61.40
12345 | | | | | | 1.00 | | || | | vxorpd %ymm8, %ymm8, %ymm8 #61.40
12346 | | | | | | | | || | | * vmovdqa %ymm13, %ymm0 #61.40
12347 | | | | | | 1.00 | | || | | vxorpd %ymm14, %ymm14, %ymm14 #62.40
12348 | | | | | | | | || | | * vmovdqa %ymm13, %ymm7 #62.40
12349 | 0.00 | 0.00 | 2.00 | 2.00 | 1.00 | 3.00 | 1.00 | || | | vgatherdpd %ymm0, (%r8,%xmm2,8), %ymm8 #61.40
12350 | 0.00 | 0.00 | 2.00 | 2.00 | 1.00 | 3.00 | 1.00 | || 5.0 | | vgatherdpd %ymm7, (%rdi,%xmm2,8), %ymm14 #62.40
12351 | | 1.00 | | | | | | || | | vsubpd %ymm8, %ymm5, %ymm0 #61.40
12352 | | 1.00 | | | | | | || 3.0 | | vsubpd %ymm14, %ymm4, %ymm8 #62.40
12353 | | | | | | 1.00 | | || | | vxorpd %ymm7, %ymm7, %ymm7 #63.40
12354 | | | | | | | | || | 0.0 | * vmovdqa %ymm13, %ymm15 #63.40
12355 | 0.00 | 0.00 | 2.00 | 2.00 | 1.00 | 3.00 | 1.00 | || | 5.0 | vgatherdpd %ymm15, (%rsi,%xmm2,8), %ymm7 #63.40
12356 | 0.50 | 0.50 | | | | | | || 3.0 | | vmulpd %ymm8, %ymm8, %ymm14 #64.53
12357 | | 1.00 | | | | | | || | 3.0 | vsubpd %ymm7, %ymm6, %ymm2 #63.40
12358 | 0.50 | 0.50 | | | | | | || 5.0 | | vfmadd231pd %ymm0, %ymm0, %ymm14 #64.53
12359 | 0.50 | 0.50 | | | | | | || 5.0 | 5.0 | vfmadd231pd %ymm2, %ymm2, %ymm14 #64.67
12360 | | | | | | | | || | | X vcmpltpd %ymm11, %ymm14, %ymm7 #66.26
12361 | 1.00 | | | | | 1.00 | | || | | vptest %ymm13, %ymm7 #66.26
12362 | | | | | | | | || | | X je ..B1.26 # Prob 50% #66.26
12363 | | | | | | | | || | | # LOE rax rdx rsi rdi r8 r9 r12 r14 ecx ebx r10d r13d r15d ymm0 ymm1 ymm2 ymm3 ymm4 ymm5 ymm6 ymm7 ymm8 ymm9 ymm10 ymm11 ymm12 ymm14
12364 | | | | | | | | || | | ..B1.25: # Preds ..B1.24
12365 | | | | | | | | || | | # Execution count [1.25e+04]
12366 | 2.50 16.00 | 0.50 | | | | | | || 23.0 | 23.0 | vdivpd %ymm14, %ymm10, %ymm13 #67.42
12367 | 0.50 | 0.50 | 0.50 0.50 | 0.50 0.50 | | | | || 3.0 | | vmulpd 352(%rsp), %ymm13, %ymm14 #68.42[spill]
12368 | 0.50 | 0.50 | | | | | | || 3.0 | | vmulpd %ymm14, %ymm13, %ymm14 #68.48
12369 | 0.50 | 0.50 | | | | | | || 3.0 | | vmulpd %ymm14, %ymm13, %ymm15 #68.54
12370 | 0.50 | 0.50 | | | | | | || | | vfmsub213pd %ymm9, %ymm13, %ymm14 #69.58
12371 | 0.50 | 0.50 | 0.50 0.50 | 0.50 0.50 | | | | || | 3.0 | vmulpd 320(%rsp), %ymm13, %ymm13 #69.58[spill]
12372 | 0.50 | 0.50 | | | | | | || 3.0 | 3.0 | vmulpd %ymm13, %ymm15, %ymm15 #69.65
12373 | 0.75 | 0.25 | | | | | | || 3.0 | 3.0 | vmulpd %ymm14, %ymm15, %ymm13 #69.71
12374 | 1.00 | 0.00 | | | | | | || | | vmulpd %ymm13, %ymm0, %ymm0 #70.35
12375 | 1.00 | 0.00 | | | | | | || | | vmulpd %ymm13, %ymm8, %ymm8 #71.35
12376 | | | | | | 1.00 | | || | | vandpd %ymm0, %ymm7, %ymm0 #70.35
12377 | | 1.00 | | | | | | || | | vaddpd %ymm0, %ymm12, %ymm12 #70.21
12378 | 1.00 | 0.00 | | | | | | || 3.0 | | vmulpd %ymm13, %ymm2, %ymm0 #72.35
12379 | | | | | | 1.00 | | || | | vandpd %ymm8, %ymm7, %ymm2 #71.35
12380 | | | | | | 1.00 | | || 1.0 | | vandpd %ymm0, %ymm7, %ymm7 #72.35
12381 | | 1.00 | | | | | | || | | vaddpd %ymm2, %ymm1, %ymm1 #71.21
12382 | | 1.00 | | | | | | || 3.0 | | vaddpd %ymm7, %ymm3, %ymm3 #72.21
12383 | | | | | | | | || | | # LOE rax rdx rsi rdi r8 r9 r12 r14 ecx ebx r10d r13d r15d ymm1 ymm3 ymm4 ymm5 ymm6 ymm9 ymm10 ymm11 ymm12
12384 | | | | | | | | || | | ..B1.26: # Preds ..B1.25 ..B1.24
12385 | | | | | | | | || | | # Execution count [2.50e+04]
12386 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | addq $4, %rdx #59.13
12387 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | cmpq %r14, %rdx #59.13
12388 | | | | | | | | || | | * jb ..B1.24 # Prob 82% #59.13
11.8 16.00 12.2 7.50 1.50 7.50 1.50 3.00 16.0 5.00 70 46
Loop-Carried Dependencies Analysis Report
-----------------------------------------
12373 | 46.0 | vmulpd %ymm14, %ymm15, %ymm13 #69.71| [12344, 12354, 12355, 12357, 12359, 12366, 12371, 12372, 12373]
12377 | 3.0 | vaddpd %ymm0, %ymm12, %ymm12 #70.21| [12377]
12381 | 3.0 | vaddpd %ymm2, %ymm1, %ymm1 #71.21| [12381]
12382 | 3.0 | vaddpd %ymm7, %ymm3, %ymm3 #72.21| [12382]
12386 | 1.0 | addq $4, %rdx #59.13| [12386]