Add OSACA analysis and ASM code for AVX2 with AoS, lt600 variant

Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
Rafael Ravedutti 2021-05-08 01:03:44 +02:00
parent 6c03ea3f3c
commit 6c4168fdb5
2 changed files with 12907 additions and 0 deletions

View File

@ -0,0 +1,90 @@
iwia021h@testfront1:~/MD-Bench$ /home/hpc/iwia/iwia021h/.local/bin/osaca --ignore-unknown --arch=BDW asm/avx2/force_aos_lt600_markers.s
Open Source Architecture Code Analyzer (OSACA) - 0.3.14
Analyzed file: asm/avx2/force_aos_lt600_markers.s
Architecture: BDW
Timestamp: 2021-05-07 23:01:31
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
* - Instruction micro-ops not bound to a port
X - No throughput/latency information for this instruction in data file
Combined Analysis Report
------------------------
Port pressure in cycles
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |
----------------------------------------------------------------------------------------------------
12567 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | vmovdqu (%rax,%r8,4), %xmm2 #60.25
12568 | 1.00 | | | | | | | || 1.0 | | vmovd %xmm2, %rdx #60.25
12569 | | | | | | 1.00 | | || | | vpunpckhqdq %xmm2, %xmm2, %xmm8 #60.25
12570 | 1.00 | | | | | | | || | | vmovd %xmm8, %r9 #60.25
12571 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || 1.0 | | movl %edx, %ebx #60.25
12572 | 0.00 | | | | | | 1.00 | || | | shrq $32, %rdx #60.25
12573 | | 0.50 | | | | 0.50 | | || | | lea (%rdx,%rdx,2), %esi #61.40
12574 | | 0.50 | | | | 0.50 | | || 1.0 | | lea (%rbx,%rbx,2), %ebx #61.40
12575 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || 1.0 | | movslq %ebx, %rdx #61.40
12576 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | movslq %esi, %rbx #61.40
12577 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | movl %r9d, %esi #60.25
12578 | 0.00 | | | | | | 1.00 | || | | shrq $32, %r9 #60.25
12579 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | vmovups (%rcx,%rdx,8), %xmm13 #61.40
12580 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovups (%rcx,%rbx,8), %xmm0 #61.40
12581 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovq 16(%rcx,%rdx,8), %xmm14 #61.40
12582 | | 0.00 | | | | 1.00 | | || | | lea (%rsi,%rsi,2), %esi #61.40
12583 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | movslq %esi, %rsi #61.40
12584 | | 0.00 | | | | 1.00 | | || | | lea (%r9,%r9,2), %r9d #61.40
12585 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | movslq %r9d, %r9 #61.40
12586 | | | 0.50 0.50 | 0.50 0.50 | | 1.00 | | || | | vmovhpd 16(%rcx,%rbx,8), %xmm14, %xmm15 #61.40
12587 | 0.50 | 0.50 | 0.50 0.50 | 0.50 0.50 | | | | || 1.0 | | vinsertf128 $1, (%rcx,%rsi,8), %ymm13, %ymm7 #61.40
12588 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovq 16(%rcx,%rsi,8), %xmm2 #61.40
12589 | 0.50 | 0.50 | 0.50 0.50 | 0.50 0.50 | | | | || | | vinsertf128 $1, (%rcx,%r9,8), %ymm0, %ymm0 #61.40
12590 | | | 0.50 0.50 | 0.50 0.50 | | 1.00 | | || | | vmovhpd 16(%rcx,%r9,8), %xmm2, %xmm8 #61.40
12591 | | | | | | 1.00 | | || | | vunpcklpd %ymm0, %ymm7, %ymm14 #61.40
12592 | | | | | | 1.00 | | || 1.0 | | vunpckhpd %ymm0, %ymm7, %ymm7 #61.40
12593 | | 1.00 | | | | | | || | | vsubpd %ymm14, %ymm5, %ymm0 #61.40
12594 | | | | | | 1.00 | | || | | vinsertf128 $1, %xmm8, %ymm15, %ymm13 #61.40
12595 | | 1.00 | | | | | | || 3.0 | | vsubpd %ymm7, %ymm4, %ymm8 #62.40
12596 | | 1.00 | | | | | | || | | vsubpd %ymm13, %ymm6, %ymm2 #63.40
12597 | 0.50 | 0.50 | | | | | | || 3.0 | | vmulpd %ymm8, %ymm8, %ymm14 #64.53
12598 | 0.50 | 0.50 | | | | | | || 5.0 | | vfmadd231pd %ymm0, %ymm0, %ymm14 #64.53
12599 | 0.25 | 0.75 | | | | | | || 5.0 | | vfmadd231pd %ymm2, %ymm2, %ymm14 #64.67
12600 | | | | | | | | || | | X vcmpltpd %ymm11, %ymm14, %ymm7 #66.26
12601 | | 0.00 | | | | 1.00 | | || | | vpcmpeqd %ymm13, %ymm13, %ymm13 #66.26
12602 | 1.00 | | | | | 1.00 | | || | | vptest %ymm13, %ymm7 #66.26
12603 | | | | | | | | || | | X je ..B1.26 # Prob 50% #66.26
12604 | | | | | | | | || | | # LOE rax rcx rdi r8 r14 r10d r11d r12d r13d r15d ymm0 ymm1 ymm2 ymm3 ymm4 ymm5 ymm6 ymm7 ymm8 ymm9 ymm10 ymm11 ymm12 ymm14
12605 | | | | | | | | || | | ..B1.25: # Preds ..B1.24
12606 | | | | | | | | || | | # Execution count [1.25e+04]
12607 | 2.50 16.00 | 0.50 | | | | | | || 23.0 | | vdivpd %ymm14, %ymm10, %ymm13 #67.42
12608 | 0.50 | 0.50 | 0.50 0.50 | 0.50 0.50 | | | | || 3.0 | | vmulpd 352(%rsp), %ymm13, %ymm14 #68.42[spill]
12609 | 0.50 | 0.50 | | | | | | || 3.0 | | vmulpd %ymm14, %ymm13, %ymm14 #68.48
12610 | 0.50 | 0.50 | | | | | | || 3.0 | | vmulpd %ymm14, %ymm13, %ymm15 #68.54
12611 | 0.50 | 0.50 | | | | | | || | | vfmsub213pd %ymm9, %ymm13, %ymm14 #69.58
12612 | 0.50 | 0.50 | 0.50 0.50 | 0.50 0.50 | | | | || | | vmulpd 320(%rsp), %ymm13, %ymm13 #69.58[spill]
12613 | 0.50 | 0.50 | | | | | | || 3.0 | | vmulpd %ymm13, %ymm15, %ymm15 #69.65
12614 | 0.50 | 0.50 | | | | | | || 3.0 | | vmulpd %ymm14, %ymm15, %ymm13 #69.71
12615 | 0.75 | 0.25 | | | | | | || | | vmulpd %ymm13, %ymm0, %ymm0 #70.35
12616 | 1.00 | 0.00 | | | | | | || | | vmulpd %ymm13, %ymm8, %ymm8 #71.35
12617 | | | | | | 1.00 | | || | | vandpd %ymm0, %ymm7, %ymm0 #70.35
12618 | | 1.00 | | | | | | || | 3.0 | vaddpd %ymm0, %ymm12, %ymm12 #70.21
12619 | 1.00 | 0.00 | | | | | | || 3.0 | | vmulpd %ymm13, %ymm2, %ymm0 #72.35
12620 | | | | | | 1.00 | | || | | vandpd %ymm8, %ymm7, %ymm2 #71.35
12621 | | | | | | 1.00 | | || 1.0 | | vandpd %ymm0, %ymm7, %ymm7 #72.35
12622 | | 1.00 | | | | | | || | | vaddpd %ymm2, %ymm1, %ymm1 #71.21
12623 | | 1.00 | | | | | | || 3.0 | | vaddpd %ymm7, %ymm3, %ymm3 #72.21
12624 | | | | | | | | || | | # LOE rax rcx rdi r8 r14 r10d r11d r12d r13d r15d ymm1 ymm3 ymm4 ymm5 ymm6 ymm9 ymm10 ymm11 ymm12
12625 | | | | | | | | || | | ..B1.26: # Preds ..B1.25 ..B1.24
12626 | | | | | | | | || | | # Execution count [2.50e+04]
12627 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | addq $4, %r8 #59.13
12628 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | cmpq %rdi, %r8 #59.13
12629 | | | | | | | | || | | * jb ..B1.24 # Prob 82% #59.13
14.0 16.00 14.0 5.50 5.50 5.50 5.50 14.0 10.0 75.0 3
Loop-Carried Dependencies Analysis Report
-----------------------------------------
12618 | 3.0 | vaddpd %ymm0, %ymm12, %ymm12 #70.21| [12618]
12622 | 3.0 | vaddpd %ymm2, %ymm1, %ymm1 #71.21| [12622]
12623 | 3.0 | vaddpd %ymm7, %ymm3, %ymm3 #72.21| [12623]
12627 | 1.0 | addq $4, %r8 #59.13| [12627]

File diff suppressed because it is too large Load Diff