Add OSACA analysis and ASM code for AVX2 with AoS, lt600 variant
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
6c03ea3f3c
commit
6c4168fdb5
90
arch_analysis/avx2/force_aos_lt600_markers.txt
Normal file
90
arch_analysis/avx2/force_aos_lt600_markers.txt
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
iwia021h@testfront1:~/MD-Bench$ /home/hpc/iwia/iwia021h/.local/bin/osaca --ignore-unknown --arch=BDW asm/avx2/force_aos_lt600_markers.s
|
||||||
|
Open Source Architecture Code Analyzer (OSACA) - 0.3.14
|
||||||
|
Analyzed file: asm/avx2/force_aos_lt600_markers.s
|
||||||
|
Architecture: BDW
|
||||||
|
Timestamp: 2021-05-07 23:01:31
|
||||||
|
|
||||||
|
|
||||||
|
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
||||||
|
* - Instruction micro-ops not bound to a port
|
||||||
|
X - No throughput/latency information for this instruction in data file
|
||||||
|
|
||||||
|
|
||||||
|
Combined Analysis Report
|
||||||
|
------------------------
|
||||||
|
Port pressure in cycles
|
||||||
|
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |
|
||||||
|
----------------------------------------------------------------------------------------------------
|
||||||
|
12567 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | vmovdqu (%rax,%r8,4), %xmm2 #60.25
|
||||||
|
12568 | 1.00 | | | | | | | || 1.0 | | vmovd %xmm2, %rdx #60.25
|
||||||
|
12569 | | | | | | 1.00 | | || | | vpunpckhqdq %xmm2, %xmm2, %xmm8 #60.25
|
||||||
|
12570 | 1.00 | | | | | | | || | | vmovd %xmm8, %r9 #60.25
|
||||||
|
12571 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || 1.0 | | movl %edx, %ebx #60.25
|
||||||
|
12572 | 0.00 | | | | | | 1.00 | || | | shrq $32, %rdx #60.25
|
||||||
|
12573 | | 0.50 | | | | 0.50 | | || | | lea (%rdx,%rdx,2), %esi #61.40
|
||||||
|
12574 | | 0.50 | | | | 0.50 | | || 1.0 | | lea (%rbx,%rbx,2), %ebx #61.40
|
||||||
|
12575 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || 1.0 | | movslq %ebx, %rdx #61.40
|
||||||
|
12576 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | movslq %esi, %rbx #61.40
|
||||||
|
12577 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | movl %r9d, %esi #60.25
|
||||||
|
12578 | 0.00 | | | | | | 1.00 | || | | shrq $32, %r9 #60.25
|
||||||
|
12579 | | | 0.50 0.50 | 0.50 0.50 | | | | || 4.0 | | vmovups (%rcx,%rdx,8), %xmm13 #61.40
|
||||||
|
12580 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovups (%rcx,%rbx,8), %xmm0 #61.40
|
||||||
|
12581 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovq 16(%rcx,%rdx,8), %xmm14 #61.40
|
||||||
|
12582 | | 0.00 | | | | 1.00 | | || | | lea (%rsi,%rsi,2), %esi #61.40
|
||||||
|
12583 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | movslq %esi, %rsi #61.40
|
||||||
|
12584 | | 0.00 | | | | 1.00 | | || | | lea (%r9,%r9,2), %r9d #61.40
|
||||||
|
12585 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | movslq %r9d, %r9 #61.40
|
||||||
|
12586 | | | 0.50 0.50 | 0.50 0.50 | | 1.00 | | || | | vmovhpd 16(%rcx,%rbx,8), %xmm14, %xmm15 #61.40
|
||||||
|
12587 | 0.50 | 0.50 | 0.50 0.50 | 0.50 0.50 | | | | || 1.0 | | vinsertf128 $1, (%rcx,%rsi,8), %ymm13, %ymm7 #61.40
|
||||||
|
12588 | | | 0.50 0.50 | 0.50 0.50 | | | | || | | vmovq 16(%rcx,%rsi,8), %xmm2 #61.40
|
||||||
|
12589 | 0.50 | 0.50 | 0.50 0.50 | 0.50 0.50 | | | | || | | vinsertf128 $1, (%rcx,%r9,8), %ymm0, %ymm0 #61.40
|
||||||
|
12590 | | | 0.50 0.50 | 0.50 0.50 | | 1.00 | | || | | vmovhpd 16(%rcx,%r9,8), %xmm2, %xmm8 #61.40
|
||||||
|
12591 | | | | | | 1.00 | | || | | vunpcklpd %ymm0, %ymm7, %ymm14 #61.40
|
||||||
|
12592 | | | | | | 1.00 | | || 1.0 | | vunpckhpd %ymm0, %ymm7, %ymm7 #61.40
|
||||||
|
12593 | | 1.00 | | | | | | || | | vsubpd %ymm14, %ymm5, %ymm0 #61.40
|
||||||
|
12594 | | | | | | 1.00 | | || | | vinsertf128 $1, %xmm8, %ymm15, %ymm13 #61.40
|
||||||
|
12595 | | 1.00 | | | | | | || 3.0 | | vsubpd %ymm7, %ymm4, %ymm8 #62.40
|
||||||
|
12596 | | 1.00 | | | | | | || | | vsubpd %ymm13, %ymm6, %ymm2 #63.40
|
||||||
|
12597 | 0.50 | 0.50 | | | | | | || 3.0 | | vmulpd %ymm8, %ymm8, %ymm14 #64.53
|
||||||
|
12598 | 0.50 | 0.50 | | | | | | || 5.0 | | vfmadd231pd %ymm0, %ymm0, %ymm14 #64.53
|
||||||
|
12599 | 0.25 | 0.75 | | | | | | || 5.0 | | vfmadd231pd %ymm2, %ymm2, %ymm14 #64.67
|
||||||
|
12600 | | | | | | | | || | | X vcmpltpd %ymm11, %ymm14, %ymm7 #66.26
|
||||||
|
12601 | | 0.00 | | | | 1.00 | | || | | vpcmpeqd %ymm13, %ymm13, %ymm13 #66.26
|
||||||
|
12602 | 1.00 | | | | | 1.00 | | || | | vptest %ymm13, %ymm7 #66.26
|
||||||
|
12603 | | | | | | | | || | | X je ..B1.26 # Prob 50% #66.26
|
||||||
|
12604 | | | | | | | | || | | # LOE rax rcx rdi r8 r14 r10d r11d r12d r13d r15d ymm0 ymm1 ymm2 ymm3 ymm4 ymm5 ymm6 ymm7 ymm8 ymm9 ymm10 ymm11 ymm12 ymm14
|
||||||
|
12605 | | | | | | | | || | | ..B1.25: # Preds ..B1.24
|
||||||
|
12606 | | | | | | | | || | | # Execution count [1.25e+04]
|
||||||
|
12607 | 2.50 16.00 | 0.50 | | | | | | || 23.0 | | vdivpd %ymm14, %ymm10, %ymm13 #67.42
|
||||||
|
12608 | 0.50 | 0.50 | 0.50 0.50 | 0.50 0.50 | | | | || 3.0 | | vmulpd 352(%rsp), %ymm13, %ymm14 #68.42[spill]
|
||||||
|
12609 | 0.50 | 0.50 | | | | | | || 3.0 | | vmulpd %ymm14, %ymm13, %ymm14 #68.48
|
||||||
|
12610 | 0.50 | 0.50 | | | | | | || 3.0 | | vmulpd %ymm14, %ymm13, %ymm15 #68.54
|
||||||
|
12611 | 0.50 | 0.50 | | | | | | || | | vfmsub213pd %ymm9, %ymm13, %ymm14 #69.58
|
||||||
|
12612 | 0.50 | 0.50 | 0.50 0.50 | 0.50 0.50 | | | | || | | vmulpd 320(%rsp), %ymm13, %ymm13 #69.58[spill]
|
||||||
|
12613 | 0.50 | 0.50 | | | | | | || 3.0 | | vmulpd %ymm13, %ymm15, %ymm15 #69.65
|
||||||
|
12614 | 0.50 | 0.50 | | | | | | || 3.0 | | vmulpd %ymm14, %ymm15, %ymm13 #69.71
|
||||||
|
12615 | 0.75 | 0.25 | | | | | | || | | vmulpd %ymm13, %ymm0, %ymm0 #70.35
|
||||||
|
12616 | 1.00 | 0.00 | | | | | | || | | vmulpd %ymm13, %ymm8, %ymm8 #71.35
|
||||||
|
12617 | | | | | | 1.00 | | || | | vandpd %ymm0, %ymm7, %ymm0 #70.35
|
||||||
|
12618 | | 1.00 | | | | | | || | 3.0 | vaddpd %ymm0, %ymm12, %ymm12 #70.21
|
||||||
|
12619 | 1.00 | 0.00 | | | | | | || 3.0 | | vmulpd %ymm13, %ymm2, %ymm0 #72.35
|
||||||
|
12620 | | | | | | 1.00 | | || | | vandpd %ymm8, %ymm7, %ymm2 #71.35
|
||||||
|
12621 | | | | | | 1.00 | | || 1.0 | | vandpd %ymm0, %ymm7, %ymm7 #72.35
|
||||||
|
12622 | | 1.00 | | | | | | || | | vaddpd %ymm2, %ymm1, %ymm1 #71.21
|
||||||
|
12623 | | 1.00 | | | | | | || 3.0 | | vaddpd %ymm7, %ymm3, %ymm3 #72.21
|
||||||
|
12624 | | | | | | | | || | | # LOE rax rcx rdi r8 r14 r10d r11d r12d r13d r15d ymm1 ymm3 ymm4 ymm5 ymm6 ymm9 ymm10 ymm11 ymm12
|
||||||
|
12625 | | | | | | | | || | | ..B1.26: # Preds ..B1.25 ..B1.24
|
||||||
|
12626 | | | | | | | | || | | # Execution count [2.50e+04]
|
||||||
|
12627 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | addq $4, %r8 #59.13
|
||||||
|
12628 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | cmpq %rdi, %r8 #59.13
|
||||||
|
12629 | | | | | | | | || | | * jb ..B1.24 # Prob 82% #59.13
|
||||||
|
|
||||||
|
14.0 16.00 14.0 5.50 5.50 5.50 5.50 14.0 10.0 75.0 3
|
||||||
|
|
||||||
|
|
||||||
|
Loop-Carried Dependencies Analysis Report
|
||||||
|
-----------------------------------------
|
||||||
|
12618 | 3.0 | vaddpd %ymm0, %ymm12, %ymm12 #70.21| [12618]
|
||||||
|
12622 | 3.0 | vaddpd %ymm2, %ymm1, %ymm1 #71.21| [12622]
|
||||||
|
12623 | 3.0 | vaddpd %ymm7, %ymm3, %ymm3 #72.21| [12623]
|
||||||
|
12627 | 1.0 | addq $4, %r8 #59.13| [12627]
|
12817
asm/avx2/force_aos_lt600_markers.s
Normal file
12817
asm/avx2/force_aos_lt600_markers.s
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user