11b2d4bcc1
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
81 lines
9.7 KiB
Plaintext
81 lines
9.7 KiB
Plaintext
iwia021h@testfront1:~/MD-Bench/asm$ /home/hpc/iwia/iwia021h/.local/bin/osaca --ignore-unknown --arch=CSX force_aos_geq1200_markers.s
|
|
Open Source Architecture Code Analyzer (OSACA) - 0.3.14
|
|
Analyzed file: force_aos_geq1200_markers.s
|
|
Architecture: CSX
|
|
Timestamp: 2021-04-29 15:53:50
|
|
|
|
|
|
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
|
* - Instruction micro-ops not bound to a port
|
|
X - No throughput/latency information for this instruction in data file
|
|
|
|
|
|
Combined Analysis Report
|
|
------------------------
|
|
Port pressure in cycles
|
|
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |
|
|
-------------------------------------------------------------------------------------------------
|
|
196 | | | | | | 1.00 | | || | | vpcmpgtd %ymm3, %ymm2, %k3 #67.9
|
|
197 | 0.00 | 1.00 | 0.50 0.50 | 0.50 0.50 | | 0.00 | | || 4.0 | | vmovdqu32 (%r15,%r13,4), %ymm17{%k3}{z} #68.21
|
|
198 | 1.00 | | | | | | | || | | kmovw %k3, %r9d #67.9
|
|
199 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm17, %ymm17, %ymm18 #69.36
|
|
200 | 0.00 | 1.00 | | | | 0.00 | | || 1.0 | | vpaddd %ymm18, %ymm17, %ymm17 #69.36
|
|
201 | | | | | | | | || | | # LOE rax rcx rbx rbp rsi rdi r8 r10 r13 r15 r9d r11d r12d r14d xmm6 xmm7 xmm12 ymm2 ymm3 ymm15 ymm16 ymm17 zmm0 zmm1 zmm4 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14 k3
|
|
202 | | | | | | | | || | | ..B1.21: # Preds ..B1.18
|
|
203 | | | | | | | | || | | # Execution count [1.25e+01]
|
|
204 | 1.00 | | | | | | | || | | kmovw %k3, %k1 #69.36
|
|
205 | 1.00 | | | | | | | || | | kmovw %k3, %k2 #69.36
|
|
206 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm18, %zmm18, %zmm18 #69.36
|
|
207 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm19, %zmm19, %zmm19 #69.36
|
|
208 | 0.50 | | | | | 0.50 | | || | | vpxord %zmm20, %zmm20, %zmm20 #69.36
|
|
209 | 1.50 | 0.50 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 0.50 | || | | vgatherdpd 16(%rdi,%ymm17,8), %zmm18{%k1} #69.36
|
|
210 | 1.50 | 0.50 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 0.50 | || 4.0 | | vgatherdpd 8(%rdi,%ymm17,8), %zmm19{%k2} #69.36
|
|
211 | 1.50 | 0.50 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 0.50 | || | | vgatherdpd (%rdi,%ymm17,8), %zmm20{%k3} #69.36
|
|
212 | | | | | | | | || | | # LOE rax rcx rbx rbp rsi rdi r8 r10 r13 r15 r9d r11d r12d r14d xmm6 xmm7 xmm12 ymm2 ymm3 ymm15 ymm16 zmm0 zmm1 zmm4 zmm5 zmm8 zmm9 zmm10 zmm11 zmm13 zmm14 zmm18 zmm19 zmm20
|
|
213 | | | | | | | | || | | ..B1.22: # Preds ..B1.21
|
|
214 | | | | | | | | || | | # Execution count [2.50e+01]
|
|
215 | 0.00 | 0.16 | | | | 0.00 | 0.84 | || | | addq $8, %r13 #67.9
|
|
216 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm16, %ymm3, %ymm3 #67.9
|
|
217 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm18, %zmm4, %zmm29 #71.36
|
|
218 | 0.50 | | | | | 0.50 | | || 4.0 | | vsubpd %zmm19, %zmm0, %zmm27 #70.36
|
|
219 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm20, %zmm1, %zmm26 #69.36
|
|
220 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm27, %zmm27, %zmm25 #72.49
|
|
221 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm26, %zmm26, %zmm25 #72.49
|
|
222 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm29, %zmm29, %zmm25 #72.63
|
|
223 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14pd %zmm25, %zmm24 #75.38
|
|
224 | | | | | | 1.00 | | || | | vcmppd $1, %zmm14, %zmm25, %k2 #74.22
|
|
225 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm24, %k0 #75.38
|
|
226 | 1.00 | | | | | | | || | | kmovw %k2, %edx #74.22
|
|
227 | 1.00 | | | | | | | || | | knotw %k0, %k1 #75.38
|
|
228 | | | | | | | | || | | * vmovaps %zmm25, %zmm17 #75.38
|
|
229 | 0.00 | 0.34 | | | | 0.00 | 0.66 | || | | andl %edx, %r9d #74.22
|
|
230 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || 4.0 | | vfnmadd213pd .L_2il0floatpacket.9(%rip){1to8}, %zmm24, %zmm17 #75.38
|
|
231 | 1.00 | | | | | | | || | | kmovw %r9d, %k3 #78.17
|
|
232 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm17, %zmm17, %zmm18 #75.38
|
|
233 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm24, %zmm17, %zmm24{%k1} #75.38
|
|
234 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd213pd %zmm24, %zmm18, %zmm24{%k1} #75.38
|
|
235 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm13, %zmm24, %zmm19 #76.38
|
|
236 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm10, %zmm24, %zmm21 #77.54
|
|
237 | 0.00 | | | | | 1.00 | | || 4.0 | | vmulpd %zmm19, %zmm24, %zmm22 #76.44
|
|
238 | 0.00 | | | | | 1.00 | | || 4.0 | | vmulpd %zmm22, %zmm24, %zmm20 #76.50
|
|
239 | 0.00 | | | | | 1.00 | | || | | vfmsub213pd %zmm5, %zmm22, %zmm24 #77.54
|
|
240 | 0.00 | | | | | 1.00 | | || 4.0 | | vmulpd %zmm21, %zmm20, %zmm23 #77.61
|
|
241 | 0.00 | | | | | 1.00 | | || 4.0 | | vmulpd %zmm24, %zmm23, %zmm28 #77.67
|
|
242 | 0.00 | | | | | 1.00 | | || | | vfmadd231pd %zmm26, %zmm28, %zmm9{%k3} #78.17
|
|
243 | 0.00 | | | | | 1.00 | | || | | vfmadd231pd %zmm27, %zmm28, %zmm8{%k3} #79.17
|
|
244 | 0.00 | | | | | 1.00 | | || 4.0 | 4.0 | vfmadd231pd %zmm29, %zmm28, %zmm11{%k3} #80.17
|
|
245 | 0.00 | 0.00 | | | | 0.00 | 1.00 | || | | cmpq %rbx, %r13 #67.9
|
|
246 | | | | | | | | || | | * jb ..B1.18 # Prob 82% #67.9
|
|
|
|
20.5 6.00 13.0 2.50 13.0 2.50 20.5 4.00 70.0 4
|
|
|
|
|
|
Loop-Carried Dependencies Analysis Report
|
|
-----------------------------------------
|
|
215 | 1.0 | addq $8, %r13 #67.9| [215]
|
|
216 | 1.0 | vpaddd %ymm16, %ymm3, %ymm3 #67.9| [216]
|
|
244 | 4.0 | vfmadd231pd %zmm29, %zmm28, %zmm11{%k3} #80.17| [244]
|
|
243 | 4.0 | vfmadd231pd %zmm27, %zmm28, %zmm8{%k3} #79.17| [243]
|
|
242 | 4.0 | vfmadd231pd %zmm26, %zmm28, %zmm9{%k3} #78.17| [242]
|
|
|