9c28ff1e9e
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
72 lines
8.4 KiB
Plaintext
72 lines
8.4 KiB
Plaintext
iwia021h@testfront1:~/MD-Bench/asm$ /home/hpc/iwia/iwia021h/.local/bin/osaca --ignore-unknown --arch=CSX force_soa_geq1200_markers.s
|
|
Open Source Architecture Code Analyzer (OSACA) - 0.3.14
|
|
Analyzed file: force_soa_geq1200_markers.s
|
|
Architecture: CSX
|
|
Timestamp: 2021-04-29 15:54:23
|
|
|
|
|
|
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
|
* - Instruction micro-ops not bound to a port
|
|
X - No throughput/latency information for this instruction in data file
|
|
|
|
|
|
Combined Analysis Report
|
|
------------------------
|
|
Port pressure in cycles
|
|
| 0 - 0DV | 1 | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 || CP | LCD |
|
|
-------------------------------------------------------------------------------------------------
|
|
189 | | | | | | | | || | | # LOE rax rdx rcx rbp rsi rdi r8 r10 r11 r12 r14 r15 ebx r9d r13d xmm8 xmm9 xmm10 ymm3 ymm4 ymm17 ymm18 zmm2 zmm5 zmm6 zmm7 zmm11 zmm12 zmm13 zmm14 zmm15 zmm16 zmm19
|
|
190 | | | | | | | | || | | ..B1.18: # Preds ..B1.18 ..B1.17
|
|
191 | | | | | | | | || | | # Execution count [2.50e+01]
|
|
192 | | | | | | 1.00 | | || | | vpcmpgtd %ymm4, %ymm3, %k5 #67.9
|
|
193 | 0.00 | 1.00 | | | | 0.00 | | || | | vpaddd %ymm18, %ymm4, %ymm4 #67.9
|
|
194 | 0.00 | 1.00 | 0.50 0.50 | 0.50 0.50 | | 0.00 | | || 4.0 | | vmovdqu32 (%rcx,%r15,4), %ymm20{%k5}{z} #68.21
|
|
195 | | | | | | | | || | | * vmovaps %zmm19, %zmm22 #70.36
|
|
196 | 0.00 | 0.50 | | | | 0.00 | 0.50 | || | | addq $8, %r15 #67.9
|
|
197 | 1.00 | | | | | | | || | | kmovw %k5, %k2 #70.36
|
|
198 | | | | | | | | || | | * vmovaps %zmm19, %zmm21 #69.36
|
|
199 | 1.00 | | | | | | | || | | kmovw %k5, %k1 #69.36
|
|
200 | | | | | | | | || | | * vmovaps %zmm19, %zmm23 #71.36
|
|
201 | 1.00 | | | | | | | || | | kmovw %k5, %k3 #71.36
|
|
202 | 1.50 | 0.50 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 0.50 | || | | vgatherdpd (%rsi,%ymm20,8), %zmm23{%k3} #71.36
|
|
203 | 1.50 | 0.50 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 0.50 | || 4.0 | | vgatherdpd (%rax,%ymm20,8), %zmm22{%k2} #70.36
|
|
204 | 1.50 | 0.50 | 4.00 0.50 | 4.00 0.50 | | 0.50 | 0.50 | || | | vgatherdpd (%rdx,%ymm20,8), %zmm21{%k1} #69.36
|
|
205 | 0.50 | | | | | 0.50 | | || 4.0 | | vsubpd %zmm22, %zmm5, %zmm0 #70.36
|
|
206 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm21, %zmm2, %zmm1 #69.36
|
|
207 | 0.50 | | | | | 0.50 | | || | | vsubpd %zmm23, %zmm6, %zmm21 #71.36
|
|
208 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm0, %zmm0, %zmm20 #72.49
|
|
209 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm1, %zmm1, %zmm20 #72.49
|
|
210 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd231pd %zmm21, %zmm21, %zmm20 #72.63
|
|
211 | 2.50 | | | | | 0.50 | | || 8.0 | | vrcp14pd %zmm20, %zmm31 #75.38
|
|
212 | | | | | | 1.00 | | || | | vcmppd $1, %zmm16, %zmm20, %k6{%k5} #74.22
|
|
213 | | | | | | 1.00 | | || | | vfpclasspd $30, %zmm31, %k0 #75.38
|
|
214 | | | | | | | | || | | * vmovaps %zmm20, %zmm24 #75.38
|
|
215 | 0.50 | | 0.50 0.50 | 0.50 0.50 | | 0.50 | | || 4.0 | | vfnmadd213pd .L_2il0floatpacket.5(%rip){1to8}, %zmm31, %zmm24 #75.38
|
|
216 | 1.00 | | | | | | | || | | knotw %k0, %k4 #75.38
|
|
217 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm24, %zmm24, %zmm25 #75.38
|
|
218 | 0.50 | | | | | 0.50 | | || | | vfmadd213pd %zmm31, %zmm24, %zmm31{%k4} #75.38
|
|
219 | 0.50 | | | | | 0.50 | | || 4.0 | | vfmadd213pd %zmm31, %zmm25, %zmm31{%k4} #75.38
|
|
220 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm15, %zmm31, %zmm26 #76.38
|
|
221 | 0.50 | | | | | 0.50 | | || | | vmulpd %zmm14, %zmm31, %zmm28 #77.54
|
|
222 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm26, %zmm31, %zmm29 #76.44
|
|
223 | 0.50 | | | | | 0.50 | | || 4.0 | | vmulpd %zmm29, %zmm31, %zmm27 #76.50
|
|
224 | 0.00 | | | | | 1.00 | | || | | vfmsub213pd %zmm7, %zmm29, %zmm31 #77.54
|
|
225 | 0.00 | | | | | 1.00 | | || 4.0 | | vmulpd %zmm28, %zmm27, %zmm30 #77.61
|
|
226 | 0.00 | | | | | 1.00 | | || 4.0 | | vmulpd %zmm31, %zmm30, %zmm24 #77.67
|
|
227 | 0.00 | | | | | 1.00 | | || | | vfmadd231pd %zmm1, %zmm24, %zmm13{%k6} #78.17
|
|
228 | 0.00 | | | | | 1.00 | | || | 4.0 | vfmadd231pd %zmm0, %zmm24, %zmm12{%k6} #79.17
|
|
229 | 0.00 | | | | | 1.00 | | || 4.0 | | vfmadd231pd %zmm21, %zmm24, %zmm11{%k6} #80.17
|
|
230 | 0.00 | 0.17 | | | | 0.00 | 0.83 | || | | cmpq %r14, %r15 #67.9
|
|
231 | | | | | | | | || | | * jb ..B1.18 # Prob 82% #67.9
|
|
|
|
18.0 4.17 13.0 2.50 13.0 2.50 18.0 2.83 68.0 4
|
|
|
|
|
|
Loop-Carried Dependencies Analysis Report
|
|
-----------------------------------------
|
|
193 | 1.0 | vpaddd %ymm18, %ymm4, %ymm4 #67.9| [193]
|
|
196 | 1.0 | addq $8, %r15 #67.9| [196]
|
|
228 | 4.0 | vfmadd231pd %zmm0, %zmm24, %zmm12{%k6} #79.17| [228]
|
|
227 | 4.0 | vfmadd231pd %zmm1, %zmm24, %zmm13{%k6} #78.17| [227]
|
|
229 | 4.0 | vfmadd231pd %zmm21, %zmm24, %zmm11{%k6} #80.17| [229]
|