Merge branch 'master' of github.com:RRZE-HPC/MD-Bench
This commit is contained in:
commit
f5fd3e265a
167
static_analysis/jan/analyses/gromacs-icx-avx512-dp-osaca-icx.out
Normal file
167
static_analysis/jan/analyses/gromacs-icx-avx512-dp-osaca-icx.out
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
Open Source Architecture Code Analyzer (OSACA) - 0.4.12
|
||||||
|
Analyzed file: gromacs-icx-avx512-dp.s
|
||||||
|
Architecture: ICX
|
||||||
|
Timestamp: 2023-02-14 12:51:57
|
||||||
|
|
||||||
|
|
||||||
|
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
||||||
|
* - Instruction micro-ops not bound to a port
|
||||||
|
X - No throughput/latency information for this instruction in data file
|
||||||
|
|
||||||
|
|
||||||
|
Combined Analysis Report
|
||||||
|
------------------------
|
||||||
|
Port pressure in cycles
|
||||||
|
| 0 - 0DV | 1 - 1DV | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 | 8 | 9 || CP | LCD |
|
||||||
|
------------------------------------------------------------------------------------------------------------------------
|
||||||
|
2241 | | | | | | | | | | || | | # pointer_increment=64 da67166e5736661e6b03ea29ee7bfd67
|
||||||
|
2242 | | | | | | | | | | || | | # LLVM-MCA-BEGIN
|
||||||
|
2243 | | | | | | | | | | || | | .LBB5_12: # Parent Loop BB5_7 Depth=1
|
||||||
|
2244 | | | | | | | | | | || | | # => This Inner Loop Header: Depth=2
|
||||||
|
2245 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || 5.0 | | movslq (%r10,%rbx,4), %rcx
|
||||||
|
2246 | 0.00 | 0.75 | | | | 0.000 | 0.25 | | | || 6.0 | | leaq (%rcx,%rcx,2), %rdx
|
||||||
|
2247 | 0.00 | | | | | | 1.00 | | | || 1.0 | | shlq $6, %rdx
|
||||||
|
2248 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd (%rsi,%rdx), %zmm28 # AlignMOV convert to UnAlignMOV
|
||||||
|
2249 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 64(%rsi,%rdx), %zmm29 # AlignMOV convert to UnAlignMOV
|
||||||
|
2250 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || 5.0 | | vmovupd 128(%rsi,%rdx), %zmm30 # AlignMOV convert to UnAlignMOV
|
||||||
|
2251 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 16(%rsp), %zmm3 # 64-byte Reload
|
||||||
|
2252 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm28, %zmm3, %zmm3
|
||||||
|
2253 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vsubpd %zmm30, %zmm24, %zmm31
|
||||||
|
2254 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 336(%rsp), %zmm16 # 64-byte Reload
|
||||||
|
2255 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm29, %zmm16, %zmm16
|
||||||
|
2256 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm31, %zmm31, %zmm17
|
||||||
|
2257 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231pd %zmm16, %zmm16, %zmm17 # zmm17 = (zmm16 * zmm16) + zmm17
|
||||||
|
2258 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231pd %zmm3, %zmm3, %zmm17 # zmm17 = (zmm3 * zmm3) + zmm17
|
||||||
|
2259 | 2.50 | | | | | 0.500 | | | | || 6.0 | | vrcp14pd %zmm17, %zmm18
|
||||||
|
2260 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm18, %zmm21, %zmm19
|
||||||
|
2261 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm19, %zmm18, %zmm19
|
||||||
|
2262 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm19, %zmm18, %zmm19
|
||||||
|
2263 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vaddpd %zmm1, %zmm19, %zmm20
|
||||||
|
2264 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm18, %zmm22, %zmm18
|
||||||
|
2265 | 0.75 | | | | | 0.250 | | | | || 4.0 | | vmulpd %zmm20, %zmm18, %zmm18
|
||||||
|
2266 | 1.00 | | | | | 0.000 | | | | || | | vsubpd %zmm30, %zmm25, %zmm20
|
||||||
|
2267 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | | leal (%rcx,%rcx), %edx
|
||||||
|
2268 | 0.00 | 1.00 | | | | 0.000 | 0.00 | | | || | | cmpq %rdx, %r11
|
||||||
|
2269 | 0.00 | | | | | | 1.00 | | | || | | setne %dl
|
||||||
|
2270 | 0.00 | | | | | | 1.00 | | | || | | sete %al
|
||||||
|
2271 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | | addl %ecx, %ecx
|
||||||
|
2272 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | | incl %ecx
|
||||||
|
2273 | 0.00 | 1.00 | | | | 0.000 | 0.00 | | | || | | cmpq %rcx, %r11
|
||||||
|
2274 | 0.00 | | | | | | 1.00 | | | || | | sete %cl
|
||||||
|
2275 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm18, %zmm19, %zmm18
|
||||||
|
2276 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 528(%rsp), %zmm19 # 64-byte Reload
|
||||||
|
2277 | 1.00 | | | | | 0.000 | | | | || | | vsubpd %zmm28, %zmm19, %zmm19
|
||||||
|
2278 | 0.00 | | | | | | 1.00 | | | || | | setne %dil
|
||||||
|
2279 | 0.00 | 0.75 | | | | 0.000 | 0.25 | | | || | | movl %edi, %ebp
|
||||||
|
2280 | 0.00 | | | | | | 1.00 | | | || | 1.0 | shlb $4, %bpl
|
||||||
|
2281 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | 1.0 | subb %al, %bpl
|
||||||
|
2282 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | 1.0 | addb $-17, %bpl
|
||||||
|
2283 | 1.00 | | | | | | | | | || | | kmovd %ebp, %k1
|
||||||
|
2284 | 0.50 | | | | | 0.500 | | | | || | | vcmpltpd %zmm0, %zmm17, %k1 {%k1}
|
||||||
|
2285 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 272(%rsp), %zmm17 # 64-byte Reload
|
||||||
|
2286 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm29, %zmm17, %zmm17
|
||||||
|
2287 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | | leal (%rdx,%rdx), %eax
|
||||||
|
2288 | 0.00 | 0.75 | | | | 0.000 | 0.25 | | | || | | movl %edi, %ebp
|
||||||
|
2289 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm2, %zmm18, %zmm18
|
||||||
|
2290 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231pd %zmm18, %zmm3, %zmm14 {%k1} # zmm14 {%k1} = (zmm3 * zmm18) + zmm14
|
||||||
|
2291 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm20, %zmm20, %zmm3
|
||||||
|
2292 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm17, %zmm17, %zmm3 # zmm3 = (zmm17 * zmm17) + zmm3
|
||||||
|
2293 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm19, %zmm19, %zmm3 # zmm3 = (zmm19 * zmm19) + zmm3
|
||||||
|
2294 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm18, %zmm16, %zmm11 {%k1} # zmm11 {%k1} = (zmm16 * zmm18) + zmm11
|
||||||
|
2295 | 2.50 | | | | | 0.500 | | | | || | | vrcp14pd %zmm3, %zmm16
|
||||||
|
2296 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm18, %zmm31, %zmm7 {%k1} # zmm7 {%k1} = (zmm31 * zmm18) + zmm7
|
||||||
|
2297 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm16, %zmm21, %zmm18
|
||||||
|
2298 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm18, %zmm16, %zmm18
|
||||||
|
2299 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm18, %zmm16, %zmm18
|
||||||
|
2300 | 0.50 | | | | | 0.500 | | | | || | | vaddpd %zmm1, %zmm18, %zmm31
|
||||||
|
2301 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm16, %zmm22, %zmm16
|
||||||
|
2302 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm31, %zmm16, %zmm16
|
||||||
|
2303 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 464(%rsp), %zmm31 # 64-byte Reload
|
||||||
|
2304 | 0.75 | | | | | 0.250 | | | | || | | vsubpd %zmm28, %zmm31, %zmm31
|
||||||
|
2305 | 0.00 | | | | | | 1.00 | | | || | 1.0 | shlb $5, %bpl
|
||||||
|
2306 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | 1.0 | orb %al, %bpl
|
||||||
|
2307 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | 1.0 | orb $-35, %bpl
|
||||||
|
2308 | 1.00 | | | | | | | | | || | | kmovd %ebp, %k1
|
||||||
|
2309 | 0.50 | | | | | 0.500 | | | | || | | vcmpltpd %zmm0, %zmm3, %k1 {%k1}
|
||||||
|
2310 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 208(%rsp), %zmm3 # 64-byte Reload
|
||||||
|
2311 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm29, %zmm3, %zmm3
|
||||||
|
2312 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm16, %zmm18, %zmm16
|
||||||
|
2313 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm30, %zmm26, %zmm18
|
||||||
|
2314 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm2, %zmm16, %zmm16
|
||||||
|
2315 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm16, %zmm19, %zmm15 {%k1} # zmm15 {%k1} = (zmm19 * zmm16) + zmm15
|
||||||
|
2316 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm18, %zmm18, %zmm19
|
||||||
|
2317 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm3, %zmm3, %zmm19 # zmm19 = (zmm3 * zmm3) + zmm19
|
||||||
|
2318 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm31, %zmm31, %zmm19 # zmm19 = (zmm31 * zmm31) + zmm19
|
||||||
|
2319 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm16, %zmm17, %zmm10 {%k1} # zmm10 {%k1} = (zmm17 * zmm16) + zmm10
|
||||||
|
2320 | 2.50 | | | | | 0.500 | | | | || | | vrcp14pd %zmm19, %zmm17
|
||||||
|
2321 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm16, %zmm20, %zmm6 {%k1} # zmm6 {%k1} = (zmm20 * zmm16) + zmm6
|
||||||
|
2322 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm17, %zmm21, %zmm16
|
||||||
|
2323 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm16, %zmm17, %zmm16
|
||||||
|
2324 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm16, %zmm17, %zmm16
|
||||||
|
2325 | 0.50 | | | | | 0.500 | | | | || | | vaddpd %zmm1, %zmm16, %zmm20
|
||||||
|
2326 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm17, %zmm22, %zmm17
|
||||||
|
2327 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm20, %zmm17, %zmm17
|
||||||
|
2328 | 0.75 | | | | | 0.250 | | | | || | | vmulpd %zmm17, %zmm16, %zmm16
|
||||||
|
2329 | 0.00 | 0.75 | | | | 0.000 | 0.25 | | | || | | leal (,%rdx,4), %eax
|
||||||
|
2330 | 0.00 | | | | | | 1.00 | | | || | | shlb $6, %dil
|
||||||
|
2331 | 0.00 | 1.00 | | | | 0.000 | 0.00 | | | || | | orb %al, %dil
|
||||||
|
2332 | 0.00 | 1.00 | | | | 0.000 | 0.00 | | | || | | orb $-69, %dil
|
||||||
|
2333 | 1.00 | | | | | | | | | || | | kmovd %edi, %k1
|
||||||
|
2334 | 0.50 | | | | | 0.500 | | | | || | | vcmpltpd %zmm0, %zmm19, %k1 {%k1}
|
||||||
|
2335 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 400(%rsp), %zmm17 # 64-byte Reload
|
||||||
|
2336 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm28, %zmm17, %zmm17
|
||||||
|
2337 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm29, %zmm23, %zmm19
|
||||||
|
2338 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm30, %zmm27, %zmm20
|
||||||
|
2339 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm2, %zmm16, %zmm16
|
||||||
|
2340 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm16, %zmm31, %zmm13 {%k1} # zmm13 {%k1} = (zmm31 * zmm16) + zmm13
|
||||||
|
2341 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm20, %zmm20, %zmm28
|
||||||
|
2342 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm19, %zmm19, %zmm28 # zmm28 = (zmm19 * zmm19) + zmm28
|
||||||
|
2343 | 0.00 | | | | | 1.000 | | | | || | | vfmadd231pd %zmm17, %zmm17, %zmm28 # zmm28 = (zmm17 * zmm17) + zmm28
|
||||||
|
2344 | 0.00 | | | | | 1.000 | | | | || | | vfmadd231pd %zmm16, %zmm3, %zmm9 {%k1} # zmm9 {%k1} = (zmm3 * zmm16) + zmm9
|
||||||
|
2345 | 2.00 | | | | | 1.000 | | | | || | | vrcp14pd %zmm28, %zmm3
|
||||||
|
2346 | 0.00 | | | | | 1.000 | | | | || | | vfmadd231pd %zmm16, %zmm18, %zmm5 {%k1} # zmm5 {%k1} = (zmm18 * zmm16) + zmm5
|
||||||
|
2347 | 0.00 | | | | | 1.000 | | | | || | | vmulpd %zmm3, %zmm21, %zmm16
|
||||||
|
2348 | 0.00 | | | | | 1.000 | | | | || | | vmulpd %zmm16, %zmm3, %zmm16
|
||||||
|
2349 | 0.00 | | | | | 1.000 | | | | || | | vmulpd %zmm16, %zmm3, %zmm16
|
||||||
|
2350 | 0.00 | | | | | 1.000 | | | | || | | vaddpd %zmm1, %zmm16, %zmm18
|
||||||
|
2351 | 0.00 | | | | | 1.000 | | | | || | | vmulpd %zmm3, %zmm22, %zmm3
|
||||||
|
2352 | 0.00 | | | | | 1.000 | | | | || | | vmulpd %zmm18, %zmm3, %zmm3
|
||||||
|
2353 | 0.00 | | | | | 1.000 | | | | || | | vmulpd %zmm3, %zmm16, %zmm3
|
||||||
|
2354 | 0.00 | | | | | | 1.00 | | | || | | shlb $3, %dl
|
||||||
|
2355 | 0.00 | | | | | | 1.00 | | | || | | shlb $7, %cl
|
||||||
|
2356 | 0.00 | 1.00 | | | | 0.000 | 0.00 | | | || | | orb %dl, %cl
|
||||||
|
2357 | 0.00 | 1.00 | | | | 0.000 | 0.00 | | | || | | addb $-9, %cl
|
||||||
|
2358 | 1.00 | | | | | | | | | || | | kmovd %ecx, %k1
|
||||||
|
2359 | 0.00 | | | | | 1.000 | | | | || | | vcmpltpd %zmm0, %zmm28, %k1 {%k1}
|
||||||
|
2360 | 0.00 | | | | | 1.000 | | | | || | | vmulpd %zmm2, %zmm3, %zmm3
|
||||||
|
2361 | 0.00 | | | | | 1.000 | | | | || | | vfmadd231pd %zmm3, %zmm17, %zmm12 {%k1} # zmm12 {%k1} = (zmm17 * zmm3) + zmm12
|
||||||
|
2362 | 0.00 | | | | | 1.000 | | | | || | | vfmadd231pd %zmm3, %zmm19, %zmm8 {%k1} # zmm8 {%k1} = (zmm19 * zmm3) + zmm8
|
||||||
|
2363 | 0.24 | | | | | 0.760 | | | | || | | vfmadd231pd %zmm3, %zmm20, %zmm4 {%k1} # zmm4 {%k1} = (zmm20 * zmm3) + zmm4
|
||||||
|
2364 | 0.00 | 1.00 | | | | -0.01 | 0.00 | | | || | | incq %rbx
|
||||||
|
2365 | 0.00 | 1.00 | | | | -0.01 | 0.00 | | | || | | cmpq %rbx, %r9
|
||||||
|
2366 | | | | | | | | | | || | | * jne .LBB5_12
|
||||||
|
2367 | | | | | | | | | | || | | # LLVM-MCA-END
|
||||||
|
|
||||||
|
44.0 15.0 5.50 5.50 5.50 5.50 43.99 15.0 71 6.0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loop-Carried Dependencies Analysis Report
|
||||||
|
-----------------------------------------
|
||||||
|
2280 | 6.0 | shlb $4, %bpl | [2280, 2281, 2282, 2305, 2306, 2307]
|
||||||
|
2363 | 4.0 | vfmadd231pd %zmm3, %zmm20, %zmm4 {%k1} # zmm4 {%k1} = (zmm20 * zmm3) + zmm4| [2363]
|
||||||
|
2362 | 4.0 | vfmadd231pd %zmm3, %zmm19, %zmm8 {%k1} # zmm8 {%k1} = (zmm19 * zmm3) + zmm8| [2362]
|
||||||
|
2361 | 4.0 | vfmadd231pd %zmm3, %zmm17, %zmm12 {%k1} # zmm12 {%k1} = (zmm17 * zmm3) + zmm12| [2361]
|
||||||
|
2346 | 4.0 | vfmadd231pd %zmm16, %zmm18, %zmm5 {%k1} # zmm5 {%k1} = (zmm18 * zmm16) + zmm5| [2346]
|
||||||
|
2344 | 4.0 | vfmadd231pd %zmm16, %zmm3, %zmm9 {%k1} # zmm9 {%k1} = (zmm3 * zmm16) + zmm9| [2344]
|
||||||
|
2340 | 4.0 | vfmadd231pd %zmm16, %zmm31, %zmm13 {%k1} # zmm13 {%k1} = (zmm31 * zmm16) + zmm13| [2340]
|
||||||
|
2321 | 4.0 | vfmadd231pd %zmm16, %zmm20, %zmm6 {%k1} # zmm6 {%k1} = (zmm20 * zmm16) + zmm6| [2321]
|
||||||
|
2319 | 4.0 | vfmadd231pd %zmm16, %zmm17, %zmm10 {%k1} # zmm10 {%k1} = (zmm17 * zmm16) + zmm10| [2319]
|
||||||
|
2315 | 4.0 | vfmadd231pd %zmm16, %zmm19, %zmm15 {%k1} # zmm15 {%k1} = (zmm19 * zmm16) + zmm15| [2315]
|
||||||
|
2296 | 4.0 | vfmadd231pd %zmm18, %zmm31, %zmm7 {%k1} # zmm7 {%k1} = (zmm31 * zmm18) + zmm7| [2296]
|
||||||
|
2294 | 4.0 | vfmadd231pd %zmm18, %zmm16, %zmm11 {%k1} # zmm11 {%k1} = (zmm16 * zmm18) + zmm11| [2294]
|
||||||
|
2290 | 4.0 | vfmadd231pd %zmm18, %zmm3, %zmm14 {%k1} # zmm14 {%k1} = (zmm3 * zmm18) + zmm14| [2290]
|
||||||
|
2330 | 3.0 | shlb $6, %dil | [2330, 2331, 2332]
|
||||||
|
2364 | 1.0 | incq %rbx | [2364]
|
||||||
|
|
116
static_analysis/jan/analyses/gromacs-icx-avx512-sp-osaca-icx.out
Normal file
116
static_analysis/jan/analyses/gromacs-icx-avx512-sp-osaca-icx.out
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
Open Source Architecture Code Analyzer (OSACA) - 0.4.12
|
||||||
|
Analyzed file: gromacs-icx-avx512-sp.s
|
||||||
|
Architecture: ICX
|
||||||
|
Timestamp: 2023-02-14 12:51:43
|
||||||
|
|
||||||
|
|
||||||
|
P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
|
||||||
|
* - Instruction micro-ops not bound to a port
|
||||||
|
X - No throughput/latency information for this instruction in data file
|
||||||
|
|
||||||
|
|
||||||
|
Combined Analysis Report
|
||||||
|
------------------------
|
||||||
|
Port pressure in cycles
|
||||||
|
| 0 - 0DV | 1 - 1DV | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 | 8 | 9 || CP | LCD |
|
||||||
|
------------------------------------------------------------------------------------------------------------------------
|
||||||
|
1338 | | | | | | | | | | || | | # pointer_increment=64 0f91ac4f7fe1a70d0c899f7f3e745649
|
||||||
|
1339 | | | | | | | | | | || | | # LLVM-MCA-BEGIN
|
||||||
|
1340 | | | | | | | | | | || | | .LBB2_12: # Parent Loop BB2_7 Depth=1
|
||||||
|
1341 | | | | | | | | | | || | | # => This Inner Loop Header: Depth=2
|
||||||
|
1342 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || 5.0 | | movslq (%r11,%rax,4), %rcx
|
||||||
|
1343 | 0.00 | 0.75 | | | | 0.000 | 0.25 | | | || 6.0 | | leaq (%rcx,%rcx,2), %rdx
|
||||||
|
1344 | 0.00 | | | | | | 1.00 | | | || 1.0 | | shlq $5, %rdx
|
||||||
|
1345 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd (%rsi,%rdx), %zmm16
|
||||||
|
1346 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || 5.0 | | vbroadcastf64x4 64(%rsi,%rdx), %zmm20 # zmm20 = mem[0,1,2,3,0,1,2,3]
|
||||||
|
1347 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vbroadcastf64x4 (%rsi,%rdx), %zmm19 # zmm19 = mem[0,1,2,3,0,1,2,3]
|
||||||
|
1348 | | | | | | 1.000 | | | | || | | vshuff64x2 $238, %zmm16, %zmm16, %zmm21 # zmm21 = zmm16[4,5,6,7,4,5,6,7]
|
||||||
|
1349 | 0.50 | | | | | 0.500 | | | | || | | vsubps %zmm19, %zmm6, %zmm18
|
||||||
|
1350 | 0.50 | | | | | 0.500 | | | | || | | vsubps %zmm21, %zmm10, %zmm17
|
||||||
|
1351 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vsubps %zmm20, %zmm14, %zmm16
|
||||||
|
1352 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulps %zmm16, %zmm16, %zmm22
|
||||||
|
1353 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231ps %zmm17, %zmm17, %zmm22 # zmm22 = (zmm17 * zmm17) + zmm22
|
||||||
|
1354 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231ps %zmm18, %zmm18, %zmm22 # zmm22 = (zmm18 * zmm18) + zmm22
|
||||||
|
1355 | 2.50 | | | | | 0.500 | | | | || 6.0 | | vrcp14ps %zmm22, %zmm23
|
||||||
|
1356 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulps %zmm23, %zmm26, %zmm24
|
||||||
|
1357 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulps %zmm24, %zmm23, %zmm24
|
||||||
|
1358 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulps %zmm24, %zmm23, %zmm24
|
||||||
|
1359 | 0.75 | | | | | 0.250 | | | | || 4.0 | | vaddps %zmm1, %zmm24, %zmm25
|
||||||
|
1360 | 1.00 | | | | | 0.000 | | | | || | | vmulps %zmm23, %zmm27, %zmm23
|
||||||
|
1361 | 1.00 | | | | | 0.000 | | | | || 4.0 | | vmulps %zmm25, %zmm23, %zmm23
|
||||||
|
1362 | 1.00 | | | | | 0.000 | | | | || 4.0 | | vmulps %zmm23, %zmm24, %zmm23
|
||||||
|
1363 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | | leal (%rcx,%rcx), %edx
|
||||||
|
1364 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | | xorl %edi, %edi
|
||||||
|
1365 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | | xorl %ebp, %ebp
|
||||||
|
1366 | 0.00 | 0.75 | | | | 0.000 | 0.25 | | | || | | cmpq %rdx, %r12
|
||||||
|
1367 | 0.00 | | | | | | 1.00 | | | || | | setne %dil
|
||||||
|
1368 | 0.00 | 0.75 | | | | 0.000 | 0.25 | | | || | | leal 1(%rcx,%rcx), %ecx
|
||||||
|
1369 | 0.00 | | | | | | 1.00 | | | || | | sete %bpl
|
||||||
|
1370 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | | xorl %edx, %edx
|
||||||
|
1371 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | | xorl %ebx, %ebx
|
||||||
|
1372 | 0.00 | 0.75 | | | | 0.000 | 0.25 | | | || | | cmpq %rcx, %r12
|
||||||
|
1373 | 0.00 | | | | | | 1.00 | | | || | | sete %dl
|
||||||
|
1374 | 0.00 | 1.00 | | | | 0.000 | 0.00 | | | || | | movl $0, %ecx
|
||||||
|
1375 | 0.00 | | | | | | 1.00 | | | || | | setne %bl
|
||||||
|
1376 | 0.00 | | | | | | 1.00 | | | || | | cmovel %r8d, %ecx
|
||||||
|
1377 | 0.00 | 0.75 | | | | 0.000 | 0.25 | | | || | | movl %ebx, %r14d
|
||||||
|
1378 | 0.00 | | | | | | 1.00 | | | || | | shll $4, %r14d
|
||||||
|
1379 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | | subl %ebp, %r14d
|
||||||
|
1380 | 0.00 | 0.75 | | | | 0.000 | 0.25 | | | || | | leal (%rcx,%rdi,2), %ecx
|
||||||
|
1381 | 0.00 | | | | | | 1.00 | | | || | | shll $8, %ecx
|
||||||
|
1382 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | | addl $239, %r14d
|
||||||
|
1383 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | | addl $-768, %ecx # imm = 0xFD00
|
||||||
|
1384 | 0.00 | 1.00 | | | | 0.000 | 0.00 | | | || | | orl %r14d, %ecx
|
||||||
|
1385 | 1.00 | | | | | | | | | || | | kmovd %ecx, %k2
|
||||||
|
1386 | 0.50 | | | | | 0.500 | | | | || | | vcmpltps %zmm0, %zmm22, %k2 {%k2}
|
||||||
|
1387 | 0.50 | | | | | 0.500 | | | | || | | vsubps %zmm21, %zmm11, %zmm21
|
||||||
|
1388 | 0.50 | | | | | 0.500 | | | | || | | vsubps %zmm20, %zmm15, %zmm20
|
||||||
|
1389 | 0.50 | | | | | 0.500 | | | | || | | vsubps %zmm19, %zmm7, %zmm19
|
||||||
|
1390 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulps %zmm2, %zmm23, %zmm22
|
||||||
|
1391 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231ps %zmm22, %zmm18, %zmm12 {%k2} # zmm12 {%k2} = (zmm18 * zmm22) + zmm12
|
||||||
|
1392 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm20, %zmm20, %zmm18
|
||||||
|
1393 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231ps %zmm21, %zmm21, %zmm18 # zmm18 = (zmm21 * zmm21) + zmm18
|
||||||
|
1394 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231ps %zmm19, %zmm19, %zmm18 # zmm18 = (zmm19 * zmm19) + zmm18
|
||||||
|
1395 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231ps %zmm22, %zmm17, %zmm9 {%k2} # zmm9 {%k2} = (zmm17 * zmm22) + zmm9
|
||||||
|
1396 | 2.50 | | | | | 0.500 | | | | || | | vrcp14ps %zmm18, %zmm17
|
||||||
|
1397 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231ps %zmm22, %zmm16, %zmm5 {%k2} # zmm5 {%k2} = (zmm16 * zmm22) + zmm5
|
||||||
|
1398 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm17, %zmm26, %zmm16
|
||||||
|
1399 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm16, %zmm17, %zmm16
|
||||||
|
1400 | 0.50 | | | | | 0.500 | | | | || | | vmulps %zmm16, %zmm17, %zmm16
|
||||||
|
1401 | 0.00 | | | | | 1.000 | | | | || | | vaddps %zmm1, %zmm16, %zmm22
|
||||||
|
1402 | 0.00 | | | | | 1.000 | | | | || | | vmulps %zmm17, %zmm27, %zmm17
|
||||||
|
1403 | 0.00 | | | | | 1.000 | | | | || | | vmulps %zmm22, %zmm17, %zmm17
|
||||||
|
1404 | 0.00 | | | | | 1.000 | | | | || | | vmulps %zmm17, %zmm16, %zmm16
|
||||||
|
1405 | 0.00 | | | | | | 1.00 | | | || | | shll $6, %ebx
|
||||||
|
1406 | 0.00 | 1.00 | | | | 0.000 | 0.00 | | | || | | leal (%rbx,%rdi,4), %ecx
|
||||||
|
1407 | 0.00 | | | | | | 1.00 | | | || | | shll $7, %edx
|
||||||
|
1408 | 0.00 | 1.00 | | | | 0.000 | 0.00 | | | || | | leal (%rdx,%rdi,8), %edx
|
||||||
|
1409 | 0.00 | | | | | | 1.00 | | | || | | shll $8, %edx
|
||||||
|
1410 | 0.00 | 1.00 | | | | 0.000 | 0.00 | | | || | | addl %edx, %ecx
|
||||||
|
1411 | 0.00 | 1.00 | | | | 0.000 | 0.00 | | | || | | addl $-2117, %ecx # imm = 0xF7BB
|
||||||
|
1412 | 1.00 | | | | | | | | | || | | kmovd %ecx, %k2
|
||||||
|
1413 | 0.00 | | | | | 1.000 | | | | || | | vcmpltps %zmm0, %zmm18, %k2 {%k2}
|
||||||
|
1414 | 0.00 | | | | | 1.000 | | | | || | | vmulps %zmm2, %zmm16, %zmm16
|
||||||
|
1415 | 0.00 | | | | | 1.000 | | | | || | | vfmadd231ps %zmm16, %zmm19, %zmm13 {%k2} # zmm13 {%k2} = (zmm19 * zmm16) + zmm13
|
||||||
|
1416 | 0.00 | | | | | 1.000 | | | | || | | vfmadd231ps %zmm16, %zmm21, %zmm8 {%k2} # zmm8 {%k2} = (zmm21 * zmm16) + zmm8
|
||||||
|
1417 | 0.24 | | | | | 0.760 | | | | || | 4.0 | vfmadd231ps %zmm16, %zmm20, %zmm4 {%k2} # zmm4 {%k2} = (zmm20 * zmm16) + zmm4
|
||||||
|
1418 | 0.00 | 1.00 | | | | -0.01 | 0.00 | | | || | | incq %rax
|
||||||
|
1419 | 0.00 | 1.00 | | | | -0.01 | 0.00 | | | || | | cmpq %rax, %r10
|
||||||
|
1420 | | | | | | | | | | || | | * jne .LBB2_12
|
||||||
|
1421 | | | | | | | | | | || | | # LLVM-MCA-END
|
||||||
|
|
||||||
|
22.5 16.5 2.00 2.00 2.00 2.00 22.49 16.5 71 4.0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loop-Carried Dependencies Analysis Report
|
||||||
|
-----------------------------------------
|
||||||
|
1417 | 4.0 | vfmadd231ps %zmm16, %zmm20, %zmm4 {%k2} # zmm4 {%k2} = (zmm20 * zmm16) + zmm4| [1417]
|
||||||
|
1416 | 4.0 | vfmadd231ps %zmm16, %zmm21, %zmm8 {%k2} # zmm8 {%k2} = (zmm21 * zmm16) + zmm8| [1416]
|
||||||
|
1415 | 4.0 | vfmadd231ps %zmm16, %zmm19, %zmm13 {%k2} # zmm13 {%k2} = (zmm19 * zmm16) + zmm13| [1415]
|
||||||
|
1397 | 4.0 | vfmadd231ps %zmm22, %zmm16, %zmm5 {%k2} # zmm5 {%k2} = (zmm16 * zmm22) + zmm5| [1397]
|
||||||
|
1395 | 4.0 | vfmadd231ps %zmm22, %zmm17, %zmm9 {%k2} # zmm9 {%k2} = (zmm17 * zmm22) + zmm9| [1395]
|
||||||
|
1391 | 4.0 | vfmadd231ps %zmm22, %zmm18, %zmm12 {%k2} # zmm12 {%k2} = (zmm18 * zmm22) + zmm12| [1391]
|
||||||
|
1418 | 1.0 | incq %rax | [1418]
|
||||||
|
|
Binary file not shown.
@ -1331,11 +1331,12 @@ computeForceLJ_2xnn_full: #
|
|||||||
vxorps %xmm8, %xmm8, %xmm8
|
vxorps %xmm8, %xmm8, %xmm8
|
||||||
vxorps %xmm4, %xmm4, %xmm4
|
vxorps %xmm4, %xmm4, %xmm4
|
||||||
.p2align 4, 0x90
|
.p2align 4, 0x90
|
||||||
# LLVM-MCA-BEGIN
|
|
||||||
movl $111, %ebx # OSACA START MARKER
|
movl $111, %ebx # OSACA START MARKER
|
||||||
.byte 100 # OSACA START MARKER
|
.byte 100 # OSACA START MARKER
|
||||||
.byte 103 # OSACA START MARKER
|
.byte 103 # OSACA START MARKER
|
||||||
.byte 144 # OSACA START MARKER
|
.byte 144 # OSACA START MARKER
|
||||||
|
# pointer_increment=64 0f91ac4f7fe1a70d0c899f7f3e745649
|
||||||
|
# LLVM-MCA-BEGIN
|
||||||
.LBB2_12: # Parent Loop BB2_7 Depth=1
|
.LBB2_12: # Parent Loop BB2_7 Depth=1
|
||||||
# => This Inner Loop Header: Depth=2
|
# => This Inner Loop Header: Depth=2
|
||||||
movslq (%r11,%rax,4), %rcx
|
movslq (%r11,%rax,4), %rcx
|
||||||
@ -1665,7 +1666,6 @@ computeForceLJ_4xn_half: #
|
|||||||
vmovups 64(%rsp), %zmm6 # 64-byte Reload
|
vmovups 64(%rsp), %zmm6 # 64-byte Reload
|
||||||
vmovups 512(%rsp), %zmm7 # 64-byte Reload
|
vmovups 512(%rsp), %zmm7 # 64-byte Reload
|
||||||
.p2align 4, 0x90
|
.p2align 4, 0x90
|
||||||
# pointer_increment=64 0f91ac4f7fe1a70d0c899f7f3e745649
|
|
||||||
.LBB4_8: # =>This Inner Loop Header: Depth=1
|
.LBB4_8: # =>This Inner Loop Header: Depth=1
|
||||||
movslq (%r11,%rdx,4), %rax
|
movslq (%r11,%rdx,4), %rax
|
||||||
movq %rax, %rsi
|
movq %rax, %rsi
|
||||||
|
Loading…
Reference in New Issue
Block a user