Open Source Architecture Code Analyzer (OSACA) - 0.4.12
Analyzed file:      gromacs-avx512-dp-ICX.s
Architecture:       CSX
Timestamp:          2023-01-03 00:07:20


 P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction
 * - Instruction micro-ops not bound to a port
 X - No throughput/latency information for this instruction in data file


Combined Analysis Report
------------------------
                                      Port pressure in cycles                                       
     |  0   - 0DV  |  1   |  2   -  2D  |  3   -  3D  |  4   |  5   |  6   |  7   ||  CP  | LCD  |
--------------------------------------------------------------------------------------------------
2287 |             |      |             |             |      |      |      |      ||      |      |   .LBB5_11:                               #
2288 |             |      |             |             |      |      |      |      ||      |      |   #   Parent Loop BB5_6 Depth=1
2289 |             |      |             |             |      |      |      |      ||      |      |   # =>  This Inner Loop Header: Depth=2
2290 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||  4.0 |      |   movslq (%r12,%r14,4), %rbx
2291 |             | 1.00 |             |             |      | 0.00 |      |      ||  1.0 |      |   leaq (%rbx,%rbx,2), %rcx
2292 | 0.00        |      |             |             |      |      | 1.00 |      ||  1.0 |      |   shlq $6, %rcx
2293 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||      |      |   vmovapd (%rsi,%rcx), %zmm29
2294 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||      |      |   vmovapd 64(%rsi,%rcx), %zmm30
2295 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||  0.0 |      |   vmovapd 128(%rsi,%rcx), %zmm31
2296 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||      |      |   vmovupd 64(%rsp), %zmm3         # 64-byte Reload
2297 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vsubpd %zmm29, %zmm3, %zmm4
2298 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||      |      |   vmovupd 320(%rsp), %zmm3        # 64-byte Reload
2299 | 0.75        |      |             |             |      | 0.25 |      |      ||      |      |   vsubpd %zmm30, %zmm3, %zmm3
2300 |             | 1.00 |             |             |      | 0.00 |      |      ||      |      |   leal (%rbx,%rbx), %ecx
2301 | 0.00        | 1.00 |             |             |      | 0.00 | 0.00 |      ||      |      |   cmpq %rcx, %rdi
2302 | 0.00        |      |             |             |      |      | 1.00 |      ||      |      |   setne %dl
2303 | 0.00        |      |             |             |      |      | 1.00 |      ||      |      |   sete %cl
2304 |             | 1.00 |             |             |      |      |      |      ||      |      |   leal 1(%rbx,%rbx), %ebx
2305 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vsubpd %zmm31, %zmm25, %zmm17
2306 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vmulpd %zmm17, %zmm17, %zmm18
2307 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vfmadd231pd %zmm3, %zmm3, %zmm18 # zmm18 = (zmm3 * zmm3) + zmm18
2308 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vfmadd231pd %zmm4, %zmm4, %zmm18 # zmm18 = (zmm4 * zmm4) + zmm18
2309 | 2.75        |      |             |             |      | 0.25 |      |      ||  8.0 |      |   vrcp14pd %zmm18, %zmm19
2310 | 0.00        | 0.75 |             |             |      | 0.00 | 0.25 |      ||      |      |   cmpq %rbx, %rdi
2311 | 0.00        |      |             |             |      |      | 1.00 |      ||      |      |   sete %bl
2312 | 0.00        | 0.75 |             |             |      | 0.00 | 0.25 |      ||      |      |   movl %ebx, %ebp
2313 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vmulpd %zmm22, %zmm19, %zmm20
2314 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm19, %zmm19, %zmm21
2315 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vmulpd %zmm20, %zmm21, %zmm20
2316 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||      |      |   vmovupd 128(%rsp), %zmm21       # 64-byte Reload
2317 | 0.75        |      |             |             |      | 0.25 |      |      ||      |      |   vsubpd %zmm29, %zmm21, %zmm21
2318 | 0.00        |      |             |             |      |      | 1.00 |      ||      |      |   shlb $4, %bpl
2319 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm19, %zmm1, %zmm19
2320 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vmulpd %zmm20, %zmm19, %zmm19
2321 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vaddpd %zmm2, %zmm20, %zmm20
2322 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vmulpd %zmm20, %zmm19, %zmm19
2323 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||      |      |   vmovupd 256(%rsp), %zmm20       # 64-byte Reload
2324 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vsubpd %zmm30, %zmm20, %zmm20
2325 | 0.00        | 0.50 |             |             |      | 0.00 | 0.50 |      ||      |      |   notb %bpl
2326 | 0.00        | 0.25 |             |             |      | 0.00 | 0.75 |      ||      |      |   subb %cl, %bpl
2327 | 1.00        |      |             |             |      |      |      |      ||      |      |   kmovd %ebp, %k1
2328 |             |      |             |             |      |      |      |      ||      |      | X vcmpltpd %zmm0, %zmm18, %k1 {%k1}
2329 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vsubpd %zmm31, %zmm26, %zmm18
2330 | 0.50        |      |             |             |      | 0.50 |      |      ||  4.0 |      |   vfmadd231pd %zmm4, %zmm19, %zmm15 {%k1} # zmm15 = (zmm19 * zmm4) + zmm15
2331 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm18, %zmm18, %zmm4
2332 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm20, %zmm20, %zmm4 # zmm4 = (zmm20 * zmm20) + zmm4
2333 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm21, %zmm21, %zmm4 # zmm4 = (zmm21 * zmm21) + zmm4
2334 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm3, %zmm19, %zmm12 {%k1} # zmm12 = (zmm19 * zmm3) + zmm12
2335 | 2.25        |      |             |             |      | 0.75 |      |      ||      |      |   vrcp14pd %zmm4, %zmm3
2336 |             | 1.00 |             |             |      | 0.00 |      |      ||      |      |   leal (%rdx,%rdx), %ecx
2337 | 0.00        | 0.75 |             |             |      | 0.00 | 0.25 |      ||      |      |   movl %ebx, %eax
2338 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm17, %zmm19, %zmm8 {%k1} # zmm8 = (zmm19 * zmm17) + zmm8
2339 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm22, %zmm3, %zmm17
2340 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm3, %zmm3, %zmm19
2341 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm17, %zmm19, %zmm17
2342 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||      |      |   vmovupd 448(%rsp), %zmm19       # 64-byte Reload
2343 | 0.75        |      |             |             |      | 0.25 |      |      ||      |      |   vsubpd %zmm29, %zmm19, %zmm19
2344 | 0.00        |      |             |             |      |      | 1.00 |      ||      |      |   shlb $5, %al
2345 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm3, %zmm1, %zmm3
2346 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm17, %zmm3, %zmm3
2347 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vaddpd %zmm2, %zmm17, %zmm17
2348 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm17, %zmm3, %zmm3
2349 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vsubpd %zmm30, %zmm23, %zmm17
2350 | 0.00        | 0.50 |             |             |      | 0.00 | 0.50 |      ||      |      |   subb %al, %cl
2351 | 0.00        | 0.25 |             |             |      | 0.00 | 0.75 |      ||      |      |   addb $-3, %cl
2352 | 1.00        |      |             |             |      |      |      |      ||      |      |   kmovd %ecx, %k1
2353 |             |      |             |             |      |      |      |      ||      |      | X vcmpltpd %zmm0, %zmm4, %k1 {%k1}
2354 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vsubpd %zmm31, %zmm27, %zmm4
2355 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm21, %zmm3, %zmm14 {%k1} # zmm14 = (zmm3 * zmm21) + zmm14
2356 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm4, %zmm4, %zmm21
2357 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm17, %zmm17, %zmm21 # zmm21 = (zmm17 * zmm17) + zmm21
2358 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm19, %zmm19, %zmm21 # zmm21 = (zmm19 * zmm19) + zmm21
2359 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm20, %zmm3, %zmm10 {%k1} # zmm10 = (zmm3 * zmm20) + zmm10
2360 | 2.50        |      |             |             |      | 0.50 |      |      ||      |      |   vrcp14pd %zmm21, %zmm20
2361 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm18, %zmm3, %zmm6 {%k1} # zmm6 = (zmm3 * zmm18) + zmm6
2362 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm22, %zmm20, %zmm3
2363 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm20, %zmm20, %zmm18
2364 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm3, %zmm18, %zmm3
2365 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm20, %zmm1, %zmm18
2366 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm3, %zmm18, %zmm18
2367 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vaddpd %zmm2, %zmm3, %zmm3
2368 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm3, %zmm18, %zmm3
2369 |             | 1.00 |             |             |      | 0.00 |      |      ||      |      |   leal (,%rdx,4), %eax
2370 | 0.00        | 0.75 |             |             |      | 0.00 | 0.25 |      ||      |      |   movl %ebx, %ecx
2371 | 0.00        |      |             |             |      |      | 1.00 |      ||      |      |   shlb $6, %cl
2372 | 0.00        | 0.50 |             |             |      | 0.00 | 0.50 |      ||      |      |   subb %cl, %al
2373 | 0.00        | 1.00 |             |             |      | 0.00 | 0.00 |      ||      |      |   addb $-5, %al
2374 | 1.00        |      |             |             |      |      |      |      ||      |      |   kmovd %eax, %k1
2375 |             |      |             |             |      |      |      |      ||      |      | X vcmpltpd %zmm0, %zmm21, %k1 {%k1}
2376 |             |      | 0.50   0.50 | 0.50   0.50 |      |      |      |      ||      |      |   vmovupd 384(%rsp), %zmm18       # 64-byte Reload
2377 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vsubpd %zmm29, %zmm18, %zmm18
2378 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vsubpd %zmm30, %zmm24, %zmm20
2379 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vsubpd %zmm31, %zmm28, %zmm21
2380 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm19, %zmm3, %zmm16 {%k1} # zmm16 = (zmm3 * zmm19) + zmm16
2381 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vmulpd %zmm21, %zmm21, %zmm19
2382 | 0.50        |      |             |             |      | 0.50 |      |      ||      |      |   vfmadd231pd %zmm20, %zmm20, %zmm19 # zmm19 = (zmm20 * zmm20) + zmm19
2383 | 0.25        |      |             |             |      | 0.75 |      |      ||      |      |   vfmadd231pd %zmm18, %zmm18, %zmm19 # zmm19 = (zmm18 * zmm18) + zmm19
2384 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vfmadd231pd %zmm17, %zmm3, %zmm11 {%k1} # zmm11 = (zmm3 * zmm17) + zmm11
2385 | 2.00        |      |             |             |      | 1.00 |      |      ||      |      |   vrcp14pd %zmm19, %zmm17
2386 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vfmadd231pd %zmm4, %zmm3, %zmm7 {%k1} # zmm7 = (zmm3 * zmm4) + zmm7
2387 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vmulpd %zmm22, %zmm17, %zmm3
2388 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vmulpd %zmm17, %zmm17, %zmm4
2389 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vmulpd %zmm3, %zmm4, %zmm3
2390 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vmulpd %zmm17, %zmm1, %zmm4
2391 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vmulpd %zmm3, %zmm4, %zmm4
2392 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vaddpd %zmm2, %zmm3, %zmm3
2393 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vmulpd %zmm3, %zmm4, %zmm3
2394 | 0.00        |      |             |             |      |      | 1.00 |      ||      |      |   shlb $3, %dl
2395 | 0.00        |      |             |             |      |      | 1.00 |      ||      |      |   shlb $7, %bl
2396 | 0.00        | 0.50 |             |             |      | 0.00 | 0.50 |      ||      |      |   subb %bl, %dl
2397 | 0.00        | 0.50 |             |             |      | 0.00 | 0.50 |      ||      |      |   addb $-9, %dl
2398 | 1.00        |      |             |             |      |      |      |      ||      |      |   kmovd %edx, %k1
2399 |             |      |             |             |      |      |      |      ||      |      | X vcmpltpd %zmm0, %zmm19, %k1 {%k1}
2400 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vfmadd231pd %zmm18, %zmm3, %zmm13 {%k1} # zmm13 = (zmm3 * zmm18) + zmm13
2401 | 0.00        |      |             |             |      | 1.00 |      |      ||      |      |   vfmadd231pd %zmm20, %zmm3, %zmm9 {%k1} # zmm9 = (zmm3 * zmm20) + zmm9
2402 | 0.00        |      |             |             |      | 1.00 |      |      ||      |  4.0 |   vfmadd231pd %zmm21, %zmm3, %zmm5 {%k1} # zmm5 = (zmm3 * zmm21) + zmm5
2403 | 0.00        | 0.50 |             |             |      | 0.00 | 0.50 |      ||      |      |   incq %r14
2404 | 0.00        | 1.00 |             |             |      | 0.00 | 0.00 |      ||      |      |   cmpq %r14, %r11
2405 |             |      |             |             |      |      |      |      ||      |      | * jne .LBB5_11

       40.0          14.5   5.00   5.00   5.00   5.00          40.0   14.5           50.0    4.0  




Loop-Carried Dependencies Analysis Report
-----------------------------------------
2402 |  4.0 | vfmadd231pd	%zmm21, %zmm3, %zmm5 {%k1} # zmm5 = (zmm3 * zmm21) + zmm5| [2402]
2401 |  4.0 | vfmadd231pd	%zmm20, %zmm3, %zmm9 {%k1} # zmm9 = (zmm3 * zmm20) + zmm9| [2401]
2400 |  4.0 | vfmadd231pd	%zmm18, %zmm3, %zmm13 {%k1} # zmm13 = (zmm3 * zmm18) + zmm13| [2400]
2386 |  4.0 | vfmadd231pd	%zmm4, %zmm3, %zmm7 {%k1} # zmm7 = (zmm3 * zmm4) + zmm7| [2386]
2384 |  4.0 | vfmadd231pd	%zmm17, %zmm3, %zmm11 {%k1} # zmm11 = (zmm3 * zmm17) + zmm11| [2384]
2380 |  4.0 | vfmadd231pd	%zmm19, %zmm3, %zmm16 {%k1} # zmm16 = (zmm3 * zmm19) + zmm16| [2380]
2361 |  4.0 | vfmadd231pd	%zmm18, %zmm3, %zmm6 {%k1} # zmm6 = (zmm3 * zmm18) + zmm6| [2361]
2359 |  4.0 | vfmadd231pd	%zmm20, %zmm3, %zmm10 {%k1} # zmm10 = (zmm3 * zmm20) + zmm10| [2359]
2355 |  4.0 | vfmadd231pd	%zmm21, %zmm3, %zmm14 {%k1} # zmm14 = (zmm3 * zmm21) + zmm14| [2355]
2338 |  4.0 | vfmadd231pd	%zmm17, %zmm19, %zmm8 {%k1} # zmm8 = (zmm19 * zmm17) + zmm8| [2338]
2334 |  4.0 | vfmadd231pd	%zmm3, %zmm19, %zmm12 {%k1} # zmm12 = (zmm19 * zmm3) + zmm12| [2334]
2330 |  4.0 | vfmadd231pd	%zmm4, %zmm19, %zmm15 {%k1} # zmm15 = (zmm19 * zmm4) + zmm15| [2330]
2394 |  3.0 | shlb	$3, %dl                        | [2394, 2396, 2397]
2318 |  3.0 | shlb	$4, %bpl                       | [2318, 2325, 2326]
2403 |  1.0 | incq	%r14                           | [2403]

