Open Source Architecture Code Analyzer (OSACA) - 0.4.12 Analyzed file: gromacs-icx-avx512-dp.s Architecture: ICX Timestamp: 2023-02-14 12:51:57 P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction * - Instruction micro-ops not bound to a port X - No throughput/latency information for this instruction in data file Combined Analysis Report ------------------------ Port pressure in cycles | 0 - 0DV | 1 - 1DV | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 | 8 | 9 || CP | LCD | ------------------------------------------------------------------------------------------------------------------------ 2241 | | | | | | | | | | || | | # pointer_increment=64 da67166e5736661e6b03ea29ee7bfd67 2242 | | | | | | | | | | || | | # LLVM-MCA-BEGIN 2243 | | | | | | | | | | || | | .LBB5_12: # Parent Loop BB5_7 Depth=1 2244 | | | | | | | | | | || | | # => This Inner Loop Header: Depth=2 2245 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || 5.0 | | movslq (%r10,%rbx,4), %rcx 2246 | 0.00 | 0.75 | | | | 0.000 | 0.25 | | | || 6.0 | | leaq (%rcx,%rcx,2), %rdx 2247 | 0.00 | | | | | | 1.00 | | | || 1.0 | | shlq $6, %rdx 2248 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd (%rsi,%rdx), %zmm28 # AlignMOV convert to UnAlignMOV 2249 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 64(%rsi,%rdx), %zmm29 # AlignMOV convert to UnAlignMOV 2250 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || 5.0 | | vmovupd 128(%rsi,%rdx), %zmm30 # AlignMOV convert to UnAlignMOV 2251 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 16(%rsp), %zmm3 # 64-byte Reload 2252 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm28, %zmm3, %zmm3 2253 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vsubpd %zmm30, %zmm24, %zmm31 2254 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 336(%rsp), %zmm16 # 64-byte Reload 2255 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm29, %zmm16, %zmm16 2256 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm31, %zmm31, %zmm17 2257 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231pd %zmm16, %zmm16, %zmm17 # zmm17 = (zmm16 * zmm16) + zmm17 2258 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231pd %zmm3, %zmm3, %zmm17 # zmm17 = (zmm3 * zmm3) + zmm17 2259 | 2.50 | | | | | 0.500 | | | | || 6.0 | | vrcp14pd %zmm17, %zmm18 2260 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm18, %zmm21, %zmm19 2261 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm19, %zmm18, %zmm19 2262 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm19, %zmm18, %zmm19 2263 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vaddpd %zmm1, %zmm19, %zmm20 2264 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm18, %zmm22, %zmm18 2265 | 0.75 | | | | | 0.250 | | | | || 4.0 | | vmulpd %zmm20, %zmm18, %zmm18 2266 | 1.00 | | | | | 0.000 | | | | || | | vsubpd %zmm30, %zmm25, %zmm20 2267 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | | leal (%rcx,%rcx), %edx 2268 | 0.00 | 1.00 | | | | 0.000 | 0.00 | | | || | | cmpq %rdx, %r11 2269 | 0.00 | | | | | | 1.00 | | | || | | setne %dl 2270 | 0.00 | | | | | | 1.00 | | | || | | sete %al 2271 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | | addl %ecx, %ecx 2272 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | | incl %ecx 2273 | 0.00 | 1.00 | | | | 0.000 | 0.00 | | | || | | cmpq %rcx, %r11 2274 | 0.00 | | | | | | 1.00 | | | || | | sete %cl 2275 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm18, %zmm19, %zmm18 2276 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 528(%rsp), %zmm19 # 64-byte Reload 2277 | 1.00 | | | | | 0.000 | | | | || | | vsubpd %zmm28, %zmm19, %zmm19 2278 | 0.00 | | | | | | 1.00 | | | || | | setne %dil 2279 | 0.00 | 0.75 | | | | 0.000 | 0.25 | | | || | | movl %edi, %ebp 2280 | 0.00 | | | | | | 1.00 | | | || | 1.0 | shlb $4, %bpl 2281 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | 1.0 | subb %al, %bpl 2282 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | 1.0 | addb $-17, %bpl 2283 | 1.00 | | | | | | | | | || | | kmovd %ebp, %k1 2284 | 0.50 | | | | | 0.500 | | | | || | | vcmpltpd %zmm0, %zmm17, %k1 {%k1} 2285 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 272(%rsp), %zmm17 # 64-byte Reload 2286 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm29, %zmm17, %zmm17 2287 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | | leal (%rdx,%rdx), %eax 2288 | 0.00 | 0.75 | | | | 0.000 | 0.25 | | | || | | movl %edi, %ebp 2289 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vmulpd %zmm2, %zmm18, %zmm18 2290 | 0.50 | | | | | 0.500 | | | | || 4.0 | | vfmadd231pd %zmm18, %zmm3, %zmm14 {%k1} # zmm14 {%k1} = (zmm3 * zmm18) + zmm14 2291 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm20, %zmm20, %zmm3 2292 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm17, %zmm17, %zmm3 # zmm3 = (zmm17 * zmm17) + zmm3 2293 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm19, %zmm19, %zmm3 # zmm3 = (zmm19 * zmm19) + zmm3 2294 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm18, %zmm16, %zmm11 {%k1} # zmm11 {%k1} = (zmm16 * zmm18) + zmm11 2295 | 2.50 | | | | | 0.500 | | | | || | | vrcp14pd %zmm3, %zmm16 2296 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm18, %zmm31, %zmm7 {%k1} # zmm7 {%k1} = (zmm31 * zmm18) + zmm7 2297 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm16, %zmm21, %zmm18 2298 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm18, %zmm16, %zmm18 2299 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm18, %zmm16, %zmm18 2300 | 0.50 | | | | | 0.500 | | | | || | | vaddpd %zmm1, %zmm18, %zmm31 2301 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm16, %zmm22, %zmm16 2302 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm31, %zmm16, %zmm16 2303 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 464(%rsp), %zmm31 # 64-byte Reload 2304 | 0.75 | | | | | 0.250 | | | | || | | vsubpd %zmm28, %zmm31, %zmm31 2305 | 0.00 | | | | | | 1.00 | | | || | 1.0 | shlb $5, %bpl 2306 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | 1.0 | orb %al, %bpl 2307 | 0.00 | 0.50 | | | | 0.000 | 0.50 | | | || | 1.0 | orb $-35, %bpl 2308 | 1.00 | | | | | | | | | || | | kmovd %ebp, %k1 2309 | 0.50 | | | | | 0.500 | | | | || | | vcmpltpd %zmm0, %zmm3, %k1 {%k1} 2310 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 208(%rsp), %zmm3 # 64-byte Reload 2311 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm29, %zmm3, %zmm3 2312 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm16, %zmm18, %zmm16 2313 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm30, %zmm26, %zmm18 2314 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm2, %zmm16, %zmm16 2315 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm16, %zmm19, %zmm15 {%k1} # zmm15 {%k1} = (zmm19 * zmm16) + zmm15 2316 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm18, %zmm18, %zmm19 2317 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm3, %zmm3, %zmm19 # zmm19 = (zmm3 * zmm3) + zmm19 2318 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm31, %zmm31, %zmm19 # zmm19 = (zmm31 * zmm31) + zmm19 2319 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm16, %zmm17, %zmm10 {%k1} # zmm10 {%k1} = (zmm17 * zmm16) + zmm10 2320 | 2.50 | | | | | 0.500 | | | | || | | vrcp14pd %zmm19, %zmm17 2321 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm16, %zmm20, %zmm6 {%k1} # zmm6 {%k1} = (zmm20 * zmm16) + zmm6 2322 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm17, %zmm21, %zmm16 2323 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm16, %zmm17, %zmm16 2324 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm16, %zmm17, %zmm16 2325 | 0.50 | | | | | 0.500 | | | | || | | vaddpd %zmm1, %zmm16, %zmm20 2326 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm17, %zmm22, %zmm17 2327 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm20, %zmm17, %zmm17 2328 | 0.75 | | | | | 0.250 | | | | || | | vmulpd %zmm17, %zmm16, %zmm16 2329 | 0.00 | 0.75 | | | | 0.000 | 0.25 | | | || | | leal (,%rdx,4), %eax 2330 | 0.00 | | | | | | 1.00 | | | || | | shlb $6, %dil 2331 | 0.00 | 1.00 | | | | 0.000 | 0.00 | | | || | | orb %al, %dil 2332 | 0.00 | 1.00 | | | | 0.000 | 0.00 | | | || | | orb $-69, %dil 2333 | 1.00 | | | | | | | | | || | | kmovd %edi, %k1 2334 | 0.50 | | | | | 0.500 | | | | || | | vcmpltpd %zmm0, %zmm19, %k1 {%k1} 2335 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovupd 400(%rsp), %zmm17 # 64-byte Reload 2336 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm28, %zmm17, %zmm17 2337 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm29, %zmm23, %zmm19 2338 | 0.50 | | | | | 0.500 | | | | || | | vsubpd %zmm30, %zmm27, %zmm20 2339 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm2, %zmm16, %zmm16 2340 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm16, %zmm31, %zmm13 {%k1} # zmm13 {%k1} = (zmm31 * zmm16) + zmm13 2341 | 0.50 | | | | | 0.500 | | | | || | | vmulpd %zmm20, %zmm20, %zmm28 2342 | 0.50 | | | | | 0.500 | | | | || | | vfmadd231pd %zmm19, %zmm19, %zmm28 # zmm28 = (zmm19 * zmm19) + zmm28 2343 | 0.00 | | | | | 1.000 | | | | || | | vfmadd231pd %zmm17, %zmm17, %zmm28 # zmm28 = (zmm17 * zmm17) + zmm28 2344 | 0.00 | | | | | 1.000 | | | | || | | vfmadd231pd %zmm16, %zmm3, %zmm9 {%k1} # zmm9 {%k1} = (zmm3 * zmm16) + zmm9 2345 | 2.00 | | | | | 1.000 | | | | || | | vrcp14pd %zmm28, %zmm3 2346 | 0.00 | | | | | 1.000 | | | | || | | vfmadd231pd %zmm16, %zmm18, %zmm5 {%k1} # zmm5 {%k1} = (zmm18 * zmm16) + zmm5 2347 | 0.00 | | | | | 1.000 | | | | || | | vmulpd %zmm3, %zmm21, %zmm16 2348 | 0.00 | | | | | 1.000 | | | | || | | vmulpd %zmm16, %zmm3, %zmm16 2349 | 0.00 | | | | | 1.000 | | | | || | | vmulpd %zmm16, %zmm3, %zmm16 2350 | 0.00 | | | | | 1.000 | | | | || | | vaddpd %zmm1, %zmm16, %zmm18 2351 | 0.00 | | | | | 1.000 | | | | || | | vmulpd %zmm3, %zmm22, %zmm3 2352 | 0.00 | | | | | 1.000 | | | | || | | vmulpd %zmm18, %zmm3, %zmm3 2353 | 0.00 | | | | | 1.000 | | | | || | | vmulpd %zmm3, %zmm16, %zmm3 2354 | 0.00 | | | | | | 1.00 | | | || | | shlb $3, %dl 2355 | 0.00 | | | | | | 1.00 | | | || | | shlb $7, %cl 2356 | 0.00 | 1.00 | | | | 0.000 | 0.00 | | | || | | orb %dl, %cl 2357 | 0.00 | 1.00 | | | | 0.000 | 0.00 | | | || | | addb $-9, %cl 2358 | 1.00 | | | | | | | | | || | | kmovd %ecx, %k1 2359 | 0.00 | | | | | 1.000 | | | | || | | vcmpltpd %zmm0, %zmm28, %k1 {%k1} 2360 | 0.00 | | | | | 1.000 | | | | || | | vmulpd %zmm2, %zmm3, %zmm3 2361 | 0.00 | | | | | 1.000 | | | | || | | vfmadd231pd %zmm3, %zmm17, %zmm12 {%k1} # zmm12 {%k1} = (zmm17 * zmm3) + zmm12 2362 | 0.00 | | | | | 1.000 | | | | || | | vfmadd231pd %zmm3, %zmm19, %zmm8 {%k1} # zmm8 {%k1} = (zmm19 * zmm3) + zmm8 2363 | 0.24 | | | | | 0.760 | | | | || | | vfmadd231pd %zmm3, %zmm20, %zmm4 {%k1} # zmm4 {%k1} = (zmm20 * zmm3) + zmm4 2364 | 0.00 | 1.00 | | | | -0.01 | 0.00 | | | || | | incq %rbx 2365 | 0.00 | 1.00 | | | | -0.01 | 0.00 | | | || | | cmpq %rbx, %r9 2366 | | | | | | | | | | || | | * jne .LBB5_12 2367 | | | | | | | | | | || | | # LLVM-MCA-END 44.0 15.0 5.50 5.50 5.50 5.50 43.99 15.0 71 6.0 Loop-Carried Dependencies Analysis Report ----------------------------------------- 2280 | 6.0 | shlb $4, %bpl | [2280, 2281, 2282, 2305, 2306, 2307] 2363 | 4.0 | vfmadd231pd %zmm3, %zmm20, %zmm4 {%k1} # zmm4 {%k1} = (zmm20 * zmm3) + zmm4| [2363] 2362 | 4.0 | vfmadd231pd %zmm3, %zmm19, %zmm8 {%k1} # zmm8 {%k1} = (zmm19 * zmm3) + zmm8| [2362] 2361 | 4.0 | vfmadd231pd %zmm3, %zmm17, %zmm12 {%k1} # zmm12 {%k1} = (zmm17 * zmm3) + zmm12| [2361] 2346 | 4.0 | vfmadd231pd %zmm16, %zmm18, %zmm5 {%k1} # zmm5 {%k1} = (zmm18 * zmm16) + zmm5| [2346] 2344 | 4.0 | vfmadd231pd %zmm16, %zmm3, %zmm9 {%k1} # zmm9 {%k1} = (zmm3 * zmm16) + zmm9| [2344] 2340 | 4.0 | vfmadd231pd %zmm16, %zmm31, %zmm13 {%k1} # zmm13 {%k1} = (zmm31 * zmm16) + zmm13| [2340] 2321 | 4.0 | vfmadd231pd %zmm16, %zmm20, %zmm6 {%k1} # zmm6 {%k1} = (zmm20 * zmm16) + zmm6| [2321] 2319 | 4.0 | vfmadd231pd %zmm16, %zmm17, %zmm10 {%k1} # zmm10 {%k1} = (zmm17 * zmm16) + zmm10| [2319] 2315 | 4.0 | vfmadd231pd %zmm16, %zmm19, %zmm15 {%k1} # zmm15 {%k1} = (zmm19 * zmm16) + zmm15| [2315] 2296 | 4.0 | vfmadd231pd %zmm18, %zmm31, %zmm7 {%k1} # zmm7 {%k1} = (zmm31 * zmm18) + zmm7| [2296] 2294 | 4.0 | vfmadd231pd %zmm18, %zmm16, %zmm11 {%k1} # zmm11 {%k1} = (zmm16 * zmm18) + zmm11| [2294] 2290 | 4.0 | vfmadd231pd %zmm18, %zmm3, %zmm14 {%k1} # zmm14 {%k1} = (zmm3 * zmm18) + zmm14| [2290] 2330 | 3.0 | shlb $6, %dil | [2330, 2331, 2332] 2364 | 1.0 | incq %rbx | [2364]