Open Source Architecture Code Analyzer (OSACA) - 0.4.12 Analyzed file: lammps-icc-avx2.s Architecture: ICX Timestamp: 2023-02-10 16:29:48 P - Throughput of LOAD operation can be hidden behind a past or future STORE instruction * - Instruction micro-ops not bound to a port X - No throughput/latency information for this instruction in data file Combined Analysis Report ------------------------ Port pressure in cycles | 0 - 0DV | 1 - 1DV | 2 - 2D | 3 - 3D | 4 | 5 | 6 | 7 | 8 | 9 || CP | LCD | ----------------------------------------------------------------------------------------------------------------------- 256 | | | | | | | | | | || | | # pointer_increment=32 724d27eafcb27eabca1528ddfdbdba3e 257 | | | | | | | | | | || | | # LLVM-MCA-BEGIN 258 | | | | | | | | | | || | | ..B1.22: # Preds ..B1.24 ..B1.21 259 | | | | | | | | | | || | | # Execution count [2.50e+01] 260 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || 5.0 | | vmovdqu (%rbx,%rdx,4), %xmm0 #60.21 261 | 1.00 | | | | | | | | | || 1.0 | | vmovq %xmm0, %rcx #60.21 262 | | 0.50 | | | | 0.50 | | | | || | | vpunpckhqdq %xmm0, %xmm0, %xmm2 #60.21 263 | 1.00 | | | | | | | | | || | | vmovq %xmm2, %r15 #60.21 264 | 0.37 | 0.00 | | | | 0.25 | 0.38 | | | || 1.0 | | movl %ecx, %r8d #60.21 265 | 0.50 | | | | | | 0.50 | | | || | | shrq $32, %rcx #60.21 266 | 0.13 | 0.00 | | | | 0.00 | 0.87 | | | || | | lea (%rcx,%rcx,2), %r14d #61.36 267 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || 6.0 | | lea (%r8,%r8,2), %r8d #61.36 268 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || 1.0 | | movslq %r8d, %rcx #61.36 269 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || | | movslq %r14d, %r8 #61.36 270 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || | | movl %r15d, %r14d #60.21 271 | 0.00 | | | | | | 1.00 | | | || | | shrq $32, %r15 #60.21 272 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || 5.0 | | vmovups (%r11,%rcx,8), %xmm7 #61.36 273 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovups (%r11,%r8,8), %xmm6 #61.36 274 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovq 16(%r11,%rcx,8), %xmm14 #61.36 275 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || | | lea (%r14,%r14,2), %r14d #61.36 276 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || | | movslq %r14d, %r14 #61.36 277 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || | | lea (%r15,%r15,2), %r15d #61.36 278 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || | | movslq %r15d, %r15 #61.36 279 | | | 0.50 0.50 | 0.50 0.50 | | 1.00 | | | | || | | vmovhpd 16(%r11,%r8,8), %xmm14, %xmm15 #61.36 280 | | | 0.50 0.50 | 0.50 0.50 | | 1.00 | | | | || 3.0 | | vinsertf128 $1, (%r11,%r14,8), %ymm7, %ymm1 #61.36 281 | | | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmovq 16(%r11,%r14,8), %xmm0 #61.36 282 | | | 0.50 0.50 | 0.50 0.50 | | 1.00 | | | | || | | vinsertf128 $1, (%r11,%r15,8), %ymm6, %ymm6 #61.36 283 | | | 0.50 0.50 | 0.50 0.50 | | 1.00 | | | | || | | vmovhpd 16(%r11,%r15,8), %xmm0, %xmm2 #61.36 284 | | | | | | 1.00 | | | | || | | vunpcklpd %ymm6, %ymm1, %ymm14 #61.36 285 | | | | | | 1.00 | | | | || 1.0 | | vunpckhpd %ymm6, %ymm1, %ymm1 #61.36 286 | 0.50 | 0.50 | | | | | | | | || | | vsubpd %ymm14, %ymm10, %ymm6 #61.36 287 | | | | | | 1.00 | | | | || | | vinsertf128 $1, %xmm2, %ymm15, %ymm7 #61.36 288 | 0.50 | 0.50 | | | | | | | | || 4.0 | | vsubpd %ymm1, %ymm9, %ymm2 #62.36 289 | 0.50 | 0.50 | | | | | | | | || | | vsubpd %ymm7, %ymm8, %ymm0 #63.36 290 | 0.50 | 0.50 | | | | | | | | || 4.0 | | vmulpd %ymm2, %ymm2, %ymm14 #64.49 291 | 0.50 | 0.50 | | | | | | | | || 4.0 | | vfmadd231pd %ymm6, %ymm6, %ymm14 #64.49 292 | 0.75 | 0.25 | | | | | | | | || 4.0 | | vfmadd231pd %ymm0, %ymm0, %ymm14 #64.63 293 | 0.00 | | | | | 1.00 | | | | || | | vcmpltpd %ymm5, %ymm14, %ymm1 #74.22 294 | 0.50 | 0.50 | | | | | | | | || | | vpcmpeqd %ymm7, %ymm7, %ymm7 #74.22 295 | 1.00 | | | | | 1.00 | | | | || | | vptest %ymm7, %ymm1 #74.22 296 | | | | | | | | | | || | | #je ..B1.24 # Prob 50% #74.22 297 | | | | | | | | | | || | | # LOE rax rdx rbx rsi rdi r9 r10 r11 r12 r13d ymm0 ymm1 ymm2 ymm3 ymm4 ymm5 ymm6 ymm8 ymm9 ymm10 ymm11 ymm12 ymm13 ymm14 298 | | | | | | | | | | || | | ..B1.23: # Preds ..B1.22 299 | | | | | | | | | | || | | # Execution count [1.25e+01] 300 | 1.00 8.00 | | | | | | | | | || 13.0 | | vdivpd %ymm14, %ymm4, %ymm7 #75.39 301 | 0.50 | 0.50 | 0.50 0.50 | 0.50 0.50 | | | | | | || 4.0 | | vmulpd 96(%rsp), %ymm7, %ymm14 #76.38[spill] 302 | 0.50 | 0.50 | | | | | | | | || 4.0 | | vmulpd %ymm14, %ymm7, %ymm14 #76.44 303 | 0.50 | 0.50 | | | | | | | | || 4.0 | | vmulpd %ymm14, %ymm7, %ymm15 #76.50 304 | 0.50 | 0.50 | | | | | | | | || | | vfmsub213pd %ymm3, %ymm7, %ymm14 #77.55 305 | 0.50 | 0.50 | 0.50 0.50 | 0.50 0.50 | | | | | | || | | vmulpd 64(%rsp), %ymm7, %ymm7 #77.55[spill] 306 | 0.50 | 0.50 | | | | | | | | || 4.0 | | vmulpd %ymm7, %ymm15, %ymm15 #77.64 307 | 0.50 | 0.50 | | | | | | | | || 4.0 | | vmulpd %ymm14, %ymm15, %ymm7 #77.70 308 | 0.50 | 0.50 | | | | | | | | || 4.0 | | vmulpd %ymm7, %ymm6, %ymm6 #78.31 309 | 0.00 | 1.00 | | | | | | | | || | | vmulpd %ymm7, %ymm2, %ymm2 #79.31 310 | 0.00 | 0.00 | | | | 1.00 | | | | || 1.0 | | vandpd %ymm6, %ymm1, %ymm6 #78.31 311 | 0.00 | 1.00 | | | | | | | | || 4.0 | | vaddpd %ymm6, %ymm13, %ymm13 #78.17 312 | 0.00 | 1.00 | | | | | | | | || | | vmulpd %ymm7, %ymm0, %ymm6 #80.31 313 | 0.00 | 0.00 | | | | 1.00 | | | | || | | vandpd %ymm2, %ymm1, %ymm0 #79.31 314 | 0.00 | 0.00 | | | | 1.00 | | | | || | | vandpd %ymm6, %ymm1, %ymm1 #80.31 315 | 0.00 | 1.00 | | | | | | | | || | | vaddpd %ymm0, %ymm12, %ymm12 #79.17 316 | 0.00 | 1.00 | | | | | | | | || | 4.0 | vaddpd %ymm1, %ymm11, %ymm11 #80.17 317 | | | | | | | | | | || | | # LOE rax rdx rbx rsi rdi r9 r10 r11 r12 r13d ymm3 ymm4 ymm5 ymm8 ymm9 ymm10 ymm11 ymm12 ymm13 318 | | | | | | | | | | || | | ..B1.24: # Preds ..B1.23 ..B1.22 319 | | | | | | | | | | || | | # Execution count [2.50e+01] 320 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || | | addq $4, %rdx #59.9 321 | 0.00 | 0.00 | | | | 0.00 | 1.00 | | | || | | cmpq %rsi, %rdx #59.9 322 | | | | | | | | | | || | | * jb ..B1.22 # Prob 82% #59.9 323 | | | | | | | | | | || | | # LLVM-MCA-END 12.8 8.00 12.8 5.50 5.50 5.50 5.50 12.8 12.8 81 4.0 Loop-Carried Dependencies Analysis Report ----------------------------------------- 316 | 4.0 | vaddpd %ymm1, %ymm11, %ymm11 #80.17| [316] 315 | 4.0 | vaddpd %ymm0, %ymm12, %ymm12 #79.17| [315] 311 | 4.0 | vaddpd %ymm6, %ymm13, %ymm13 #78.17| [311] 320 | 1.0 | addq $4, %rdx #59.9| [320]