[0] Code Region Iterations: 100 Instructions: 5600 Total Cycles: 2306 Total uOps: 6300 Dispatch Width: 6 uOps Per Cycle: 2.73 IPC: 2.43 Block RThroughput: 10.5 Instruction Info: [1]: #uOps [2]: Latency [3]: RThroughput [4]: MayLoad [5]: MayStore [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: 1 6 0.50 * vmovdqu (%rbx,%rdx,4), %xmm0 1 2 1.00 vmovq %xmm0, %rcx 1 1 0.50 vpunpckhqdq %xmm0, %xmm0, %xmm2 1 2 1.00 vmovq %xmm2, %r15 1 1 0.25 movl %ecx, %r8d 1 1 0.50 shrq $32, %rcx 1 1 0.50 leal (%rcx,%rcx,2), %r14d 1 1 0.50 leal (%r8,%r8,2), %r8d 1 1 0.25 movslq %r8d, %rcx 1 1 0.25 movslq %r14d, %r8 1 1 0.25 movl %r15d, %r14d 1 1 0.50 shrq $32, %r15 1 6 0.50 * vmovups (%r11,%rcx,8), %xmm7 1 6 0.50 * vmovups (%r11,%r8,8), %xmm6 1 5 0.50 * vmovq 16(%r11,%rcx,8), %xmm14 1 1 0.50 leal (%r14,%r14,2), %r14d 1 1 0.25 movslq %r14d, %r14 1 1 0.50 leal (%r15,%r15,2), %r15d 1 1 0.25 movslq %r15d, %r15 2 6 1.00 * vmovhpd 16(%r11,%r8,8), %xmm14, %xmm15 2 7 0.50 * vinsertf128 $1, (%r11,%r14,8), %ymm7, %ymm1 1 5 0.50 * vmovq 16(%r11,%r14,8), %xmm0 2 7 0.50 * vinsertf128 $1, (%r11,%r15,8), %ymm6, %ymm6 2 6 1.00 * vmovhpd 16(%r11,%r15,8), %xmm0, %xmm2 1 1 1.00 vunpcklpd %ymm6, %ymm1, %ymm14 1 1 1.00 vunpckhpd %ymm6, %ymm1, %ymm1 1 4 0.50 vsubpd %ymm14, %ymm10, %ymm6 1 3 1.00 vinsertf128 $1, %xmm2, %ymm15, %ymm7 1 4 0.50 vsubpd %ymm1, %ymm9, %ymm2 1 4 0.50 vsubpd %ymm7, %ymm8, %ymm0 1 4 0.50 vmulpd %ymm2, %ymm2, %ymm14 1 4 0.50 vfmadd231pd %ymm6, %ymm6, %ymm14 1 4 0.50 vfmadd231pd %ymm0, %ymm0, %ymm14 1 4 0.50 vcmpltpd %ymm5, %ymm14, %ymm1 1 1 0.50 vpcmpeqd %ymm7, %ymm7, %ymm7 2 3 1.00 vptest %ymm7, %ymm1 1 14 5.00 vdivpd %ymm14, %ymm4, %ymm7 2 11 0.50 * vmulpd 96(%rsp), %ymm7, %ymm14 1 4 0.50 vmulpd %ymm14, %ymm7, %ymm14 1 4 0.50 vmulpd %ymm14, %ymm7, %ymm15 1 4 0.50 vfmsub213pd %ymm3, %ymm7, %ymm14 2 11 0.50 * vmulpd 64(%rsp), %ymm7, %ymm7 1 4 0.50 vmulpd %ymm7, %ymm15, %ymm15 1 4 0.50 vmulpd %ymm14, %ymm15, %ymm7 1 4 0.50 vmulpd %ymm7, %ymm6, %ymm6 1 4 0.50 vmulpd %ymm7, %ymm2, %ymm2 1 1 0.33 vandpd %ymm6, %ymm1, %ymm6 1 4 0.50 vaddpd %ymm6, %ymm13, %ymm13 1 4 0.50 vmulpd %ymm7, %ymm0, %ymm6 1 1 0.33 vandpd %ymm2, %ymm1, %ymm0 1 1 0.33 vandpd %ymm6, %ymm1, %ymm1 1 4 0.50 vaddpd %ymm0, %ymm12, %ymm12 1 4 0.50 vaddpd %ymm1, %ymm11, %ymm11 1 1 0.25 addq $4, %rdx 1 1 0.25 cmpq %rsi, %rdx 1 1 0.50 jb ..B1.22 Resources: [0] - ICXDivider [1] - ICXFPDivider [2] - ICXPort0 [3] - ICXPort1 [4] - ICXPort2 [5] - ICXPort3 [6] - ICXPort4 [7] - ICXPort5 [8] - ICXPort6 [9] - ICXPort7 [10] - ICXPort8 [11] - ICXPort9 Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] - 5.00 15.12 15.03 5.50 5.50 - 13.45 8.40 - - - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: - - - - 0.50 0.50 - - - - - - vmovdqu (%rbx,%rdx,4), %xmm0 - - 1.00 - - - - - - - - - vmovq %xmm0, %rcx - - - 0.46 - - - 0.54 - - - - vpunpckhqdq %xmm0, %xmm0, %xmm2 - - 1.00 - - - - - - - - - vmovq %xmm2, %r15 - - - - - - - - 1.00 - - - movl %ecx, %r8d - - 0.96 - - - - - 0.04 - - - shrq $32, %rcx - - - 0.01 - - - 0.99 - - - - leal (%rcx,%rcx,2), %r14d - - - 0.03 - - - 0.97 - - - - leal (%r8,%r8,2), %r8d - - 0.48 0.01 - - - - 0.51 - - - movslq %r8d, %rcx - - 0.02 0.02 - - - 0.01 0.95 - - - movslq %r14d, %r8 - - 0.02 - - - - - 0.98 - - - movl %r15d, %r14d - - 0.52 - - - - - 0.48 - - - shrq $32, %r15 - - - - 0.49 0.51 - - - - - - vmovups (%r11,%rcx,8), %xmm7 - - - - 0.49 0.51 - - - - - - vmovups (%r11,%r8,8), %xmm6 - - - - 0.52 0.48 - - - - - - vmovq 16(%r11,%rcx,8), %xmm14 - - - 0.47 - - - 0.53 - - - - leal (%r14,%r14,2), %r14d - - 0.01 0.01 - - - 0.01 0.97 - - - movslq %r14d, %r14 - - - 0.04 - - - 0.96 - - - - leal (%r15,%r15,2), %r15d - - 0.48 - - - - 0.01 0.51 - - - movslq %r15d, %r15 - - - - 0.51 0.49 - 1.00 - - - - vmovhpd 16(%r11,%r8,8), %xmm14, %xmm15 - - 0.02 0.01 0.95 0.05 - 0.97 - - - - vinsertf128 $1, (%r11,%r14,8), %ymm7, %ymm1 - - - - 0.05 0.95 - - - - - - vmovq 16(%r11,%r14,8), %xmm0 - - 0.02 0.49 0.49 0.51 - 0.49 - - - - vinsertf128 $1, (%r11,%r15,8), %ymm6, %ymm6 - - - - 0.50 0.50 - 1.00 - - - - vmovhpd 16(%r11,%r15,8), %xmm0, %xmm2 - - - - - - - 1.00 - - - - vunpcklpd %ymm6, %ymm1, %ymm14 - - - - - - - 1.00 - - - - vunpckhpd %ymm6, %ymm1, %ymm1 - - 0.47 0.53 - - - - - - - - vsubpd %ymm14, %ymm10, %ymm6 - - - - - - - 1.00 - - - - vinsertf128 $1, %xmm2, %ymm15, %ymm7 - - 0.50 0.50 - - - - - - - - vsubpd %ymm1, %ymm9, %ymm2 - - 0.94 0.06 - - - - - - - - vsubpd %ymm7, %ymm8, %ymm0 - - 0.06 0.94 - - - - - - - - vmulpd %ymm2, %ymm2, %ymm14 - - 0.04 0.96 - - - - - - - - vfmadd231pd %ymm6, %ymm6, %ymm14 - - 0.95 0.05 - - - - - - - - vfmadd231pd %ymm0, %ymm0, %ymm14 - - 0.02 0.98 - - - - - - - - vcmpltpd %ymm5, %ymm14, %ymm1 - - 0.05 0.95 - - - - - - - - vpcmpeqd %ymm7, %ymm7, %ymm7 - - 1.00 - - - - 1.00 - - - - vptest %ymm7, %ymm1 - 5.00 1.00 - - - - - - - - - vdivpd %ymm14, %ymm4, %ymm7 - - 0.51 0.49 0.49 0.51 - - - - - - vmulpd 96(%rsp), %ymm7, %ymm14 - - 0.04 0.96 - - - - - - - - vmulpd %ymm14, %ymm7, %ymm14 - - 0.01 0.99 - - - - - - - - vmulpd %ymm14, %ymm7, %ymm15 - - 0.99 0.01 - - - - - - - - vfmsub213pd %ymm3, %ymm7, %ymm14 - - 0.49 0.51 0.51 0.49 - - - - - - vmulpd 64(%rsp), %ymm7, %ymm7 - - 0.01 0.99 - - - - - - - - vmulpd %ymm7, %ymm15, %ymm15 - - 0.01 0.99 - - - - - - - - vmulpd %ymm14, %ymm15, %ymm7 - - 0.48 0.52 - - - - - - - - vmulpd %ymm7, %ymm6, %ymm6 - - 0.52 0.48 - - - - - - - - vmulpd %ymm7, %ymm2, %ymm2 - - 0.46 0.02 - - - 0.52 - - - - vandpd %ymm6, %ymm1, %ymm6 - - 0.49 0.51 - - - - - - - - vaddpd %ymm6, %ymm13, %ymm13 - - 0.48 0.52 - - - - - - - - vmulpd %ymm7, %ymm0, %ymm6 - - 0.02 0.52 - - - 0.46 - - - - vandpd %ymm2, %ymm1, %ymm0 - - 0.02 - - - - 0.98 - - - - vandpd %ymm6, %ymm1, %ymm1 - - 0.49 0.51 - - - - - - - - vaddpd %ymm0, %ymm12, %ymm12 - - 0.51 0.49 - - - - - - - - vaddpd %ymm1, %ymm11, %ymm11 - - 0.01 - - - - - 0.99 - - - addq $4, %rdx - - 0.01 - - - - 0.01 0.98 - - - cmpq %rsi, %rdx - - 0.01 - - - - - 0.99 - - - jb ..B1.22