[0] Code Region Iterations: 100 Instructions: 5600 Total Cycles: 2352 Total uOps: 6300 Dispatch Width: 6 uOps Per Cycle: 2.68 IPC: 2.38 Block RThroughput: 10.5 Instruction Info: [1]: #uOps [2]: Latency [3]: RThroughput [4]: MayLoad [5]: MayStore [6]: HasSideEffects (U) [1] [2] [3] [4] [5] [6] Instructions: 1 6 0.50 * vmovdqu (%rbx,%rdx,4), %xmm0 1 2 1.00 vmovq %xmm0, %rcx 1 1 1.00 vpunpckhqdq %xmm0, %xmm0, %xmm2 1 2 1.00 vmovq %xmm2, %r15 1 1 0.25 movl %ecx, %r8d 1 1 0.50 shrq $32, %rcx 1 1 0.50 leal (%rcx,%rcx,2), %r14d 1 1 0.50 leal (%r8,%r8,2), %r8d 1 1 0.25 movslq %r8d, %rcx 1 1 0.25 movslq %r14d, %r8 1 1 0.25 movl %r15d, %r14d 1 1 0.50 shrq $32, %r15 1 6 0.50 * vmovups (%r11,%rcx,8), %xmm7 1 6 0.50 * vmovups (%r11,%r8,8), %xmm6 1 5 0.50 * vmovq 16(%r11,%rcx,8), %xmm14 1 1 0.50 leal (%r14,%r14,2), %r14d 1 1 0.25 movslq %r14d, %r14 1 1 0.50 leal (%r15,%r15,2), %r15d 1 1 0.25 movslq %r15d, %r15 2 6 1.00 * vmovhpd 16(%r11,%r8,8), %xmm14, %xmm15 2 7 0.50 * vinsertf128 $1, (%r11,%r14,8), %ymm7, %ymm1 1 5 0.50 * vmovq 16(%r11,%r14,8), %xmm0 2 7 0.50 * vinsertf128 $1, (%r11,%r15,8), %ymm6, %ymm6 2 6 1.00 * vmovhpd 16(%r11,%r15,8), %xmm0, %xmm2 1 1 1.00 vunpcklpd %ymm6, %ymm1, %ymm14 1 1 1.00 vunpckhpd %ymm6, %ymm1, %ymm1 1 4 0.50 vsubpd %ymm14, %ymm10, %ymm6 1 3 1.00 vinsertf128 $1, %xmm2, %ymm15, %ymm7 1 4 0.50 vsubpd %ymm1, %ymm9, %ymm2 1 4 0.50 vsubpd %ymm7, %ymm8, %ymm0 1 4 0.50 vmulpd %ymm2, %ymm2, %ymm14 1 4 0.50 vfmadd231pd %ymm6, %ymm6, %ymm14 1 4 0.50 vfmadd231pd %ymm0, %ymm0, %ymm14 1 4 0.50 vcmpltpd %ymm5, %ymm14, %ymm1 1 1 0.50 vpcmpeqd %ymm7, %ymm7, %ymm7 2 3 1.00 vptest %ymm7, %ymm1 1 14 5.00 vdivpd %ymm14, %ymm4, %ymm7 2 11 0.50 * vmulpd 96(%rsp), %ymm7, %ymm14 1 4 0.50 vmulpd %ymm14, %ymm7, %ymm14 1 4 0.50 vmulpd %ymm14, %ymm7, %ymm15 1 4 0.50 vfmsub213pd %ymm3, %ymm7, %ymm14 2 11 0.50 * vmulpd 64(%rsp), %ymm7, %ymm7 1 4 0.50 vmulpd %ymm7, %ymm15, %ymm15 1 4 0.50 vmulpd %ymm14, %ymm15, %ymm7 1 4 0.50 vmulpd %ymm7, %ymm6, %ymm6 1 4 0.50 vmulpd %ymm7, %ymm2, %ymm2 1 1 0.33 vandpd %ymm6, %ymm1, %ymm6 1 4 0.50 vaddpd %ymm6, %ymm13, %ymm13 1 4 0.50 vmulpd %ymm7, %ymm0, %ymm6 1 1 0.33 vandpd %ymm2, %ymm1, %ymm0 1 1 0.33 vandpd %ymm6, %ymm1, %ymm1 1 4 0.50 vaddpd %ymm0, %ymm12, %ymm12 1 4 0.50 vaddpd %ymm1, %ymm11, %ymm11 1 1 0.25 addq $4, %rdx 1 1 0.25 cmpq %rsi, %rdx 1 1 0.50 jb ..B1.22 Resources: [0] - SKXDivider [1] - SKXFPDivider [2] - SKXPort0 [3] - SKXPort1 [4] - SKXPort2 [5] - SKXPort3 [6] - SKXPort4 [7] - SKXPort5 [8] - SKXPort6 [9] - SKXPort7 Resource pressure per iteration: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] - 5.00 16.00 14.12 5.50 5.50 - 13.47 8.41 - Resource pressure by instruction: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: - - - - 0.50 0.50 - - - - vmovdqu (%rbx,%rdx,4), %xmm0 - - 1.00 - - - - - - - vmovq %xmm0, %rcx - - - - - - - 1.00 - - vpunpckhqdq %xmm0, %xmm0, %xmm2 - - 1.00 - - - - - - - vmovq %xmm2, %r15 - - - - - - - - 1.00 - movl %ecx, %r8d - - 0.06 - - - - - 0.94 - shrq $32, %rcx - - - 0.02 - - - 0.98 - - leal (%rcx,%rcx,2), %r14d - - - 0.02 - - - 0.98 - - leal (%r8,%r8,2), %r8d - - 0.47 0.02 - - - - 0.51 - movslq %r8d, %rcx - - 0.46 0.02 - - - 0.01 0.51 - movslq %r14d, %r8 - - 0.03 0.01 - - - 0.45 0.51 - movl %r15d, %r14d - - 0.51 - - - - - 0.49 - shrq $32, %r15 - - - - 0.49 0.51 - - - - vmovups (%r11,%rcx,8), %xmm7 - - - - 0.49 0.51 - - - - vmovups (%r11,%r8,8), %xmm6 - - - - 0.52 0.48 - - - - vmovq 16(%r11,%rcx,8), %xmm14 - - - 0.02 - - - 0.98 - - leal (%r14,%r14,2), %r14d - - 0.01 0.01 - - - 0.01 0.97 - movslq %r14d, %r14 - - - 0.03 - - - 0.97 - - leal (%r15,%r15,2), %r15d - - 0.04 - - - - - 0.96 - movslq %r15d, %r15 - - - - 0.07 0.93 - 1.00 - - vmovhpd 16(%r11,%r8,8), %xmm14, %xmm15 - - 0.03 0.46 0.49 0.51 - 0.51 - - vinsertf128 $1, (%r11,%r14,8), %ymm7, %ymm1 - - - - 0.51 0.49 - - - - vmovq 16(%r11,%r14,8), %xmm0 - - 0.47 0.02 0.93 0.07 - 0.51 - - vinsertf128 $1, (%r11,%r15,8), %ymm6, %ymm6 - - - - 0.50 0.50 - 1.00 - - vmovhpd 16(%r11,%r15,8), %xmm0, %xmm2 - - - - - - - 1.00 - - vunpcklpd %ymm6, %ymm1, %ymm14 - - - - - - - 1.00 - - vunpckhpd %ymm6, %ymm1, %ymm1 - - 0.01 0.99 - - - - - - vsubpd %ymm14, %ymm10, %ymm6 - - - - - - - 1.00 - - vinsertf128 $1, %xmm2, %ymm15, %ymm7 - - 0.96 0.04 - - - - - - vsubpd %ymm1, %ymm9, %ymm2 - - 0.49 0.51 - - - - - - vsubpd %ymm7, %ymm8, %ymm0 - - 0.48 0.52 - - - - - - vmulpd %ymm2, %ymm2, %ymm14 - - 0.03 0.97 - - - - - - vfmadd231pd %ymm6, %ymm6, %ymm14 - - 0.94 0.06 - - - - - - vfmadd231pd %ymm0, %ymm0, %ymm14 - - 0.47 0.53 - - - - - - vcmpltpd %ymm5, %ymm14, %ymm1 - - 0.96 0.04 - - - - - - vpcmpeqd %ymm7, %ymm7, %ymm7 - - 1.00 - - - - 1.00 - - vptest %ymm7, %ymm1 - 5.00 1.00 - - - - - - - vdivpd %ymm14, %ymm4, %ymm7 - - 0.93 0.07 0.49 0.51 - - - - vmulpd 96(%rsp), %ymm7, %ymm14 - - 0.05 0.95 - - - - - - vmulpd %ymm14, %ymm7, %ymm14 - - 0.02 0.98 - - - - - - vmulpd %ymm14, %ymm7, %ymm15 - - 0.98 0.02 - - - - - - vfmsub213pd %ymm3, %ymm7, %ymm14 - - 0.07 0.93 0.51 0.49 - - - - vmulpd 64(%rsp), %ymm7, %ymm7 - - 0.01 0.99 - - - - - - vmulpd %ymm7, %ymm15, %ymm15 - - 0.01 0.99 - - - - - - vmulpd %ymm14, %ymm15, %ymm7 - - 0.03 0.97 - - - - - - vmulpd %ymm7, %ymm6, %ymm6 - - 0.97 0.03 - - - - - - vmulpd %ymm7, %ymm2, %ymm2 - - 0.03 0.90 - - - 0.07 - - vandpd %ymm6, %ymm1, %ymm6 - - 0.06 0.94 - - - - - - vaddpd %ymm6, %ymm13, %ymm13 - - 0.03 0.97 - - - - - - vmulpd %ymm7, %ymm0, %ymm6 - - 0.46 0.08 - - - 0.46 - - vandpd %ymm2, %ymm1, %ymm0 - - 0.47 0.01 - - - 0.52 - - vandpd %ymm6, %ymm1, %ymm1 - - 0.48 0.52 - - - - - - vaddpd %ymm0, %ymm12, %ymm12 - - 0.52 0.48 - - - - - - vaddpd %ymm1, %ymm11, %ymm11 - - 0.01 - - - - - 0.99 - addq $4, %rdx - - - - - - - 0.02 0.98 - cmpq %rsi, %rdx - - 0.45 - - - - - 0.55 - jb ..B1.22