159 lines
11 KiB
Plaintext
159 lines
11 KiB
Plaintext
|
|
||
|
[0] Code Region
|
||
|
|
||
|
Iterations: 100
|
||
|
Instructions: 5600
|
||
|
Total Cycles: 2306
|
||
|
Total uOps: 6300
|
||
|
|
||
|
Dispatch Width: 6
|
||
|
uOps Per Cycle: 2.73
|
||
|
IPC: 2.43
|
||
|
Block RThroughput: 10.5
|
||
|
|
||
|
|
||
|
Instruction Info:
|
||
|
[1]: #uOps
|
||
|
[2]: Latency
|
||
|
[3]: RThroughput
|
||
|
[4]: MayLoad
|
||
|
[5]: MayStore
|
||
|
[6]: HasSideEffects (U)
|
||
|
|
||
|
[1] [2] [3] [4] [5] [6] Instructions:
|
||
|
1 6 0.50 * vmovdqu (%rbx,%rdx,4), %xmm0
|
||
|
1 2 1.00 vmovq %xmm0, %rcx
|
||
|
1 1 0.50 vpunpckhqdq %xmm0, %xmm0, %xmm2
|
||
|
1 2 1.00 vmovq %xmm2, %r15
|
||
|
1 1 0.25 movl %ecx, %r8d
|
||
|
1 1 0.50 shrq $32, %rcx
|
||
|
1 1 0.50 leal (%rcx,%rcx,2), %r14d
|
||
|
1 1 0.50 leal (%r8,%r8,2), %r8d
|
||
|
1 1 0.25 movslq %r8d, %rcx
|
||
|
1 1 0.25 movslq %r14d, %r8
|
||
|
1 1 0.25 movl %r15d, %r14d
|
||
|
1 1 0.50 shrq $32, %r15
|
||
|
1 6 0.50 * vmovups (%r11,%rcx,8), %xmm7
|
||
|
1 6 0.50 * vmovups (%r11,%r8,8), %xmm6
|
||
|
1 5 0.50 * vmovq 16(%r11,%rcx,8), %xmm14
|
||
|
1 1 0.50 leal (%r14,%r14,2), %r14d
|
||
|
1 1 0.25 movslq %r14d, %r14
|
||
|
1 1 0.50 leal (%r15,%r15,2), %r15d
|
||
|
1 1 0.25 movslq %r15d, %r15
|
||
|
2 6 1.00 * vmovhpd 16(%r11,%r8,8), %xmm14, %xmm15
|
||
|
2 7 0.50 * vinsertf128 $1, (%r11,%r14,8), %ymm7, %ymm1
|
||
|
1 5 0.50 * vmovq 16(%r11,%r14,8), %xmm0
|
||
|
2 7 0.50 * vinsertf128 $1, (%r11,%r15,8), %ymm6, %ymm6
|
||
|
2 6 1.00 * vmovhpd 16(%r11,%r15,8), %xmm0, %xmm2
|
||
|
1 1 1.00 vunpcklpd %ymm6, %ymm1, %ymm14
|
||
|
1 1 1.00 vunpckhpd %ymm6, %ymm1, %ymm1
|
||
|
1 4 0.50 vsubpd %ymm14, %ymm10, %ymm6
|
||
|
1 3 1.00 vinsertf128 $1, %xmm2, %ymm15, %ymm7
|
||
|
1 4 0.50 vsubpd %ymm1, %ymm9, %ymm2
|
||
|
1 4 0.50 vsubpd %ymm7, %ymm8, %ymm0
|
||
|
1 4 0.50 vmulpd %ymm2, %ymm2, %ymm14
|
||
|
1 4 0.50 vfmadd231pd %ymm6, %ymm6, %ymm14
|
||
|
1 4 0.50 vfmadd231pd %ymm0, %ymm0, %ymm14
|
||
|
1 4 0.50 vcmpltpd %ymm5, %ymm14, %ymm1
|
||
|
1 1 0.50 vpcmpeqd %ymm7, %ymm7, %ymm7
|
||
|
2 3 1.00 vptest %ymm7, %ymm1
|
||
|
1 14 5.00 vdivpd %ymm14, %ymm4, %ymm7
|
||
|
2 11 0.50 * vmulpd 96(%rsp), %ymm7, %ymm14
|
||
|
1 4 0.50 vmulpd %ymm14, %ymm7, %ymm14
|
||
|
1 4 0.50 vmulpd %ymm14, %ymm7, %ymm15
|
||
|
1 4 0.50 vfmsub213pd %ymm3, %ymm7, %ymm14
|
||
|
2 11 0.50 * vmulpd 64(%rsp), %ymm7, %ymm7
|
||
|
1 4 0.50 vmulpd %ymm7, %ymm15, %ymm15
|
||
|
1 4 0.50 vmulpd %ymm14, %ymm15, %ymm7
|
||
|
1 4 0.50 vmulpd %ymm7, %ymm6, %ymm6
|
||
|
1 4 0.50 vmulpd %ymm7, %ymm2, %ymm2
|
||
|
1 1 0.33 vandpd %ymm6, %ymm1, %ymm6
|
||
|
1 4 0.50 vaddpd %ymm6, %ymm13, %ymm13
|
||
|
1 4 0.50 vmulpd %ymm7, %ymm0, %ymm6
|
||
|
1 1 0.33 vandpd %ymm2, %ymm1, %ymm0
|
||
|
1 1 0.33 vandpd %ymm6, %ymm1, %ymm1
|
||
|
1 4 0.50 vaddpd %ymm0, %ymm12, %ymm12
|
||
|
1 4 0.50 vaddpd %ymm1, %ymm11, %ymm11
|
||
|
1 1 0.25 addq $4, %rdx
|
||
|
1 1 0.25 cmpq %rsi, %rdx
|
||
|
1 1 0.50 jb ..B1.22
|
||
|
|
||
|
|
||
|
Resources:
|
||
|
[0] - ICXDivider
|
||
|
[1] - ICXFPDivider
|
||
|
[2] - ICXPort0
|
||
|
[3] - ICXPort1
|
||
|
[4] - ICXPort2
|
||
|
[5] - ICXPort3
|
||
|
[6] - ICXPort4
|
||
|
[7] - ICXPort5
|
||
|
[8] - ICXPort6
|
||
|
[9] - ICXPort7
|
||
|
[10] - ICXPort8
|
||
|
[11] - ICXPort9
|
||
|
|
||
|
|
||
|
Resource pressure per iteration:
|
||
|
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
|
||
|
- 5.00 15.12 15.03 5.50 5.50 - 13.45 8.40 - - -
|
||
|
|
||
|
Resource pressure by instruction:
|
||
|
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
|
||
|
- - - - 0.50 0.50 - - - - - - vmovdqu (%rbx,%rdx,4), %xmm0
|
||
|
- - 1.00 - - - - - - - - - vmovq %xmm0, %rcx
|
||
|
- - - 0.46 - - - 0.54 - - - - vpunpckhqdq %xmm0, %xmm0, %xmm2
|
||
|
- - 1.00 - - - - - - - - - vmovq %xmm2, %r15
|
||
|
- - - - - - - - 1.00 - - - movl %ecx, %r8d
|
||
|
- - 0.96 - - - - - 0.04 - - - shrq $32, %rcx
|
||
|
- - - 0.01 - - - 0.99 - - - - leal (%rcx,%rcx,2), %r14d
|
||
|
- - - 0.03 - - - 0.97 - - - - leal (%r8,%r8,2), %r8d
|
||
|
- - 0.48 0.01 - - - - 0.51 - - - movslq %r8d, %rcx
|
||
|
- - 0.02 0.02 - - - 0.01 0.95 - - - movslq %r14d, %r8
|
||
|
- - 0.02 - - - - - 0.98 - - - movl %r15d, %r14d
|
||
|
- - 0.52 - - - - - 0.48 - - - shrq $32, %r15
|
||
|
- - - - 0.49 0.51 - - - - - - vmovups (%r11,%rcx,8), %xmm7
|
||
|
- - - - 0.49 0.51 - - - - - - vmovups (%r11,%r8,8), %xmm6
|
||
|
- - - - 0.52 0.48 - - - - - - vmovq 16(%r11,%rcx,8), %xmm14
|
||
|
- - - 0.47 - - - 0.53 - - - - leal (%r14,%r14,2), %r14d
|
||
|
- - 0.01 0.01 - - - 0.01 0.97 - - - movslq %r14d, %r14
|
||
|
- - - 0.04 - - - 0.96 - - - - leal (%r15,%r15,2), %r15d
|
||
|
- - 0.48 - - - - 0.01 0.51 - - - movslq %r15d, %r15
|
||
|
- - - - 0.51 0.49 - 1.00 - - - - vmovhpd 16(%r11,%r8,8), %xmm14, %xmm15
|
||
|
- - 0.02 0.01 0.95 0.05 - 0.97 - - - - vinsertf128 $1, (%r11,%r14,8), %ymm7, %ymm1
|
||
|
- - - - 0.05 0.95 - - - - - - vmovq 16(%r11,%r14,8), %xmm0
|
||
|
- - 0.02 0.49 0.49 0.51 - 0.49 - - - - vinsertf128 $1, (%r11,%r15,8), %ymm6, %ymm6
|
||
|
- - - - 0.50 0.50 - 1.00 - - - - vmovhpd 16(%r11,%r15,8), %xmm0, %xmm2
|
||
|
- - - - - - - 1.00 - - - - vunpcklpd %ymm6, %ymm1, %ymm14
|
||
|
- - - - - - - 1.00 - - - - vunpckhpd %ymm6, %ymm1, %ymm1
|
||
|
- - 0.47 0.53 - - - - - - - - vsubpd %ymm14, %ymm10, %ymm6
|
||
|
- - - - - - - 1.00 - - - - vinsertf128 $1, %xmm2, %ymm15, %ymm7
|
||
|
- - 0.50 0.50 - - - - - - - - vsubpd %ymm1, %ymm9, %ymm2
|
||
|
- - 0.94 0.06 - - - - - - - - vsubpd %ymm7, %ymm8, %ymm0
|
||
|
- - 0.06 0.94 - - - - - - - - vmulpd %ymm2, %ymm2, %ymm14
|
||
|
- - 0.04 0.96 - - - - - - - - vfmadd231pd %ymm6, %ymm6, %ymm14
|
||
|
- - 0.95 0.05 - - - - - - - - vfmadd231pd %ymm0, %ymm0, %ymm14
|
||
|
- - 0.02 0.98 - - - - - - - - vcmpltpd %ymm5, %ymm14, %ymm1
|
||
|
- - 0.05 0.95 - - - - - - - - vpcmpeqd %ymm7, %ymm7, %ymm7
|
||
|
- - 1.00 - - - - 1.00 - - - - vptest %ymm7, %ymm1
|
||
|
- 5.00 1.00 - - - - - - - - - vdivpd %ymm14, %ymm4, %ymm7
|
||
|
- - 0.51 0.49 0.49 0.51 - - - - - - vmulpd 96(%rsp), %ymm7, %ymm14
|
||
|
- - 0.04 0.96 - - - - - - - - vmulpd %ymm14, %ymm7, %ymm14
|
||
|
- - 0.01 0.99 - - - - - - - - vmulpd %ymm14, %ymm7, %ymm15
|
||
|
- - 0.99 0.01 - - - - - - - - vfmsub213pd %ymm3, %ymm7, %ymm14
|
||
|
- - 0.49 0.51 0.51 0.49 - - - - - - vmulpd 64(%rsp), %ymm7, %ymm7
|
||
|
- - 0.01 0.99 - - - - - - - - vmulpd %ymm7, %ymm15, %ymm15
|
||
|
- - 0.01 0.99 - - - - - - - - vmulpd %ymm14, %ymm15, %ymm7
|
||
|
- - 0.48 0.52 - - - - - - - - vmulpd %ymm7, %ymm6, %ymm6
|
||
|
- - 0.52 0.48 - - - - - - - - vmulpd %ymm7, %ymm2, %ymm2
|
||
|
- - 0.46 0.02 - - - 0.52 - - - - vandpd %ymm6, %ymm1, %ymm6
|
||
|
- - 0.49 0.51 - - - - - - - - vaddpd %ymm6, %ymm13, %ymm13
|
||
|
- - 0.48 0.52 - - - - - - - - vmulpd %ymm7, %ymm0, %ymm6
|
||
|
- - 0.02 0.52 - - - 0.46 - - - - vandpd %ymm2, %ymm1, %ymm0
|
||
|
- - 0.02 - - - - 0.98 - - - - vandpd %ymm6, %ymm1, %ymm1
|
||
|
- - 0.49 0.51 - - - - - - - - vaddpd %ymm0, %ymm12, %ymm12
|
||
|
- - 0.51 0.49 - - - - - - - - vaddpd %ymm1, %ymm11, %ymm11
|
||
|
- - 0.01 - - - - - 0.99 - - - addq $4, %rdx
|
||
|
- - 0.01 - - - - 0.01 0.98 - - - cmpq %rsi, %rdx
|
||
|
- - 0.01 - - - - - 0.99 - - - jb ..B1.22
|