From 4e0390404a16a0f7560667cf652eb733ffe7ec68 Mon Sep 17 00:00:00 2001 From: Rafael Ravedutti Date: Wed, 6 Oct 2021 16:08:46 +0200 Subject: [PATCH] Add correct version (without traces) of CACHES_PMC results with and without force calculation on casclakesp2 Signed-off-by: Rafael Ravedutti --- asm/unused/force-mem-only-with-likwid.s | 1153 +++++++++-------------- 1 file changed, 425 insertions(+), 728 deletions(-) diff --git a/asm/unused/force-mem-only-with-likwid.s b/asm/unused/force-mem-only-with-likwid.s index 2872943..753b804 100644 --- a/asm/unused/force-mem-only-with-likwid.s +++ b/asm/unused/force-mem-only-with-likwid.s @@ -1,7 +1,7 @@ # mark_description "Intel(R) C Intel(R) 64 Compiler for applications running on Intel(R) 64, Version 19.0.5.281 Build 20190815"; -# mark_description "-I/mnt/opt/likwid-5.1.1/include -I./src/includes -S -D_GNU_SOURCE -DLIKWID_PERFMON -DAOS -DPRECISION=2 -DNEI"; -# mark_description "GHBORS_LOOP_RUNS=1 -DINDEX_TRACER -DVECTOR_WIDTH=8 -DALIGNMENT=64 -restrict -Ofast -xCORE-AVX512 -qopt-zmm-u"; -# mark_description "sage=high -o ICC/force.s"; +# mark_description "-I/mnt/opt/likwid-5.2-dev/include -I./src/includes -S -D_GNU_SOURCE -DLIKWID_PERFMON -DAOS -DPRECISION=2 -DN"; +# mark_description "EIGHBORS_LOOP_RUNS=1 -DVECTOR_WIDTH=8 -DALIGNMENT=64 -restrict -Ofast -xCORE-AVX512 -qopt-zmm-usage=high -o "; +# mark_description "ICC/force.s"; .file "force.c" .text ..TXTST0: @@ -36,744 +36,483 @@ computeForce: pushq %r14 #121.112 pushq %r15 #121.112 pushq %rbx #121.112 - subq $536, %rsp #121.112 - movl %r8d, %eax #123.5 - movq %rdx, 384(%rsp) #121.112[spill] + subq $88, %rsp #121.112 + xorl %eax, %eax #124.16 .cfi_escape 0x10, 0x03, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd8, 0xff, 0xff, 0xff, 0x22 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf8, 0xff, 0xff, 0xff, 0x22 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf0, 0xff, 0xff, 0xff, 0x22 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe8, 0xff, 0xff, 0xff, 0x22 .cfi_escape 0x10, 0x0f, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x22 - movq %rsi, %r14 #121.112 - cltd #123.5 - idivl %r9d #123.5 - movl %edx, %ebx #123.5 - movq %rdi, %r12 #121.112 - testl %ebx, %ebx #123.5 - jne ..B1.5 # Prob 78% #123.5 - # LOE r12 r13 r14 ebx r8d -..B1.2: # Preds ..B1.1 - # Execution count [2.20e-01] - movl $128, %esi #123.5 - lea (%rsp), %rdi #123.5 - movl $.L_2__STRING.0, %edx #123.5 - movl %r8d, %ecx #123.5 - xorl %eax, %eax #123.5 -# snprintf(char *__restrict__, size_t, const char *__restrict__, ...) - call snprintf #123.5 - # LOE r12 r14 ebx -..B1.3: # Preds ..B1.2 - # Execution count [2.20e-01] - movl $.L_2__STRING.1, %esi #123.5 - lea (%rsp), %rdi #123.5 -# fopen(const char *__restrict__, const char *__restrict__) - call fopen #123.5 - # LOE rax r12 r14 ebx -..B1.75: # Preds ..B1.3 - # Execution count [2.20e-01] - movq %rax, %r13 #123.5 - # LOE r12 r13 r14 ebx -..B1.5: # Preds ..B1.75 ..B1.1 - # Execution count [1.00e+00] - xorl %eax, %eax #124.16 -..___tag_value_computeForce.12: + movq %rdx, %r15 #121.112 + movq %rsi, %r12 #121.112 + movq %rdi, %rbx #121.112 +..___tag_value_computeForce.11: # getTimeStamp() call getTimeStamp #124.16 -..___tag_value_computeForce.13: - # LOE r12 r13 r14 ebx xmm0 -..B1.76: # Preds ..B1.5 +..___tag_value_computeForce.12: + # LOE rbx r12 r15 xmm0 +..B1.51: # Preds ..B1.1 # Execution count [1.00e+00] - vmovsd %xmm0, 160(%rsp) #124.16[spill] - # LOE r12 r13 r14 ebx -..B1.6: # Preds ..B1.76 + vmovsd %xmm0, 24(%rsp) #124.16[spill] + # LOE rbx r12 r15 +..B1.2: # Preds ..B1.51 # Execution count [1.00e+00] - movl 4(%r14), %edx #125.18 - movq 64(%r14), %r15 #127.20 - movq 72(%r14), %r10 #127.45 - movq 80(%r14), %r9 #127.70 - vmovsd 72(%r12), %xmm2 #129.27 - vmovsd 8(%r12), %xmm1 #130.23 - vmovsd (%r12), %xmm0 #131.24 - testl %edx, %edx #134.24 - jle ..B1.15 # Prob 50% #134.24 - # LOE r9 r10 r13 r14 r15 edx ebx xmm0 xmm1 xmm2 -..B1.7: # Preds ..B1.6 + movl 4(%r12), %r13d #125.18 + movq 64(%r12), %r9 #127.20 + movq 72(%r12), %r14 #127.45 + movq 80(%r12), %r8 #127.70 + vmovsd 72(%rbx), %xmm2 #129.27 + vmovsd 8(%rbx), %xmm1 #130.23 + vmovsd (%rbx), %xmm0 #131.24 + testl %r13d, %r13d #134.24 + jle ..B1.43 # Prob 50% #134.24 + # LOE r8 r9 r12 r14 r15 r13d xmm0 xmm1 xmm2 +..B1.3: # Preds ..B1.2 # Execution count [1.00e+00] - movl %edx, %eax #134.5 - xorl %r8d, %r8d #134.5 - movl $1, %r11d #134.5 - xorl %esi, %esi #134.5 - shrl $1, %eax #134.5 - je ..B1.11 # Prob 9% #134.5 - # LOE rax rsi r8 r9 r10 r13 r14 r15 edx ebx r11d xmm0 xmm1 xmm2 + xorl %ebx, %ebx #134.5 + movl %r13d, %edx #134.5 + xorl %ecx, %ecx #134.5 + movl $1, %esi #134.5 + xorl %eax, %eax #135.17 + shrl $1, %edx #134.5 + je ..B1.7 # Prob 9% #134.5 + # LOE rax rdx rcx rbx r8 r9 r12 r14 r15 esi r13d xmm0 xmm1 xmm2 +..B1.5: # Preds ..B1.3 ..B1.5 + # Execution count [2.50e+00] + movq %rax, (%rcx,%r9) #135.9 + incq %rbx #134.5 + movq %rax, (%rcx,%r14) #136.9 + movq %rax, (%rcx,%r8) #137.9 + movq %rax, 8(%rcx,%r9) #135.9 + movq %rax, 8(%rcx,%r14) #136.9 + movq %rax, 8(%rcx,%r8) #137.9 + addq $16, %rcx #134.5 + cmpq %rdx, %rbx #134.5 + jb ..B1.5 # Prob 63% #134.5 + # LOE rax rdx rcx rbx r8 r9 r12 r14 r15 r13d xmm0 xmm1 xmm2 +..B1.6: # Preds ..B1.5 + # Execution count [9.00e-01] + lea 1(%rbx,%rbx), %esi #135.9 + # LOE rax r8 r9 r12 r14 r15 esi r13d xmm0 xmm1 xmm2 +..B1.7: # Preds ..B1.3 ..B1.6 + # Execution count [1.00e+00] + lea -1(%rsi), %edx #134.5 + cmpl %r13d, %edx #134.5 + jae ..B1.9 # Prob 9% #134.5 + # LOE rax r8 r9 r12 r14 r15 esi r13d xmm0 xmm1 xmm2 ..B1.8: # Preds ..B1.7 # Execution count [9.00e-01] - xorl %r11d, %r11d #134.5 - # LOE rax rsi r8 r9 r10 r11 r13 r14 r15 edx ebx xmm0 xmm1 xmm2 -..B1.9: # Preds ..B1.9 ..B1.8 - # Execution count [2.50e+00] - movq %r11, (%rsi,%r15) #135.9 - incq %r8 #134.5 - movq %r11, (%rsi,%r10) #136.9 - movq %r11, (%rsi,%r9) #137.9 - movq %r11, 8(%rsi,%r15) #135.9 - movq %r11, 8(%rsi,%r10) #136.9 - movq %r11, 8(%rsi,%r9) #137.9 - addq $16, %rsi #134.5 - cmpq %rax, %r8 #134.5 - jb ..B1.9 # Prob 63% #134.5 - # LOE rax rsi r8 r9 r10 r11 r13 r14 r15 edx ebx xmm0 xmm1 xmm2 + movslq %esi, %rsi #134.5 + movq %rax, -8(%r9,%rsi,8) #135.9 + movq %rax, -8(%r14,%rsi,8) #136.9 + movq %rax, -8(%r8,%rsi,8) #137.9 + # LOE r8 r9 r12 r14 r15 r13d xmm0 xmm1 xmm2 +..B1.9: # Preds ..B1.7 ..B1.8 + # Execution count [5.00e-01] + movl $.L_2__STRING.0, %edi #141.5 + movq %r8, 32(%rsp) #141.5[spill] + movq %r9, 80(%rsp) #141.5[spill] + vmovsd %xmm2, (%rsp) #141.5[spill] + vmovsd %xmm1, 8(%rsp) #141.5[spill] + vmovsd %xmm0, 16(%rsp) #141.5[spill] +..___tag_value_computeForce.18: +# likwid_markerStartRegion(const char *) + call likwid_markerStartRegion #141.5 +..___tag_value_computeForce.19: + # LOE r12 r14 r15 r13d ..B1.10: # Preds ..B1.9 # Execution count [9.00e-01] - lea 1(%r8,%r8), %r11d #135.9 - # LOE r9 r10 r13 r14 r15 edx ebx r11d xmm0 xmm1 xmm2 -..B1.11: # Preds ..B1.10 ..B1.7 - # Execution count [1.00e+00] - lea -1(%r11), %eax #134.5 - cmpl %edx, %eax #134.5 - jae ..B1.13 # Prob 9% #134.5 - # LOE r9 r10 r13 r14 r15 edx ebx r11d xmm0 xmm1 xmm2 -..B1.12: # Preds ..B1.11 - # Execution count [9.00e-01] - movslq %r11d, %r11 #134.5 - xorl %eax, %eax #135.9 - movq %rax, -8(%r15,%r11,8) #135.9 - movq %rax, -8(%r10,%r11,8) #136.9 - movq %rax, -8(%r9,%r11,8) #137.9 - # LOE r9 r10 r13 r14 r15 edx ebx xmm0 xmm1 xmm2 -..B1.13: # Preds ..B1.11 ..B1.12 - # Execution count [5.00e-01] - testl %ebx, %ebx #140.5 - je ..B1.17 # Prob 22% #140.5 - # LOE r9 r10 r13 r14 r15 edx ebx xmm0 xmm1 xmm2 -..B1.14: # Preds ..B1.13 - # Execution count [3.90e-01] - movl $.L_2__STRING.3, %edi #141.5 - movl %edx, 128(%rsp) #141.5[spill] - movq %r9, 400(%rsp) #141.5[spill] - movq %r10, 392(%rsp) #141.5[spill] - vmovsd %xmm2, 136(%rsp) #141.5[spill] - vmovsd %xmm1, 144(%rsp) #141.5[spill] - vmovsd %xmm0, 152(%rsp) #141.5[spill] -..___tag_value_computeForce.20: -# likwid_markerStartRegion(const char *) - call likwid_markerStartRegion #141.5 -..___tag_value_computeForce.21: - # LOE r13 r14 r15 ebx -..B1.79: # Preds ..B1.14 - # Execution count [3.90e-01] - movq 392(%rsp), %r10 #[spill] - movq 400(%rsp), %r9 #[spill] - vmovsd 152(%rsp), %xmm0 #[spill] - vmovsd 144(%rsp), %xmm1 #[spill] - vmovsd 136(%rsp), %xmm2 #[spill] - movl 128(%rsp), %edx #[spill] - jmp ..B1.20 # Prob 100% # - # LOE r9 r10 r13 r14 r15 edx ebx xmm0 xmm1 xmm2 -..B1.15: # Preds ..B1.6 - # Execution count [5.00e-01] - testl %ebx, %ebx #140.5 - je ..B1.17 # Prob 22% #140.5 - # LOE r9 r10 r13 r14 r15 edx ebx xmm0 xmm1 xmm2 -..B1.16: # Preds ..B1.15 - # Execution count [3.90e-01] - movl $.L_2__STRING.3, %edi #141.5 -..___tag_value_computeForce.29: -# likwid_markerStartRegion(const char *) - call likwid_markerStartRegion #141.5 -..___tag_value_computeForce.30: - jmp ..B1.66 # Prob 100% #141.5 - # LOE r13 ebx -..B1.17: # Preds ..B1.15 ..B1.13 - # Execution count [2.20e-01] - movq 384(%rsp), %r11 #140.5[spill] - movq %r13, %rdi #140.5 - movl $.L_2__STRING.2, %esi #140.5 - xorl %eax, %eax #140.5 - movl 8(%r14), %ecx #140.5 - movl 16(%r11), %r8d #140.5 - movl %edx, 128(%rsp) #140.5[spill] - movq %r9, 400(%rsp) #140.5[spill] - movq %r10, 392(%rsp) #140.5[spill] - vmovsd %xmm2, 136(%rsp) #140.5[spill] - vmovsd %xmm1, 144(%rsp) #140.5[spill] - vmovsd %xmm0, 152(%rsp) #140.5[spill] -# fprintf(FILE *__restrict__, const char *__restrict__, ...) - call fprintf #140.5 - # LOE r13 r14 r15 ebx -..B1.18: # Preds ..B1.17 - # Execution count [2.20e-01] - movl $.L_2__STRING.3, %edi #141.5 - vmovsd 152(%rsp), %xmm0 #[spill] - vmovsd 144(%rsp), %xmm1 #[spill] - vmovsd 136(%rsp), %xmm2 #[spill] - vmovsd %xmm2, 136(%rsp) #141.5[spill] - vmovsd %xmm1, 144(%rsp) #141.5[spill] - vmovsd %xmm0, 152(%rsp) #141.5[spill] -..___tag_value_computeForce.43: -# likwid_markerStartRegion(const char *) - call likwid_markerStartRegion #141.5 -..___tag_value_computeForce.44: - # LOE r13 r14 r15 ebx -..B1.19: # Preds ..B1.18 - # Execution count [2.20e-01] - movl 128(%rsp), %edx #[spill] - movq 392(%rsp), %r10 #[spill] - movq 400(%rsp), %r9 #[spill] - vmovsd 152(%rsp), %xmm0 #[spill] - vmovsd 144(%rsp), %xmm1 #[spill] - vmovsd 136(%rsp), %xmm2 #[spill] - testl %edx, %edx #143.24 - jle ..B1.66 # Prob 9% #143.24 - # LOE r9 r10 r13 r14 r15 edx ebx xmm0 xmm1 xmm2 -..B1.20: # Preds ..B1.19 ..B1.79 - # Execution count [9.00e-01] - vmulsd %xmm2, %xmm2, %xmm2 #129.45 - movq %r10, %rax #140.5 - vmovdqu .L_2il0floatpacket.0(%rip), %ymm5 #173.13 + vmovsd 16(%rsp), %xmm0 #[spill] + xorl %esi, %esi #143.15 + vmovsd (%rsp), %xmm2 #[spill] + xorl %eax, %eax #143.5 + vmulsd %xmm2, %xmm2, %xmm13 #129.45 + xorl %edi, %edi #143.5 + vmovdqu32 .L_2il0floatpacket.0(%rip), %ymm16 #173.13 vmulsd .L_2il0floatpacket.3(%rip), %xmm0, %xmm0 #197.45 - vbroadcastsd %xmm2, %zmm3 #129.25 - vbroadcastsd %xmm1, %zmm2 #130.21 - vbroadcastsd %xmm0, %zmm1 #197.45 - vmovups .L_2il0floatpacket.4(%rip), %zmm0 #197.58 - vmovups %zmm2, 256(%rsp) #140.5[spill] - vmovups %zmm1, 320(%rsp) #140.5[spill] - vmovups %zmm3, 192(%rsp) #140.5[spill] - movslq %edx, %rdx #143.5 - xorl %r12d, %r12d #140.5 - movq %rdx, 440(%rsp) #140.5[spill] - movl %ebx, %ecx #140.5 - movq %r13, 128(%rsp) #140.5[spill] - movq %r9, %rbx #140.5 - movq 384(%rsp), %r9 #140.5[spill] - movq %r14, %r10 #140.5 - movq %rax, %r14 #140.5 - # LOE rbx r9 r10 r12 r14 r15 ecx -..B1.21: # Preds ..B1.64 ..B1.20 + vmovdqu .L_2il0floatpacket.1(%rip), %ymm15 #173.13 + vmovups .L_2il0floatpacket.4(%rip), %zmm5 #197.58 + vmovsd 8(%rsp), %xmm1 #[spill] + vbroadcastsd %xmm13, %zmm14 #129.25 + vbroadcastsd %xmm1, %zmm13 #130.21 + vbroadcastsd %xmm0, %zmm9 #197.45 + movslq %r13d, %r13 #143.5 + movq 24(%r15), %r10 #145.25 + movslq 16(%r15), %rdx #144.43 + movq 8(%r15), %rcx #144.19 + movq 32(%rsp), %r8 #[spill] + movq 16(%r12), %rbx #146.25 + shlq $2, %rdx #126.5 + movq %r13, 64(%rsp) #143.5[spill] + movq %r10, 72(%rsp) #143.5[spill] + # LOE rax rdx rcx rbx rsi rdi r8 r14 ymm15 ymm16 zmm5 zmm9 zmm13 zmm14 +..B1.11: # Preds ..B1.41 ..B1.10 # Execution count [5.00e+00] - movl %r12d, %edx #144.9 - movl 16(%r9), %r8d #144.43 - vxorpd %xmm18, %xmm18, %xmm18 #149.22 - imull %edx, %r8d #144.43 - vmovapd %xmm18, %xmm12 #150.22 - vmovapd %xmm12, %xmm4 #151.22 - movslq %r8d, %r8 #144.19 - lea (%rdx,%rdx,2), %eax #146.25 - movq 8(%r9), %rdi #144.19 - movslq %eax, %rax #146.25 - movq 24(%r9), %r13 #145.25 - lea (%rdi,%r8,4), %r8 #144.19 - movq 16(%r10), %rdi #146.25 - movl (%r13,%r12,4), %r13d #145.25 - vmovsd (%rdi,%rax,8), %xmm1 #146.25 - vmovsd 8(%rdi,%rax,8), %xmm15 #147.25 - vmovsd 16(%rdi,%rax,8), %xmm9 #148.25 - testl %ecx, %ecx #156.9 - je ..B1.23 # Prob 22% #156.9 - # LOE rbx rdi r8 r9 r10 r12 r14 r15 edx ecx r13d xmm1 xmm4 xmm9 xmm12 xmm15 xmm18 -..B1.22: # Preds ..B1.21 - # Execution count [2.50e+00] - testl %r13d, %r13d #173.32 - jg ..B1.35 # Prob 50% #173.32 - jmp ..B1.64 # Prob 100% #173.32 - # LOE rbx rdi r8 r9 r10 r12 r14 r15 ecx r13d xmm1 xmm4 xmm9 xmm12 xmm15 xmm18 -..B1.23: # Preds ..B1.21 - # Execution count [1.10e+00] - movl $.L_2__STRING.4, %esi #156.9 - xorl %eax, %eax #156.9 - movq 128(%rsp), %rdi #156.9[spill] - movq %r8, 448(%rsp) #156.9[spill] - movl %ecx, 416(%rsp) #156.9[spill] - movq %r10, 408(%rsp) #156.9[spill] - movq %r9, 384(%rsp) #156.9[spill] - vzeroupper #156.9 - vmovsd %xmm18, 168(%rsp) #156.9[spill] - vmovsd %xmm12, 176(%rsp) #156.9[spill] - vmovsd %xmm4, 184(%rsp) #156.9[spill] - vmovsd %xmm9, 152(%rsp) #156.9[spill] - vmovsd %xmm15, 144(%rsp) #156.9[spill] - vmovsd %xmm1, 136(%rsp) #156.9[spill] -# fprintf(FILE *__restrict__, const char *__restrict__, ...) - call fprintf #156.9 - # LOE rbx r12 r14 r15 r13d -..B1.24: # Preds ..B1.23 - # Execution count [2.50e+00] - xorl %edx, %edx #170.13 - movq 384(%rsp), %r9 #[spill] - xorl %eax, %eax #170.13 - movq 408(%rsp), %r10 #[spill] - movl 416(%rsp), %ecx #[spill] - movq 448(%rsp), %r8 #[spill] - vmovsd 136(%rsp), %xmm1 #[spill] - vmovsd 144(%rsp), %xmm15 #[spill] - vmovsd 152(%rsp), %xmm9 #[spill] - vmovsd 184(%rsp), %xmm4 #[spill] - vmovsd 176(%rsp), %xmm12 #[spill] - vmovsd 168(%rsp), %xmm18 #[spill] - testl %r13d, %r13d #170.13 - jle ..B1.64 # Prob 10% #170.13 - # LOE rax rdx rbx r8 r9 r10 r12 r14 r15 ecx r13d xmm1 xmm4 xmm9 xmm12 xmm15 xmm18 -..B1.25: # Preds ..B1.24 - # Execution count [2.25e+00] - movq %r15, 424(%rsp) #170.13[spill] - lea 7(%r13), %esi #145.25 - shrl $3, %esi #145.25 - vmovsd %xmm9, 152(%rsp) #170.13[spill] - vmovsd %xmm15, 144(%rsp) #170.13[spill] - vmovsd %xmm1, 136(%rsp) #170.13[spill] - movq %rsi, 472(%rsp) #170.13[spill] - movl %r13d, 464(%rsp) #170.13[spill] - movq %r12, 432(%rsp) #170.13[spill] - movq %rdx, %r12 #170.13 - movq %rbx, 400(%rsp) #170.13[spill] - movq %r8, %rbx #170.13 - movq %r14, 392(%rsp) #170.13[spill] - movq %rax, %r14 #170.13 - movl %ecx, 416(%rsp) #170.13[spill] - movq %r10, 408(%rsp) #170.13[spill] - movq %r9, 384(%rsp) #170.13[spill] - movq 128(%rsp), %r15 #170.13[spill] - # LOE rbx r12 r14 r15 -..B1.26: # Preds ..B1.33 ..B1.25 - # Execution count [1.25e+01] - movl %r12d, %r13d #170.13 - movl $8, %ecx #170.13 - shll $3, %r13d #170.13 - movl $il0_peep_printf_format_0, %edi #170.13 - negl %r13d #170.13 - movq %r15, %rsi #170.13 - addl 464(%rsp), %r13d #170.13[spill] - cmpl $8, %r13d #170.13 - cmovge %ecx, %r13d #170.13 - movslq %r13d, %r13 #170.13 - call fputs #170.13 - # LOE rbx r12 r13 r14 r15 -..B1.27: # Preds ..B1.26 - # Execution count [1.25e+01] - xorl %edx, %edx #170.13 - testq %r13, %r13 #170.13 - jle ..B1.32 # Prob 10% #170.13 - # LOE rdx rbx r12 r13 r14 r15 -..B1.28: # Preds ..B1.27 - # Execution count [1.12e+01] - movq %r12, 456(%rsp) #170.13[spill] - lea (%rbx,%r14), %rax #170.13 - movq %rbx, 448(%rsp) #170.13[spill] - movq %rax, %rbx #170.13 - movq %rdx, %r12 #170.13 - # LOE rbx r12 r13 r14 r15 -..B1.29: # Preds ..B1.30 ..B1.28 - # Execution count [6.25e+01] - movq %r15, %rdi #170.13 - movl $.L_2__STRING.6, %esi #170.13 - xorl %eax, %eax #170.13 - movl (%rbx,%r12,4), %edx #170.13 -# fprintf(FILE *__restrict__, const char *__restrict__, ...) - call fprintf #170.13 - # LOE rbx r12 r13 r14 r15 -..B1.30: # Preds ..B1.29 - # Execution count [6.25e+01] - incq %r12 #170.13 - cmpq %r13, %r12 #170.13 - jb ..B1.29 # Prob 82% #170.13 - # LOE rbx r12 r13 r14 r15 -..B1.31: # Preds ..B1.30 - # Execution count [1.12e+01] - movq 456(%rsp), %r12 #[spill] - movq 448(%rsp), %rbx #[spill] - # LOE rbx r12 r14 r15 -..B1.32: # Preds ..B1.31 ..B1.27 - # Execution count [1.25e+01] - movl $10, %edi #170.13 - movq %r15, %rsi #170.13 - call fputc #170.13 - # LOE rbx r12 r14 r15 -..B1.33: # Preds ..B1.32 - # Execution count [1.25e+01] - incq %r12 #170.13 - addq $32, %r14 #170.13 - cmpq 472(%rsp), %r12 #170.13[spill] - jb ..B1.26 # Prob 82% #170.13 - # LOE rbx r12 r14 r15 -..B1.34: # Preds ..B1.33 - # Execution count [2.25e+00] - movq 408(%rsp), %r10 #[spill] - movq %rbx, %r8 # - vmovsd 152(%rsp), %xmm9 #[spill] - vmovsd 144(%rsp), %xmm15 #[spill] - vmovsd 136(%rsp), %xmm1 #[spill] - movl 464(%rsp), %r13d #[spill] - movq 432(%rsp), %r12 #[spill] - movq 400(%rsp), %rbx #[spill] - movq 392(%rsp), %r14 #[spill] - movq 424(%rsp), %r15 #[spill] - movl 416(%rsp), %ecx #[spill] - movq 384(%rsp), %r9 #[spill] - movq 16(%r10), %rdi #175.40 - # LOE rbx rdi r8 r9 r10 r12 r14 r15 ecx r13d xmm1 xmm9 xmm15 -..B1.35: # Preds ..B1.34 ..B1.22 + movq 72(%rsp), %r9 #145.25[spill] + vxorpd %xmm24, %xmm24, %xmm24 #149.22 + vmovapd %xmm24, %xmm18 #150.22 + movl (%r9,%rax,4), %r10d #145.25 + vmovapd %xmm18, %xmm4 #151.22 + vmovsd (%rdi,%rbx), %xmm10 #146.25 + vmovsd 8(%rdi,%rbx), %xmm6 #147.25 + vmovsd 16(%rdi,%rbx), %xmm12 #148.25 + testl %r10d, %r10d #173.32 + jle ..B1.41 # Prob 50% #173.32 + # LOE rax rdx rcx rbx rsi rdi r8 r14 r10d xmm4 xmm6 xmm10 xmm12 xmm18 xmm24 ymm15 ymm16 zmm5 zmm9 zmm13 zmm14 +..B1.12: # Preds ..B1.11 # Execution count [4.50e+00] vpxord %zmm8, %zmm8, %zmm8 #149.22 vmovaps %zmm8, %zmm7 #150.22 - vmovaps %zmm7, %zmm6 #151.22 - cmpl $8, %r13d #173.13 - jl ..B1.72 # Prob 10% #173.13 - # LOE rbx rdi r8 r9 r10 r12 r14 r15 ecx r13d xmm1 xmm9 xmm15 zmm6 zmm7 zmm8 -..B1.36: # Preds ..B1.35 + vmovaps %zmm7, %zmm11 #151.22 + cmpl $8, %r10d #173.13 + jl ..B1.48 # Prob 10% #173.13 + # LOE rax rdx rcx rbx rsi rdi r8 r14 r10d xmm6 xmm10 xmm12 ymm15 ymm16 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 +..B1.13: # Preds ..B1.12 # Execution count [4.50e+00] - cmpl $1200, %r13d #173.13 - jl ..B1.71 # Prob 10% #173.13 - # LOE rbx rdi r8 r9 r10 r12 r14 r15 ecx r13d xmm1 xmm9 xmm15 zmm6 zmm7 zmm8 -..B1.37: # Preds ..B1.36 + cmpl $1200, %r10d #173.13 + jl ..B1.47 # Prob 10% #173.13 + # LOE rax rdx rcx rbx rsi rdi r8 r14 r10d xmm6 xmm10 xmm12 ymm15 ymm16 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 +..B1.14: # Preds ..B1.13 # Execution count [4.50e+00] - movq %r8, %rax #173.13 - andq $63, %rax #173.13 - testb $3, %al #173.13 - je ..B1.39 # Prob 50% #173.13 - # LOE rbx rdi r8 r9 r10 r12 r14 r15 eax ecx r13d xmm1 xmm9 xmm15 zmm6 zmm7 zmm8 -..B1.38: # Preds ..B1.37 + movq %rdx, %r15 #144.43 + imulq %rsi, %r15 #144.43 + addq %rcx, %r15 #126.5 + movq %r15, %r11 #173.13 + andq $63, %r11 #173.13 + testl $3, %r11d #173.13 + je ..B1.16 # Prob 50% #173.13 + # LOE rax rdx rcx rbx rsi rdi r8 r14 r15 r10d r11d xmm6 xmm10 xmm12 ymm15 ymm16 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 +..B1.15: # Preds ..B1.14 # Execution count [2.25e+00] - xorl %eax, %eax #173.13 - jmp ..B1.41 # Prob 100% #173.13 - # LOE rbx rdi r8 r9 r10 r12 r14 r15 eax ecx r13d xmm1 xmm9 xmm15 zmm6 zmm7 zmm8 -..B1.39: # Preds ..B1.37 - # Execution count [2.25e+00] - testl %eax, %eax #173.13 - je ..B1.41 # Prob 50% #173.13 - # LOE rbx rdi r8 r9 r10 r12 r14 r15 eax ecx r13d xmm1 xmm9 xmm15 zmm6 zmm7 zmm8 -..B1.40: # Preds ..B1.39 - # Execution count [2.50e+01] - negl %eax #173.13 - addl $64, %eax #173.13 - shrl $2, %eax #173.13 - cmpl %eax, %r13d #173.13 - cmovl %r13d, %eax #173.13 - # LOE rbx rdi r8 r9 r10 r12 r14 r15 eax ecx r13d xmm1 xmm9 xmm15 zmm6 zmm7 zmm8 -..B1.41: # Preds ..B1.38 ..B1.40 ..B1.39 - # Execution count [5.00e+00] - movl %r13d, %edx #173.13 - subl %eax, %edx #173.13 - andl $7, %edx #173.13 - negl %edx #173.13 - addl %r13d, %edx #173.13 - cmpl $1, %eax #173.13 - jb ..B1.49 # Prob 50% #173.13 - # LOE rbx rdi r8 r9 r10 r12 r14 r15 eax edx ecx r13d xmm1 xmm9 xmm15 zmm6 zmm7 zmm8 -..B1.42: # Preds ..B1.41 - # Execution count [4.50e+00] - vmovdqu .L_2il0floatpacket.1(%rip), %ymm5 #173.13 xorl %r11d, %r11d #173.13 - vmovups .L_2il0floatpacket.4(%rip), %zmm10 #173.13 - vmovups 320(%rsp), %zmm11 #173.13[spill] - vmovups 256(%rsp), %zmm12 #173.13[spill] - vmovups 192(%rsp), %zmm13 #173.13[spill] - vmovdqu .L_2il0floatpacket.0(%rip), %ymm14 #173.13 - vpbroadcastd %eax, %ymm4 #173.13 - vbroadcastsd %xmm1, %zmm3 #146.23 - vbroadcastsd %xmm15, %zmm2 #147.23 - vbroadcastsd %xmm9, %zmm0 #148.23 - movslq %eax, %rsi #173.13 - movq %r10, 408(%rsp) #173.13[spill] - movq %r9, 384(%rsp) #173.13[spill] - # LOE rbx rsi rdi r8 r11 r12 r14 r15 eax edx ecx r13d xmm1 xmm9 xmm15 ymm4 ymm5 ymm14 zmm0 zmm2 zmm3 zmm6 zmm7 zmm8 zmm10 zmm11 zmm12 zmm13 -..B1.43: # Preds ..B1.47 ..B1.42 + jmp ..B1.18 # Prob 100% #173.13 + # LOE rax rdx rcx rbx rsi rdi r8 r14 r15 r10d r11d xmm6 xmm10 xmm12 ymm15 ymm16 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 +..B1.16: # Preds ..B1.14 + # Execution count [2.25e+00] + testl %r11d, %r11d #173.13 + je ..B1.18 # Prob 50% #173.13 + # LOE rax rdx rcx rbx rsi rdi r8 r14 r15 r10d r11d xmm6 xmm10 xmm12 ymm15 ymm16 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 +..B1.17: # Preds ..B1.16 # Execution count [2.50e+01] - vpcmpgtd %ymm5, %ymm4, %k3 #173.13 - vmovdqu32 (%r8,%r11,4), %ymm16{%k3}{z} #174.25 - kmovw %k3, %r10d #173.13 - vpaddd %ymm16, %ymm16, %ymm17 #175.40 - vpaddd %ymm17, %ymm16, %ymm16 #175.40 - # LOE rbx rsi rdi r8 r11 r12 r14 r15 eax edx ecx r10d r13d xmm1 xmm9 xmm15 ymm4 ymm5 ymm14 ymm16 zmm0 zmm2 zmm3 zmm6 zmm7 zmm8 zmm10 zmm11 zmm12 zmm13 k3 -..B1.46: # Preds ..B1.43 + negl %r11d #173.13 + addl $64, %r11d #173.13 + shrl $2, %r11d #173.13 + cmpl %r11d, %r10d #173.13 + cmovl %r10d, %r11d #173.13 + # LOE rax rdx rcx rbx rsi rdi r8 r14 r15 r10d r11d xmm6 xmm10 xmm12 ymm15 ymm16 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 +..B1.18: # Preds ..B1.15 ..B1.17 ..B1.16 + # Execution count [5.00e+00] + movl %r10d, %r13d #173.13 + subl %r11d, %r13d #173.13 + andl $7, %r13d #173.13 + negl %r13d #173.13 + addl %r10d, %r13d #173.13 + cmpl $1, %r11d #173.13 + jb ..B1.26 # Prob 50% #173.13 + # LOE rax rdx rcx rbx rsi rdi r8 r14 r15 r10d r11d r13d xmm6 xmm10 xmm12 ymm15 ymm16 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 +..B1.19: # Preds ..B1.18 + # Execution count [4.50e+00] + vmovdqa %ymm15, %ymm4 #173.13 + xorl %r12d, %r12d #173.13 + vpbroadcastd %r11d, %ymm3 #173.13 + vbroadcastsd %xmm10, %zmm2 #146.23 + vbroadcastsd %xmm6, %zmm1 #147.23 + vbroadcastsd %xmm12, %zmm0 #148.23 + movslq %r11d, %r9 #173.13 + movq %r8, 32(%rsp) #173.13[spill] + movq %r14, (%rsp) #173.13[spill] + # LOE rax rdx rcx rbx rsi rdi r9 r12 r15 r10d r11d r13d xmm6 xmm10 xmm12 ymm3 ymm4 ymm15 ymm16 zmm0 zmm1 zmm2 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 +..B1.20: # Preds ..B1.24 ..B1.19 + # Execution count [2.50e+01] + vpcmpgtd %ymm4, %ymm3, %k3 #173.13 + vmovdqu32 (%r15,%r12,4), %ymm17{%k3}{z} #174.25 + kmovw %k3, %r14d #173.13 + vpaddd %ymm17, %ymm17, %ymm18 #175.40 + vpaddd %ymm18, %ymm17, %ymm17 #175.40 + # LOE rax rdx rcx rbx rsi rdi r9 r12 r15 r10d r11d r13d r14d xmm6 xmm10 xmm12 ymm3 ymm4 ymm15 ymm16 ymm17 zmm0 zmm1 zmm2 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 k3 +..B1.23: # Preds ..B1.20 # Execution count [1.25e+01] kmovw %k3, %k1 #175.40 kmovw %k3, %k2 #175.40 - vpxord %zmm17, %zmm17, %zmm17 #175.40 vpxord %zmm18, %zmm18, %zmm18 #175.40 vpxord %zmm19, %zmm19, %zmm19 #175.40 - vgatherdpd 16(%rdi,%ymm16,8), %zmm17{%k1} #175.40 - vgatherdpd 8(%rdi,%ymm16,8), %zmm18{%k2} #175.40 - vgatherdpd (%rdi,%ymm16,8), %zmm19{%k3} #175.40 - # LOE rbx rsi rdi r8 r11 r12 r14 r15 eax edx ecx r10d r13d xmm1 xmm9 xmm15 ymm4 ymm5 ymm14 zmm0 zmm2 zmm3 zmm6 zmm7 zmm8 zmm10 zmm11 zmm12 zmm13 zmm17 zmm18 zmm19 -..B1.47: # Preds ..B1.46 + vpxord %zmm20, %zmm20, %zmm20 #175.40 + vgatherdpd 16(%rbx,%ymm17,8), %zmm18{%k1} #175.40 + vgatherdpd 8(%rbx,%ymm17,8), %zmm19{%k2} #175.40 + vgatherdpd (%rbx,%ymm17,8), %zmm20{%k3} #175.40 + # LOE rax rdx rcx rbx rsi rdi r9 r12 r15 r10d r11d r13d r14d xmm6 xmm10 xmm12 ymm3 ymm4 ymm15 ymm16 zmm0 zmm1 zmm2 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 zmm18 zmm19 zmm20 +..B1.24: # Preds ..B1.23 # Execution count [2.50e+01] - addq $8, %r11 #173.13 - #vpaddd %ymm14, %ymm5, %ymm5 #173.13 - #vsubpd %zmm17, %zmm0, %zmm28 #177.40 - #vsubpd %zmm18, %zmm2, %zmm26 #176.40 - #vsubpd %zmm19, %zmm3, %zmm25 #175.40 - #vmulpd %zmm26, %zmm26, %zmm24 #178.53 - #vfmadd231pd %zmm25, %zmm25, %zmm24 #178.53 - #vfmadd231pd %zmm28, %zmm28, %zmm24 #178.67 - #vrcp14pd %zmm24, %zmm23 #195.42 - #vcmppd $1, %zmm13, %zmm24, %k2 #194.26 - #vfpclasspd $30, %zmm23, %k0 #195.42 - #kmovw %k2, %r9d #194.26 + addq $8, %r12 #173.13 + #vpaddd %ymm16, %ymm4, %ymm4 #173.13 + #vsubpd %zmm18, %zmm0, %zmm29 #177.40 + #vsubpd %zmm19, %zmm1, %zmm27 #176.40 + #vsubpd %zmm20, %zmm2, %zmm26 #175.40 + #vmulpd %zmm27, %zmm27, %zmm25 #178.53 + #vfmadd231pd %zmm26, %zmm26, %zmm25 #178.53 + #vfmadd231pd %zmm29, %zmm29, %zmm25 #178.67 + #vrcp14pd %zmm25, %zmm24 #195.42 + #vcmppd $1, %zmm14, %zmm25, %k2 #194.26 + #vfpclasspd $30, %zmm24, %k0 #195.42 + #kmovw %k2, %r8d #194.26 #knotw %k0, %k1 #195.42 - #vmovaps %zmm24, %zmm16 #195.42 - #andl %r9d, %r10d #194.26 - #vfnmadd213pd .L_2il0floatpacket.9(%rip){1to8}, %zmm23, %zmm16 #195.42 - #kmovw %r10d, %k3 #198.21 - #vmulpd %zmm16, %zmm16, %zmm17 #195.42 - #vfmadd213pd %zmm23, %zmm16, %zmm23{%k1} #195.42 - #vfmadd213pd %zmm23, %zmm17, %zmm23{%k1} #195.42 - #vmulpd %zmm12, %zmm23, %zmm18 #196.42 - #vmulpd %zmm11, %zmm23, %zmm20 #197.58 - #vmulpd %zmm18, %zmm23, %zmm21 #196.48 - #vmulpd %zmm21, %zmm23, %zmm19 #196.54 - #vfmsub213pd %zmm10, %zmm21, %zmm23 #197.58 - #vmulpd %zmm20, %zmm19, %zmm22 #197.65 - #vmulpd %zmm23, %zmm22, %zmm27 #197.71 - #vfmadd231pd %zmm25, %zmm27, %zmm8{%k3} #198.21 - #vfmadd231pd %zmm26, %zmm27, %zmm7{%k3} #199.21 - #vfmadd231pd %zmm28, %zmm27, %zmm6{%k3} #200.21 - cmpq %rsi, %r11 #173.13 - jb ..B1.43 # Prob 82% #173.13 - # LOE rbx rsi rdi r8 r11 r12 r14 r15 eax edx ecx r13d xmm1 xmm9 xmm15 ymm4 ymm5 ymm14 zmm0 zmm2 zmm3 zmm6 zmm7 zmm8 zmm10 zmm11 zmm12 zmm13 -..B1.48: # Preds ..B1.47 + #vmovaps %zmm25, %zmm17 #195.42 + #andl %r8d, %r14d #194.26 + #vfnmadd213pd .L_2il0floatpacket.9(%rip){1to8}, %zmm24, %zmm17 #195.42 + #kmovw %r14d, %k3 #198.21 + #vmulpd %zmm17, %zmm17, %zmm18 #195.42 + #vfmadd213pd %zmm24, %zmm17, %zmm24{%k1} #195.42 + #vfmadd213pd %zmm24, %zmm18, %zmm24{%k1} #195.42 + #vmulpd %zmm13, %zmm24, %zmm19 #196.42 + #vmulpd %zmm9, %zmm24, %zmm21 #197.58 + #vmulpd %zmm19, %zmm24, %zmm22 #196.48 + #vmulpd %zmm22, %zmm24, %zmm20 #196.54 + #vfmsub213pd %zmm5, %zmm22, %zmm24 #197.58 + #vmulpd %zmm21, %zmm20, %zmm23 #197.65 + #vmulpd %zmm24, %zmm23, %zmm28 #197.71 + #vfmadd231pd %zmm26, %zmm28, %zmm8{%k3} #198.21 + #vfmadd231pd %zmm27, %zmm28, %zmm7{%k3} #199.21 + #vfmadd231pd %zmm29, %zmm28, %zmm11{%k3} #200.21 + cmpq %r9, %r12 #173.13 + jb ..B1.20 # Prob 82% #173.13 + # LOE rax rdx rcx rbx rsi rdi r9 r12 r15 r10d r11d r13d xmm6 xmm10 xmm12 ymm3 ymm4 ymm15 ymm16 zmm0 zmm1 zmm2 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 +..B1.25: # Preds ..B1.24 # Execution count [4.50e+00] - movq 408(%rsp), %r10 #[spill] - movq 384(%rsp), %r9 #[spill] - cmpl %eax, %r13d #173.13 - je ..B1.63 # Prob 10% #173.13 - # LOE rbx rdi r8 r9 r10 r12 r14 r15 eax edx ecx r13d xmm1 xmm9 xmm15 zmm6 zmm7 zmm8 -..B1.49: # Preds ..B1.48 ..B1.41 ..B1.71 + movq 32(%rsp), %r8 #[spill] + movq (%rsp), %r14 #[spill] + cmpl %r11d, %r10d #173.13 + je ..B1.40 # Prob 10% #173.13 + # LOE rax rdx rcx rbx rsi rdi r8 r14 r10d r11d r13d xmm6 xmm10 xmm12 ymm15 ymm16 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 +..B1.26: # Preds ..B1.25 ..B1.18 ..B1.47 # Execution count [2.50e+01] - lea 8(%rax), %esi #173.13 - cmpl %esi, %edx #173.13 - jl ..B1.57 # Prob 50% #173.13 - # LOE rbx rdi r8 r9 r10 r12 r14 r15 eax edx ecx r13d xmm1 xmm9 xmm15 zmm6 zmm7 zmm8 -..B1.50: # Preds ..B1.49 + lea 8(%r11), %r9d #173.13 + cmpl %r9d, %r13d #173.13 + jl ..B1.34 # Prob 50% #173.13 + # LOE rax rdx rcx rbx rsi rdi r8 r14 r10d r11d r13d xmm6 xmm10 xmm12 ymm15 ymm16 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 +..B1.27: # Preds ..B1.26 # Execution count [4.50e+00] - vbroadcastsd %xmm1, %zmm3 #146.23 - vbroadcastsd %xmm15, %zmm2 #147.23 - vbroadcastsd %xmm9, %zmm0 #148.23 - vmovups .L_2il0floatpacket.4(%rip), %zmm21 #173.13 - vmovups 320(%rsp), %zmm22 #173.13[spill] - vmovups 256(%rsp), %zmm23 #173.13[spill] - vmovups 192(%rsp), %zmm24 #173.13[spill] - movslq %eax, %rsi #173.13 - movq %r12, 432(%rsp) #173.13[spill] - movq %rbx, 400(%rsp) #173.13[spill] - movq %r14, 392(%rsp) #173.13[spill] - movq %r15, 424(%rsp) #173.13[spill] - movl %ecx, 416(%rsp) #173.13[spill] - movq %r10, 408(%rsp) #173.13[spill] - movq %r9, 384(%rsp) #173.13[spill] - # LOE rsi rdi r8 eax edx r13d xmm1 xmm9 xmm15 zmm0 zmm2 zmm3 zmm6 zmm7 zmm8 zmm21 zmm22 zmm23 zmm24 -..B1.51: # Preds ..B1.55 ..B1.50 + movq %rdx, %r12 #144.43 + imulq %rsi, %r12 #144.43 + vbroadcastsd %xmm10, %zmm1 #146.23 + vbroadcastsd %xmm6, %zmm0 #147.23 + vbroadcastsd %xmm12, %zmm2 #148.23 + movslq %r11d, %r9 #173.13 + addq %rcx, %r12 #126.5 + movq %rdi, 8(%rsp) #126.5[spill] + movq %rdx, 16(%rsp) #126.5[spill] + movq %rcx, 40(%rsp) #126.5[spill] + movq %rax, 48(%rsp) #126.5[spill] + movq %rsi, 56(%rsp) #126.5[spill] + movq %r8, 32(%rsp) #126.5[spill] + movq %r14, (%rsp) #126.5[spill] + # LOE rbx r9 r12 r10d r11d r13d xmm6 xmm10 xmm12 ymm15 ymm16 zmm0 zmm1 zmm2 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 +..B1.28: # Preds ..B1.32 ..B1.27 # Execution count [2.50e+01] - vmovdqu (%r8,%rsi,4), %ymm4 #174.25 - movl 20(%r8,%rsi,4), %ecx #174.25 - movl 24(%r8,%rsi,4), %r15d #174.25 - vpaddd %ymm4, %ymm4, %ymm5 #175.40 - vpaddd %ymm5, %ymm4, %ymm4 #175.40 - movl (%r8,%rsi,4), %r14d #174.25 - lea (%rcx,%rcx,2), %ebx #175.40 - movl 4(%r8,%rsi,4), %r12d #174.25 - lea (%r15,%r15,2), %ecx #175.40 - movl 8(%r8,%rsi,4), %r11d #174.25 - movl 12(%r8,%rsi,4), %r10d #174.25 + vmovdqu (%r12,%r9,4), %ymm3 #174.25 + vpaddd %ymm3, %ymm3, %ymm4 #175.40 + vpaddd %ymm4, %ymm3, %ymm3 #175.40 + movl (%r12,%r9,4), %r14d #174.25 + movl 4(%r12,%r9,4), %r8d #174.25 + movl 8(%r12,%r9,4), %edi #174.25 + movl 12(%r12,%r9,4), %esi #174.25 lea (%r14,%r14,2), %r14d #175.40 - movl 16(%r8,%rsi,4), %r9d #174.25 - lea (%r12,%r12,2), %r12d #175.40 - movl 28(%r8,%rsi,4), %r15d #174.25 - lea (%r11,%r11,2), %r11d #175.40 - lea (%r10,%r10,2), %r10d #175.40 - lea (%r9,%r9,2), %r9d #175.40 + movl 16(%r12,%r9,4), %ecx #174.25 + lea (%r8,%r8,2), %r8d #175.40 + movl 20(%r12,%r9,4), %edx #174.25 + lea (%rdi,%rdi,2), %edi #175.40 + movl 24(%r12,%r9,4), %eax #174.25 + lea (%rsi,%rsi,2), %esi #175.40 + movl 28(%r12,%r9,4), %r15d #174.25 + lea (%rcx,%rcx,2), %ecx #175.40 + lea (%rdx,%rdx,2), %edx #175.40 + lea (%rax,%rax,2), %eax #175.40 lea (%r15,%r15,2), %r15d #175.40 - # LOE rsi rdi r8 eax edx ecx ebx r9d r10d r11d r12d r13d r14d r15d xmm1 xmm9 xmm15 ymm4 zmm0 zmm2 zmm3 zmm6 zmm7 zmm8 zmm21 zmm22 zmm23 zmm24 -..B1.54: # Preds ..B1.51 + # LOE rbx r9 r12 eax edx ecx esi edi r8d r10d r11d r13d r14d r15d xmm6 xmm10 xmm12 ymm3 ymm15 ymm16 zmm0 zmm1 zmm2 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 +..B1.31: # Preds ..B1.28 # Execution count [1.25e+01] vpcmpeqb %xmm0, %xmm0, %k1 #175.40 vpcmpeqb %xmm0, %xmm0, %k2 #175.40 vpcmpeqb %xmm0, %xmm0, %k3 #175.40 - vpxord %zmm5, %zmm5, %zmm5 #175.40 - vpxord %zmm10, %zmm10, %zmm10 #175.40 - vpxord %zmm11, %zmm11, %zmm11 #175.40 - vgatherdpd 16(%rdi,%ymm4,8), %zmm5{%k1} #175.40 - vgatherdpd 8(%rdi,%ymm4,8), %zmm10{%k2} #175.40 - vgatherdpd (%rdi,%ymm4,8), %zmm11{%k3} #175.40 - # LOE rsi rdi r8 eax edx r13d xmm1 xmm9 xmm15 zmm0 zmm2 zmm3 zmm5 zmm6 zmm7 zmm8 zmm10 zmm11 zmm21 zmm22 zmm23 zmm24 -..B1.55: # Preds ..B1.54 + vpxord %zmm4, %zmm4, %zmm4 #175.40 + vpxord %zmm17, %zmm17, %zmm17 #175.40 + vpxord %zmm18, %zmm18, %zmm18 #175.40 + vgatherdpd 16(%rbx,%ymm3,8), %zmm4{%k1} #175.40 + vgatherdpd 8(%rbx,%ymm3,8), %zmm17{%k2} #175.40 + vgatherdpd (%rbx,%ymm3,8), %zmm18{%k3} #175.40 + # LOE rbx r9 r12 r10d r11d r13d xmm6 xmm10 xmm12 ymm15 ymm16 zmm0 zmm1 zmm2 zmm4 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 zmm17 zmm18 +..B1.32: # Preds ..B1.31 # Execution count [2.50e+01] - addl $8, %eax #173.13 - addq $8, %rsi #173.13 - # vsubpd %zmm5, %zmm0, %zmm20 #177.40 - # vsubpd %zmm10, %zmm2, %zmm18 #176.40 - # vsubpd %zmm11, %zmm3, %zmm17 #175.40 - # vmulpd %zmm18, %zmm18, %zmm4 #178.53 - # vfmadd231pd %zmm17, %zmm17, %zmm4 #178.53 - # vfmadd231pd %zmm20, %zmm20, %zmm4 #178.67 - # vrcp14pd %zmm4, %zmm16 #195.42 - # vcmppd $1, %zmm24, %zmm4, %k2 #194.26 - # vfpclasspd $30, %zmm16, %k0 #195.42 - # vfnmadd213pd .L_2il0floatpacket.9(%rip){1to8}, %zmm16, %zmm4 #195.42 - # knotw %k0, %k1 #195.42 - # vmulpd %zmm4, %zmm4, %zmm5 #195.42 - # vfmadd213pd %zmm16, %zmm4, %zmm16{%k1} #195.42 - # vfmadd213pd %zmm16, %zmm5, %zmm16{%k1} #195.42 - # vmulpd %zmm23, %zmm16, %zmm10 #196.42 - # vmulpd %zmm22, %zmm16, %zmm12 #197.58 - # vmulpd %zmm10, %zmm16, %zmm13 #196.48 - # vmulpd %zmm13, %zmm16, %zmm11 #196.54 - # vfmsub213pd %zmm21, %zmm13, %zmm16 #197.58 - # vmulpd %zmm12, %zmm11, %zmm14 #197.65 - # vmulpd %zmm16, %zmm14, %zmm19 #197.71 - # vfmadd231pd %zmm17, %zmm19, %zmm8{%k2} #198.21 - # vfmadd231pd %zmm18, %zmm19, %zmm7{%k2} #199.21 - # vfmadd231pd %zmm20, %zmm19, %zmm6{%k2} #200.21 - cmpl %edx, %eax #173.13 - jb ..B1.51 # Prob 82% #173.13 - # LOE rsi rdi r8 eax edx r13d xmm1 xmm9 xmm15 zmm0 zmm2 zmm3 zmm6 zmm7 zmm8 zmm21 zmm22 zmm23 zmm24 -..B1.56: # Preds ..B1.55 + addl $8, %r11d #173.13 + addq $8, %r9 #173.13 + #vsubpd %zmm4, %zmm2, %zmm26 #177.40 + #vsubpd %zmm17, %zmm0, %zmm24 #176.40 + #vsubpd %zmm18, %zmm1, %zmm23 #175.40 + #vmulpd %zmm24, %zmm24, %zmm3 #178.53 + #vfmadd231pd %zmm23, %zmm23, %zmm3 #178.53 + #vfmadd231pd %zmm26, %zmm26, %zmm3 #178.67 + #vrcp14pd %zmm3, %zmm22 #195.42 + #vcmppd $1, %zmm14, %zmm3, %k2 #194.26 + #vfpclasspd $30, %zmm22, %k0 #195.42 + #vfnmadd213pd .L_2il0floatpacket.9(%rip){1to8}, %zmm22, %zmm3 #195.42 + #knotw %k0, %k1 #195.42 + #vmulpd %zmm3, %zmm3, %zmm4 #195.42 + #vfmadd213pd %zmm22, %zmm3, %zmm22{%k1} #195.42 + #vfmadd213pd %zmm22, %zmm4, %zmm22{%k1} #195.42 + #vmulpd %zmm13, %zmm22, %zmm17 #196.42 + #vmulpd %zmm9, %zmm22, %zmm19 #197.58 + #vmulpd %zmm17, %zmm22, %zmm20 #196.48 + #vmulpd %zmm20, %zmm22, %zmm18 #196.54 + #vfmsub213pd %zmm5, %zmm20, %zmm22 #197.58 + #vmulpd %zmm19, %zmm18, %zmm21 #197.65 + #vmulpd %zmm22, %zmm21, %zmm25 #197.71 + #vfmadd231pd %zmm23, %zmm25, %zmm8{%k2} #198.21 + #vfmadd231pd %zmm24, %zmm25, %zmm7{%k2} #199.21 + #vfmadd231pd %zmm26, %zmm25, %zmm11{%k2} #200.21 + cmpl %r13d, %r11d #173.13 + jb ..B1.28 # Prob 82% #173.13 + # LOE rbx r9 r12 r10d r11d r13d xmm6 xmm10 xmm12 ymm15 ymm16 zmm0 zmm1 zmm2 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 +..B1.33: # Preds ..B1.32 # Execution count [4.50e+00] - movq 432(%rsp), %r12 #[spill] - movq 400(%rsp), %rbx #[spill] - movq 392(%rsp), %r14 #[spill] - movq 424(%rsp), %r15 #[spill] - movl 416(%rsp), %ecx #[spill] - movq 408(%rsp), %r10 #[spill] - movq 384(%rsp), %r9 #[spill] - # LOE rbx rdi r8 r9 r10 r12 r14 r15 edx ecx r13d xmm1 xmm9 xmm15 zmm6 zmm7 zmm8 -..B1.57: # Preds ..B1.56 ..B1.49 ..B1.72 + movq 8(%rsp), %rdi #[spill] + movq 16(%rsp), %rdx #[spill] + movq 40(%rsp), %rcx #[spill] + movq 48(%rsp), %rax #[spill] + movq 56(%rsp), %rsi #[spill] + movq 32(%rsp), %r8 #[spill] + movq (%rsp), %r14 #[spill] + # LOE rax rdx rcx rbx rsi rdi r8 r14 r10d r13d xmm6 xmm10 xmm12 ymm15 ymm16 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 +..B1.34: # Preds ..B1.33 ..B1.26 ..B1.48 # Execution count [5.00e+00] - lea 1(%rdx), %eax #173.13 - cmpl %r13d, %eax #173.13 - ja ..B1.63 # Prob 50% #173.13 - # LOE rbx rdi r8 r9 r10 r12 r14 r15 edx ecx r13d xmm1 xmm9 xmm15 zmm6 zmm7 zmm8 -..B1.58: # Preds ..B1.57 + lea 1(%r13), %r9d #173.13 + cmpl %r10d, %r9d #173.13 + ja ..B1.40 # Prob 50% #173.13 + # LOE rax rdx rcx rbx rsi rdi r8 r14 r10d r13d xmm6 xmm10 xmm12 ymm15 ymm16 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 +..B1.35: # Preds ..B1.34 # Execution count [2.50e+01] - subl %edx, %r13d #173.13 - vpbroadcastd %r13d, %ymm0 #173.13 - vbroadcastsd %xmm1, %zmm4 #146.23 - vpcmpgtd .L_2il0floatpacket.1(%rip), %ymm0, %k3 #173.13 - movslq %edx, %rdx #173.13 - vmovdqu32 (%r8,%rdx,4), %ymm2{%k3}{z} #174.25 - kmovw %k3, %edx #173.13 - vpaddd %ymm2, %ymm2, %ymm1 #175.40 - vpaddd %ymm1, %ymm2, %ymm0 #175.40 - # LOE rbx rdi r9 r10 r12 r14 r15 edx ecx xmm9 xmm15 ymm0 zmm4 zmm6 zmm7 zmm8 k3 -..B1.61: # Preds ..B1.58 + imulq %rdx, %rsi #144.43 + vbroadcastsd %xmm10, %zmm4 #146.23 + subl %r13d, %r10d #173.13 + addq %rcx, %rsi #126.5 + vpbroadcastd %r10d, %ymm0 #173.13 + vpcmpgtd %ymm15, %ymm0, %k3 #173.13 + movslq %r13d, %r13 #173.13 + kmovw %k3, %r9d #173.13 + vmovdqu32 (%rsi,%r13,4), %ymm1{%k3}{z} #174.25 + vpaddd %ymm1, %ymm1, %ymm2 #175.40 + vpaddd %ymm2, %ymm1, %ymm0 #175.40 + # LOE rax rdx rcx rbx rdi r8 r14 r9d xmm6 xmm12 ymm0 ymm15 ymm16 zmm4 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 k3 +..B1.38: # Preds ..B1.35 # Execution count [1.25e+01] kmovw %k3, %k1 #175.40 kmovw %k3, %k2 #175.40 vpxord %zmm1, %zmm1, %zmm1 #175.40 vpxord %zmm2, %zmm2, %zmm2 #175.40 vpxord %zmm3, %zmm3, %zmm3 #175.40 - vgatherdpd 16(%rdi,%ymm0,8), %zmm1{%k1} #175.40 - vgatherdpd 8(%rdi,%ymm0,8), %zmm2{%k2} #175.40 - vgatherdpd (%rdi,%ymm0,8), %zmm3{%k3} #175.40 - # LOE rbx r9 r10 r12 r14 r15 edx ecx xmm9 xmm15 zmm1 zmm2 zmm3 zmm4 zmm6 zmm7 zmm8 -..B1.62: # Preds ..B1.61 + vgatherdpd 16(%rbx,%ymm0,8), %zmm1{%k1} #175.40 + vgatherdpd 8(%rbx,%ymm0,8), %zmm2{%k2} #175.40 + vgatherdpd (%rbx,%ymm0,8), %zmm3{%k3} #175.40 + # LOE rax rdx rcx rbx rdi r8 r14 r9d xmm6 xmm12 ymm15 ymm16 zmm1 zmm2 zmm3 zmm4 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 +..B1.39: # Preds ..B1.38 # Execution count [2.50e+01] - #vbroadcastsd %xmm15, %zmm15 #147.23 - #vbroadcastsd %xmm9, %zmm9 #148.23 - #vsubpd %zmm1, %zmm9, %zmm17 #177.40 - #vsubpd %zmm2, %zmm15, %zmm14 #176.40 - #vsubpd %zmm3, %zmm4, %zmm13 #175.40 - #vmulpd %zmm14, %zmm14, %zmm12 #178.53 - #vfmadd231pd %zmm13, %zmm13, %zmm12 #178.53 - #vfmadd231pd %zmm17, %zmm17, %zmm12 #178.67 - #vrcp14pd %zmm12, %zmm11 #195.42 - #vcmppd $1, 192(%rsp), %zmm12, %k2 #194.26[spill] - #vfpclasspd $30, %zmm11, %k0 #195.42 - #kmovw %k2, %eax #194.26 + #vbroadcastsd %xmm6, %zmm6 #147.23 + #vbroadcastsd %xmm12, %zmm12 #148.23 + #vsubpd %zmm1, %zmm12, %zmm23 #177.40 + #vsubpd %zmm2, %zmm6, %zmm21 #176.40 + #vsubpd %zmm3, %zmm4, %zmm20 #175.40 + #vmulpd %zmm21, %zmm21, %zmm19 #178.53 + #vfmadd231pd %zmm20, %zmm20, %zmm19 #178.53 + #vfmadd231pd %zmm23, %zmm23, %zmm19 #178.67 + #vrcp14pd %zmm19, %zmm18 #195.42 + #vcmppd $1, %zmm14, %zmm19, %k2 #194.26 + #vfpclasspd $30, %zmm18, %k0 #195.42 + #kmovw %k2, %esi #194.26 #knotw %k0, %k1 #195.42 - #vmovaps %zmm12, %zmm0 #195.42 - #andl %eax, %edx #194.26 - #vfnmadd213pd .L_2il0floatpacket.9(%rip){1to8}, %zmm11, %zmm0 #195.42 - #kmovw %edx, %k3 #198.21 + #vmovaps %zmm19, %zmm0 #195.42 + #andl %esi, %r9d #194.26 + #vfnmadd213pd .L_2il0floatpacket.9(%rip){1to8}, %zmm18, %zmm0 #195.42 + #kmovw %r9d, %k3 #198.21 #vmulpd %zmm0, %zmm0, %zmm1 #195.42 - #vfmadd213pd %zmm11, %zmm0, %zmm11{%k1} #195.42 - #vfmadd213pd %zmm11, %zmm1, %zmm11{%k1} #195.42 - #vmulpd 256(%rsp), %zmm11, %zmm2 #196.42[spill] - #vmulpd 320(%rsp), %zmm11, %zmm4 #197.58[spill] - #vmulpd %zmm2, %zmm11, %zmm5 #196.48 - #vmulpd %zmm5, %zmm11, %zmm3 #196.54 - #vfmsub213pd .L_2il0floatpacket.4(%rip), %zmm5, %zmm11 #197.58 - #vmulpd %zmm4, %zmm3, %zmm10 #197.65 - #vmulpd %zmm11, %zmm10, %zmm16 #197.71 - #vfmadd231pd %zmm13, %zmm16, %zmm8{%k3} #198.21 - #vfmadd231pd %zmm14, %zmm16, %zmm7{%k3} #199.21 - #vfmadd231pd %zmm17, %zmm16, %zmm6{%k3} #200.21 - # LOE rbx r9 r10 r12 r14 r15 ecx zmm6 zmm7 zmm8 -..B1.63: # Preds ..B1.48 ..B1.62 ..B1.57 + #vfmadd213pd %zmm18, %zmm0, %zmm18{%k1} #195.42 + #vfmadd213pd %zmm18, %zmm1, %zmm18{%k1} #195.42 + #vmulpd %zmm13, %zmm18, %zmm2 #196.42 + #vmulpd %zmm9, %zmm18, %zmm4 #197.58 + #vmulpd %zmm2, %zmm18, %zmm10 #196.48 + #vmulpd %zmm10, %zmm18, %zmm3 #196.54 + #vfmsub213pd %zmm5, %zmm10, %zmm18 #197.58 + #vmulpd %zmm4, %zmm3, %zmm17 #197.65 + #vmulpd %zmm18, %zmm17, %zmm22 #197.71 + #vfmadd231pd %zmm20, %zmm22, %zmm8{%k3} #198.21 + #vfmadd231pd %zmm21, %zmm22, %zmm7{%k3} #199.21 + #vfmadd231pd %zmm23, %zmm22, %zmm11{%k3} #200.21 + # LOE rax rdx rcx rbx rdi r8 r14 ymm15 ymm16 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 +..B1.40: # Preds ..B1.25 ..B1.39 ..B1.34 # Execution count [4.50e+00] - vmovups .L_2il0floatpacket.10(%rip), %zmm13 #151.22 - vpermd %zmm6, %zmm13, %zmm0 #151.22 - vpermd %zmm7, %zmm13, %zmm5 #150.22 - vpermd %zmm8, %zmm13, %zmm14 #149.22 - vaddpd %zmm6, %zmm0, %zmm6 #151.22 - vaddpd %zmm7, %zmm5, %zmm7 #150.22 - vaddpd %zmm8, %zmm14, %zmm8 #149.22 - vpermpd $78, %zmm6, %zmm1 #151.22 - vpermpd $78, %zmm7, %zmm9 #150.22 - vpermpd $78, %zmm8, %zmm15 #149.22 - vaddpd %zmm1, %zmm6, %zmm2 #151.22 - vaddpd %zmm9, %zmm7, %zmm10 #150.22 - vaddpd %zmm15, %zmm8, %zmm16 #149.22 + vmovups .L_2il0floatpacket.10(%rip), %zmm19 #151.22 + vpermd %zmm11, %zmm19, %zmm0 #151.22 + vpermd %zmm7, %zmm19, %zmm6 #150.22 + vpermd %zmm8, %zmm19, %zmm20 #149.22 + vaddpd %zmm11, %zmm0, %zmm11 #151.22 + vaddpd %zmm7, %zmm6, %zmm7 #150.22 + vaddpd %zmm8, %zmm20, %zmm8 #149.22 + vpermpd $78, %zmm11, %zmm1 #151.22 + vpermpd $78, %zmm7, %zmm10 #150.22 + vpermpd $78, %zmm8, %zmm21 #149.22 + vaddpd %zmm1, %zmm11, %zmm2 #151.22 + vaddpd %zmm10, %zmm7, %zmm12 #150.22 + vaddpd %zmm21, %zmm8, %zmm22 #149.22 vpermpd $177, %zmm2, %zmm3 #151.22 - vpermpd $177, %zmm10, %zmm11 #150.22 - vpermpd $177, %zmm16, %zmm17 #149.22 + vpermpd $177, %zmm12, %zmm17 #150.22 + vpermpd $177, %zmm22, %zmm23 #149.22 vaddpd %zmm3, %zmm2, %zmm4 #151.22 - vaddpd %zmm11, %zmm10, %zmm12 #150.22 - vaddpd %zmm17, %zmm16, %zmm18 #149.22 - # LOE rbx r9 r10 r12 r14 r15 ecx xmm4 xmm12 xmm18 -..B1.64: # Preds ..B1.24 ..B1.63 ..B1.22 + vaddpd %zmm17, %zmm12, %zmm18 #150.22 + vaddpd %zmm23, %zmm22, %zmm24 #149.22 + # LOE rax rdx rcx rbx rdi r8 r14 xmm4 xmm18 xmm24 ymm15 ymm16 zmm5 zmm9 zmm13 zmm14 +..B1.41: # Preds ..B1.40 ..B1.11 # Execution count [5.00e+00] - vaddsd (%r15,%r12,8), %xmm18, %xmm0 #208.9 - vmovsd %xmm0, (%r15,%r12,8) #208.9 - vaddsd (%r14,%r12,8), %xmm12, %xmm1 #209.9 - vmovsd %xmm1, (%r14,%r12,8) #209.9 - vaddsd (%rbx,%r12,8), %xmm4, %xmm2 #210.9 - vmovsd %xmm2, (%rbx,%r12,8) #210.9 - incq %r12 #143.5 - cmpq 440(%rsp), %r12 #143.5[spill] - jb ..B1.21 # Prob 82% #143.5 - # LOE rbx r9 r10 r12 r14 r15 ecx -..B1.65: # Preds ..B1.64 - # Execution count [9.00e-01] - movq 128(%rsp), %r13 #[spill] - movl %ecx, %ebx # - # LOE r13 ebx -..B1.66: # Preds ..B1.16 ..B1.65 ..B1.19 + movq 80(%rsp), %rsi #208.9[spill] + addq $24, %rdi #143.5 + vaddsd (%rsi,%rax,8), %xmm24, %xmm0 #208.9 + vmovsd %xmm0, (%rsi,%rax,8) #208.9 + movslq %eax, %rsi #143.32 + vaddsd (%r14,%rax,8), %xmm18, %xmm1 #209.9 + vmovsd %xmm1, (%r14,%rax,8) #209.9 + incq %rsi #143.32 + vaddsd (%r8,%rax,8), %xmm4, %xmm2 #210.9 + vmovsd %xmm2, (%r8,%rax,8) #210.9 + incq %rax #143.5 + cmpq 64(%rsp), %rax #143.5[spill] + jb ..B1.11 # Prob 82% #143.5 + jmp ..B1.44 # Prob 100% #143.5 + # LOE rax rdx rcx rbx rsi rdi r8 r14 ymm15 ymm16 zmm5 zmm9 zmm13 zmm14 +..B1.43: # Preds ..B1.2 + # Execution count [5.00e-01] + movl $.L_2__STRING.0, %edi #141.5 +..___tag_value_computeForce.48: +# likwid_markerStartRegion(const char *) + call likwid_markerStartRegion #141.5 +..___tag_value_computeForce.49: + # LOE +..B1.44: # Preds ..B1.41 ..B1.43 # Execution count [1.00e+00] - movl $.L_2__STRING.3, %edi #219.5 + movl $.L_2__STRING.0, %edi #219.5 vzeroupper #219.5 -..___tag_value_computeForce.138: +..___tag_value_computeForce.50: # likwid_markerStopRegion(const char *) call likwid_markerStopRegion #219.5 -..___tag_value_computeForce.139: - # LOE r13 ebx -..B1.67: # Preds ..B1.66 +..___tag_value_computeForce.51: + # LOE +..B1.45: # Preds ..B1.44 # Execution count [1.00e+00] xorl %eax, %eax #221.16 -..___tag_value_computeForce.140: +..___tag_value_computeForce.52: # getTimeStamp() call getTimeStamp #221.16 -..___tag_value_computeForce.141: - # LOE r13 ebx xmm0 -..B1.68: # Preds ..B1.67 - # Execution count [1.00e+00] - testl %ebx, %ebx #222.5 - jne ..B1.70 # Prob 78% #222.5 - # LOE r13 xmm0 -..B1.69: # Preds ..B1.68 - # Execution count [2.20e-01] - movq %r13, %rdi #222.5 - vmovsd %xmm0, 128(%rsp) #222.5[spill] -# fclose(FILE *) - call fclose #222.5 - # LOE -..B1.80: # Preds ..B1.69 - # Execution count [2.20e-01] - vmovsd 128(%rsp), %xmm0 #[spill] +..___tag_value_computeForce.53: # LOE xmm0 -..B1.70: # Preds ..B1.80 ..B1.68 +..B1.46: # Preds ..B1.45 # Execution count [1.00e+00] - vsubsd 160(%rsp), %xmm0, %xmm0 #224.14[spill] - addq $536, %rsp #224.14 + vsubsd 24(%rsp), %xmm0, %xmm0 #224.14[spill] + addq $88, %rsp #224.14 .cfi_restore 3 popq %rbx #224.14 .cfi_restore 15 @@ -797,29 +536,24 @@ computeForce: .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe8, 0xff, 0xff, 0xff, 0x22 .cfi_escape 0x10, 0x0f, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x22 # LOE -..B1.71: # Preds ..B1.36 +..B1.47: # Preds ..B1.13 # Execution count [4.50e-01]: Infreq - movl %r13d, %edx #173.13 - xorl %eax, %eax #173.13 - andl $-8, %edx #173.13 - jmp ..B1.49 # Prob 100% #173.13 - # LOE rbx rdi r8 r9 r10 r12 r14 r15 eax edx ecx r13d xmm1 xmm9 xmm15 zmm6 zmm7 zmm8 -..B1.72: # Preds ..B1.35 + movl %r10d, %r13d #173.13 + xorl %r11d, %r11d #173.13 + andl $-8, %r13d #173.13 + jmp ..B1.26 # Prob 100% #173.13 + # LOE rax rdx rcx rbx rsi rdi r8 r14 r10d r11d r13d xmm6 xmm10 xmm12 ymm15 ymm16 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 +..B1.48: # Preds ..B1.12 # Execution count [4.50e-01]: Infreq - xorl %edx, %edx #173.13 - jmp ..B1.57 # Prob 100% #173.13 + xorl %r13d, %r13d #173.13 + jmp ..B1.34 # Prob 100% #173.13 .align 16,0x90 - # LOE rbx rdi r8 r9 r10 r12 r14 r15 edx ecx r13d xmm1 xmm9 xmm15 zmm6 zmm7 zmm8 + # LOE rax rdx rcx rbx rsi rdi r8 r14 r10d r13d xmm6 xmm10 xmm12 ymm15 ymm16 zmm5 zmm7 zmm8 zmm9 zmm11 zmm13 zmm14 .cfi_endproc # mark_end; .type computeForce,@function .size computeForce,.-computeForce ..LNcomputeForce.0: - .section .rodata.str1.4, "aMS",@progbits,1 - .align 4 - .align 4 -il0_peep_printf_format_0: - .long 2112073 .data # -- End computeForce .section .rodata, "a" @@ -881,49 +615,12 @@ il0_peep_printf_format_0: .size .L_2il0floatpacket.9,8 .section .rodata.str1.4, "aMS",@progbits,1 .align 4 + .align 4 .L_2__STRING.0: - .long 1701080681 - .long 1920229240 - .long 1919247201 - .long 778315103 - .long 7632239 - .type .L_2__STRING.0,@object - .size .L_2__STRING.0,20 - .align 4 -.L_2__STRING.1: - .word 119 - .type .L_2__STRING.1,@object - .size .L_2__STRING.1,2 - .space 2, 0x00 # pad - .align 4 -.L_2__STRING.3: .long 1668444006 .word 101 - .type .L_2__STRING.3,@object - .size .L_2__STRING.3,6 - .space 2, 0x00 # pad - .align 4 -.L_2__STRING.2: - .long 622869070 - .long 1680154724 - .long 174335264 - .byte 0 - .type .L_2__STRING.2,@object - .size .L_2__STRING.2,13 - .space 3, 0x00 # pad - .align 4 -.L_2__STRING.4: - .long 622869057 - .word 2660 - .byte 0 - .type .L_2__STRING.4,@object - .size .L_2__STRING.4,7 - .space 1, 0x00 # pad - .align 4 -.L_2__STRING.6: - .long 2122789 - .type .L_2__STRING.6,@object - .size .L_2__STRING.6,4 + .type .L_2__STRING.0,@object + .size .L_2__STRING.0,6 .data .section .note.GNU-stack, "" # End