# mark_description "Intel(R) C Intel(R) 64 Compiler Classic for applications running on Intel(R) 64, Version 2021.6.0 Build 2022"; # mark_description "0226_000000"; # mark_description "-I/apps/likwid/5.2.2/include -I././lammps/includes -I././common/includes -S -std=c11 -pedantic-errors -D_GNU"; # mark_description "_SOURCE -DLIKWID_PERFMON -DAOS -DPRECISION=2 -DCOMPUTE_STATS -DVECTOR_WIDTH=1 -DENABLE_OMP_SIMD -DALIGNMENT="; # mark_description "64 -restrict -Ofast -no-vec -o build-lammps-ICC-NOVEC-DP/force_lj.s"; .file "force_lj.c" .text ..TXTST0: .L_2__routine_start_computeForceLJFullNeigh_plain_c_0: # -- Begin computeForceLJFullNeigh_plain_c .text # mark_begin; .align 16,0x90 .globl computeForceLJFullNeigh_plain_c # --- computeForceLJFullNeigh_plain_c(Parameter *, Atom *, Neighbor *, Stats *) computeForceLJFullNeigh_plain_c: # parameter 1: %rdi # parameter 2: %rsi # parameter 3: %rdx # parameter 4: %rcx ..B1.1: # Preds ..B1.0 # Execution count [1.00e+00] .cfi_startproc ..___tag_value_computeForceLJFullNeigh_plain_c.1: ..L2: #23.104 pushq %r12 #23.104 .cfi_def_cfa_offset 16 .cfi_offset 12, -16 pushq %r13 #23.104 .cfi_def_cfa_offset 24 .cfi_offset 13, -24 pushq %r14 #23.104 .cfi_def_cfa_offset 32 .cfi_offset 14, -32 pushq %r15 #23.104 .cfi_def_cfa_offset 40 .cfi_offset 15, -40 pushq %rbx #23.104 .cfi_def_cfa_offset 48 .cfi_offset 3, -48 pushq %rbp #23.104 .cfi_def_cfa_offset 56 .cfi_offset 6, -56 subq $56, %rsp #23.104 .cfi_def_cfa_offset 112 movq %rdi, %rbp #23.104 movq %rsi, %r15 #23.104 movq %rcx, %r13 #23.104 movq %rdx, %r12 #23.104 movsd 144(%rbp), %xmm0 #27.27 mulsd %xmm0, %xmm0 #27.45 movsd 56(%rbp), %xmm1 #28.23 movsd 40(%rbp), %xmm2 #29.24 movl 4(%r15), %r14d #24.18 movsd %xmm0, 32(%rsp) #27.45[spill] movsd %xmm1, 24(%rsp) #28.23[spill] movsd %xmm2, 40(%rsp) #29.24[spill] testl %r14d, %r14d #32.24 jle ..B1.16 # Prob 50% #32.24 # LOE rbp r12 r13 r15 r14d ..B1.2: # Preds ..B1.1 # Execution count [5.00e-03] movq 64(%r15), %rdi #33.9 lea (%r14,%r14,2), %eax #24.18 cmpl $12, %eax #32.5 jle ..B1.23 # Prob 0% #32.5 # LOE rbp rdi r12 r13 r15 eax r14d ..B1.3: # Preds ..B1.2 # Execution count [1.00e+00] movslq %r14d, %r14 #32.5 xorl %esi, %esi #32.5 lea (%r14,%r14,2), %rdx #32.5 shlq $3, %rdx #32.5 call _intel_fast_memset #32.5 # LOE rbp r12 r13 r14 r15 ..B1.5: # Preds ..B1.3 ..B1.28 ..B1.34 # Execution count [1.00e+00] xorl %ebx, %ebx #37.22 xorl %eax, %eax #38.16 ..___tag_value_computeForceLJFullNeigh_plain_c.19: # getTimeStamp() call getTimeStamp #38.16 ..___tag_value_computeForceLJFullNeigh_plain_c.20: # LOE rbx rbp r12 r13 r14 r15 xmm0 ..B1.31: # Preds ..B1.5 # Execution count [1.00e+00] movsd %xmm0, 16(%rsp) #38.16[spill] # LOE rbx rbp r12 r13 r14 r15 ..B1.6: # Preds ..B1.31 # Execution count [5.00e-01] movl $.L_2__STRING.0, %edi #39.5 ..___tag_value_computeForceLJFullNeigh_plain_c.22: # likwid_markerStartRegion(const char *) call likwid_markerStartRegion #39.5 ..___tag_value_computeForceLJFullNeigh_plain_c.23: # LOE rbx rbp r12 r13 r14 r15 ..B1.7: # Preds ..B1.6 # Execution count [9.00e-01] xorl %ecx, %ecx #41.15 movsd .L_2il0floatpacket.0(%rip), %xmm11 #77.41 xorl %edx, %edx #41.5 movq 16(%r12), %rax #42.19 xorl %r11d, %r11d #41.5 movslq 8(%r12), %rdi #42.43 movq 24(%r12), %rsi #43.25 mulsd 40(%rsp), %xmm11 #77.41[spill] movsd 24(%rsp), %xmm12 #41.5[spill] movsd 32(%rsp), %xmm13 #41.5[spill] movsd .L_2il0floatpacket.1(%rip), %xmm1 #77.54 shlq $2, %rdi #25.5 movq 16(%r15), %r12 #44.25 movq 64(%r15), %r8 #89.9 xorl %r15d, %r15d #41.5 movq (%r13), %r9 #93.9 movq 8(%r13), %r10 #94.9 movq %rbp, 8(%rsp) #41.5[spill] movq %r13, (%rsp) #41.5[spill] # LOE rax rdx rcx rbx rsi rdi r8 r9 r10 r11 r12 r14 r15 xmm1 xmm11 xmm12 xmm13 ..B1.8: # Preds ..B1.14 ..B1.7 # Execution count [5.00e+00] movslq (%rsi,%rdx,4), %r13 #43.25 movq %r15, %rbp #56.9 pxor %xmm2, %xmm2 #47.22 movaps %xmm2, %xmm4 #48.22 movsd (%r11,%r12), %xmm9 #44.25 movaps %xmm4, %xmm10 #49.22 movsd 8(%r11,%r12), %xmm8 #45.25 movsd 16(%r11,%r12), %xmm5 #46.25 testq %r13, %r13 #56.28 jle ..B1.14 # Prob 10% #56.28 # LOE rax rdx rcx rbx rbp rsi rdi r8 r9 r10 r11 r12 r13 r14 r15 xmm1 xmm2 xmm4 xmm5 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 ..B1.9: # Preds ..B1.8 # Execution count [4.50e+00] imulq %rdi, %rcx #42.43 addq %rax, %rcx #25.5 # LOE rax rdx rcx rbx rbp rsi rdi r8 r9 r10 r11 r12 r13 r14 xmm1 xmm2 xmm4 xmm5 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 # LLVM-MCA-BEGIN # OSACA-BEGIN ..B1.10: # Preds ..B1.12 ..B1.9 # Execution count [2.50e+01] movl (%rcx,%rbp,4), %r15d #57.21 movaps %xmm9, %xmm3 #58.36 movaps %xmm8, %xmm7 #59.36 movaps %xmm5, %xmm6 #60.36 lea (%r15,%r15,2), %r15d #58.36 movslq %r15d, %r15 #58.36 subsd (%r12,%r15,8), %xmm3 #58.36 subsd 8(%r12,%r15,8), %xmm7 #59.36 subsd 16(%r12,%r15,8), %xmm6 #60.36 movaps %xmm3, %xmm0 #61.35 movaps %xmm7, %xmm14 #61.49 mulsd %xmm3, %xmm0 #61.35 movaps %xmm6, %xmm15 #61.63 mulsd %xmm7, %xmm14 #61.49 mulsd %xmm6, %xmm15 #61.63 addsd %xmm14, %xmm0 #61.49 addsd %xmm15, %xmm0 #61.63 comisd %xmm0, %xmm13 #71.22 jbe ..B1.12 # Prob 50% #71.22 # LOE rax rdx rcx rbx rbp rsi rdi r8 r9 r10 r11 r12 r13 r14 xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 ..B1.11: # Preds ..B1.10 # Execution count [1.25e+01] movsd .L_2il0floatpacket.3(%rip), %xmm14 #75.38 incl %ebx #73.17 divsd %xmm0, %xmm14 #75.38 movaps %xmm12, %xmm0 #76.38 mulsd %xmm14, %xmm0 #76.38 mulsd %xmm14, %xmm0 #76.44 mulsd %xmm14, %xmm0 #76.50 mulsd %xmm11, %xmm14 #77.54 mulsd %xmm0, %xmm14 #77.61 subsd %xmm1, %xmm0 #77.54 mulsd %xmm0, %xmm14 #77.67 mulsd %xmm14, %xmm3 #78.31 mulsd %xmm14, %xmm7 #79.31 mulsd %xmm14, %xmm6 #80.31 addsd %xmm3, %xmm2 #78.17 addsd %xmm7, %xmm4 #79.17 addsd %xmm6, %xmm10 #80.17 # LOE rax rdx rcx rbx rbp rsi rdi r8 r9 r10 r11 r12 r13 r14 xmm1 xmm2 xmm4 xmm5 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 ..B1.12: # Preds ..B1.11 ..B1.10 # Execution count [2.50e+01] incq %rbp #56.9 cmpq %r13, %rbp #56.9 jb ..B1.10 # Prob 82% #56.9 # OSACA-END # LLVM-MCA-END # LOE rax rdx rcx rbx rbp rsi rdi r8 r9 r10 r11 r12 r13 r14 xmm1 xmm2 xmm4 xmm5 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 ..B1.13: # Preds ..B1.12 # Execution count [4.50e+00] xorl %r15d, %r15d # # LOE rax rdx rbx rsi rdi r8 r9 r10 r11 r12 r13 r14 r15 xmm1 xmm2 xmm4 xmm10 xmm11 xmm12 xmm13 ..B1.14: # Preds ..B1.13 ..B1.8 # Execution count [5.00e+00] movslq %edx, %rcx #41.32 incq %rdx #41.5 addq %r13, %r9 #93.9 addq %r13, %r10 #94.9 incq %rcx #41.32 addsd (%r11,%r8), %xmm2 #89.9 addsd 8(%r11,%r8), %xmm4 #90.9 addsd 16(%r11,%r8), %xmm10 #91.9 movsd %xmm2, (%r11,%r8) #89.9 movsd %xmm4, 8(%r11,%r8) #90.9 movsd %xmm10, 16(%r11,%r8) #91.9 addq $24, %r11 #41.5 cmpq %r14, %rdx #41.5 jb ..B1.8 # Prob 82% #41.5 # LOE rax rdx rcx rbx rsi rdi r8 r9 r10 r11 r12 r14 r15 xmm1 xmm11 xmm12 xmm13 ..B1.15: # Preds ..B1.14 # Execution count [9.00e-01] movq (%rsp), %r13 #[spill] movq 8(%rsp), %rbp #[spill] movq %r9, (%r13) #93.9 movq %r10, 8(%r13) #94.9 jmp ..B1.19 # Prob 100% #94.9 # LOE rbx rbp ..B1.16: # Preds ..B1.1 # Execution count [5.00e-01] xorl %ebx, %ebx #37.22 xorl %eax, %eax #38.16 ..___tag_value_computeForceLJFullNeigh_plain_c.31: # getTimeStamp() call getTimeStamp #38.16 ..___tag_value_computeForceLJFullNeigh_plain_c.32: # LOE rbx rbp xmm0 ..B1.32: # Preds ..B1.16 # Execution count [5.00e-01] movsd %xmm0, 16(%rsp) #38.16[spill] # LOE rbx rbp ..B1.17: # Preds ..B1.32 # Execution count [5.00e-01] movl $.L_2__STRING.0, %edi #39.5 ..___tag_value_computeForceLJFullNeigh_plain_c.34: # likwid_markerStartRegion(const char *) call likwid_markerStartRegion #39.5 ..___tag_value_computeForceLJFullNeigh_plain_c.35: # LOE rbx rbp ..B1.19: # Preds ..B1.17 ..B1.15 # Execution count [1.00e+00] movl $.L_2__STRING.0, %edi #97.5 ..___tag_value_computeForceLJFullNeigh_plain_c.36: # likwid_markerStopRegion(const char *) call likwid_markerStopRegion #97.5 ..___tag_value_computeForceLJFullNeigh_plain_c.37: # LOE rbx rbp ..B1.20: # Preds ..B1.19 # Execution count [1.00e+00] xorl %eax, %eax #98.16 ..___tag_value_computeForceLJFullNeigh_plain_c.38: # getTimeStamp() call getTimeStamp #98.16 ..___tag_value_computeForceLJFullNeigh_plain_c.39: # LOE rbx rbp xmm0 ..B1.33: # Preds ..B1.20 # Execution count [1.00e+00] movaps %xmm0, %xmm1 #98.16 # LOE rbx rbp xmm1 ..B1.21: # Preds ..B1.33 # Execution count [1.00e+00] pxor %xmm3, %xmm3 #100.5 cvtsi2sdq %rbx, %xmm3 #100.5 subsd 16(%rsp), %xmm1 #100.91[spill] movsd .L_2il0floatpacket.2(%rip), %xmm2 #100.5 movl $.L_2__STRING.1, %edi #100.5 divsd %xmm3, %xmm2 #100.5 mulsd %xmm1, %xmm2 #100.5 movl %ebx, %esi #100.5 movsd 264(%rbp), %xmm0 #100.71 movl $3, %eax #100.5 mulsd %xmm0, %xmm2 #100.5 movsd %xmm1, (%rsp) #100.5[spill] ..___tag_value_computeForceLJFullNeigh_plain_c.41: # printf(const char *__restrict__, ...) call printf #100.5 ..___tag_value_computeForceLJFullNeigh_plain_c.42: # LOE ..B1.22: # Preds ..B1.21 # Execution count [1.00e+00] movsd (%rsp), %xmm1 #[spill] movaps %xmm1, %xmm0 #102.14 addq $56, %rsp #102.14 .cfi_def_cfa_offset 56 .cfi_restore 6 popq %rbp #102.14 .cfi_def_cfa_offset 48 .cfi_restore 3 popq %rbx #102.14 .cfi_def_cfa_offset 40 .cfi_restore 15 popq %r15 #102.14 .cfi_def_cfa_offset 32 .cfi_restore 14 popq %r14 #102.14 .cfi_def_cfa_offset 24 .cfi_restore 13 popq %r13 #102.14 .cfi_def_cfa_offset 16 .cfi_restore 12 popq %r12 #102.14 .cfi_def_cfa_offset 8 ret #102.14 .cfi_def_cfa_offset 112 .cfi_offset 3, -48 .cfi_offset 6, -56 .cfi_offset 12, -16 .cfi_offset 13, -24 .cfi_offset 14, -32 .cfi_offset 15, -40 # LOE ..B1.23: # Preds ..B1.2 # Execution count [1.11e+00]: Infreq movl %eax, %edx #32.5 xorl %ebx, %ebx #32.5 movl $1, %esi #32.5 xorl %ecx, %ecx #32.5 shrl $1, %edx #32.5 je ..B1.27 # Prob 10% #32.5 # LOE rdx rcx rbx rbp rdi r12 r13 r15 eax esi r14d ..B1.24: # Preds ..B1.23 # Execution count [1.00e+00]: Infreq xorl %esi, %esi #32.5 # LOE rdx rcx rbx rbp rsi rdi r12 r13 r15 eax r14d ..B1.25: # Preds ..B1.25 ..B1.24 # Execution count [2.78e+00]: Infreq incq %rbx #32.5 movq %rsi, (%rcx,%rdi) #33.9 movq %rsi, 8(%rcx,%rdi) #33.9 addq $16, %rcx #32.5 cmpq %rdx, %rbx #32.5 jb ..B1.25 # Prob 64% #32.5 # LOE rdx rcx rbx rbp rsi rdi r12 r13 r15 eax r14d ..B1.26: # Preds ..B1.25 # Execution count [1.00e+00]: Infreq lea 1(%rbx,%rbx), %esi #33.9 # LOE rbp rdi r12 r13 r15 eax esi r14d ..B1.27: # Preds ..B1.23 ..B1.26 # Execution count [1.11e+00]: Infreq lea -1(%rsi), %edx #32.5 cmpl %eax, %edx #32.5 jae ..B1.34 # Prob 10% #32.5 # LOE rbp rdi r12 r13 r15 esi r14d ..B1.28: # Preds ..B1.27 # Execution count [1.00e+00]: Infreq movslq %esi, %rsi #32.5 movslq %r14d, %r14 #32.5 movq $0, -8(%rdi,%rsi,8) #33.9 jmp ..B1.5 # Prob 100% #33.9 # LOE rbp r12 r13 r14 r15 ..B1.34: # Preds ..B1.27 # Execution count [1.11e-01]: Infreq movslq %r14d, %r14 #32.5 jmp ..B1.5 # Prob 100% #32.5 .align 16,0x90 # LOE rbp r12 r13 r14 r15 .cfi_endproc # mark_end; .type computeForceLJFullNeigh_plain_c,@function .size computeForceLJFullNeigh_plain_c,.-computeForceLJFullNeigh_plain_c ..LNcomputeForceLJFullNeigh_plain_c.0: .data # -- End computeForceLJFullNeigh_plain_c .text .L_2__routine_start_computeForceLJHalfNeigh_1: # -- Begin computeForceLJHalfNeigh .text # mark_begin; .align 16,0x90 .globl computeForceLJHalfNeigh # --- computeForceLJHalfNeigh(Parameter *, Atom *, Neighbor *, Stats *) computeForceLJHalfNeigh: # parameter 1: %rdi # parameter 2: %rsi # parameter 3: %rdx # parameter 4: %rcx ..B2.1: # Preds ..B2.0 # Execution count [1.00e+00] .cfi_startproc ..___tag_value_computeForceLJHalfNeigh.66: ..L67: #105.96 pushq %r12 #105.96 .cfi_def_cfa_offset 16 .cfi_offset 12, -16 pushq %r13 #105.96 .cfi_def_cfa_offset 24 .cfi_offset 13, -24 pushq %r14 #105.96 .cfi_def_cfa_offset 32 .cfi_offset 14, -32 pushq %r15 #105.96 .cfi_def_cfa_offset 40 .cfi_offset 15, -40 pushq %rbx #105.96 .cfi_def_cfa_offset 48 .cfi_offset 3, -48 pushq %rbp #105.96 .cfi_def_cfa_offset 56 .cfi_offset 6, -56 subq $216, %rsp #105.96 .cfi_def_cfa_offset 272 movq %rdi, %r14 #105.96 movq %rsi, %r15 #105.96 movq %rcx, %r12 #105.96 movq %rdx, 32(%rsp) #105.96[spill] movsd 144(%r14), %xmm0 #109.27 mulsd %xmm0, %xmm0 #109.45 movsd 56(%r14), %xmm1 #110.23 movsd 40(%r14), %xmm2 #111.24 movl 4(%r15), %ebp #106.18 movsd %xmm0, 48(%rsp) #109.45[spill] movsd %xmm1, 40(%rsp) #110.23[spill] movsd %xmm2, 24(%rsp) #111.24[spill] testl %ebp, %ebp #114.24 jle ..B2.51 # Prob 50% #114.24 # LOE r12 r14 r15 ebp ..B2.2: # Preds ..B2.1 # Execution count [5.00e-03] movq 64(%r15), %rdi #115.9 lea (%rbp,%rbp,2), %ebx #106.18 cmpl $12, %ebx #114.5 jle ..B2.59 # Prob 0% #114.5 # LOE rdi r12 r14 r15 ebx ebp ..B2.3: # Preds ..B2.2 # Execution count [1.00e+00] movslq %ebp, %r13 #114.5 xorl %esi, %esi #114.5 lea (%r13,%r13,2), %rdx #114.5 shlq $3, %rdx #114.5 call _intel_fast_memset #114.5 # LOE r12 r13 r14 r15 ebp ..B2.5: # Preds ..B2.3 ..B2.64 ..B2.70 # Execution count [1.00e+00] xorl %ebx, %ebx #120.22 xorl %eax, %eax #121.16 ..___tag_value_computeForceLJHalfNeigh.85: # getTimeStamp() call getTimeStamp #121.16 ..___tag_value_computeForceLJHalfNeigh.86: # LOE r12 r13 r14 r15 ebx ebp xmm0 ..B2.67: # Preds ..B2.5 # Execution count [1.00e+00] movsd %xmm0, 16(%rsp) #121.16[spill] # LOE r12 r13 r14 r15 ebx ebp ..B2.6: # Preds ..B2.67 # Execution count [5.00e-01] movl $.L_2__STRING.2, %edi #122.5 ..___tag_value_computeForceLJHalfNeigh.88: # likwid_markerStartRegion(const char *) call likwid_markerStartRegion #122.5 ..___tag_value_computeForceLJHalfNeigh.89: # LOE r12 r13 r14 r15 ebx ebp ..B2.7: # Preds ..B2.6 # Execution count [9.00e-01] movsd .L_2il0floatpacket.0(%rip), %xmm13 #161.41 movd %ebp, %xmm0 #106.18 mulsd 24(%rsp), %xmm13 #161.41[spill] xorl %r9d, %r9d #124.15 movq 32(%rsp), %rdx #125.19[spill] movaps %xmm13, %xmm2 #161.41 movsd 40(%rsp), %xmm3 #110.21[spill] xorl %r8d, %r8d #124.5 movsd 48(%rsp), %xmm6 #109.25[spill] xorl %esi, %esi #124.5 unpcklpd %xmm3, %xmm3 #110.21 unpcklpd %xmm2, %xmm2 #161.41 pshufd $0, %xmm0, %xmm0 #106.18 movq 16(%rdx), %rdi #125.19 movslq 8(%rdx), %rax #125.43 movq 24(%rdx), %rcx #126.25 movq 16(%r15), %rdx #127.25 movq 64(%r15), %r15 #168.21 unpcklpd %xmm6, %xmm6 #109.25 movups .L_2il0floatpacket.7(%rip), %xmm1 #161.54 movsd .L_2il0floatpacket.1(%rip), %xmm7 #161.54 shlq $2, %rax #107.5 movq (%r12), %r10 #179.9 movq 8(%r12), %r11 #180.9 movdqu %xmm0, 160(%rsp) #124.5[spill] movups %xmm2, 192(%rsp) #124.5[spill] movups %xmm3, 176(%rsp) #124.5[spill] movq %rdi, 56(%rsp) #124.5[spill] movl %ebp, 64(%rsp) #124.5[spill] movq %r14, (%rsp) #124.5[spill] movq %r12, 8(%rsp) #124.5[spill] movsd 40(%rsp), %xmm10 #124.5[spill] movsd 48(%rsp), %xmm12 #124.5[spill] # LOE rax rdx rcx rsi r8 r9 r10 r11 r13 r15 ebx xmm6 xmm7 xmm10 xmm12 xmm13 ..B2.8: # Preds ..B2.49 ..B2.7 # Execution count [5.00e+00] movl (%rcx,%r8,4), %r14d #126.25 addl %r14d, %ebx #138.9 pxor %xmm5, %xmm5 #130.22 movaps %xmm5, %xmm4 #131.22 movsd (%rsi,%rdx), %xmm9 #127.25 movaps %xmm4, %xmm0 #132.22 movsd 8(%rsi,%rdx), %xmm8 #128.25 movsd 16(%rsi,%rdx), %xmm11 #129.25 testl %r14d, %r14d #143.9 jle ..B2.48 # Prob 50% #143.9 # LOE rax rdx rcx rbx rsi r8 r9 r10 r11 r13 r15 r14d xmm0 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 ..B2.9: # Preds ..B2.8 # Execution count [2.50e+00] jbe ..B2.48 # Prob 50% #143.9 # LOE rax rdx rcx rbx rsi r8 r9 r10 r11 r13 r15 r14d xmm0 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 ..B2.10: # Preds ..B2.9 # Execution count [2.25e+00] cmpl $2, %r14d #143.9 jb ..B2.58 # Prob 10% #143.9 # LOE rax rdx rcx rbx rsi r8 r9 r10 r11 r13 r15 r14d xmm0 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 ..B2.11: # Preds ..B2.10 # Execution count [2.25e+00] movq %rax, %rdi #125.43 movl %r14d, %r12d #143.9 imulq %r9, %rdi #125.43 pxor %xmm5, %xmm5 #130.22 movaps %xmm9, %xmm1 #127.23 movaps %xmm5, %xmm4 #131.22 movaps %xmm8, %xmm2 #128.23 movaps %xmm11, %xmm3 #129.23 andl $-2, %r12d #143.9 movsd %xmm11, 120(%rsp) #143.9[spill] addq 56(%rsp), %rdi #107.5[spill] xorl %ebp, %ebp #143.9 unpcklpd %xmm1, %xmm1 #127.23 movaps %xmm4, %xmm0 #132.22 unpcklpd %xmm2, %xmm2 #128.23 unpcklpd %xmm3, %xmm3 #129.23 movslq %r12d, %r12 #143.9 movsd %xmm8, 128(%rsp) #143.9[spill] movsd %xmm9, 136(%rsp) #143.9[spill] movsd %xmm13, 144(%rsp) #143.9[spill] movl %r14d, 24(%rsp) #143.9[spill] movq %rsi, 32(%rsp) #143.9[spill] movq %rax, 72(%rsp) #143.9[spill] movq %r11, 80(%rsp) #143.9[spill] movq %r10, 88(%rsp) #143.9[spill] movq %rcx, 96(%rsp) #143.9[spill] movq %r8, 104(%rsp) #143.9[spill] movq %r13, 112(%rsp) #143.9[spill] movdqu .L_2il0floatpacket.5(%rip), %xmm11 #143.9 movdqu .L_2il0floatpacket.4(%rip), %xmm12 #143.9 # LOE rdx rbx rbp rdi r9 r12 r15 xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm11 xmm12 ..B2.12: # Preds ..B2.38 ..B2.11 # Execution count [1.25e+01] movq (%rdi,%rbp,4), %xmm10 #144.21 movdqa %xmm12, %xmm15 #146.36 movdqa %xmm10, %xmm7 #145.36 paddd %xmm10, %xmm7 #145.36 paddd %xmm10, %xmm7 #145.36 movdqa %xmm7, %xmm9 #145.36 paddd %xmm7, %xmm15 #146.36 movd %xmm7, %r13d #145.36 paddd %xmm11, %xmm7 #147.36 psrldq $4, %xmm9 #145.36 movd %xmm9, %r11d #145.36 movaps %xmm1, %xmm9 #145.36 movd %xmm15, %r10d #146.36 psrldq $4, %xmm15 #146.36 movd %xmm15, %r8d #146.36 movd %xmm7, %ecx #147.36 psrldq $4, %xmm7 #147.36 movd %xmm7, %eax #147.36 movaps %xmm3, %xmm7 #147.36 movslq %r13d, %r13 #145.36 movslq %r11d, %r11 #145.36 movslq %r10d, %r10 #146.36 movslq %r8d, %r8 #146.36 movsd (%rdx,%r13,8), %xmm8 #145.36 movhpd (%rdx,%r11,8), %xmm8 #145.36 movsd (%rdx,%r10,8), %xmm13 #146.36 subpd %xmm8, %xmm9 #145.36 movhpd (%rdx,%r8,8), %xmm13 #146.36 movaps %xmm2, %xmm8 #146.36 movslq %ecx, %rcx #147.36 movaps %xmm9, %xmm15 #148.35 subpd %xmm13, %xmm8 #146.36 mulpd %xmm9, %xmm15 #148.35 movslq %eax, %rax #147.36 movaps %xmm8, %xmm13 #148.49 movsd (%rdx,%rcx,8), %xmm14 #147.36 mulpd %xmm8, %xmm13 #148.49 movhpd (%rdx,%rax,8), %xmm14 #147.36 subpd %xmm14, %xmm7 #147.36 addpd %xmm13, %xmm15 #148.49 movaps %xmm7, %xmm14 #148.63 mulpd %xmm7, %xmm14 #148.63 addpd %xmm14, %xmm15 #148.63 movaps %xmm15, %xmm13 #158.22 cmpltpd %xmm6, %xmm13 #158.22 movmskpd %xmm13, %r14d #158.22 testl %r14d, %r14d #158.22 je ..B2.38 # Prob 50% #158.22 # LOE rax rdx rcx rbx rbp rdi r8 r9 r10 r11 r12 r13 r15 xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm15 ..B2.13: # Preds ..B2.12 # Execution count [6.25e+00] movups .L_2il0floatpacket.6(%rip), %xmm14 #159.38 divpd %xmm15, %xmm14 #159.38 movdqu 160(%rsp), %xmm15 #167.24[spill] pcmpgtd %xmm10, %xmm15 #167.24 movups 176(%rsp), %xmm10 #160.38[spill] mulpd %xmm14, %xmm10 #160.38 mulpd %xmm14, %xmm10 #160.44 mulpd %xmm14, %xmm10 #160.50 mulpd 192(%rsp), %xmm14 #161.54[spill] mulpd %xmm10, %xmm14 #161.61 subpd .L_2il0floatpacket.7(%rip), %xmm10 #161.54 mulpd %xmm10, %xmm14 #161.67 mulpd %xmm14, %xmm9 #162.31 mulpd %xmm14, %xmm8 #163.31 mulpd %xmm14, %xmm7 #164.31 punpckldq %xmm15, %xmm15 #167.24 movaps %xmm13, %xmm14 #162.31 andps %xmm13, %xmm15 #167.24 movaps %xmm13, %xmm10 #163.31 movmskpd %xmm15, %esi #167.24 andps %xmm9, %xmm14 #162.31 andps %xmm8, %xmm10 #163.31 andps %xmm7, %xmm13 #164.31 addpd %xmm14, %xmm5 #162.17 addpd %xmm10, %xmm4 #163.17 addpd %xmm13, %xmm0 #164.17 testl %esi, %esi #167.24 je ..B2.38 # Prob 50% #167.24 # LOE rax rdx rcx rbx rbp rdi r8 r9 r10 r11 r12 r13 r15 esi xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm11 xmm12 ..B2.14: # Preds ..B2.13 # Execution count [3.12e+00] movl %esi, %r14d #168.21 andl $2, %r14d #168.21 andl $1, %esi #168.21 je ..B2.17 # Prob 40% #168.21 # LOE rax rdx rcx rbx rbp rdi r8 r9 r10 r11 r12 r13 r15 esi r14d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm11 xmm12 ..B2.15: # Preds ..B2.14 # Execution count [3.12e+00] movsd (%r15,%r13,8), %xmm10 #168.21 testl %r14d, %r14d #168.21 jne ..B2.18 # Prob 60% #168.21 # LOE rax rdx rcx rbx rbp rdi r8 r9 r10 r11 r12 r13 r15 esi r14d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 ..B2.16: # Preds ..B2.15 # Execution count [1.25e+00] pxor %xmm13, %xmm13 #168.21 unpcklpd %xmm13, %xmm10 #168.21 subpd %xmm9, %xmm10 #168.21 jmp ..B2.31 # Prob 100% #168.21 # LOE rdx rcx rbx rbp rdi r9 r10 r12 r13 r15 xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm10 xmm11 xmm12 ..B2.17: # Preds ..B2.14 # Execution count [3.12e+00] pxor %xmm10, %xmm10 #168.21 testl %r14d, %r14d #168.21 je ..B2.30 # Prob 40% #168.21 # LOE rax rdx rcx rbx rbp rdi r8 r9 r10 r11 r12 r13 r15 esi r14d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 ..B2.18: # Preds ..B2.15 ..B2.17 # Execution count [3.12e+00] movhpd (%r15,%r11,8), %xmm10 #168.21 subpd %xmm9, %xmm10 #168.21 testl %esi, %esi #168.21 je ..B2.20 # Prob 40% #168.21 # LOE rax rdx rcx rbx rbp rdi r8 r9 r10 r11 r12 r13 r15 esi r14d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm10 xmm11 xmm12 ..B2.19: # Preds ..B2.18 # Execution count [1.88e+00] movsd %xmm10, (%r15,%r13,8) #168.21 psrldq $8, %xmm10 #168.21 movsd %xmm10, (%r15,%r11,8) #168.21 movsd (%r15,%r10,8), %xmm10 #169.21 jmp ..B2.21 # Prob 100% #169.21 # LOE rax rdx rcx rbx rbp rdi r8 r9 r10 r12 r15 esi r14d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm10 xmm11 xmm12 ..B2.20: # Preds ..B2.18 # Execution count [1.25e+00] psrldq $8, %xmm10 #168.21 movsd %xmm10, (%r15,%r11,8) #168.21 pxor %xmm10, %xmm10 #169.21 # LOE rax rdx rcx rbx rbp rdi r8 r9 r10 r12 r15 esi r14d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm10 xmm11 xmm12 ..B2.21: # Preds ..B2.19 ..B2.20 # Execution count [1.88e+00] testl %r14d, %r14d #169.21 je ..B2.72 # Prob 40% #169.21 # LOE rax rdx rcx rbx rbp rdi r8 r9 r10 r12 r15 esi r14d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm10 xmm11 xmm12 ..B2.22: # Preds ..B2.21 # Execution count [3.12e+00] movhpd (%r15,%r8,8), %xmm10 #169.21 subpd %xmm8, %xmm10 #169.21 testl %esi, %esi #169.21 je ..B2.24 # Prob 40% #169.21 # LOE rax rdx rcx rbx rbp rdi r8 r9 r10 r12 r15 esi r14d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm10 xmm11 xmm12 ..B2.23: # Preds ..B2.22 # Execution count [1.88e+00] movsd %xmm10, (%r15,%r10,8) #169.21 psrldq $8, %xmm10 #169.21 movsd %xmm10, (%r15,%r8,8) #169.21 movsd (%r15,%rcx,8), %xmm9 #170.21 jmp ..B2.25 # Prob 100% #170.21 # LOE rax rdx rcx rbx rbp rdi r9 r12 r15 esi r14d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm9 xmm11 xmm12 ..B2.24: # Preds ..B2.22 # Execution count [1.25e+00] psrldq $8, %xmm10 #169.21 movsd %xmm10, (%r15,%r8,8) #169.21 pxor %xmm9, %xmm9 #170.21 # LOE rax rdx rcx rbx rbp rdi r9 r12 r15 esi r14d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm9 xmm11 xmm12 ..B2.25: # Preds ..B2.23 ..B2.24 # Execution count [1.88e+00] testl %r14d, %r14d #170.21 je ..B2.71 # Prob 40% #170.21 # LOE rax rdx rcx rbx rbp rdi r9 r12 r15 esi xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm9 xmm11 xmm12 ..B2.26: # Preds ..B2.25 # Execution count [3.12e+00] movhpd (%r15,%rax,8), %xmm9 #170.21 subpd %xmm7, %xmm9 #170.21 testl %esi, %esi #170.21 je ..B2.28 # Prob 40% #170.21 # LOE rax rdx rcx rbx rbp rdi r9 r12 r15 xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm9 xmm11 xmm12 ..B2.27: # Preds ..B2.26 # Execution count [1.88e+00] movsd %xmm9, (%r15,%rcx,8) #170.21 psrldq $8, %xmm9 #170.21 jmp ..B2.29 # Prob 100% #170.21 # LOE rax rdx rbx rbp rdi r9 r12 r15 xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm9 xmm11 xmm12 ..B2.28: # Preds ..B2.26 # Execution count [1.25e+00] psrldq $8, %xmm9 #170.21 # LOE rax rdx rbx rbp rdi r9 r12 r15 xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm9 xmm11 xmm12 ..B2.29: # Preds ..B2.27 ..B2.28 # Execution count [3.12e+00] movsd %xmm9, (%r15,%rax,8) #170.21 jmp ..B2.38 # Prob 100% #170.21 # LOE rdx rbx rbp rdi r9 r12 r15 xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm11 xmm12 ..B2.30: # Preds ..B2.17 # Execution count [1.88e+00] pxor %xmm10, %xmm10 #168.21 subpd %xmm9, %xmm10 #168.21 testl %esi, %esi #168.21 je ..B2.32 # Prob 40% #168.21 # LOE rdx rcx rbx rbp rdi r9 r10 r12 r13 r15 xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm10 xmm11 xmm12 ..B2.31: # Preds ..B2.16 ..B2.30 # Execution count [1.25e+00] movsd %xmm10, (%r15,%r13,8) #168.21 movsd (%r15,%r10,8), %xmm10 #169.21 pxor %xmm9, %xmm9 #169.21 unpcklpd %xmm9, %xmm10 #169.21 subpd %xmm8, %xmm10 #169.21 jmp ..B2.34 # Prob 100% #169.21 # LOE rdx rcx rbx rbp rdi r9 r10 r12 r15 xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm10 xmm11 xmm12 ..B2.32: # Preds ..B2.30 # Execution count [0.00e+00] pxor %xmm10, %xmm10 #169.21 jmp ..B2.33 # Prob 100% #169.21 # LOE rdx rcx rbx rbp rdi r9 r10 r12 r15 xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm10 xmm11 xmm12 ..B2.72: # Preds ..B2.21 # Execution count [7.50e-01] testl %esi, %esi #168.21 # LOE rdx rcx rbx rbp rdi r9 r10 r12 r15 xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm10 xmm11 xmm12 ..B2.33: # Preds ..B2.32 ..B2.72 # Execution count [2.67e+00] pxor %xmm9, %xmm9 #169.21 unpcklpd %xmm9, %xmm10 #169.21 subpd %xmm8, %xmm10 #169.21 je ..B2.35 # Prob 40% #169.21 # LOE rdx rcx rbx rbp rdi r9 r10 r12 r15 xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm10 xmm11 xmm12 ..B2.34: # Preds ..B2.31 ..B2.33 # Execution count [1.25e+00] movsd %xmm10, (%r15,%r10,8) #169.21 movsd (%r15,%rcx,8), %xmm9 #170.21 pxor %xmm8, %xmm8 #170.21 unpcklpd %xmm8, %xmm9 #170.21 subpd %xmm7, %xmm9 #170.21 jmp ..B2.37 # Prob 100% #170.21 # LOE rdx rcx rbx rbp rdi r9 r12 r15 xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm9 xmm11 xmm12 ..B2.35: # Preds ..B2.33 # Execution count [0.00e+00] pxor %xmm9, %xmm9 #170.21 jmp ..B2.36 # Prob 100% #170.21 # LOE rdx rcx rbx rbp rdi r9 r12 r15 xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm9 xmm11 xmm12 ..B2.71: # Preds ..B2.25 # Execution count [7.50e-01] testl %esi, %esi #168.21 # LOE rdx rcx rbx rbp rdi r9 r12 r15 xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm9 xmm11 xmm12 ..B2.36: # Preds ..B2.35 ..B2.71 # Execution count [2.67e+00] pxor %xmm8, %xmm8 #170.21 unpcklpd %xmm8, %xmm9 #170.21 subpd %xmm7, %xmm9 #170.21 je ..B2.38 # Prob 40% #170.21 # LOE rdx rcx rbx rbp rdi r9 r12 r15 xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm9 xmm11 xmm12 ..B2.37: # Preds ..B2.34 ..B2.36 # Execution count [1.25e+00] movsd %xmm9, (%r15,%rcx,8) #170.21 # LOE rdx rbx rbp rdi r9 r12 r15 xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm11 xmm12 ..B2.38: # Preds ..B2.36 ..B2.29 ..B2.37 ..B2.13 ..B2.12 # # Execution count [1.25e+01] addq $2, %rbp #143.9 cmpq %r12, %rbp #143.9 jb ..B2.12 # Prob 82% #143.9 # LOE rdx rbx rbp rdi r9 r12 r15 xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm11 xmm12 ..B2.39: # Preds ..B2.38 # Execution count [2.25e+00] movaps %xmm0, %xmm1 #132.22 movaps %xmm4, %xmm2 #131.22 movaps %xmm5, %xmm3 #130.22 unpckhpd %xmm0, %xmm1 #132.22 unpckhpd %xmm4, %xmm2 #131.22 addsd %xmm1, %xmm0 #132.22 addsd %xmm2, %xmm4 #131.22 unpckhpd %xmm5, %xmm3 #130.22 movsd 120(%rsp), %xmm11 #[spill] addsd %xmm3, %xmm5 #130.22 movsd 128(%rsp), %xmm8 #[spill] movsd 136(%rsp), %xmm9 #[spill] movsd 144(%rsp), %xmm13 #[spill] movsd 40(%rsp), %xmm10 #[spill] movsd 48(%rsp), %xmm12 #[spill] movl 24(%rsp), %r14d #[spill] movq 32(%rsp), %rsi #[spill] movq 72(%rsp), %rax #[spill] movq 80(%rsp), %r11 #[spill] movq 88(%rsp), %r10 #[spill] movq 96(%rsp), %rcx #[spill] movq 104(%rsp), %r8 #[spill] movq 112(%rsp), %r13 #[spill] movsd .L_2il0floatpacket.1(%rip), %xmm7 # # LOE rax rdx rcx rbx rsi r8 r9 r10 r11 r12 r13 r15 r14d xmm0 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 ..B2.40: # Preds ..B2.39 ..B2.58 # Execution count [2.50e+00] movslq %r14d, %r14 #143.9 cmpq %r14, %r12 #143.9 jae ..B2.49 # Prob 10% #143.9 # LOE rax rdx rcx rbx rsi r8 r9 r10 r11 r12 r13 r14 r15 xmm0 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 ..B2.41: # Preds ..B2.40 # Execution count [2.25e+00] imulq %rax, %r9 #125.43 addq 56(%rsp), %r9 #107.5[spill] movl 64(%rsp), %ebp #107.5[spill] movq %r13, 112(%rsp) #107.5[spill] # LOE rax rdx rcx rbx rsi r8 r9 r10 r11 r12 r14 r15 ebp xmm0 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 ..B2.42: # Preds ..B2.45 ..B2.41 # Execution count [1.25e+01] movl (%r9,%r12,4), %edi #144.21 movaps %xmm9, %xmm14 #145.36 movaps %xmm8, %xmm3 #146.36 movaps %xmm11, %xmm2 #147.36 lea (%rdi,%rdi,2), %r13d #145.36 movslq %r13d, %r13 #145.36 subsd (%rdx,%r13,8), %xmm14 #145.36 subsd 8(%rdx,%r13,8), %xmm3 #146.36 subsd 16(%rdx,%r13,8), %xmm2 #147.36 movaps %xmm14, %xmm15 #148.35 movaps %xmm3, %xmm1 #148.49 mulsd %xmm14, %xmm15 #148.35 mulsd %xmm3, %xmm1 #148.49 addsd %xmm1, %xmm15 #148.49 movaps %xmm2, %xmm1 #148.63 mulsd %xmm2, %xmm1 #148.63 addsd %xmm1, %xmm15 #148.63 comisd %xmm15, %xmm12 #158.22 jbe ..B2.45 # Prob 50% #158.22 # LOE rax rdx rcx rbx rsi r8 r9 r10 r11 r12 r13 r14 r15 ebp edi xmm0 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 xmm15 ..B2.43: # Preds ..B2.42 # Execution count [6.25e+00] movsd .L_2il0floatpacket.3(%rip), %xmm1 #159.38 divsd %xmm15, %xmm1 #159.38 movaps %xmm10, %xmm15 #160.38 mulsd %xmm1, %xmm15 #160.38 mulsd %xmm1, %xmm15 #160.44 mulsd %xmm1, %xmm15 #160.50 mulsd %xmm13, %xmm1 #161.54 mulsd %xmm15, %xmm1 #161.61 subsd %xmm7, %xmm15 #161.54 mulsd %xmm15, %xmm1 #161.67 mulsd %xmm1, %xmm14 #162.31 mulsd %xmm1, %xmm3 #163.31 mulsd %xmm1, %xmm2 #164.31 addsd %xmm14, %xmm5 #162.17 addsd %xmm3, %xmm4 #163.17 addsd %xmm2, %xmm0 #164.17 cmpl %ebp, %edi #167.24 jge ..B2.45 # Prob 50% #167.24 # LOE rax rdx rcx rbx rsi r8 r9 r10 r11 r12 r13 r14 r15 ebp xmm0 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 ..B2.44: # Preds ..B2.43 # Execution count [3.12e+00] movsd (%r15,%r13,8), %xmm1 #168.21 subsd %xmm14, %xmm1 #168.21 movsd 8(%r15,%r13,8), %xmm14 #169.21 subsd %xmm3, %xmm14 #169.21 movsd 16(%r15,%r13,8), %xmm3 #170.21 movsd %xmm1, (%r15,%r13,8) #168.21 subsd %xmm2, %xmm3 #170.21 movsd %xmm14, 8(%r15,%r13,8) #169.21 movsd %xmm3, 16(%r15,%r13,8) #170.21 # LOE rax rdx rcx rbx rsi r8 r9 r10 r11 r12 r14 r15 ebp xmm0 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 ..B2.45: # Preds ..B2.44 ..B2.43 ..B2.42 # Execution count [1.25e+01] incq %r12 #143.9 cmpq %r14, %r12 #143.9 jb ..B2.42 # Prob 82% #143.9 # LOE rax rdx rcx rbx rsi r8 r9 r10 r11 r12 r14 r15 ebp xmm0 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 ..B2.46: # Preds ..B2.45 # Execution count [2.25e+00] movq 112(%rsp), %r13 #[spill] jmp ..B2.49 # Prob 100% # # LOE rax rdx rcx rbx rsi r8 r10 r11 r13 r14 r15 xmm0 xmm4 xmm5 xmm6 xmm7 xmm10 xmm12 xmm13 ..B2.48: # Preds ..B2.9 ..B2.8 # Execution count [2.50e+00] movslq %r14d, %r14 #179.9 # LOE rax rdx rcx rbx rsi r8 r10 r11 r13 r14 r15 xmm0 xmm4 xmm5 xmm6 xmm7 xmm10 xmm12 xmm13 ..B2.49: # Preds ..B2.46 ..B2.40 ..B2.48 # Execution count [5.00e+00] movslq %r8d, %r9 #124.32 incq %r8 #124.5 addq %r14, %r10 #179.9 addq %r14, %r11 #180.9 incq %r9 #124.32 addsd (%rsi,%r15), %xmm5 #175.9 addsd 8(%rsi,%r15), %xmm4 #176.9 addsd 16(%rsi,%r15), %xmm0 #177.9 movsd %xmm5, (%rsi,%r15) #175.9 movsd %xmm4, 8(%rsi,%r15) #176.9 movsd %xmm0, 16(%rsi,%r15) #177.9 addq $24, %rsi #124.5 cmpq %r13, %r8 #124.5 jb ..B2.8 # Prob 82% #124.5 # LOE rax rdx rcx rbx rsi r8 r9 r10 r11 r13 r15 xmm6 xmm7 xmm10 xmm12 xmm13 ..B2.50: # Preds ..B2.49 # Execution count [9.00e-01] movq 8(%rsp), %r12 #[spill] movq (%rsp), %r14 #[spill] movq %r10, (%r12) #179.9 movq %r11, 8(%r12) #180.9 jmp ..B2.54 # Prob 100% #180.9 # LOE rbx r14 ..B2.51: # Preds ..B2.1 # Execution count [5.00e-01] xorl %ebx, %ebx #120.22 xorl %eax, %eax #121.16 ..___tag_value_computeForceLJHalfNeigh.139: # getTimeStamp() call getTimeStamp #121.16 ..___tag_value_computeForceLJHalfNeigh.140: # LOE rbx r14 xmm0 ..B2.68: # Preds ..B2.51 # Execution count [5.00e-01] movsd %xmm0, 16(%rsp) #121.16[spill] # LOE rbx r14 ..B2.52: # Preds ..B2.68 # Execution count [5.00e-01] movl $.L_2__STRING.2, %edi #122.5 ..___tag_value_computeForceLJHalfNeigh.142: # likwid_markerStartRegion(const char *) call likwid_markerStartRegion #122.5 ..___tag_value_computeForceLJHalfNeigh.143: # LOE rbx r14 ..B2.54: # Preds ..B2.52 ..B2.50 # Execution count [1.00e+00] movl $.L_2__STRING.2, %edi #183.5 ..___tag_value_computeForceLJHalfNeigh.144: # likwid_markerStopRegion(const char *) call likwid_markerStopRegion #183.5 ..___tag_value_computeForceLJHalfNeigh.145: # LOE rbx r14 ..B2.55: # Preds ..B2.54 # Execution count [1.00e+00] xorl %eax, %eax #184.16 ..___tag_value_computeForceLJHalfNeigh.146: # getTimeStamp() call getTimeStamp #184.16 ..___tag_value_computeForceLJHalfNeigh.147: # LOE rbx r14 xmm0 ..B2.69: # Preds ..B2.55 # Execution count [1.00e+00] movaps %xmm0, %xmm1 #184.16 # LOE rbx r14 xmm1 ..B2.56: # Preds ..B2.69 # Execution count [1.00e+00] pxor %xmm3, %xmm3 #185.5 cvtsi2sdq %rbx, %xmm3 #185.5 subsd 16(%rsp), %xmm1 #185.94[spill] movsd .L_2il0floatpacket.2(%rip), %xmm2 #185.5 movl $.L_2__STRING.3, %edi #185.5 divsd %xmm3, %xmm2 #185.5 mulsd %xmm1, %xmm2 #185.5 movl %ebx, %esi #185.5 movsd 264(%r14), %xmm0 #185.74 movl $3, %eax #185.5 mulsd %xmm0, %xmm2 #185.5 movsd %xmm1, (%rsp) #185.5[spill] ..___tag_value_computeForceLJHalfNeigh.149: # printf(const char *__restrict__, ...) call printf #185.5 ..___tag_value_computeForceLJHalfNeigh.150: # LOE ..B2.57: # Preds ..B2.56 # Execution count [1.00e+00] movsd (%rsp), %xmm1 #[spill] movaps %xmm1, %xmm0 #186.14 addq $216, %rsp #186.14 .cfi_def_cfa_offset 56 .cfi_restore 6 popq %rbp #186.14 .cfi_def_cfa_offset 48 .cfi_restore 3 popq %rbx #186.14 .cfi_def_cfa_offset 40 .cfi_restore 15 popq %r15 #186.14 .cfi_def_cfa_offset 32 .cfi_restore 14 popq %r14 #186.14 .cfi_def_cfa_offset 24 .cfi_restore 13 popq %r13 #186.14 .cfi_def_cfa_offset 16 .cfi_restore 12 popq %r12 #186.14 .cfi_def_cfa_offset 8 ret #186.14 .cfi_def_cfa_offset 272 .cfi_offset 3, -48 .cfi_offset 6, -56 .cfi_offset 12, -16 .cfi_offset 13, -24 .cfi_offset 14, -32 .cfi_offset 15, -40 # LOE ..B2.58: # Preds ..B2.10 # Execution count [2.25e-01]: Infreq xorl %r12d, %r12d #143.9 jmp ..B2.40 # Prob 100% #143.9 # LOE rax rdx rcx rbx rsi r8 r9 r10 r11 r12 r13 r15 r14d xmm0 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 ..B2.59: # Preds ..B2.2 # Execution count [1.11e+00]: Infreq movl %ebx, %eax #114.5 xorl %ecx, %ecx #114.5 movl $1, %esi #114.5 xorl %edx, %edx #114.5 shrl $1, %eax #114.5 je ..B2.63 # Prob 10% #114.5 # LOE rax rdx rcx rdi r12 r14 r15 ebx ebp esi ..B2.60: # Preds ..B2.59 # Execution count [1.00e+00]: Infreq xorl %esi, %esi #114.5 # LOE rax rdx rcx rsi rdi r12 r14 r15 ebx ebp ..B2.61: # Preds ..B2.61 ..B2.60 # Execution count [2.78e+00]: Infreq incq %rcx #114.5 movq %rsi, (%rdx,%rdi) #115.9 movq %rsi, 8(%rdx,%rdi) #115.9 addq $16, %rdx #114.5 cmpq %rax, %rcx #114.5 jb ..B2.61 # Prob 64% #114.5 # LOE rax rdx rcx rsi rdi r12 r14 r15 ebx ebp ..B2.62: # Preds ..B2.61 # Execution count [1.00e+00]: Infreq lea 1(%rcx,%rcx), %esi #115.9 # LOE rdi r12 r14 r15 ebx ebp esi ..B2.63: # Preds ..B2.59 ..B2.62 # Execution count [1.11e+00]: Infreq lea -1(%rsi), %eax #114.5 cmpl %ebx, %eax #114.5 jae ..B2.70 # Prob 10% #114.5 # LOE rdi r12 r14 r15 ebp esi ..B2.64: # Preds ..B2.63 # Execution count [1.00e+00]: Infreq movslq %esi, %rsi #114.5 movslq %ebp, %r13 #114.5 movq $0, -8(%rdi,%rsi,8) #115.9 jmp ..B2.5 # Prob 100% #115.9 # LOE r12 r13 r14 r15 ebp ..B2.70: # Preds ..B2.63 # Execution count [1.11e-01]: Infreq movslq %ebp, %r13 #114.5 jmp ..B2.5 # Prob 100% #114.5 .align 16,0x90 # LOE r12 r13 r14 r15 ebp .cfi_endproc # mark_end; .type computeForceLJHalfNeigh,@function .size computeForceLJHalfNeigh,.-computeForceLJHalfNeigh ..LNcomputeForceLJHalfNeigh.1: .data # -- End computeForceLJHalfNeigh .text .L_2__routine_start_computeForceLJFullNeigh_simd_2: # -- Begin computeForceLJFullNeigh_simd .text # mark_begin; .align 16,0x90 .globl computeForceLJFullNeigh_simd # --- computeForceLJFullNeigh_simd(Parameter *, Atom *, Neighbor *, Stats *) computeForceLJFullNeigh_simd: # parameter 1: %rdi # parameter 2: %rsi # parameter 3: %rdx # parameter 4: %rcx ..B3.1: # Preds ..B3.0 # Execution count [1.00e+00] .cfi_startproc ..___tag_value_computeForceLJFullNeigh_simd.174: ..L175: #189.101 pushq %rsi #189.101 .cfi_def_cfa_offset 16 movl 4(%rsi), %edx #190.18 testl %edx, %edx #196.24 jle ..B3.4 # Prob 50% #196.24 # LOE rbx rbp rsi r12 r13 r14 r15 edx ..B3.2: # Preds ..B3.1 # Execution count [5.00e-03] movq 64(%rsi), %rdi #197.9 lea (%rdx,%rdx,2), %eax #190.18 cmpl $12, %eax #196.5 jle ..B3.8 # Prob 0% #196.5 # LOE rbx rbp rdi r12 r13 r14 r15 eax edx ..B3.3: # Preds ..B3.2 # Execution count [1.00e+00] movslq %edx, %rdx #196.5 xorl %esi, %esi #196.5 lea (%rdx,%rdx,2), %rdx #196.5 shlq $3, %rdx #196.5 call _intel_fast_memset #196.5 # LOE rbx rbp r12 r13 r14 r15 ..B3.4: # Preds ..B3.1 ..B3.12 ..B3.3 ..B3.13 # Execution count [1.00e+00] xorl %eax, %eax #203.16 ..___tag_value_computeForceLJFullNeigh_simd.177: # getTimeStamp() call getTimeStamp #203.16 ..___tag_value_computeForceLJFullNeigh_simd.178: # LOE rbx rbp r12 r13 r14 r15 ..B3.5: # Preds ..B3.4 # Execution count [1.00e+00] movl $.L_2__STRING.0, %edi #204.5 ..___tag_value_computeForceLJFullNeigh_simd.179: # likwid_markerStartRegion(const char *) call likwid_markerStartRegion #204.5 ..___tag_value_computeForceLJFullNeigh_simd.180: # LOE ..B3.6: # Preds ..B3.5 # Execution count [1.00e+00] movl $il0_peep_printf_format_0, %edi #207.5 movq stderr(%rip), %rsi #207.5 call fputs #207.5 # LOE ..B3.7: # Preds ..B3.6 # Execution count [1.00e+00] movl $-1, %edi #208.5 # exit(int) call exit #208.5 # LOE ..B3.8: # Preds ..B3.2 # Execution count [1.11e+00]: Infreq movl %eax, %edx #196.5 xorl %r8d, %r8d #196.5 movl $1, %r9d #196.5 xorl %esi, %esi #196.5 xorl %ecx, %ecx #196.5 shrl $1, %edx #196.5 je ..B3.12 # Prob 10% #196.5 # LOE rdx rcx rbx rbp rsi rdi r8 r12 r13 r14 r15 eax r9d ..B3.10: # Preds ..B3.8 ..B3.10 # Execution count [2.78e+00]: Infreq incq %r8 #196.5 movq %rsi, (%rcx,%rdi) #197.9 movq %rsi, 8(%rcx,%rdi) #197.9 addq $16, %rcx #196.5 cmpq %rdx, %r8 #196.5 jb ..B3.10 # Prob 64% #196.5 # LOE rdx rcx rbx rbp rsi rdi r8 r12 r13 r14 r15 eax ..B3.11: # Preds ..B3.10 # Execution count [1.00e+00]: Infreq lea 1(%r8,%r8), %r9d #197.9 # LOE rbx rbp rdi r12 r13 r14 r15 eax r9d ..B3.12: # Preds ..B3.11 ..B3.8 # Execution count [1.11e+00]: Infreq lea -1(%r9), %edx #196.5 cmpl %eax, %edx #196.5 jae ..B3.4 # Prob 10% #196.5 # LOE rbx rbp rdi r12 r13 r14 r15 r9d ..B3.13: # Preds ..B3.12 # Execution count [1.00e+00]: Infreq movslq %r9d, %r9 #196.5 movq $0, -8(%rdi,%r9,8) #197.9 jmp ..B3.4 # Prob 100% #197.9 .align 16,0x90 # LOE rbx rbp r12 r13 r14 r15 .cfi_endproc # mark_end; .type computeForceLJFullNeigh_simd,@function .size computeForceLJFullNeigh_simd,.-computeForceLJFullNeigh_simd ..LNcomputeForceLJFullNeigh_simd.2: .section .rodata.str1.32, "aMS",@progbits,1 .align 32 .align 32 il0_peep_printf_format_0: .long 1869771333 .long 1394621042 .long 541347145 .long 1852990827 .long 1847618661 .long 1763734639 .long 1701605485 .long 1953391981 .long 1713398885 .long 1931506287 .long 1768121712 .long 1684367718 .long 1936615712 .long 1668641396 .long 1852795252 .long 1952805664 .word 33 .data # -- End computeForceLJFullNeigh_simd .section .rodata, "a" .align 16 .align 16 .L_2il0floatpacket.4: .long 0x00000001,0x00000001,0x00000001,0x00000001 .type .L_2il0floatpacket.4,@object .size .L_2il0floatpacket.4,16 .align 16 .L_2il0floatpacket.5: .long 0x00000002,0x00000002,0x00000002,0x00000002 .type .L_2il0floatpacket.5,@object .size .L_2il0floatpacket.5,16 .align 16 .L_2il0floatpacket.6: .long 0x00000000,0x3ff00000,0x00000000,0x3ff00000 .type .L_2il0floatpacket.6,@object .size .L_2il0floatpacket.6,16 .align 16 .L_2il0floatpacket.7: .long 0x00000000,0x3fe00000,0x00000000,0x3fe00000 .type .L_2il0floatpacket.7,@object .size .L_2il0floatpacket.7,16 .align 8 .L_2il0floatpacket.0: .long 0x00000000,0x40480000 .type .L_2il0floatpacket.0,@object .size .L_2il0floatpacket.0,8 .align 8 .L_2il0floatpacket.1: .long 0x00000000,0x3fe00000 .type .L_2il0floatpacket.1,@object .size .L_2il0floatpacket.1,8 .align 8 .L_2il0floatpacket.2: .long 0x00000000,0x41cdcd65 .type .L_2il0floatpacket.2,@object .size .L_2il0floatpacket.2,8 .align 8 .L_2il0floatpacket.3: .long 0x00000000,0x3ff00000 .type .L_2il0floatpacket.3,@object .size .L_2il0floatpacket.3,8 .section .rodata.str1.4, "aMS",@progbits,1 .align 4 .align 4 .L_2__STRING.0: .long 1668444006 .word 101 .type .L_2__STRING.0,@object .size .L_2__STRING.0,6 .space 2, 0x00 # pad .align 4 .L_2__STRING.1: .long 980644937 .long 544548128 .long 1701987872 .long 622869105 .long 1411391590 .long 979725673 .long 174466336 .long 1764718915 .long 622869108 .long 1881677926 .long 1852399980 .long 170484575 .byte 0 .type .L_2__STRING.1,@object .size .L_2__STRING.1,49 .space 3, 0x00 # pad .align 4 .L_2__STRING.2: .long 1668444006 .long 759843941 .long 1718378856 .long 1734960494 .word 104 .type .L_2__STRING.2,@object .size .L_2__STRING.2,18 .space 2, 0x00 # pad .align 4 .L_2__STRING.3: .long 980644937 .long 544548128 .long 1701987872 .long 622869105 .long 1411391590 .long 979725673 .long 174466336 .long 1764718915 .long 622869108 .long 1747460198 .long 761687137 .long 1734960494 .long 665960 .type .L_2__STRING.3,@object .size .L_2__STRING.3,52 .data .section .note.GNU-stack, "" # End