# mark_description "Intel(R) C Intel(R) 64 Compiler Classic for applications running on Intel(R) 64, Version 2021.6.0 Build 2022"; # mark_description "0226_000000"; # mark_description "-I/apps/likwid/5.2.2/include -I././lammps/includes -I././common/includes -S -std=c11 -pedantic-errors -D_GNU"; # mark_description "_SOURCE -DLIKWID_PERFMON -DAOS -DPRECISION=2 -DCOMPUTE_STATS -DVECTOR_WIDTH=4 -D__ISA_AVX2__ -DENABLE_OMP_SI"; # mark_description "MD -DALIGNMENT=64 -restrict -Ofast -xCORE-AVX2 -o build-lammps-ICC-AVX2-DP/force_lj.s"; .file "force_lj.c" .text ..TXTST0: .L_2__routine_start_computeForceLJFullNeigh_plain_c_0: # -- Begin computeForceLJFullNeigh_plain_c .text # mark_begin; .align 16,0x90 .globl computeForceLJFullNeigh_plain_c # --- computeForceLJFullNeigh_plain_c(Parameter *, Atom *, Neighbor *, Stats *) computeForceLJFullNeigh_plain_c: # parameter 1: %rdi # parameter 2: %rsi # parameter 3: %rdx # parameter 4: %rcx ..B1.1: # Preds ..B1.0 # Execution count [1.00e+00] .cfi_startproc ..___tag_value_computeForceLJFullNeigh_plain_c.1: ..L2: #21.104 pushq %rbp #21.104 .cfi_def_cfa_offset 16 movq %rsp, %rbp #21.104 .cfi_def_cfa 6, 16 .cfi_offset 6, -16 andq $-32, %rsp #21.104 pushq %r13 #21.104 pushq %r14 #21.104 pushq %r15 #21.104 pushq %rbx #21.104 subq $224, %rsp #21.104 .cfi_escape 0x10, 0x03, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x22 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf8, 0xff, 0xff, 0xff, 0x22 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf0, 0xff, 0xff, 0xff, 0x22 .cfi_escape 0x10, 0x0f, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe8, 0xff, 0xff, 0xff, 0x22 movq %rsi, %r15 #21.104 vmovsd 144(%rdi), %xmm0 #25.27 movq %rcx, %r13 #21.104 vmulsd %xmm0, %xmm0, %xmm1 #25.45 movq %rdx, %r14 #21.104 vmovsd 56(%rdi), %xmm2 #26.23 vmovsd 40(%rdi), %xmm3 #27.24 movl 4(%r15), %eax #22.18 vmovsd %xmm1, 128(%rsp) #25.45[spill] vmovsd %xmm2, 136(%rsp) #26.23[spill] vmovsd %xmm3, 24(%rsp) #27.24[spill] testl %eax, %eax #33.24 jle ..B1.34 # Prob 50% #33.24 # LOE r12 r13 r14 r15 eax ..B1.2: # Preds ..B1.1 # Execution count [5.00e-03] movslq %eax, %rbx #22.18 lea (%rax,%rax,2), %eax #22.18 movq 64(%r15), %rdi #34.9 cmpl $12, %eax #33.5 jle ..B1.43 # Prob 0% #33.5 # LOE rbx rdi r12 r13 r14 r15 ..B1.3: # Preds ..B1.2 # Execution count [1.00e+00] xorl %esi, %esi #33.5 lea (%rbx,%rbx,2), %rdx #33.5 shlq $3, %rdx #33.5 call __intel_avx_rep_memset #33.5 # LOE rbx r12 r13 r14 r15 ..B1.5: # Preds ..B1.49 ..B1.3 ..B1.47 # Execution count [1.00e+00] xorl %eax, %eax #38.16 vzeroupper #38.16 ..___tag_value_computeForceLJFullNeigh_plain_c.13: # getTimeStamp() call getTimeStamp #38.16 ..___tag_value_computeForceLJFullNeigh_plain_c.14: # LOE rbx r12 r13 r14 r15 xmm0 ..B1.54: # Preds ..B1.5 # Execution count [1.00e+00] vmovsd %xmm0, 16(%rsp) #38.16[spill] # LOE rbx r12 r13 r14 r15 ..B1.6: # Preds ..B1.54 # Execution count [5.00e-01] movl $.L_2__STRING.0, %edi #42.5 ..___tag_value_computeForceLJFullNeigh_plain_c.16: # likwid_markerStartRegion(const char *) call likwid_markerStartRegion #42.5 ..___tag_value_computeForceLJFullNeigh_plain_c.17: # LOE rbx r12 r13 r14 r15 ..B1.7: # Preds ..B1.6 # Execution count [9.00e-01] vmovsd 24(%rsp), %xmm0 #77.42[spill] xorl %eax, %eax #45.15 vmulsd .L_2il0floatpacket.0(%rip), %xmm0, %xmm4 #77.42 xorl %ecx, %ecx #45.5 vbroadcastsd 128(%rsp), %ymm6 #25.25[spill] vbroadcastsd %xmm4, %ymm7 #77.42 vbroadcastsd 136(%rsp), %ymm2 #26.21[spill] vmovsd .L_2il0floatpacket.4(%rip), %xmm5 #75.32 vmovsd .L_2il0floatpacket.1(%rip), %xmm0 #77.55 vmovupd %ymm6, 32(%rsp) #45.5[spill] vmovupd %ymm7, 64(%rsp) #45.5[spill] vmovsd 136(%rsp), %xmm6 #45.5[spill] vmovsd 128(%rsp), %xmm7 #45.5[spill] vmovupd %ymm2, 96(%rsp) #45.5[spill] movslq 8(%r14), %rsi #46.43 xorl %edi, %edi #45.5 movq 16(%r14), %rdx #46.19 shlq $2, %rsi #23.5 movq 24(%r14), %r14 #47.25 movq 16(%r15), %r11 #48.25 movq 64(%r15), %r8 #89.9 movq (%r13), %r9 #93.9 movq 8(%r13), %r10 #94.9 movq %rsi, 144(%rsp) #45.5[spill] movq %rdx, 152(%rsp) #45.5[spill] movq %rbx, 208(%rsp) #45.5[spill] movq %r13, (%rsp) #45.5[spill] movq %r12, 8(%rsp) #45.5[spill] .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x08, 0xff, 0xff, 0xff, 0x22 # LOE rax rcx rdi r8 r9 r10 r11 r14 xmm0 xmm4 xmm5 xmm6 xmm7 ..B1.8: # Preds ..B1.32 ..B1.7 # Execution count [5.00e+00] movl (%r14,%rcx,4), %r13d #47.25 testl %r13d, %r13d #59.28 vxorpd %xmm8, %xmm8, %xmm8 #51.22 vmovapd %xmm8, %xmm9 #52.22 vmovsd (%rdi,%r11), %xmm3 #48.25 vmovapd %xmm9, %xmm10 #53.22 vmovsd 8(%rdi,%r11), %xmm2 #49.25 vmovsd 16(%rdi,%r11), %xmm1 #50.25 movslq %r13d, %r12 #59.9 jle ..B1.32 # Prob 50% #59.28 # LOE rax rcx rdi r8 r9 r10 r11 r12 r14 r13d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 ..B1.9: # Preds ..B1.8 # Execution count [4.50e+00] cmpq $4, %r12 #59.9 jl ..B1.39 # Prob 10% #59.9 # LOE rax rcx rdi r8 r9 r10 r11 r12 r14 r13d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 ..B1.10: # Preds ..B1.9 # Execution count [4.50e+00] movq 144(%rsp), %rbx #46.43[spill] imulq %rax, %rbx #46.43 addq 152(%rsp), %rbx #23.5[spill] cmpq $600, %r12 #59.9 jl ..B1.41 # Prob 10% #59.9 # LOE rax rcx rbx rdi r8 r9 r10 r11 r12 r14 r13d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 ..B1.11: # Preds ..B1.10 # Execution count [4.50e+00] movq %rbx, %r15 #59.9 andq $31, %r15 #59.9 testl %r15d, %r15d #59.9 je ..B1.14 # Prob 50% #59.9 # LOE rax rcx rbx rdi r8 r9 r10 r11 r12 r14 r13d r15d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 ..B1.12: # Preds ..B1.11 # Execution count [4.50e+00] testl $3, %r15d #59.9 jne ..B1.39 # Prob 10% #59.9 # LOE rax rcx rbx rdi r8 r9 r10 r11 r12 r14 r13d r15d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 ..B1.13: # Preds ..B1.12 # Execution count [2.25e+00] negl %r15d #59.9 addl $32, %r15d #59.9 shrl $2, %r15d #59.9 # LOE rax rcx rbx rdi r8 r9 r10 r11 r12 r14 r13d r15d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 ..B1.14: # Preds ..B1.13 ..B1.11 # Execution count [4.50e+00] movl %r15d, %edx #59.9 lea 4(%rdx), %rsi #59.9 cmpq %rsi, %r12 #59.9 jl ..B1.39 # Prob 10% #59.9 # LOE rax rdx rcx rbx rdi r8 r9 r10 r11 r12 r14 r13d r15d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 ..B1.15: # Preds ..B1.14 # Execution count [5.00e+00] movl %r13d, %esi #59.9 subl %r15d, %esi #59.9 andl $3, %esi #59.9 negl %esi #59.9 addl %r13d, %esi #59.9 movslq %esi, %rsi #59.9 testl %r15d, %r15d #59.9 movl $0, %r15d #59.9 jbe ..B1.21 # Prob 10% #59.9 # LOE rax rdx rcx rbx rsi rdi r8 r9 r10 r11 r12 r14 r15 r13d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 ..B1.16: # Preds ..B1.15 # Execution count [4.50e+00] movq %rcx, 24(%rsp) #[spill] # LOE rax rdx rbx rsi rdi r8 r9 r10 r11 r12 r14 r15 r13d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 ..B1.17: # Preds ..B1.19 ..B1.16 # Execution count [2.50e+01] movl (%rbx,%r15,4), %ecx #60.21 lea (%rcx,%rcx,2), %ecx #61.36 movslq %ecx, %rcx #61.36 vsubsd 8(%r11,%rcx,8), %xmm2, %xmm13 #62.36 vsubsd (%r11,%rcx,8), %xmm3, %xmm12 #61.36 vsubsd 16(%r11,%rcx,8), %xmm1, %xmm11 #63.36 vmulsd %xmm13, %xmm13, %xmm14 #64.49 vfmadd231sd %xmm12, %xmm12, %xmm14 #64.63 vfmadd231sd %xmm11, %xmm11, %xmm14 #64.63 vcomisd %xmm14, %xmm7 #74.22 jbe ..B1.19 # Prob 50% #74.22 # LOE rax rdx rbx rsi rdi r8 r9 r10 r11 r12 r14 r15 r13d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 ..B1.18: # Preds ..B1.17 # Execution count [1.25e+01] vdivsd %xmm14, %xmm5, %xmm15 #75.39 vmulsd %xmm15, %xmm6, %xmm14 #76.38 vmulsd %xmm15, %xmm14, %xmm14 #76.44 vmulsd %xmm15, %xmm14, %xmm14 #76.50 vmulsd %xmm4, %xmm15, %xmm15 #77.55 vmulsd %xmm14, %xmm15, %xmm15 #77.64 vsubsd %xmm0, %xmm14, %xmm14 #77.55 vmulsd %xmm14, %xmm15, %xmm15 #77.70 vfmadd231sd %xmm12, %xmm15, %xmm8 #78.17 vfmadd231sd %xmm15, %xmm13, %xmm9 #79.17 vfmadd231sd %xmm15, %xmm11, %xmm10 #80.17 # LOE rax rdx rbx rsi rdi r8 r9 r10 r11 r12 r14 r15 r13d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 ..B1.19: # Preds ..B1.18 ..B1.17 # Execution count [2.50e+01] incq %r15 #59.9 cmpq %rdx, %r15 #59.9 jb ..B1.17 # Prob 82% #59.9 # LOE rax rdx rbx rsi rdi r8 r9 r10 r11 r12 r14 r15 r13d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 ..B1.20: # Preds ..B1.19 # Execution count [4.50e+00] movq 24(%rsp), %rcx #[spill] # LOE rax rdx rcx rbx rsi rdi r8 r9 r10 r11 r12 r14 r13d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 ..B1.21: # Preds ..B1.20 ..B1.15 ..B1.41 # Execution count [4.50e+00] vmovsd %xmm3, 192(%rsp) #74.22[spill] vxorpd %xmm11, %xmm11, %xmm11 #51.22 vmovsd %xmm8, %xmm11, %xmm13 #51.22 vmovsd %xmm9, %xmm11, %xmm12 #52.22 vmovsd %xmm10, %xmm11, %xmm11 #53.22 vmovsd %xmm4, 200(%rsp) #74.22[spill] vbroadcastsd %xmm3, %ymm10 #48.23 vmovsd %xmm1, 176(%rsp) #74.22[spill] vmovsd %xmm2, 184(%rsp) #74.22[spill] vmovupd .L_2il0floatpacket.3(%rip), %ymm3 #74.22 vmovupd .L_2il0floatpacket.2(%rip), %ymm4 #74.22 vmovupd 32(%rsp), %ymm5 #74.22[spill] vbroadcastsd %xmm2, %ymm9 #49.23 vbroadcastsd %xmm1, %ymm8 #50.23 movq %r8, 160(%rsp) #74.22[spill] movq %r14, 168(%rsp) #74.22[spill] movq %rcx, 24(%rsp) #74.22[spill] vmovaps %xmm13, %xmm13 #51.22 vmovaps %xmm12, %xmm12 #52.22 vmovaps %xmm11, %xmm11 #53.22 # LOE rax rdx rbx rsi rdi r9 r10 r11 r12 r13d ymm3 ymm4 ymm5 ymm8 ymm9 ymm10 ymm11 ymm12 ymm13 movl $111, %ebx # OSACA START MARKER .byte 100 # OSACA START MARKER .byte 103 # OSACA START MARKER .byte 144 # OSACA START MARKER # pointer_increment=32 724d27eafcb27eabca1528ddfdbdba3e # LLVM-MCA-BEGIN ..B1.22: # Preds ..B1.24 ..B1.21 # Execution count [2.50e+01] vmovdqu (%rbx,%rdx,4), %xmm0 #60.21 vmovq %xmm0, %rcx #60.21 vpunpckhqdq %xmm0, %xmm0, %xmm2 #60.21 vmovq %xmm2, %r15 #60.21 movl %ecx, %r8d #60.21 shrq $32, %rcx #60.21 lea (%rcx,%rcx,2), %r14d #61.36 lea (%r8,%r8,2), %r8d #61.36 movslq %r8d, %rcx #61.36 movslq %r14d, %r8 #61.36 movl %r15d, %r14d #60.21 shrq $32, %r15 #60.21 vmovups (%r11,%rcx,8), %xmm7 #61.36 vmovups (%r11,%r8,8), %xmm6 #61.36 vmovq 16(%r11,%rcx,8), %xmm14 #61.36 lea (%r14,%r14,2), %r14d #61.36 movslq %r14d, %r14 #61.36 lea (%r15,%r15,2), %r15d #61.36 movslq %r15d, %r15 #61.36 vmovhpd 16(%r11,%r8,8), %xmm14, %xmm15 #61.36 vinsertf128 $1, (%r11,%r14,8), %ymm7, %ymm1 #61.36 vmovq 16(%r11,%r14,8), %xmm0 #61.36 vinsertf128 $1, (%r11,%r15,8), %ymm6, %ymm6 #61.36 vmovhpd 16(%r11,%r15,8), %xmm0, %xmm2 #61.36 vunpcklpd %ymm6, %ymm1, %ymm14 #61.36 vunpckhpd %ymm6, %ymm1, %ymm1 #61.36 vsubpd %ymm14, %ymm10, %ymm6 #61.36 vinsertf128 $1, %xmm2, %ymm15, %ymm7 #61.36 vsubpd %ymm1, %ymm9, %ymm2 #62.36 vsubpd %ymm7, %ymm8, %ymm0 #63.36 vmulpd %ymm2, %ymm2, %ymm14 #64.49 vfmadd231pd %ymm6, %ymm6, %ymm14 #64.49 vfmadd231pd %ymm0, %ymm0, %ymm14 #64.63 vcmpltpd %ymm5, %ymm14, %ymm1 #74.22 vpcmpeqd %ymm7, %ymm7, %ymm7 #74.22 vptest %ymm7, %ymm1 #74.22 #je ..B1.24 # Prob 50% #74.22 # LOE rax rdx rbx rsi rdi r9 r10 r11 r12 r13d ymm0 ymm1 ymm2 ymm3 ymm4 ymm5 ymm6 ymm8 ymm9 ymm10 ymm11 ymm12 ymm13 ymm14 ..B1.23: # Preds ..B1.22 # Execution count [1.25e+01] vdivpd %ymm14, %ymm4, %ymm7 #75.39 vmulpd 96(%rsp), %ymm7, %ymm14 #76.38[spill] vmulpd %ymm14, %ymm7, %ymm14 #76.44 vmulpd %ymm14, %ymm7, %ymm15 #76.50 vfmsub213pd %ymm3, %ymm7, %ymm14 #77.55 vmulpd 64(%rsp), %ymm7, %ymm7 #77.55[spill] vmulpd %ymm7, %ymm15, %ymm15 #77.64 vmulpd %ymm14, %ymm15, %ymm7 #77.70 vmulpd %ymm7, %ymm6, %ymm6 #78.31 vmulpd %ymm7, %ymm2, %ymm2 #79.31 vandpd %ymm6, %ymm1, %ymm6 #78.31 vaddpd %ymm6, %ymm13, %ymm13 #78.17 vmulpd %ymm7, %ymm0, %ymm6 #80.31 vandpd %ymm2, %ymm1, %ymm0 #79.31 vandpd %ymm6, %ymm1, %ymm1 #80.31 vaddpd %ymm0, %ymm12, %ymm12 #79.17 vaddpd %ymm1, %ymm11, %ymm11 #80.17 # LOE rax rdx rbx rsi rdi r9 r10 r11 r12 r13d ymm3 ymm4 ymm5 ymm8 ymm9 ymm10 ymm11 ymm12 ymm13 ..B1.24: # Preds ..B1.23 ..B1.22 # Execution count [2.50e+01] addq $4, %rdx #59.9 cmpq %rsi, %rdx #59.9 jb ..B1.22 # Prob 82% #59.9 # LLVM-MCA-END movl $222, %ebx # OSACA END MARKER .byte 100 # OSACA END MARKER .byte 103 # OSACA END MARKER .byte 144 # OSACA END MARKER # LOE rax rdx rbx rsi rdi r9 r10 r11 r12 r13d ymm3 ymm4 ymm5 ymm8 ymm9 ymm10 ymm11 ymm12 ymm13 ..B1.25: # Preds ..B1.24 # Execution count [4.50e+00] vextractf128 $1, %ymm11, %xmm10 #53.22 vmovsd 176(%rsp), %xmm1 #[spill] vmovsd 184(%rsp), %xmm2 #[spill] vaddpd %xmm10, %xmm11, %xmm9 #53.22 vunpckhpd %xmm9, %xmm9, %xmm8 #53.22 vmovsd 192(%rsp), %xmm3 #[spill] vaddsd %xmm8, %xmm9, %xmm10 #53.22 vmovsd 200(%rsp), %xmm4 #[spill] vmovsd 136(%rsp), %xmm6 #[spill] vmovsd 128(%rsp), %xmm7 #[spill] movq 160(%rsp), %r8 #[spill] movq 168(%rsp), %r14 #[spill] movq 24(%rsp), %rcx #[spill] vmovsd .L_2il0floatpacket.1(%rip), %xmm0 # vmovsd .L_2il0floatpacket.4(%rip), %xmm5 # vextractf128 $1, %ymm12, %xmm14 #52.22 vextractf128 $1, %ymm13, %xmm8 #51.22 vaddpd %xmm14, %xmm12, %xmm15 #52.22 vaddpd %xmm8, %xmm13, %xmm11 #51.22 vunpckhpd %xmm15, %xmm15, %xmm9 #52.22 vunpckhpd %xmm11, %xmm11, %xmm12 #51.22 vaddsd %xmm9, %xmm15, %xmm9 #52.22 vaddsd %xmm12, %xmm11, %xmm8 #51.22 # LOE rax rcx rsi rdi r8 r9 r10 r11 r12 r14 r13d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 ..B1.26: # Preds ..B1.25 ..B1.39 # Execution count [5.00e+00] cmpq %r12, %rsi #59.9 jae ..B1.32 # Prob 10% #59.9 # LOE rax rcx rsi rdi r8 r9 r10 r11 r12 r14 r13d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 ..B1.27: # Preds ..B1.26 # Execution count [4.50e+00] imulq 144(%rsp), %rax #46.43[spill] addq 152(%rsp), %rax #23.5[spill] # LOE rax rcx rsi rdi r8 r9 r10 r11 r12 r14 r13d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 ..B1.28: # Preds ..B1.30 ..B1.27 # Execution count [2.50e+01] movl (%rax,%rsi,4), %edx #60.21 lea (%rdx,%rdx,2), %ebx #61.36 movslq %ebx, %rbx #61.36 vsubsd 8(%r11,%rbx,8), %xmm2, %xmm13 #62.36 vsubsd (%r11,%rbx,8), %xmm3, %xmm12 #61.36 vsubsd 16(%r11,%rbx,8), %xmm1, %xmm11 #63.36 vmulsd %xmm13, %xmm13, %xmm14 #64.49 vfmadd231sd %xmm12, %xmm12, %xmm14 #64.63 vfmadd231sd %xmm11, %xmm11, %xmm14 #64.63 vcomisd %xmm14, %xmm7 #74.22 jbe ..B1.30 # Prob 50% #74.22 # LOE rax rcx rsi rdi r8 r9 r10 r11 r12 r14 r13d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 ..B1.29: # Preds ..B1.28 # Execution count [1.25e+01] vdivsd %xmm14, %xmm5, %xmm15 #75.39 vmulsd %xmm15, %xmm6, %xmm14 #76.38 vmulsd %xmm15, %xmm14, %xmm14 #76.44 vmulsd %xmm15, %xmm14, %xmm14 #76.50 vmulsd %xmm4, %xmm15, %xmm15 #77.55 vmulsd %xmm14, %xmm15, %xmm15 #77.64 vsubsd %xmm0, %xmm14, %xmm14 #77.55 vmulsd %xmm14, %xmm15, %xmm15 #77.70 vfmadd231sd %xmm12, %xmm15, %xmm8 #78.17 vfmadd231sd %xmm15, %xmm13, %xmm9 #79.17 vfmadd231sd %xmm15, %xmm11, %xmm10 #80.17 # LOE rax rcx rsi rdi r8 r9 r10 r11 r12 r14 r13d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 ..B1.30: # Preds ..B1.29 ..B1.28 # Execution count [2.50e+01] incq %rsi #59.9 cmpq %r12, %rsi #59.9 jb ..B1.28 # Prob 82% #59.9 # LOE rax rcx rsi rdi r8 r9 r10 r11 r12 r14 r13d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 ..B1.32: # Preds ..B1.30 ..B1.8 ..B1.26 # Execution count [5.00e+00] addq %r12, %r9 #93.9 lea 3(%r13), %eax #94.9 sarl $1, %eax #94.9 vaddsd (%rdi,%r8), %xmm8, %xmm1 #89.9 vaddsd 8(%rdi,%r8), %xmm9, %xmm2 #90.9 vaddsd 16(%rdi,%r8), %xmm10, %xmm3 #91.9 shrl $30, %eax #94.9 vmovsd %xmm1, (%rdi,%r8) #89.9 vmovsd %xmm2, 8(%rdi,%r8) #90.9 vmovsd %xmm3, 16(%rdi,%r8) #91.9 addq $24, %rdi #45.5 lea 3(%rax,%r13), %edx #94.9 movslq %ecx, %rax #45.32 sarl $2, %edx #94.9 incq %rcx #45.5 movslq %edx, %rdx #94.9 incq %rax #45.32 addq %rdx, %r10 #94.9 cmpq 208(%rsp), %rcx #45.5[spill] jb ..B1.8 # Prob 82% #45.5 # LOE rax rcx rdi r8 r9 r10 r11 r14 xmm0 xmm4 xmm5 xmm6 xmm7 ..B1.33: # Preds ..B1.32 # Execution count [9.00e-01] movq (%rsp), %r13 #[spill] movq 8(%rsp), %r12 #[spill] .cfi_restore 12 movq %r9, (%r13) #93.9 movq %r10, 8(%r13) #94.9 jmp ..B1.36 # Prob 100% #94.9 # LOE r12 ..B1.34: # Preds ..B1.1 # Execution count [5.00e-01] xorl %eax, %eax #38.16 ..___tag_value_computeForceLJFullNeigh_plain_c.61: # getTimeStamp() call getTimeStamp #38.16 ..___tag_value_computeForceLJFullNeigh_plain_c.62: # LOE r12 xmm0 ..B1.55: # Preds ..B1.34 # Execution count [5.00e-01] vmovsd %xmm0, 16(%rsp) #38.16[spill] # LOE r12 ..B1.35: # Preds ..B1.55 # Execution count [5.00e-01] movl $.L_2__STRING.0, %edi #42.5 ..___tag_value_computeForceLJFullNeigh_plain_c.64: # likwid_markerStartRegion(const char *) call likwid_markerStartRegion #42.5 ..___tag_value_computeForceLJFullNeigh_plain_c.65: # LOE r12 ..B1.36: # Preds ..B1.33 ..B1.35 # Execution count [1.00e+00] movl $.L_2__STRING.0, %edi #97.5 vzeroupper #97.5 ..___tag_value_computeForceLJFullNeigh_plain_c.66: # likwid_markerStopRegion(const char *) call likwid_markerStopRegion #97.5 ..___tag_value_computeForceLJFullNeigh_plain_c.67: # LOE r12 ..B1.37: # Preds ..B1.36 # Execution count [1.00e+00] xorl %eax, %eax #100.16 ..___tag_value_computeForceLJFullNeigh_plain_c.68: # getTimeStamp() call getTimeStamp #100.16 ..___tag_value_computeForceLJFullNeigh_plain_c.69: # LOE r12 xmm0 ..B1.38: # Preds ..B1.37 # Execution count [1.00e+00] vsubsd 16(%rsp), %xmm0, %xmm0 #101.14[spill] addq $224, %rsp #101.14 .cfi_restore 3 popq %rbx #101.14 .cfi_restore 15 popq %r15 #101.14 .cfi_restore 14 popq %r14 #101.14 .cfi_restore 13 popq %r13 #101.14 movq %rbp, %rsp #101.14 popq %rbp #101.14 .cfi_def_cfa 7, 8 .cfi_restore 6 ret #101.14 .cfi_def_cfa 6, 16 .cfi_escape 0x10, 0x03, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x22 .cfi_offset 6, -16 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x08, 0xff, 0xff, 0xff, 0x22 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf8, 0xff, 0xff, 0xff, 0x22 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf0, 0xff, 0xff, 0xff, 0x22 .cfi_escape 0x10, 0x0f, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe8, 0xff, 0xff, 0xff, 0x22 # LOE ..B1.39: # Preds ..B1.9 ..B1.12 ..B1.14 # Execution count [4.50e-01]: Infreq xorl %esi, %esi #59.9 jmp ..B1.26 # Prob 100% #59.9 # LOE rax rcx rsi rdi r8 r9 r10 r11 r12 r14 r13d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 ..B1.41: # Preds ..B1.10 # Execution count [4.50e-01]: Infreq movl %r13d, %esi #59.9 xorl %edx, %edx #59.9 andl $-4, %esi #59.9 movslq %esi, %rsi #59.9 jmp ..B1.21 # Prob 100% #59.9 .cfi_restore 12 # LOE rax rdx rcx rbx rsi rdi r8 r9 r10 r11 r12 r14 r13d xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 ..B1.43: # Preds ..B1.2 # Execution count [1.00e+00]: Infreq lea (%rbx,%rbx,2), %rcx #22.18 cmpq $8, %rcx #33.5 jl ..B1.51 # Prob 10% #33.5 # LOE rcx rbx rdi r12 r13 r14 r15 ..B1.44: # Preds ..B1.43 # Execution count [1.00e+00]: Infreq movl %ecx, %eax #33.5 xorl %edx, %edx #33.5 andl $-8, %eax #33.5 movslq %eax, %rax #33.5 vxorpd %ymm0, %ymm0, %ymm0 #34.22 # LOE rax rdx rcx rbx rdi r12 r13 r14 r15 ymm0 ..B1.45: # Preds ..B1.45 ..B1.44 # Execution count [5.56e+00]: Infreq vmovupd %ymm0, (%rdi,%rdx,8) #34.9 vmovupd %ymm0, 32(%rdi,%rdx,8) #34.9 addq $8, %rdx #33.5 cmpq %rax, %rdx #33.5 jb ..B1.45 # Prob 82% #33.5 # LOE rax rdx rcx rbx rdi r12 r13 r14 r15 ymm0 ..B1.47: # Preds ..B1.45 ..B1.51 # Execution count [1.11e+00]: Infreq cmpq %rcx, %rax #33.5 jae ..B1.5 # Prob 10% #33.5 # LOE rax rcx rbx rdi r12 r13 r14 r15 ..B1.48: # Preds ..B1.47 # Execution count [1.00e+00]: Infreq xorl %edx, %edx # # LOE rax rdx rcx rbx rdi r12 r13 r14 r15 ..B1.49: # Preds ..B1.48 ..B1.49 # Execution count [5.56e+00]: Infreq movq %rdx, (%rdi,%rax,8) #34.9 incq %rax #33.5 cmpq %rcx, %rax #33.5 jb ..B1.49 # Prob 82% #33.5 jmp ..B1.5 # Prob 100% #33.5 # LOE rax rdx rcx rbx rdi r12 r13 r14 r15 ..B1.51: # Preds ..B1.43 # Execution count [1.00e-01]: Infreq xorl %eax, %eax #33.5 jmp ..B1.47 # Prob 100% #33.5 .align 16,0x90 # LOE rax rcx rbx rdi r12 r13 r14 r15 .cfi_endproc # mark_end; .type computeForceLJFullNeigh_plain_c,@function .size computeForceLJFullNeigh_plain_c,.-computeForceLJFullNeigh_plain_c ..LNcomputeForceLJFullNeigh_plain_c.0: .data # -- End computeForceLJFullNeigh_plain_c .text .L_2__routine_start_computeForceLJHalfNeigh_1: # -- Begin computeForceLJHalfNeigh .text # mark_begin; .align 16,0x90 .globl computeForceLJHalfNeigh # --- computeForceLJHalfNeigh(Parameter *, Atom *, Neighbor *, Stats *) computeForceLJHalfNeigh: # parameter 1: %rdi # parameter 2: %rsi # parameter 3: %rdx # parameter 4: %rcx ..B2.1: # Preds ..B2.0 # Execution count [1.00e+00] .cfi_startproc ..___tag_value_computeForceLJHalfNeigh.86: ..L87: #104.96 pushq %rbp #104.96 .cfi_def_cfa_offset 16 movq %rsp, %rbp #104.96 .cfi_def_cfa 6, 16 .cfi_offset 6, -16 andq $-32, %rsp #104.96 pushq %r12 #104.96 pushq %r13 #104.96 pushq %r14 #104.96 pushq %r15 #104.96 pushq %rbx #104.96 subq $216, %rsp #104.96 .cfi_escape 0x10, 0x03, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd8, 0xff, 0xff, 0xff, 0x22 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf8, 0xff, 0xff, 0xff, 0x22 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf0, 0xff, 0xff, 0xff, 0x22 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe8, 0xff, 0xff, 0xff, 0x22 .cfi_escape 0x10, 0x0f, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x22 movq %rsi, %r13 #104.96 vmovsd 144(%rdi), %xmm0 #108.27 movq %rcx, %r12 #104.96 vmulsd %xmm0, %xmm0, %xmm1 #108.45 movq %rdx, %r14 #104.96 vmovsd 56(%rdi), %xmm2 #109.23 vmovsd 40(%rdi), %xmm3 #110.24 movl 4(%r13), %r15d #105.18 vmovsd %xmm1, 32(%rsp) #108.45[spill] vmovsd %xmm2, 24(%rsp) #109.23[spill] vmovsd %xmm3, 16(%rsp) #110.24[spill] testl %r15d, %r15d #116.24 jle ..B2.51 # Prob 50% #116.24 # LOE r12 r13 r14 r15d ..B2.2: # Preds ..B2.1 # Execution count [5.00e-03] movq 64(%r13), %rdi #117.9 lea (%r15,%r15,2), %eax #105.18 movslq %r15d, %rbx #105.18 cmpl $12, %eax #116.5 jle ..B2.57 # Prob 0% #116.5 # LOE rbx rdi r12 r13 r14 r15d ..B2.3: # Preds ..B2.2 # Execution count [1.00e+00] xorl %esi, %esi #116.5 lea (%rbx,%rbx,2), %rdx #116.5 shlq $3, %rdx #116.5 call __intel_avx_rep_memset #116.5 # LOE rbx r12 r13 r14 r15d ..B2.5: # Preds ..B2.63 ..B2.3 ..B2.61 # Execution count [1.00e+00] xorl %eax, %eax #122.16 vzeroupper #122.16 ..___tag_value_computeForceLJHalfNeigh.99: # getTimeStamp() call getTimeStamp #122.16 ..___tag_value_computeForceLJHalfNeigh.100: # LOE rbx r12 r13 r14 r15d xmm0 ..B2.68: # Preds ..B2.5 # Execution count [1.00e+00] vmovsd %xmm0, 8(%rsp) #122.16[spill] # LOE rbx r12 r13 r14 r15d ..B2.6: # Preds ..B2.68 # Execution count [5.00e-01] movl $.L_2__STRING.1, %edi #126.5 ..___tag_value_computeForceLJHalfNeigh.102: # likwid_markerStartRegion(const char *) call likwid_markerStartRegion #126.5 ..___tag_value_computeForceLJHalfNeigh.103: # LOE rbx r12 r13 r14 r15d ..B2.7: # Preds ..B2.6 # Execution count [9.00e-01] vmovsd 16(%rsp), %xmm6 #165.42[spill] vmovd %r15d, %xmm0 #105.18 vmulsd .L_2il0floatpacket.0(%rip), %xmm6, %xmm6 #165.42 xorl %eax, %eax #129.15 vmovddup 32(%rsp), %xmm8 #108.25[spill] xorl %ecx, %ecx #129.5 vmovddup 24(%rsp), %xmm4 #109.21[spill] xorl %r9d, %r9d #129.5 vmovddup %xmm6, %xmm3 #165.42 vpbroadcastd %xmm0, %xmm1 #105.18 movq 16(%r14), %rdx #130.19 movslq 8(%r14), %rsi #130.43 movq 24(%r14), %r11 #131.25 vmovdqu .L_2il0floatpacket.6(%rip), %xmm9 #151.36 vmovsd .L_2il0floatpacket.4(%rip), %xmm5 #163.32 vmovsd .L_2il0floatpacket.1(%rip), %xmm0 #165.55 shlq $2, %rsi #106.5 movq 16(%r13), %r14 #132.25 movq 64(%r13), %rdi #172.21 movq (%r12), %r10 #183.9 movq 8(%r12), %r8 #184.9 vmovdqu %xmm1, 176(%rsp) #129.5[spill] vmovupd %xmm3, 160(%rsp) #129.5[spill] vmovupd %xmm4, 144(%rsp) #129.5[spill] vmovupd %xmm8, 192(%rsp) #129.5[spill] movq %rdx, 40(%rsp) #129.5[spill] movl %r15d, 48(%rsp) #129.5[spill] movq %r12, (%rsp) #129.5[spill] vmovsd 24(%rsp), %xmm7 #129.5[spill] vmovsd 32(%rsp), %xmm2 #129.5[spill] # LOE rax rcx rbx rsi rdi r8 r9 r10 r11 r14 xmm0 xmm2 xmm5 xmm6 xmm7 ..B2.8: # Preds ..B2.49 ..B2.7 # Execution count [5.00e+00] movl (%r11,%rcx,4), %edx #131.25 testl %edx, %edx #147.9 vxorpd %xmm10, %xmm10, %xmm10 #135.22 vmovapd %xmm10, %xmm11 #136.22 vmovsd (%r9,%r14), %xmm4 #132.25 vmovapd %xmm11, %xmm12 #137.22 vmovsd 8(%r9,%r14), %xmm3 #133.25 vmovsd 16(%r9,%r14), %xmm1 #134.25 jle ..B2.48 # Prob 50% #147.9 # LOE rax rcx rbx rsi rdi r8 r9 r10 r11 r14 edx xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm10 xmm11 xmm12 ..B2.9: # Preds ..B2.8 # Execution count [2.50e+00] jbe ..B2.48 # Prob 50% #147.9 # LOE rax rcx rbx rsi rdi r8 r9 r10 r11 r14 edx xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm10 xmm11 xmm12 ..B2.10: # Preds ..B2.9 # Execution count [2.25e+00] cmpl $2, %edx #147.9 jb ..B2.56 # Prob 10% #147.9 # LOE rax rcx rbx rsi rdi r8 r9 r10 r11 r14 edx xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm10 xmm11 xmm12 ..B2.11: # Preds ..B2.10 # Execution count [2.25e+00] movq %rsi, %r13 #130.43 movl %edx, %r12d #147.9 imulq %rax, %r13 #130.43 vxorpd %xmm14, %xmm14, %xmm14 #135.22 andl $-2, %r12d #147.9 vmovapd %xmm14, %xmm13 #136.22 vmovsd %xmm6, 136(%rsp) #147.9[spill] vmovapd %xmm13, %xmm11 #137.22 addq 40(%rsp), %r13 #106.5[spill] xorl %r15d, %r15d #147.9 vmovddup %xmm4, %xmm10 #132.23 vmovddup %xmm3, %xmm9 #133.23 vmovddup %xmm1, %xmm8 #134.23 movslq %r12d, %r12 #147.9 vmovsd %xmm1, 112(%rsp) #147.9[spill] vmovsd %xmm3, 120(%rsp) #147.9[spill] vmovsd %xmm4, 128(%rsp) #147.9[spill] movl %edx, 16(%rsp) #147.9[spill] movq %r9, 56(%rsp) #147.9[spill] movq %rsi, 64(%rsp) #147.9[spill] movq %r8, 72(%rsp) #147.9[spill] movq %r10, 80(%rsp) #147.9[spill] movq %r11, 88(%rsp) #147.9[spill] movq %rcx, 96(%rsp) #147.9[spill] movq %rbx, 104(%rsp) #147.9[spill] vmovdqu .L_2il0floatpacket.6(%rip), %xmm6 #147.9 vmovdqu .L_2il0floatpacket.5(%rip), %xmm7 #147.9 # LOE rax rdi r12 r13 r14 r15 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.12: # Preds ..B2.38 ..B2.11 # Execution count [1.25e+01] vmovq (%r13,%r15,4), %xmm4 #148.21 vpaddd %xmm4, %xmm4, %xmm0 #149.36 vpaddd %xmm0, %xmm4, %xmm1 #149.36 vmovd %xmm1, %r8d #149.36 vpaddd %xmm7, %xmm1, %xmm12 #150.36 vpshufd $57, %xmm1, %xmm2 #149.36 vpshufd $57, %xmm12, %xmm15 #150.36 vmovd %xmm2, %esi #149.36 vmovd %xmm12, %ebx #150.36 vmovd %xmm15, %ecx #150.36 movslq %r8d, %r8 #149.36 movslq %esi, %rsi #149.36 movslq %ebx, %rbx #150.36 movslq %ecx, %rcx #150.36 vmovsd (%r14,%r8,8), %xmm3 #149.36 vmovhpd (%r14,%rsi,8), %xmm3, %xmm5 #149.36 vsubpd %xmm5, %xmm10, %xmm0 #149.36 vpaddd %xmm6, %xmm1, %xmm5 #151.36 vmovd %xmm5, %edx #151.36 vpshufd $57, %xmm5, %xmm1 #151.36 vmovsd (%r14,%rbx,8), %xmm2 #150.36 vmovd %xmm1, %r9d #151.36 vmovhpd (%r14,%rcx,8), %xmm2, %xmm3 #150.36 vpcmpeqd %xmm1, %xmm1, %xmm1 #162.22 vsubpd %xmm3, %xmm9, %xmm2 #150.36 movslq %edx, %rdx #151.36 movslq %r9d, %r9 #151.36 vmovsd (%r14,%rdx,8), %xmm12 #151.36 vmovhpd (%r14,%r9,8), %xmm12, %xmm15 #151.36 vsubpd %xmm15, %xmm8, %xmm3 #151.36 vmulpd %xmm2, %xmm2, %xmm15 #152.49 vfmadd231pd %xmm0, %xmm0, %xmm15 #152.49 vfmadd231pd %xmm3, %xmm3, %xmm15 #152.63 vcmpltpd 192(%rsp), %xmm15, %xmm5 #162.22[spill] vptest %xmm1, %xmm5 #162.22 je ..B2.38 # Prob 50% #162.22 # LOE rax rdx rcx rbx rsi rdi r8 r9 r12 r13 r14 r15 xmm0 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 xmm15 ..B2.13: # Preds ..B2.12 # Execution count [6.25e+00] vmovupd .L_2il0floatpacket.7(%rip), %xmm12 #163.39 vdivpd %xmm15, %xmm12, %xmm1 #163.39 vmovdqu 176(%rsp), %xmm12 #171.24[spill] vpcmpeqd %xmm15, %xmm15, %xmm15 #171.24 vpcmpgtd %xmm4, %xmm12, %xmm4 #171.24 vmulpd 144(%rsp), %xmm1, %xmm12 #164.38[spill] vmulpd %xmm12, %xmm1, %xmm12 #164.44 vpmovsxdq %xmm4, %xmm4 #171.24 vandpd %xmm4, %xmm5, %xmm4 #171.24 vptest %xmm15, %xmm4 #171.24 vmulpd %xmm12, %xmm1, %xmm15 #164.50 vfmsub213pd .L_2il0floatpacket.8(%rip), %xmm1, %xmm12 #165.55 vmulpd 160(%rsp), %xmm1, %xmm1 #165.55[spill] vmulpd %xmm1, %xmm15, %xmm1 #165.64 vmulpd %xmm12, %xmm1, %xmm15 #165.70 vmulpd %xmm15, %xmm0, %xmm12 #166.31 vmulpd %xmm15, %xmm2, %xmm1 #167.31 vmulpd %xmm15, %xmm3, %xmm0 #168.31 vandpd %xmm12, %xmm5, %xmm2 #166.31 vandpd %xmm1, %xmm5, %xmm3 #167.31 vandpd %xmm0, %xmm5, %xmm5 #168.31 vaddpd %xmm2, %xmm14, %xmm14 #166.17 vaddpd %xmm3, %xmm13, %xmm13 #167.17 vaddpd %xmm5, %xmm11, %xmm11 #168.17 je ..B2.38 # Prob 50% #171.24 # LOE rax rdx rcx rbx rsi rdi r8 r9 r12 r13 r14 r15 xmm0 xmm1 xmm4 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 ..B2.14: # Preds ..B2.13 # Execution count [3.12e+00] vmovmskpd %xmm4, %r11d #172.21 movl %r11d, %r10d #172.21 andl $2, %r10d #172.21 andl $1, %r11d #172.21 je ..B2.17 # Prob 40% #172.21 # LOE rax rdx rcx rbx rsi rdi r8 r9 r12 r13 r14 r15 r10d r11d xmm0 xmm1 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 ..B2.15: # Preds ..B2.14 # Execution count [3.12e+00] vmovsd (%rdi,%r8,8), %xmm2 #172.21 testl %r10d, %r10d #172.21 jne ..B2.18 # Prob 60% #172.21 # LOE rax rdx rcx rbx rsi rdi r8 r9 r12 r13 r14 r15 r10d r11d xmm0 xmm1 xmm2 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 ..B2.16: # Preds ..B2.15 # Execution count [1.25e+00] vxorpd %xmm3, %xmm3, %xmm3 #172.21 vunpcklpd %xmm3, %xmm2, %xmm4 #172.21 vsubpd %xmm12, %xmm4, %xmm2 #172.21 jmp ..B2.31 # Prob 100% #172.21 # LOE rax rdx rbx rdi r8 r12 r13 r14 r15 xmm0 xmm1 xmm2 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.17: # Preds ..B2.14 # Execution count [3.12e+00] testl %r10d, %r10d #172.21 vxorpd %xmm2, %xmm2, %xmm2 #172.21 je ..B2.30 # Prob 40% #172.21 # LOE rax rdx rcx rbx rsi rdi r8 r9 r12 r13 r14 r15 r10d r11d xmm0 xmm1 xmm2 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 ..B2.18: # Preds ..B2.15 ..B2.17 # Execution count [3.12e+00] vmovhpd (%rdi,%rsi,8), %xmm2, %xmm3 #172.21 testl %r11d, %r11d #172.21 vsubpd %xmm12, %xmm3, %xmm2 #172.21 je ..B2.20 # Prob 40% #172.21 # LOE rax rdx rcx rbx rsi rdi r8 r9 r12 r13 r14 r15 r10d r11d xmm0 xmm1 xmm2 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.19: # Preds ..B2.18 # Execution count [1.88e+00] vpshufd $14, %xmm2, %xmm3 #172.21 vmovsd %xmm2, (%rdi,%r8,8) #172.21 vmovsd %xmm3, (%rdi,%rsi,8) #172.21 vmovsd (%rdi,%rbx,8), %xmm2 #173.21 jmp ..B2.21 # Prob 100% #173.21 # LOE rax rdx rcx rbx rdi r9 r12 r13 r14 r15 r10d r11d xmm0 xmm1 xmm2 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.20: # Preds ..B2.18 # Execution count [1.25e+00] vpshufd $14, %xmm2, %xmm2 #172.21 vmovsd %xmm2, (%rdi,%rsi,8) #172.21 vxorpd %xmm2, %xmm2, %xmm2 #173.21 # LOE rax rdx rcx rbx rdi r9 r12 r13 r14 r15 r10d r11d xmm0 xmm1 xmm2 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.21: # Preds ..B2.19 ..B2.20 # Execution count [1.88e+00] testl %r10d, %r10d #173.21 je ..B2.72 # Prob 40% #173.21 # LOE rax rdx rcx rbx rdi r9 r12 r13 r14 r15 r10d r11d xmm0 xmm1 xmm2 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.22: # Preds ..B2.21 # Execution count [3.12e+00] vmovhpd (%rdi,%rcx,8), %xmm2, %xmm3 #173.21 testl %r11d, %r11d #173.21 vsubpd %xmm1, %xmm3, %xmm1 #173.21 je ..B2.24 # Prob 40% #173.21 # LOE rax rdx rcx rbx rdi r9 r12 r13 r14 r15 r10d r11d xmm0 xmm1 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.23: # Preds ..B2.22 # Execution count [1.88e+00] vpshufd $14, %xmm1, %xmm2 #173.21 vmovsd %xmm1, (%rdi,%rbx,8) #173.21 vmovsd %xmm2, (%rdi,%rcx,8) #173.21 vmovsd (%rdi,%rdx,8), %xmm1 #174.21 jmp ..B2.25 # Prob 100% #174.21 # LOE rax rdx rdi r9 r12 r13 r14 r15 r10d r11d xmm0 xmm1 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.24: # Preds ..B2.22 # Execution count [1.25e+00] vpshufd $14, %xmm1, %xmm1 #173.21 vmovsd %xmm1, (%rdi,%rcx,8) #173.21 vxorpd %xmm1, %xmm1, %xmm1 #174.21 # LOE rax rdx rdi r9 r12 r13 r14 r15 r10d r11d xmm0 xmm1 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.25: # Preds ..B2.23 ..B2.24 # Execution count [1.88e+00] testl %r10d, %r10d #174.21 je ..B2.71 # Prob 40% #174.21 # LOE rax rdx rdi r9 r12 r13 r14 r15 r11d xmm0 xmm1 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.26: # Preds ..B2.25 # Execution count [3.12e+00] vmovhpd (%rdi,%r9,8), %xmm1, %xmm2 #174.21 testl %r11d, %r11d #174.21 vsubpd %xmm0, %xmm2, %xmm0 #174.21 je ..B2.28 # Prob 40% #174.21 # LOE rax rdx rdi r9 r12 r13 r14 r15 xmm0 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.27: # Preds ..B2.26 # Execution count [1.88e+00] vmovsd %xmm0, (%rdi,%rdx,8) #174.21 vpshufd $14, %xmm0, %xmm0 #174.21 jmp ..B2.29 # Prob 100% #174.21 # LOE rax rdi r9 r12 r13 r14 r15 xmm0 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.28: # Preds ..B2.26 # Execution count [1.25e+00] vpshufd $14, %xmm0, %xmm0 #174.21 # LOE rax rdi r9 r12 r13 r14 r15 xmm0 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.29: # Preds ..B2.27 ..B2.28 # Execution count [3.12e+00] vmovsd %xmm0, (%rdi,%r9,8) #174.21 jmp ..B2.38 # Prob 100% #174.21 # LOE rax rdi r12 r13 r14 r15 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.30: # Preds ..B2.17 # Execution count [1.88e+00] testl %r11d, %r11d #172.21 vxorpd %xmm2, %xmm2, %xmm2 #172.21 vsubpd %xmm12, %xmm2, %xmm2 #172.21 je ..B2.32 # Prob 40% #172.21 # LOE rax rdx rbx rdi r8 r12 r13 r14 r15 xmm0 xmm1 xmm2 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.31: # Preds ..B2.16 ..B2.30 # Execution count [1.25e+00] vmovsd %xmm2, (%rdi,%r8,8) #172.21 vmovsd (%rdi,%rbx,8), %xmm3 #173.21 vxorpd %xmm4, %xmm4, %xmm4 #173.21 vunpcklpd %xmm4, %xmm3, %xmm5 #173.21 vsubpd %xmm1, %xmm5, %xmm1 #173.21 jmp ..B2.34 # Prob 100% #173.21 # LOE rax rdx rbx rdi r12 r13 r14 r15 xmm0 xmm1 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.32: # Preds ..B2.30 # Execution count [0.00e+00] vxorpd %xmm2, %xmm2, %xmm2 #173.21 jmp ..B2.33 # Prob 100% #173.21 # LOE rax rdx rbx rdi r12 r13 r14 r15 xmm0 xmm1 xmm2 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.72: # Preds ..B2.21 # Execution count [7.50e-01] testl %r11d, %r11d #172.21 # LOE rax rdx rbx rdi r12 r13 r14 r15 xmm0 xmm1 xmm2 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.33: # Preds ..B2.32 ..B2.72 # Execution count [2.67e+00] vxorpd %xmm3, %xmm3, %xmm3 #173.21 vunpcklpd %xmm3, %xmm2, %xmm4 #173.21 vsubpd %xmm1, %xmm4, %xmm1 #173.21 je ..B2.35 # Prob 40% #173.21 # LOE rax rdx rbx rdi r12 r13 r14 r15 xmm0 xmm1 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.34: # Preds ..B2.31 ..B2.33 # Execution count [1.25e+00] vmovsd %xmm1, (%rdi,%rbx,8) #173.21 vmovsd (%rdi,%rdx,8), %xmm2 #174.21 vxorpd %xmm3, %xmm3, %xmm3 #174.21 vunpcklpd %xmm3, %xmm2, %xmm4 #174.21 vsubpd %xmm0, %xmm4, %xmm0 #174.21 jmp ..B2.37 # Prob 100% #174.21 # LOE rax rdx rdi r12 r13 r14 r15 xmm0 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.35: # Preds ..B2.33 # Execution count [0.00e+00] vxorpd %xmm1, %xmm1, %xmm1 #174.21 jmp ..B2.36 # Prob 100% #174.21 # LOE rax rdx rdi r12 r13 r14 r15 xmm0 xmm1 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.71: # Preds ..B2.25 # Execution count [7.50e-01] testl %r11d, %r11d #172.21 # LOE rax rdx rdi r12 r13 r14 r15 xmm0 xmm1 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.36: # Preds ..B2.35 ..B2.71 # Execution count [2.67e+00] vxorpd %xmm2, %xmm2, %xmm2 #174.21 vunpcklpd %xmm2, %xmm1, %xmm3 #174.21 vsubpd %xmm0, %xmm3, %xmm0 #174.21 je ..B2.38 # Prob 40% #174.21 # LOE rax rdx rdi r12 r13 r14 r15 xmm0 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.37: # Preds ..B2.34 ..B2.36 # Execution count [1.25e+00] vmovsd %xmm0, (%rdi,%rdx,8) #174.21 # LOE rax rdi r12 r13 r14 r15 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.38: # Preds ..B2.36 ..B2.29 ..B2.37 ..B2.13 ..B2.12 # # Execution count [1.25e+01] addq $2, %r15 #147.9 cmpq %r12, %r15 #147.9 jb ..B2.12 # Prob 82% #147.9 # LOE rax rdi r12 r13 r14 r15 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm13 xmm14 ..B2.39: # Preds ..B2.38 # Execution count [2.25e+00] vunpckhpd %xmm11, %xmm11, %xmm12 #137.22 vunpckhpd %xmm14, %xmm14, %xmm8 #135.22 vaddsd %xmm12, %xmm11, %xmm12 #137.22 vaddsd %xmm8, %xmm14, %xmm10 #135.22 vunpckhpd %xmm13, %xmm13, %xmm11 #136.22 vmovsd 112(%rsp), %xmm1 #[spill] vaddsd %xmm11, %xmm13, %xmm11 #136.22 vmovsd 120(%rsp), %xmm3 #[spill] vmovsd 128(%rsp), %xmm4 #[spill] vmovsd 136(%rsp), %xmm6 #[spill] vmovsd 24(%rsp), %xmm7 #[spill] vmovsd 32(%rsp), %xmm2 #[spill] movl 16(%rsp), %edx #[spill] movq 56(%rsp), %r9 #[spill] movq 64(%rsp), %rsi #[spill] movq 72(%rsp), %r8 #[spill] movq 80(%rsp), %r10 #[spill] movq 88(%rsp), %r11 #[spill] movq 96(%rsp), %rcx #[spill] movq 104(%rsp), %rbx #[spill] vmovsd .L_2il0floatpacket.1(%rip), %xmm0 # vmovsd .L_2il0floatpacket.4(%rip), %xmm5 # # LOE rax rcx rbx rsi rdi r8 r9 r10 r11 r12 r14 edx xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm10 xmm11 xmm12 ..B2.40: # Preds ..B2.39 ..B2.56 # Execution count [2.50e+00] movslq %edx, %r13 #147.9 cmpq %r13, %r12 #147.9 jae ..B2.49 # Prob 10% #147.9 # LOE rax rcx rbx rsi rdi r8 r9 r10 r11 r12 r13 r14 edx xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm10 xmm11 xmm12 ..B2.41: # Preds ..B2.40 # Execution count [2.25e+00] imulq %rsi, %rax #130.43 movq %rcx, 96(%rsp) #106.5[spill] addq 40(%rsp), %rax #106.5[spill] movl 48(%rsp), %ecx #106.5[spill] movq %rbx, 104(%rsp) #106.5[spill] # LOE rax rsi rdi r8 r9 r10 r11 r12 r13 r14 edx ecx xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm10 xmm11 xmm12 ..B2.42: # Preds ..B2.45 ..B2.41 # Execution count [1.25e+01] movl (%rax,%r12,4), %ebx #148.21 lea (%rbx,%rbx,2), %r15d #149.36 movslq %r15d, %r15 #149.36 vsubsd 8(%r14,%r15,8), %xmm3, %xmm9 #150.36 vsubsd (%r14,%r15,8), %xmm4, %xmm14 #149.36 vsubsd 16(%r14,%r15,8), %xmm1, %xmm8 #151.36 vmulsd %xmm9, %xmm9, %xmm13 #152.49 vfmadd231sd %xmm14, %xmm14, %xmm13 #152.63 vfmadd231sd %xmm8, %xmm8, %xmm13 #152.63 vcomisd %xmm13, %xmm2 #162.22 jbe ..B2.45 # Prob 50% #162.22 # LOE rax rsi rdi r8 r9 r10 r11 r12 r13 r14 r15 edx ecx ebx xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 ..B2.43: # Preds ..B2.42 # Execution count [6.25e+00] vdivsd %xmm13, %xmm5, %xmm15 #163.39 vmulsd %xmm15, %xmm7, %xmm13 #164.38 vmulsd %xmm15, %xmm13, %xmm13 #164.44 vmulsd %xmm15, %xmm13, %xmm13 #164.50 vmulsd %xmm6, %xmm15, %xmm15 #165.55 vmulsd %xmm13, %xmm15, %xmm15 #165.64 vsubsd %xmm0, %xmm13, %xmm13 #165.55 vmulsd %xmm13, %xmm15, %xmm15 #165.70 vmulsd %xmm15, %xmm14, %xmm13 #166.31 vmulsd %xmm15, %xmm9, %xmm9 #167.31 vmulsd %xmm15, %xmm8, %xmm8 #168.31 vaddsd %xmm13, %xmm10, %xmm10 #166.17 vaddsd %xmm9, %xmm11, %xmm11 #167.17 vaddsd %xmm8, %xmm12, %xmm12 #168.17 cmpl %ecx, %ebx #171.24 jge ..B2.45 # Prob 50% #171.24 # LOE rax rsi rdi r8 r9 r10 r11 r12 r13 r14 r15 edx ecx xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 ..B2.44: # Preds ..B2.43 # Execution count [3.12e+00] vmovsd 8(%rdi,%r15,8), %xmm15 #173.21 vmovsd (%rdi,%r15,8), %xmm14 #172.21 vsubsd %xmm9, %xmm15, %xmm9 #173.21 vsubsd %xmm13, %xmm14, %xmm13 #172.21 vmovsd %xmm9, 8(%rdi,%r15,8) #173.21 vmovsd 16(%rdi,%r15,8), %xmm9 #174.21 vmovsd %xmm13, (%rdi,%r15,8) #172.21 vsubsd %xmm8, %xmm9, %xmm8 #174.21 vmovsd %xmm8, 16(%rdi,%r15,8) #174.21 # LOE rax rsi rdi r8 r9 r10 r11 r12 r13 r14 edx ecx xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm10 xmm11 xmm12 ..B2.45: # Preds ..B2.44 ..B2.43 ..B2.42 # Execution count [1.25e+01] incq %r12 #147.9 cmpq %r13, %r12 #147.9 jb ..B2.42 # Prob 82% #147.9 # LOE rax rsi rdi r8 r9 r10 r11 r12 r13 r14 edx ecx xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm10 xmm11 xmm12 ..B2.46: # Preds ..B2.45 # Execution count [2.25e+00] movq 96(%rsp), %rcx #[spill] movq 104(%rsp), %rbx #[spill] jmp ..B2.49 # Prob 100% # # LOE rcx rbx rsi rdi r8 r9 r10 r11 r13 r14 edx xmm0 xmm2 xmm5 xmm6 xmm7 xmm10 xmm11 xmm12 ..B2.48: # Preds ..B2.9 ..B2.8 # Execution count [2.50e+00] movslq %edx, %r13 #183.9 # LOE rcx rbx rsi rdi r8 r9 r10 r11 r13 r14 edx xmm0 xmm2 xmm5 xmm6 xmm7 xmm10 xmm11 xmm12 ..B2.49: # Preds ..B2.46 ..B2.40 ..B2.48 # Execution count [5.00e+00] addq %r13, %r10 #183.9 lea 3(%rdx), %eax #184.9 sarl $1, %eax #184.9 vaddsd (%r9,%rdi), %xmm10, %xmm1 #179.9 vaddsd 8(%r9,%rdi), %xmm11, %xmm3 #180.9 vaddsd 16(%r9,%rdi), %xmm12, %xmm4 #181.9 shrl $30, %eax #184.9 vmovsd %xmm1, (%r9,%rdi) #179.9 vmovsd %xmm3, 8(%r9,%rdi) #180.9 vmovsd %xmm4, 16(%r9,%rdi) #181.9 addq $24, %r9 #129.5 lea 3(%rax,%rdx), %edx #184.9 movslq %ecx, %rax #129.32 sarl $2, %edx #184.9 incq %rcx #129.5 movslq %edx, %rdx #184.9 incq %rax #129.32 addq %rdx, %r8 #184.9 cmpq %rbx, %rcx #129.5 jb ..B2.8 # Prob 82% #129.5 # LOE rax rcx rbx rsi rdi r8 r9 r10 r11 r14 xmm0 xmm2 xmm5 xmm6 xmm7 ..B2.50: # Preds ..B2.49 # Execution count [9.00e-01] movq (%rsp), %r12 #[spill] movq %r10, (%r12) #183.9 movq %r8, 8(%r12) #184.9 jmp ..B2.53 # Prob 100% #184.9 # LOE ..B2.51: # Preds ..B2.1 # Execution count [5.00e-01] xorl %eax, %eax #122.16 ..___tag_value_computeForceLJHalfNeigh.154: # getTimeStamp() call getTimeStamp #122.16 ..___tag_value_computeForceLJHalfNeigh.155: # LOE xmm0 ..B2.69: # Preds ..B2.51 # Execution count [5.00e-01] vmovsd %xmm0, 8(%rsp) #122.16[spill] # LOE ..B2.52: # Preds ..B2.69 # Execution count [5.00e-01] movl $.L_2__STRING.1, %edi #126.5 ..___tag_value_computeForceLJHalfNeigh.157: # likwid_markerStartRegion(const char *) call likwid_markerStartRegion #126.5 ..___tag_value_computeForceLJHalfNeigh.158: # LOE ..B2.53: # Preds ..B2.50 ..B2.52 # Execution count [1.00e+00] movl $.L_2__STRING.1, %edi #187.5 ..___tag_value_computeForceLJHalfNeigh.159: # likwid_markerStopRegion(const char *) call likwid_markerStopRegion #187.5 ..___tag_value_computeForceLJHalfNeigh.160: # LOE ..B2.54: # Preds ..B2.53 # Execution count [1.00e+00] xorl %eax, %eax #190.16 ..___tag_value_computeForceLJHalfNeigh.161: # getTimeStamp() call getTimeStamp #190.16 ..___tag_value_computeForceLJHalfNeigh.162: # LOE xmm0 ..B2.55: # Preds ..B2.54 # Execution count [1.00e+00] vsubsd 8(%rsp), %xmm0, %xmm0 #191.14[spill] addq $216, %rsp #191.14 .cfi_restore 3 popq %rbx #191.14 .cfi_restore 15 popq %r15 #191.14 .cfi_restore 14 popq %r14 #191.14 .cfi_restore 13 popq %r13 #191.14 .cfi_restore 12 popq %r12 #191.14 movq %rbp, %rsp #191.14 popq %rbp #191.14 .cfi_def_cfa 7, 8 .cfi_restore 6 ret #191.14 .cfi_def_cfa 6, 16 .cfi_escape 0x10, 0x03, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd8, 0xff, 0xff, 0xff, 0x22 .cfi_offset 6, -16 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf8, 0xff, 0xff, 0xff, 0x22 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf0, 0xff, 0xff, 0xff, 0x22 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe8, 0xff, 0xff, 0xff, 0x22 .cfi_escape 0x10, 0x0f, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x22 # LOE ..B2.56: # Preds ..B2.10 # Execution count [2.25e-01]: Infreq xorl %r12d, %r12d #147.9 jmp ..B2.40 # Prob 100% #147.9 # LOE rax rcx rbx rsi rdi r8 r9 r10 r11 r12 r14 edx xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 xmm10 xmm11 xmm12 ..B2.57: # Preds ..B2.2 # Execution count [1.00e+00]: Infreq lea (%rbx,%rbx,2), %rcx #105.18 cmpq $8, %rcx #116.5 jl ..B2.65 # Prob 10% #116.5 # LOE rcx rbx rdi r12 r13 r14 r15d ..B2.58: # Preds ..B2.57 # Execution count [1.00e+00]: Infreq movl %ecx, %eax #116.5 xorl %edx, %edx #116.5 andl $-8, %eax #116.5 movslq %eax, %rax #116.5 vxorpd %ymm0, %ymm0, %ymm0 #117.22 # LOE rax rdx rcx rbx rdi r12 r13 r14 r15d ymm0 ..B2.59: # Preds ..B2.59 ..B2.58 # Execution count [5.56e+00]: Infreq vmovupd %ymm0, (%rdi,%rdx,8) #117.9 vmovupd %ymm0, 32(%rdi,%rdx,8) #117.9 addq $8, %rdx #116.5 cmpq %rax, %rdx #116.5 jb ..B2.59 # Prob 82% #116.5 # LOE rax rdx rcx rbx rdi r12 r13 r14 r15d ymm0 ..B2.61: # Preds ..B2.59 ..B2.65 # Execution count [1.11e+00]: Infreq cmpq %rcx, %rax #116.5 jae ..B2.5 # Prob 10% #116.5 # LOE rax rcx rbx rdi r12 r13 r14 r15d ..B2.62: # Preds ..B2.61 # Execution count [1.00e+00]: Infreq xorl %edx, %edx # # LOE rax rdx rcx rbx rdi r12 r13 r14 r15d ..B2.63: # Preds ..B2.62 ..B2.63 # Execution count [5.56e+00]: Infreq movq %rdx, (%rdi,%rax,8) #117.9 incq %rax #116.5 cmpq %rcx, %rax #116.5 jb ..B2.63 # Prob 82% #116.5 jmp ..B2.5 # Prob 100% #116.5 # LOE rax rdx rcx rbx rdi r12 r13 r14 r15d ..B2.65: # Preds ..B2.57 # Execution count [1.00e-01]: Infreq xorl %eax, %eax #116.5 jmp ..B2.61 # Prob 100% #116.5 .align 16,0x90 # LOE rax rcx rbx rdi r12 r13 r14 r15d .cfi_endproc # mark_end; .type computeForceLJHalfNeigh,@function .size computeForceLJHalfNeigh,.-computeForceLJHalfNeigh ..LNcomputeForceLJHalfNeigh.1: .data # -- End computeForceLJHalfNeigh .text .L_2__routine_start_computeForceLJFullNeigh_simd_2: # -- Begin computeForceLJFullNeigh_simd .text # mark_begin; .align 16,0x90 .globl computeForceLJFullNeigh_simd # --- computeForceLJFullNeigh_simd(Parameter *, Atom *, Neighbor *, Stats *) computeForceLJFullNeigh_simd: # parameter 1: %rdi # parameter 2: %rsi # parameter 3: %rdx # parameter 4: %rcx ..B3.1: # Preds ..B3.0 # Execution count [1.00e+00] .cfi_startproc ..___tag_value_computeForceLJFullNeigh_simd.179: ..L180: #194.101 pushq %rbp #194.101 .cfi_def_cfa_offset 16 movq %rsp, %rbp #194.101 .cfi_def_cfa 6, 16 .cfi_offset 6, -16 andq $-32, %rsp #194.101 movl 4(%rsi), %edx #195.18 testl %edx, %edx #201.24 jle ..B3.4 # Prob 50% #201.24 # LOE rbx rsi r12 r13 r14 r15 edx ..B3.2: # Preds ..B3.1 # Execution count [5.00e-03] movq 64(%rsi), %rdi #202.9 lea (%rdx,%rdx,2), %eax #195.18 cmpl $12, %eax #201.5 jle ..B3.7 # Prob 0% #201.5 # LOE rbx rdi r12 r13 r14 r15 edx ..B3.3: # Preds ..B3.2 # Execution count [1.00e+00] movslq %edx, %rdx #201.5 xorl %esi, %esi #201.5 lea (%rdx,%rdx,2), %rdx #201.5 shlq $3, %rdx #201.5 call __intel_avx_rep_memset #201.5 # LOE rbx r12 r13 r14 r15 ..B3.4: # Preds ..B3.13 ..B3.1 ..B3.11 ..B3.3 # Execution count [1.00e+00] xorl %eax, %eax #207.16 vzeroupper #207.16 ..___tag_value_computeForceLJFullNeigh_simd.184: # getTimeStamp() call getTimeStamp #207.16 ..___tag_value_computeForceLJFullNeigh_simd.185: # LOE ..B3.5: # Preds ..B3.4 # Execution count [1.00e+00] movl $il0_peep_printf_format_0, %edi #210.5 movq stderr(%rip), %rsi #210.5 call fputs #210.5 # LOE ..B3.6: # Preds ..B3.5 # Execution count [1.00e+00] movl $-1, %edi #211.5 # exit(int) call exit #211.5 # LOE ..B3.7: # Preds ..B3.2 # Execution count [1.00e+00]: Infreq movslq %edx, %rdx #201.5 lea (%rdx,%rdx,2), %rsi #195.18 cmpq $8, %rsi #201.5 jl ..B3.15 # Prob 10% #201.5 # LOE rbx rsi rdi r12 r13 r14 r15 ..B3.8: # Preds ..B3.7 # Execution count [1.00e+00]: Infreq movl %esi, %edx #201.5 xorl %ecx, %ecx #201.5 andl $-8, %edx #201.5 xorl %eax, %eax #201.5 movslq %edx, %rdx #201.5 vxorpd %ymm0, %ymm0, %ymm0 #202.22 # LOE rax rdx rcx rbx rsi rdi r12 r13 r14 r15 ymm0 ..B3.9: # Preds ..B3.9 ..B3.8 # Execution count [5.56e+00]: Infreq vmovupd %ymm0, (%rdi,%rcx,8) #202.9 vmovupd %ymm0, 32(%rdi,%rcx,8) #202.9 addq $8, %rcx #201.5 cmpq %rdx, %rcx #201.5 jb ..B3.9 # Prob 82% #201.5 # LOE rax rdx rcx rbx rsi rdi r12 r13 r14 r15 ymm0 ..B3.11: # Preds ..B3.9 ..B3.15 # Execution count [1.11e+00]: Infreq cmpq %rsi, %rdx #201.5 jae ..B3.4 # Prob 10% #201.5 # LOE rax rdx rbx rsi rdi r12 r13 r14 r15 ..B3.13: # Preds ..B3.11 ..B3.13 # Execution count [5.56e+00]: Infreq movq %rax, (%rdi,%rdx,8) #202.9 incq %rdx #201.5 cmpq %rsi, %rdx #201.5 jb ..B3.13 # Prob 82% #201.5 jmp ..B3.4 # Prob 100% #201.5 # LOE rax rdx rbx rsi rdi r12 r13 r14 r15 ..B3.15: # Preds ..B3.7 # Execution count [1.00e-01]: Infreq xorl %edx, %edx #201.5 xorl %eax, %eax #201.5 jmp ..B3.11 # Prob 100% #201.5 .align 16,0x90 # LOE rax rdx rbx rsi rdi r12 r13 r14 r15 .cfi_endproc # mark_end; .type computeForceLJFullNeigh_simd,@function .size computeForceLJFullNeigh_simd,.-computeForceLJFullNeigh_simd ..LNcomputeForceLJFullNeigh_simd.2: .section .rodata.str1.32, "aMS",@progbits,1 .align 32 .align 32 il0_peep_printf_format_0: .long 1869771333 .long 1394621042 .long 541347145 .long 1852990827 .long 1847618661 .long 1763734639 .long 1701605485 .long 1953391981 .long 1713398885 .long 1931506287 .long 1768121712 .long 1684367718 .long 1936615712 .long 1668641396 .long 1852795252 .long 1952805664 .word 33 .data # -- End computeForceLJFullNeigh_simd .section .rodata, "a" .align 32 .align 32 .L_2il0floatpacket.2: .long 0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000 .type .L_2il0floatpacket.2,@object .size .L_2il0floatpacket.2,32 .align 32 .L_2il0floatpacket.3: .long 0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000 .type .L_2il0floatpacket.3,@object .size .L_2il0floatpacket.3,32 .align 16 .L_2il0floatpacket.5: .long 0x00000001,0x00000001,0x00000001,0x00000001 .type .L_2il0floatpacket.5,@object .size .L_2il0floatpacket.5,16 .align 16 .L_2il0floatpacket.6: .long 0x00000002,0x00000002,0x00000002,0x00000002 .type .L_2il0floatpacket.6,@object .size .L_2il0floatpacket.6,16 .align 16 .L_2il0floatpacket.7: .long 0x00000000,0x3ff00000,0x00000000,0x3ff00000 .type .L_2il0floatpacket.7,@object .size .L_2il0floatpacket.7,16 .align 16 .L_2il0floatpacket.8: .long 0x00000000,0x3fe00000,0x00000000,0x3fe00000 .type .L_2il0floatpacket.8,@object .size .L_2il0floatpacket.8,16 .align 8 .L_2il0floatpacket.0: .long 0x00000000,0x40480000 .type .L_2il0floatpacket.0,@object .size .L_2il0floatpacket.0,8 .align 8 .L_2il0floatpacket.1: .long 0x00000000,0x3fe00000 .type .L_2il0floatpacket.1,@object .size .L_2il0floatpacket.1,8 .align 8 .L_2il0floatpacket.4: .long 0x00000000,0x3ff00000 .type .L_2il0floatpacket.4,@object .size .L_2il0floatpacket.4,8 .section .rodata.str1.4, "aMS",@progbits,1 .align 4 .align 4 .L_2__STRING.0: .long 1668444006 .word 101 .type .L_2__STRING.0,@object .size .L_2__STRING.0,6 .space 2, 0x00 # pad .align 4 .L_2__STRING.1: .long 1668444006 .long 759843941 .long 1718378856 .long 1734960494 .word 104 .type .L_2__STRING.1,@object .size .L_2__STRING.1,18 .data .section .note.GNU-stack, "" # End