Update asm force again
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
cf7ea1460c
commit
d8c8733cb2
@ -66,7 +66,7 @@ computeForce:
|
||||
vmovups zmm7, ZMMWORD PTR .L_2il0floatpacket.4[rip] # zmm7 <- [0.5, ...]
|
||||
vbroadcastsd zmm16, xmm15 # zmm16 <- [cutforcesq, ...]
|
||||
vbroadcastsd zmm15, xmm1 # zmm15 <- [param->sigma6, ...]
|
||||
vbroadcastsd zmm14, xmm0 # zmm16 <- [48 * epsilon, ...]
|
||||
vbroadcastsd zmm14, xmm0 # zmm14 <- [48 * epsilon, ...]
|
||||
movsxd r9, r9d # r9 <- atom->Nlocal
|
||||
xor r10d, r10d # r10d <- 0 (i)
|
||||
mov rcx, QWORD PTR [24+rdx] # rcx <- neighbor->numneigh
|
||||
@ -74,7 +74,7 @@ computeForce:
|
||||
movsxd r12, DWORD PTR [16+rdx] # r12 <- neighbor->maxneighs
|
||||
mov rdx, QWORD PTR [16+rsi] # rdx <- atom->x
|
||||
### AOS
|
||||
xor eax, eax
|
||||
xor eax, eax
|
||||
### SOA
|
||||
#mov rax, QWORD PTR [24+rsi] # rax <- atom->y
|
||||
#mov rsi, QWORD PTR [32+rsi] # rsi <- atom->z
|
||||
@ -91,10 +91,10 @@ computeForce:
|
||||
|
||||
..atom_loop_begin:
|
||||
mov rcx, QWORD PTR [-24+rsp] # rcx <- neighbor->numneigh
|
||||
vxorpd xmm25, xmm25, xmm25 # xmm25 <- 0
|
||||
vmovapd xmm20, xmm25 # xmm20 <- 0
|
||||
vxorpd xmm25, xmm25, xmm25 # xmm25 <- 0 (fix)
|
||||
vmovapd xmm20, xmm25 # xmm20 <- 0 (fiy)
|
||||
mov r13d, DWORD PTR [rcx+r10*4] # r13d <- neighbor->numneigh[i] (numneighs)
|
||||
vmovapd xmm4, xmm20 # xmm4 <- 0
|
||||
vmovapd xmm4, xmm20 # xmm4 <- 0 (fiz)
|
||||
|
||||
### AOS
|
||||
vmovsd xmm8, QWORD PTR[rdx+rax] # xmm8 <- atom->x[i * 3]
|
||||
@ -114,10 +114,10 @@ computeForce:
|
||||
vpxord zmm13, zmm13, zmm13 # zmm13 <- 0 (fix)
|
||||
vmovaps zmm12, zmm13 # zmm12 <- 0 (fiy)
|
||||
vmovaps zmm11, zmm12 # zmm11 <- 0 (fiz)
|
||||
mov rcx, r12
|
||||
imul rcx, r10
|
||||
add rcx, r11 # rcx <- &neighbor->neighbors[neighbor->maxneighs * i (r10)]
|
||||
xor r11d, r11d # r11d <- 0
|
||||
mov rcx, r12 # rcx <- neighbor->maxneighs * 4
|
||||
imul rcx, r10 # rcx <- neighbor->maxneighs * 4 * i
|
||||
add rcx, r11 # rcx <- &neighbor->neighbors[neighbor->maxneighs * i]
|
||||
xor r9d, r9d # r9d <- 0 (k)
|
||||
mov r14d, r13d # r14d <- numneighs
|
||||
cmp r14d, 8
|
||||
jl ..compute_forces_remainder
|
||||
@ -166,7 +166,7 @@ computeForce:
|
||||
vpcmpeqb k1, xmm0, xmm0
|
||||
vpcmpeqb k2, xmm0, xmm0
|
||||
vpcmpeqb k3, xmm0, xmm0
|
||||
vmovdqu ymm3, YMMWORD PTR [rcx+r11*4]
|
||||
vmovdqu ymm3, YMMWORD PTR [rcx+r9*4]
|
||||
vpxord zmm5, zmm5, zmm5
|
||||
vpxord zmm6, zmm6, zmm6
|
||||
|
||||
@ -205,9 +205,9 @@ computeForce:
|
||||
vfmadd231pd zmm12{k5}, zmm30, zmm29 # fiy += force * dely
|
||||
vfmadd231pd zmm11{k5}, zmm30, zmm31 # fiz += force * delz
|
||||
sub r14, 8
|
||||
add r11, 8
|
||||
add r9, 8
|
||||
cmp r14, 8
|
||||
jg ..compute_forces
|
||||
jge ..compute_forces
|
||||
|
||||
# Check if there are remaining neighbors to be computed
|
||||
..compute_forces_remainder:
|
||||
@ -217,7 +217,7 @@ computeForce:
|
||||
vpbroadcastd ymm0, r14d
|
||||
vpcmpgtd k1, ymm0, ymm17
|
||||
kmovw r15d, k1
|
||||
vmovdqu ymm3{k3}{z}, YMMWORD PTR [rcx+r11*4]
|
||||
vmovdqu ymm3{k3}{z}, YMMWORD PTR [rcx+r9*4]
|
||||
kmov k2, k1
|
||||
kmov k3, k1
|
||||
vpxord zmm5, zmm5, zmm5
|
||||
|
Loading…
Reference in New Issue
Block a user