Update asm force again
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
cf7ea1460c
commit
d8c8733cb2
@ -66,7 +66,7 @@ computeForce:
|
|||||||
vmovups zmm7, ZMMWORD PTR .L_2il0floatpacket.4[rip] # zmm7 <- [0.5, ...]
|
vmovups zmm7, ZMMWORD PTR .L_2il0floatpacket.4[rip] # zmm7 <- [0.5, ...]
|
||||||
vbroadcastsd zmm16, xmm15 # zmm16 <- [cutforcesq, ...]
|
vbroadcastsd zmm16, xmm15 # zmm16 <- [cutforcesq, ...]
|
||||||
vbroadcastsd zmm15, xmm1 # zmm15 <- [param->sigma6, ...]
|
vbroadcastsd zmm15, xmm1 # zmm15 <- [param->sigma6, ...]
|
||||||
vbroadcastsd zmm14, xmm0 # zmm16 <- [48 * epsilon, ...]
|
vbroadcastsd zmm14, xmm0 # zmm14 <- [48 * epsilon, ...]
|
||||||
movsxd r9, r9d # r9 <- atom->Nlocal
|
movsxd r9, r9d # r9 <- atom->Nlocal
|
||||||
xor r10d, r10d # r10d <- 0 (i)
|
xor r10d, r10d # r10d <- 0 (i)
|
||||||
mov rcx, QWORD PTR [24+rdx] # rcx <- neighbor->numneigh
|
mov rcx, QWORD PTR [24+rdx] # rcx <- neighbor->numneigh
|
||||||
@ -74,7 +74,7 @@ computeForce:
|
|||||||
movsxd r12, DWORD PTR [16+rdx] # r12 <- neighbor->maxneighs
|
movsxd r12, DWORD PTR [16+rdx] # r12 <- neighbor->maxneighs
|
||||||
mov rdx, QWORD PTR [16+rsi] # rdx <- atom->x
|
mov rdx, QWORD PTR [16+rsi] # rdx <- atom->x
|
||||||
### AOS
|
### AOS
|
||||||
xor eax, eax
|
xor eax, eax
|
||||||
### SOA
|
### SOA
|
||||||
#mov rax, QWORD PTR [24+rsi] # rax <- atom->y
|
#mov rax, QWORD PTR [24+rsi] # rax <- atom->y
|
||||||
#mov rsi, QWORD PTR [32+rsi] # rsi <- atom->z
|
#mov rsi, QWORD PTR [32+rsi] # rsi <- atom->z
|
||||||
@ -91,10 +91,10 @@ computeForce:
|
|||||||
|
|
||||||
..atom_loop_begin:
|
..atom_loop_begin:
|
||||||
mov rcx, QWORD PTR [-24+rsp] # rcx <- neighbor->numneigh
|
mov rcx, QWORD PTR [-24+rsp] # rcx <- neighbor->numneigh
|
||||||
vxorpd xmm25, xmm25, xmm25 # xmm25 <- 0
|
vxorpd xmm25, xmm25, xmm25 # xmm25 <- 0 (fix)
|
||||||
vmovapd xmm20, xmm25 # xmm20 <- 0
|
vmovapd xmm20, xmm25 # xmm20 <- 0 (fiy)
|
||||||
mov r13d, DWORD PTR [rcx+r10*4] # r13d <- neighbor->numneigh[i] (numneighs)
|
mov r13d, DWORD PTR [rcx+r10*4] # r13d <- neighbor->numneigh[i] (numneighs)
|
||||||
vmovapd xmm4, xmm20 # xmm4 <- 0
|
vmovapd xmm4, xmm20 # xmm4 <- 0 (fiz)
|
||||||
|
|
||||||
### AOS
|
### AOS
|
||||||
vmovsd xmm8, QWORD PTR[rdx+rax] # xmm8 <- atom->x[i * 3]
|
vmovsd xmm8, QWORD PTR[rdx+rax] # xmm8 <- atom->x[i * 3]
|
||||||
@ -114,10 +114,10 @@ computeForce:
|
|||||||
vpxord zmm13, zmm13, zmm13 # zmm13 <- 0 (fix)
|
vpxord zmm13, zmm13, zmm13 # zmm13 <- 0 (fix)
|
||||||
vmovaps zmm12, zmm13 # zmm12 <- 0 (fiy)
|
vmovaps zmm12, zmm13 # zmm12 <- 0 (fiy)
|
||||||
vmovaps zmm11, zmm12 # zmm11 <- 0 (fiz)
|
vmovaps zmm11, zmm12 # zmm11 <- 0 (fiz)
|
||||||
mov rcx, r12
|
mov rcx, r12 # rcx <- neighbor->maxneighs * 4
|
||||||
imul rcx, r10
|
imul rcx, r10 # rcx <- neighbor->maxneighs * 4 * i
|
||||||
add rcx, r11 # rcx <- &neighbor->neighbors[neighbor->maxneighs * i (r10)]
|
add rcx, r11 # rcx <- &neighbor->neighbors[neighbor->maxneighs * i]
|
||||||
xor r11d, r11d # r11d <- 0
|
xor r9d, r9d # r9d <- 0 (k)
|
||||||
mov r14d, r13d # r14d <- numneighs
|
mov r14d, r13d # r14d <- numneighs
|
||||||
cmp r14d, 8
|
cmp r14d, 8
|
||||||
jl ..compute_forces_remainder
|
jl ..compute_forces_remainder
|
||||||
@ -166,7 +166,7 @@ computeForce:
|
|||||||
vpcmpeqb k1, xmm0, xmm0
|
vpcmpeqb k1, xmm0, xmm0
|
||||||
vpcmpeqb k2, xmm0, xmm0
|
vpcmpeqb k2, xmm0, xmm0
|
||||||
vpcmpeqb k3, xmm0, xmm0
|
vpcmpeqb k3, xmm0, xmm0
|
||||||
vmovdqu ymm3, YMMWORD PTR [rcx+r11*4]
|
vmovdqu ymm3, YMMWORD PTR [rcx+r9*4]
|
||||||
vpxord zmm5, zmm5, zmm5
|
vpxord zmm5, zmm5, zmm5
|
||||||
vpxord zmm6, zmm6, zmm6
|
vpxord zmm6, zmm6, zmm6
|
||||||
|
|
||||||
@ -205,9 +205,9 @@ computeForce:
|
|||||||
vfmadd231pd zmm12{k5}, zmm30, zmm29 # fiy += force * dely
|
vfmadd231pd zmm12{k5}, zmm30, zmm29 # fiy += force * dely
|
||||||
vfmadd231pd zmm11{k5}, zmm30, zmm31 # fiz += force * delz
|
vfmadd231pd zmm11{k5}, zmm30, zmm31 # fiz += force * delz
|
||||||
sub r14, 8
|
sub r14, 8
|
||||||
add r11, 8
|
add r9, 8
|
||||||
cmp r14, 8
|
cmp r14, 8
|
||||||
jg ..compute_forces
|
jge ..compute_forces
|
||||||
|
|
||||||
# Check if there are remaining neighbors to be computed
|
# Check if there are remaining neighbors to be computed
|
||||||
..compute_forces_remainder:
|
..compute_forces_remainder:
|
||||||
@ -217,7 +217,7 @@ computeForce:
|
|||||||
vpbroadcastd ymm0, r14d
|
vpbroadcastd ymm0, r14d
|
||||||
vpcmpgtd k1, ymm0, ymm17
|
vpcmpgtd k1, ymm0, ymm17
|
||||||
kmovw r15d, k1
|
kmovw r15d, k1
|
||||||
vmovdqu ymm3{k3}{z}, YMMWORD PTR [rcx+r11*4]
|
vmovdqu ymm3{k3}{z}, YMMWORD PTR [rcx+r9*4]
|
||||||
kmov k2, k1
|
kmov k2, k1
|
||||||
kmov k3, k1
|
kmov k3, k1
|
||||||
vpxord zmm5, zmm5, zmm5
|
vpxord zmm5, zmm5, zmm5
|
||||||
|
Loading…
Reference in New Issue
Block a user