Merge branch 'master' of github.com:RRZE-HPC/MD-Bench
This commit is contained in:
commit
7db14b2ffe
376
asm/unused/force.s
Normal file
376
asm/unused/force.s
Normal file
@ -0,0 +1,376 @@
|
|||||||
|
.intel_syntax noprefix
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 16,0x90
|
||||||
|
.globl computeForce
|
||||||
|
computeForce:
|
||||||
|
# parameter 1: rdi Parameter*
|
||||||
|
# parameter 2: rsi Atom*
|
||||||
|
# parameter 3: rdx Neighbor*
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
|
||||||
|
# r9d <- atom->Nlocal
|
||||||
|
mov r9d, DWORD PTR [4+rsi]
|
||||||
|
# xmm2 <- param->cutforce
|
||||||
|
vmovsd xmm2, QWORD PTR [72+rdi]
|
||||||
|
# xmm1 <- param->sigma6
|
||||||
|
vmovsd xmm1, QWORD PTR [8+rdi]
|
||||||
|
# xmm0 <- param->epsilon
|
||||||
|
vmovsd xmm0, QWORD PTR [rdi]
|
||||||
|
|
||||||
|
# r13 <- atom->fx
|
||||||
|
mov r13, QWORD PTR [64+rsi]
|
||||||
|
# r14 <- atom->fy
|
||||||
|
mov r14, QWORD PTR [72+rsi]
|
||||||
|
# rdi <- atom->fz
|
||||||
|
mov rdi, QWORD PTR [80+rsi]
|
||||||
|
|
||||||
|
# atom->Nlocal <= 0
|
||||||
|
test r9d, r9d
|
||||||
|
jle ..B1.30
|
||||||
|
|
||||||
|
..B1.2:
|
||||||
|
# r10d <- 0
|
||||||
|
xor r10d, r10d
|
||||||
|
# ecx <- atom->Nlocal
|
||||||
|
mov ecx, r9d
|
||||||
|
# r8d <- 0
|
||||||
|
xor r8d, r8d
|
||||||
|
# r11d <- 1
|
||||||
|
mov r11d, 1
|
||||||
|
# eax <- 0
|
||||||
|
xor eax, eax
|
||||||
|
# ecx <- atom->Nlocal >> 1
|
||||||
|
shr ecx, 1
|
||||||
|
je ..B1.6
|
||||||
|
|
||||||
|
# Init forces to zero loop
|
||||||
|
..B1.4:
|
||||||
|
# fx[i] <- 0
|
||||||
|
mov QWORD PTR [r8+r13], rax
|
||||||
|
# i++
|
||||||
|
inc r10
|
||||||
|
# fy[i] <- 0
|
||||||
|
mov QWORD PTR [r8+r14], rax
|
||||||
|
# fz[i] <- 0
|
||||||
|
mov QWORD PTR [r8+rdi], rax
|
||||||
|
# fx[i] <- 0
|
||||||
|
mov QWORD PTR [8+r8+r13], rax
|
||||||
|
# fy[i] <- 0
|
||||||
|
mov QWORD PTR [8+r8+r14], rax
|
||||||
|
# fz[i] <- 0
|
||||||
|
mov QWORD PTR [8+r8+rdi], rax
|
||||||
|
# i++
|
||||||
|
add r8, 16
|
||||||
|
# i < Nlocal
|
||||||
|
cmp r10, rcx
|
||||||
|
jb ..B1.4
|
||||||
|
|
||||||
|
..B1.5:
|
||||||
|
# r11d <- i * 2 + 1
|
||||||
|
lea r11d, DWORD PTR [1+r10+r10]
|
||||||
|
..B1.6:
|
||||||
|
# r11d <- i * 2
|
||||||
|
lea ecx, DWORD PTR [-1+r11]
|
||||||
|
# i < Nlocal
|
||||||
|
cmp ecx, r9d
|
||||||
|
jae ..B1.8
|
||||||
|
|
||||||
|
..B1.7:
|
||||||
|
|
||||||
|
# r11 <- i * 2
|
||||||
|
movsxd r11, r11d
|
||||||
|
# fx[i] <- 0
|
||||||
|
mov QWORD PTR [-8+r13+r11*8], rax
|
||||||
|
# fy[i] <- 0
|
||||||
|
mov QWORD PTR [-8+r14+r11*8], rax
|
||||||
|
# fz[i] <- 0
|
||||||
|
mov QWORD PTR [-8+rdi+r11*8], rax
|
||||||
|
|
||||||
|
..B1.8:
|
||||||
|
|
||||||
|
# xmm15 <- cutforcesq
|
||||||
|
vmulsd xmm15, xmm2, xmm2
|
||||||
|
# r8d <- 0
|
||||||
|
xor r8d, r8d
|
||||||
|
# ymm18 <- 8
|
||||||
|
vmovdqu32 ymm18, YMMWORD PTR .L_2il0floatpacket.0[rip]
|
||||||
|
# xmm0 <- 48 * epsilon
|
||||||
|
vmulsd xmm0, xmm0, QWORD PTR .L_2il0floatpacket.3[rip]
|
||||||
|
# ymm17 <- [0..7]
|
||||||
|
vmovdqu32 ymm17, YMMWORD PTR .L_2il0floatpacket.1[rip]
|
||||||
|
# zmm7 <- 0.5
|
||||||
|
vmovups zmm7, ZMMWORD PTR .L_2il0floatpacket.4[rip]
|
||||||
|
# zmm16 <- cutforcesq
|
||||||
|
vbroadcastsd zmm16, xmm15
|
||||||
|
# zmm15 <- param->sigma6
|
||||||
|
vbroadcastsd zmm15, xmm1
|
||||||
|
# zmm16 <- 48 * epsilon
|
||||||
|
vbroadcastsd zmm14, xmm0
|
||||||
|
# r9 <- atom->Nlocal
|
||||||
|
movsxd r9, r9d
|
||||||
|
# r10d <- 0 (i)
|
||||||
|
xor r10d, r10d
|
||||||
|
# rcx <- neighbor->numneigh
|
||||||
|
mov rcx, QWORD PTR [24+rdx]
|
||||||
|
# r11 <- neighbor->neighbors
|
||||||
|
mov r11, QWORD PTR [8+rdx]
|
||||||
|
# r12 <- neighbor->maxneighs
|
||||||
|
movsxd r12, DWORD PTR [16+rdx]
|
||||||
|
|
||||||
|
# rdx <- atom->x
|
||||||
|
mov rdx, QWORD PTR [16+rsi]
|
||||||
|
# rax <- atom->y
|
||||||
|
mov rax, QWORD PTR [24+rsi]
|
||||||
|
# rsi <- atom->z
|
||||||
|
mov rsi, QWORD PTR [32+rsi]
|
||||||
|
|
||||||
|
# r12 <- neighbor->maxneighs * 4
|
||||||
|
shl r12, 2
|
||||||
|
# [-32+rsp] <- atom->Nlocal
|
||||||
|
mov QWORD PTR [-32+rsp], r9
|
||||||
|
# [-24+rsp] <- neighbor->numneigh
|
||||||
|
mov QWORD PTR [-24+rsp], rcx
|
||||||
|
# [-16+rsp] <- atom->fy
|
||||||
|
mov QWORD PTR [-16+rsp], r14
|
||||||
|
# [-8+rsp] <- atom->fx
|
||||||
|
mov QWORD PTR [-8+rsp], r13
|
||||||
|
# [-40+rsp] <- r15
|
||||||
|
mov QWORD PTR [-40+rsp], r15
|
||||||
|
# [-48+rsp] <- rbx
|
||||||
|
mov QWORD PTR [-48+rsp], rbx
|
||||||
|
# zmm19 <- 0
|
||||||
|
vpxord zmm19, zmm19, zmm19
|
||||||
|
|
||||||
|
# Loop over all atoms
|
||||||
|
..B1.9:
|
||||||
|
|
||||||
|
# rcx <- neighbor->numneigh
|
||||||
|
mov rcx, QWORD PTR [-24+rsp]
|
||||||
|
# xmm25 <- 0
|
||||||
|
vxorpd xmm25, xmm25, xmm25
|
||||||
|
# xmm20 <- 0
|
||||||
|
vmovapd xmm20, xmm25
|
||||||
|
# r13d <- neighbor->numneigh[i] (numneighs)
|
||||||
|
mov r13d, DWORD PTR [rcx+r10*4]
|
||||||
|
# xmm4 <- 0
|
||||||
|
vmovapd xmm4, xmm20
|
||||||
|
# xmm8 <- atom->x[i]
|
||||||
|
vmovsd xmm8, QWORD PTR [rdx+r10*8]
|
||||||
|
# xmm9 <- atom->y[i]
|
||||||
|
vmovsd xmm9, QWORD PTR [rax+r10*8]
|
||||||
|
# xmm9 <- atom->z[i]
|
||||||
|
vmovsd xmm10, QWORD PTR [rsi+r10*8]
|
||||||
|
# numneighs <= 0
|
||||||
|
test r13d, r13d
|
||||||
|
jle ..B1.27
|
||||||
|
|
||||||
|
..B1.10:
|
||||||
|
|
||||||
|
# zmm13 <- 0
|
||||||
|
vmovaps zmm13, zmm19
|
||||||
|
# zmm12 <- 0
|
||||||
|
vmovaps zmm12, zmm13
|
||||||
|
# zmm11 <- 0
|
||||||
|
vmovaps zmm11, zmm12
|
||||||
|
|
||||||
|
# numneighs < 8
|
||||||
|
cmp r13d, 8
|
||||||
|
jl ..B1.32
|
||||||
|
|
||||||
|
..B1.11:
|
||||||
|
|
||||||
|
# numneighs < 1200
|
||||||
|
cmp r13d, 1200
|
||||||
|
jl ..B1.31
|
||||||
|
|
||||||
|
..B1.12:
|
||||||
|
|
||||||
|
mov rcx, r12
|
||||||
|
imul rcx, r8
|
||||||
|
add rcx, r11
|
||||||
|
mov r9, rcx
|
||||||
|
and r9, 63
|
||||||
|
test r9d, 3
|
||||||
|
je ..B1.14
|
||||||
|
|
||||||
|
..B1.13:
|
||||||
|
|
||||||
|
xor r9d, r9d
|
||||||
|
jmp ..B1.16
|
||||||
|
|
||||||
|
..B1.14:
|
||||||
|
|
||||||
|
test r9d, r9d
|
||||||
|
je ..B1.16
|
||||||
|
|
||||||
|
..B1.15:
|
||||||
|
|
||||||
|
neg r9d
|
||||||
|
add r9d, 64
|
||||||
|
shr r9d, 2
|
||||||
|
cmp r13d, r9d
|
||||||
|
cmovl r9d, r13d
|
||||||
|
|
||||||
|
..B1.16:
|
||||||
|
|
||||||
|
mov ebx, r13d
|
||||||
|
sub ebx, r9d
|
||||||
|
and ebx, 7
|
||||||
|
neg ebx
|
||||||
|
add ebx, r13d
|
||||||
|
cmp r9d, 1
|
||||||
|
jb ..B1.20
|
||||||
|
|
||||||
|
..B1.20:
|
||||||
|
lea ecx, DWORD PTR [8+r9]
|
||||||
|
cmp ebx, ecx
|
||||||
|
jl ..B1.24
|
||||||
|
|
||||||
|
..B1.21:
|
||||||
|
mov rcx, r12
|
||||||
|
imul rcx, r8
|
||||||
|
vbroadcastsd zmm0, xmm8
|
||||||
|
vbroadcastsd zmm1, xmm9
|
||||||
|
vbroadcastsd zmm2, xmm10
|
||||||
|
movsxd r14, r9d
|
||||||
|
add rcx, r11
|
||||||
|
|
||||||
|
..B1.22:
|
||||||
|
vpcmpeqb k2, xmm0, xmm0
|
||||||
|
add r9d, 8
|
||||||
|
vpcmpeqb k1, xmm0, xmm0
|
||||||
|
vpcmpeqb k3, xmm0, xmm0
|
||||||
|
vmovdqu ymm3, YMMWORD PTR [rcx+r14*4]
|
||||||
|
add r14, 8
|
||||||
|
vpxord zmm5, zmm5, zmm5
|
||||||
|
vpxord zmm4, zmm4, zmm4
|
||||||
|
vpxord zmm6, zmm6, zmm6
|
||||||
|
|
||||||
|
vgatherdpd zmm5{k2}, QWORD PTR [rax+ymm3*8]
|
||||||
|
vgatherdpd zmm4{k1}, QWORD PTR [rdx+ymm3*8]
|
||||||
|
vgatherdpd zmm6{k3}, QWORD PTR [rsi+ymm3*8]
|
||||||
|
|
||||||
|
vsubpd zmm29, zmm1, zmm5
|
||||||
|
vsubpd zmm28, zmm0, zmm4
|
||||||
|
vsubpd zmm31, zmm2, zmm6
|
||||||
|
|
||||||
|
vmulpd zmm20, zmm29, zmm29
|
||||||
|
vfmadd231pd zmm20, zmm28, zmm28
|
||||||
|
vfmadd231pd zmm20, zmm31, zmm31
|
||||||
|
|
||||||
|
# if condition cutoff radius
|
||||||
|
vrcp14pd zmm27, zmm20 #-> sr2
|
||||||
|
vcmppd k5, zmm20, zmm16, 1
|
||||||
|
|
||||||
|
vmulpd zmm22, zmm27, zmm15 #-> sr2 * sigma6
|
||||||
|
vmulpd zmm24, zmm27, zmm14 #-> 48.0 * epsilon * sr2
|
||||||
|
|
||||||
|
vmulpd zmm25, zmm27, zmm22 #-> sr2 * sigma6 * sr2
|
||||||
|
vmulpd zmm23, zmm27, zmm25 #-> sr2 * sigma6 * sr2 * sr2
|
||||||
|
|
||||||
|
vfmsub213pd zmm27, zmm25, zmm7 #-> sr2 * sigma * sr2 * sr2 - 0.5
|
||||||
|
vmulpd zmm26, zmm23, zmm24 #-> 48.0 * epsilon * sr2 * sr2 * sigma6 * sr2
|
||||||
|
vmulpd zmm30, zmm26, zmm27 #->
|
||||||
|
|
||||||
|
vfmadd231pd zmm13{k5}, zmm30, zmm28
|
||||||
|
vfmadd231pd zmm12{k5}, zmm30, zmm29
|
||||||
|
vfmadd231pd zmm11{k5}, zmm30, zmm31
|
||||||
|
cmp r9d, ebx
|
||||||
|
jb ..B1.22
|
||||||
|
#end neighbor loop
|
||||||
|
|
||||||
|
..B1.26:
|
||||||
|
vmovups zmm10, ZMMWORD PTR .L_2il0floatpacket.6[rip]
|
||||||
|
vpermd zmm0, zmm10, zmm11
|
||||||
|
vpermd zmm5, zmm10, zmm12
|
||||||
|
vpermd zmm21, zmm10, zmm13
|
||||||
|
vaddpd zmm11, zmm0, zmm11
|
||||||
|
vaddpd zmm12, zmm5, zmm12
|
||||||
|
vaddpd zmm13, zmm21, zmm13
|
||||||
|
vpermpd zmm1, zmm11, 78
|
||||||
|
vpermpd zmm6, zmm12, 78
|
||||||
|
vpermpd zmm22, zmm13, 78
|
||||||
|
vaddpd zmm2, zmm11, zmm1
|
||||||
|
vaddpd zmm8, zmm12, zmm6
|
||||||
|
vaddpd zmm23, zmm13, zmm22
|
||||||
|
vpermpd zmm3, zmm2, 177
|
||||||
|
vpermpd zmm9, zmm8, 177
|
||||||
|
vpermpd zmm24, zmm23, 177
|
||||||
|
vaddpd zmm4, zmm2, zmm3
|
||||||
|
vaddpd zmm20, zmm8, zmm9
|
||||||
|
vaddpd zmm25, zmm23, zmm24
|
||||||
|
|
||||||
|
#exit function
|
||||||
|
..B1.27:
|
||||||
|
mov rcx, QWORD PTR [-8+rsp] #84.9[spill]
|
||||||
|
mov rbx, QWORD PTR [-16+rsp] #85.9[spill]
|
||||||
|
movsxd r8, r10d #55.32
|
||||||
|
inc r8 #55.32
|
||||||
|
vaddsd xmm0, xmm25, QWORD PTR [rcx+r10*8] #84.9
|
||||||
|
vmovsd QWORD PTR [rcx+r10*8], xmm0 #84.9
|
||||||
|
vaddsd xmm1, xmm20, QWORD PTR [rbx+r10*8] #85.9
|
||||||
|
vmovsd QWORD PTR [rbx+r10*8], xmm1 #85.9
|
||||||
|
vaddsd xmm2, xmm4, QWORD PTR [rdi+r10*8] #86.9
|
||||||
|
vmovsd QWORD PTR [rdi+r10*8], xmm2 #86.9
|
||||||
|
inc r10 #55.5
|
||||||
|
cmp r10, QWORD PTR [-32+rsp] #55.5[spill]
|
||||||
|
jb ..B1.9
|
||||||
|
|
||||||
|
|
||||||
|
vzeroupper #93.12
|
||||||
|
vxorpd xmm0, xmm0, xmm0 #93.12
|
||||||
|
pop r14 #93.12
|
||||||
|
pop r13 #93.12
|
||||||
|
pop r12 #93.12
|
||||||
|
ret #93.12
|
||||||
|
|
||||||
|
.type computeForce,@function
|
||||||
|
.size computeForce,.-computeForce
|
||||||
|
|
||||||
|
|
||||||
|
..LNcomputeForce.0:
|
||||||
|
.data
|
||||||
|
# -- End computeForce
|
||||||
|
.section .rodata, "a"
|
||||||
|
.align 64
|
||||||
|
.align 64
|
||||||
|
.L_2il0floatpacket.2:
|
||||||
|
.long 0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000
|
||||||
|
.type .L_2il0floatpacket.2,@object
|
||||||
|
.size .L_2il0floatpacket.2,64
|
||||||
|
.align 64
|
||||||
|
.L_2il0floatpacket.4:
|
||||||
|
.long 0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000
|
||||||
|
.type .L_2il0floatpacket.4,@object
|
||||||
|
.size .L_2il0floatpacket.4,64
|
||||||
|
.align 64
|
||||||
|
.L_2il0floatpacket.6:
|
||||||
|
.long 0x00000008,0x00000009,0x0000000a,0x0000000b,0x0000000c,0x0000000d,0x0000000e,0x0000000f,0x00000008,0x00000009,0x0000000a,0x0000000b,0x0000000c,0x0000000d,0x0000000e,0x0000000f
|
||||||
|
.type .L_2il0floatpacket.6,@object
|
||||||
|
.size .L_2il0floatpacket.6,64
|
||||||
|
.align 32
|
||||||
|
.L_2il0floatpacket.0:
|
||||||
|
.long 0x00000008,0x00000008,0x00000008,0x00000008,0x00000008,0x00000008,0x00000008,0x00000008
|
||||||
|
.type .L_2il0floatpacket.0,@object
|
||||||
|
.size .L_2il0floatpacket.0,32
|
||||||
|
.align 32
|
||||||
|
.L_2il0floatpacket.1:
|
||||||
|
.long 0x00000000,0x00000001,0x00000002,0x00000003,0x00000004,0x00000005,0x00000006,0x00000007
|
||||||
|
.type .L_2il0floatpacket.1,@object
|
||||||
|
.size .L_2il0floatpacket.1,32
|
||||||
|
.align 8
|
||||||
|
.L_2il0floatpacket.3:
|
||||||
|
.long 0x00000000,0x40480000
|
||||||
|
.type .L_2il0floatpacket.3,@object
|
||||||
|
.size .L_2il0floatpacket.3,8
|
||||||
|
.align 8
|
||||||
|
.L_2il0floatpacket.5:
|
||||||
|
.long 0x00000000,0x3ff00000
|
||||||
|
.type .L_2il0floatpacket.5,@object
|
||||||
|
.size .L_2il0floatpacket.5,8
|
||||||
|
.data
|
||||||
|
.section .note.GNU-stack, ""
|
||||||
|
# End
|
@ -1,7 +1,7 @@
|
|||||||
# Compiler tag (GCC/CLANG/ICC)
|
# Compiler tag (GCC/CLANG/ICC)
|
||||||
TAG ?= ICC
|
TAG ?= CLANG
|
||||||
# Enable likwid (true or false)
|
# Enable likwid (true or false)
|
||||||
ENABLE_LIKWID ?= true
|
ENABLE_LIKWID ?= false
|
||||||
# SP or DP
|
# SP or DP
|
||||||
DATA_TYPE ?= DP
|
DATA_TYPE ?= DP
|
||||||
# AOS or SOA
|
# AOS or SOA
|
||||||
|
232
src/force-tracing.c
Normal file
232
src/force-tracing.c
Normal file
@ -0,0 +1,232 @@
|
|||||||
|
/*
|
||||||
|
* =======================================================================================
|
||||||
|
*
|
||||||
|
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||||
|
* Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
|
||||||
|
*
|
||||||
|
* This file is part of MD-Bench.
|
||||||
|
*
|
||||||
|
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU Lesser General Public License as published
|
||||||
|
* by the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||||
|
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||||
|
* details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License along
|
||||||
|
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
* =======================================================================================
|
||||||
|
*/
|
||||||
|
#include <likwid-marker.h>
|
||||||
|
|
||||||
|
#include <timing.h>
|
||||||
|
#include <neighbor.h>
|
||||||
|
#include <parameter.h>
|
||||||
|
#include <atom.h>
|
||||||
|
#include <stats.h>
|
||||||
|
|
||||||
|
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef VECTOR_WIDTH
|
||||||
|
# define VECTOR_WIDTH 8
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef TRACER_CONDITION
|
||||||
|
# define TRACER_CONDITION (!(timestep % param->every))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef MEM_TRACER
|
||||||
|
# define MEM_TRACER_INIT FILE *mem_tracer_fp; \
|
||||||
|
if(TRACER_CONDITION) { \
|
||||||
|
char mem_tracer_fn[128]; \
|
||||||
|
snprintf(mem_tracer_fn, sizeof mem_tracer_fn, "mem_tracer_%d.out", timestep); \
|
||||||
|
mem_tracer_fp = fopen(mem_tracer_fn, "w");
|
||||||
|
}
|
||||||
|
|
||||||
|
# define MEM_TRACER_END if(TRACER_CONDITION) { fclose(mem_tracer_fp); }
|
||||||
|
# define MEM_TRACE(addr, op) if(TRACER_CONDITION) { fprintf(mem_tracer_fp, "%c: %p\n", op, (void *)(&(addr))); }
|
||||||
|
#else
|
||||||
|
# define MEM_TRACER_INIT
|
||||||
|
# define MEM_TRACER_END
|
||||||
|
# define MEM_TRACE(addr, op)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef INDEX_TRACER
|
||||||
|
# define INDEX_TRACER_INIT FILE *index_tracer_fp; \
|
||||||
|
if(TRACER_CONDITION) { \
|
||||||
|
char index_tracer_fn[128]; \
|
||||||
|
snprintf(index_tracer_fn, sizeof index_tracer_fn, "index_tracer_%d.out", timestep); \
|
||||||
|
index_tracer_fp = fopen(index_tracer_fn, "w"); \
|
||||||
|
}
|
||||||
|
|
||||||
|
# define INDEX_TRACER_END if(TRACER_CONDITION) { fclose(index_tracer_fp); }
|
||||||
|
# define INDEX_TRACE_NATOMS(nl, ng, mn) if(TRACER_CONDITION) { fprintf(index_tracer_fp, "N: %d %d %d\n", nl, ng, mn); }
|
||||||
|
# define INDEX_TRACE_ATOM(a) if(TRACER_CONDITION) { fprintf(index_tracer_fp, "A: %d\n", a); }
|
||||||
|
# define INDEX_TRACE(l, e) if(TRACER_CONDITION) { \
|
||||||
|
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
|
||||||
|
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
|
||||||
|
fprintf(index_tracer_fp, "I: "); \
|
||||||
|
for(int __j = 0; __j < __e; ++__j) { \
|
||||||
|
fprintf(index_tracer_fp, "%d ", l[__i + __j]); \
|
||||||
|
} \
|
||||||
|
fprintf(index_tracer_fp, "\n"); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
# define DIST_TRACE_SORT(l, e) if(TRACER_CONDITION) { \
|
||||||
|
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
|
||||||
|
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
|
||||||
|
if(__e > 1) { \
|
||||||
|
for(int __j = __i; __j < __i + __e - 1; ++__j) { \
|
||||||
|
for(int __k = __i; __k < __i + __e - (__j - __i) - 1; ++__k) { \
|
||||||
|
if(l[__k] > l[__k + 1]) { \
|
||||||
|
int __t = l[__k]; \
|
||||||
|
l[__k] = l[__k + 1]; \
|
||||||
|
l[__k + 1] = __t; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
# define DIST_TRACE(l, e) if(TRACER_CONDITION) { \
|
||||||
|
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
|
||||||
|
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
|
||||||
|
if(__e > 1) { \
|
||||||
|
fprintf(index_tracer_fp, "D: "); \
|
||||||
|
for(int __j = 0; __j < __e - 1; ++__j) { \
|
||||||
|
int __dist = abs(l[__i + __j + 1] - l[__i + __j]); \
|
||||||
|
fprintf(index_tracer_fp, "%d ", __dist); \
|
||||||
|
} \
|
||||||
|
fprintf(index_tracer_fp, "\n"); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
# define INDEX_TRACER_INIT
|
||||||
|
# define INDEX_TRACER_END
|
||||||
|
# define INDEX_TRACE_NATOMS(nl, ng, mn)
|
||||||
|
# define INDEX_TRACE_ATOM(a)
|
||||||
|
# define INDEX_TRACE(l, e)
|
||||||
|
# define DIST_TRACE_SORT(l, e)
|
||||||
|
# define DIST_TRACE(l, e)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
double computeForceTracing(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep) {
|
||||||
|
MEM_TRACER_INIT;
|
||||||
|
INDEX_TRACER_INIT;
|
||||||
|
int Nlocal = atom->Nlocal;
|
||||||
|
int* neighs;
|
||||||
|
MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz;
|
||||||
|
#ifndef EXPLICIT_TYPES
|
||||||
|
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||||
|
MD_FLOAT sigma6 = param->sigma6;
|
||||||
|
MD_FLOAT epsilon = param->epsilon;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for(int i = 0; i < Nlocal; i++) {
|
||||||
|
fx[i] = 0.0;
|
||||||
|
fy[i] = 0.0;
|
||||||
|
fz[i] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
INDEX_TRACE_NATOMS(Nlocal, atom->Nghost, neighbor->maxneighs);
|
||||||
|
double S = getTimeStamp();
|
||||||
|
LIKWID_MARKER_START("force");
|
||||||
|
|
||||||
|
for(int na = 0; na < (first_exec ? 1 : ATOMS_LOOP_RUNS); na++) {
|
||||||
|
#pragma omp parallel for
|
||||||
|
for(int i = 0; i < Nlocal; i++) {
|
||||||
|
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||||
|
int numneighs = neighbor->numneigh[i];
|
||||||
|
MD_FLOAT xtmp = atom_x(i);
|
||||||
|
MD_FLOAT ytmp = atom_y(i);
|
||||||
|
MD_FLOAT ztmp = atom_z(i);
|
||||||
|
MD_FLOAT fix = 0;
|
||||||
|
MD_FLOAT fiy = 0;
|
||||||
|
MD_FLOAT fiz = 0;
|
||||||
|
|
||||||
|
MEM_TRACE(atom_x(i), 'R');
|
||||||
|
MEM_TRACE(atom_y(i), 'R');
|
||||||
|
MEM_TRACE(atom_z(i), 'R');
|
||||||
|
INDEX_TRACE_ATOM(i);
|
||||||
|
|
||||||
|
#ifdef EXPLICIT_TYPES
|
||||||
|
const int type_i = atom->type[i];
|
||||||
|
MEM_TRACE(atom->type(i), 'R');
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(VARIANT) && VARIANT == stub && defined(NEIGHBORS_LOOP_RUNS) && NEIGHBORS_LOOP_RUNS > 1
|
||||||
|
#define REPEAT_NEIGHBORS_LOOP
|
||||||
|
int nmax = first_exec ? 1 : NEIGHBORS_LOOP_RUNS;
|
||||||
|
for(int nn = 0; nn < (first_exec ? 1 : NEIGHBORS_LOOP_RUNS); nn++) {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//DIST_TRACE_SORT(neighs, numneighs);
|
||||||
|
INDEX_TRACE(neighs, numneighs);
|
||||||
|
//DIST_TRACE(neighs, numneighs);
|
||||||
|
|
||||||
|
for(int k = 0; k < numneighs; k++) {
|
||||||
|
int j = neighs[k];
|
||||||
|
MD_FLOAT delx = xtmp - atom_x(j);
|
||||||
|
MD_FLOAT dely = ytmp - atom_y(j);
|
||||||
|
MD_FLOAT delz = ztmp - atom_z(j);
|
||||||
|
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||||
|
|
||||||
|
MEM_TRACE(neighs[k], 'R');
|
||||||
|
MEM_TRACE(atom_x(j), 'R');
|
||||||
|
MEM_TRACE(atom_y(j), 'R');
|
||||||
|
MEM_TRACE(atom_z(j), 'R');
|
||||||
|
|
||||||
|
#ifdef EXPLICIT_TYPES
|
||||||
|
const int type_j = atom->type[j];
|
||||||
|
const int type_ij = type_i * atom->ntypes + type_j;
|
||||||
|
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
||||||
|
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
||||||
|
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
||||||
|
MEM_TRACE(atom->type(j), 'R');
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if(rsq < cutforcesq) {
|
||||||
|
MD_FLOAT sr2 = 1.0 / rsq;
|
||||||
|
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||||
|
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
|
||||||
|
fix += delx * force;
|
||||||
|
fiy += dely * force;
|
||||||
|
fiz += delz * force;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef REPEAT_NEIGHBORS_LOOP
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
fx[i] += fix;
|
||||||
|
fy[i] += fiy;
|
||||||
|
fz[i] += fiz;
|
||||||
|
|
||||||
|
addStat(stats->total_force_neighs, numneighs);
|
||||||
|
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
|
||||||
|
MEM_TRACE(fx[i], 'R');
|
||||||
|
MEM_TRACE(fx[i], 'W');
|
||||||
|
MEM_TRACE(fy[i], 'R');
|
||||||
|
MEM_TRACE(fy[i], 'W');
|
||||||
|
MEM_TRACE(fz[i], 'R');
|
||||||
|
MEM_TRACE(fz[i], 'W');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
LIKWID_MARKER_STOP("force");
|
||||||
|
double E = getTimeStamp();
|
||||||
|
|
||||||
|
INDEX_TRACER_END;
|
||||||
|
MEM_TRACER_END;
|
||||||
|
return E-S;
|
||||||
|
}
|
158
src/force.c
158
src/force.c
@ -26,110 +26,23 @@
|
|||||||
#include <neighbor.h>
|
#include <neighbor.h>
|
||||||
#include <parameter.h>
|
#include <parameter.h>
|
||||||
#include <atom.h>
|
#include <atom.h>
|
||||||
#include <stats.h>
|
|
||||||
|
|
||||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
double computeForce(
|
||||||
#include <stdio.h>
|
Parameter *param,
|
||||||
#include <stdlib.h>
|
Atom *atom,
|
||||||
#endif
|
Neighbor *neighbor
|
||||||
|
)
|
||||||
#ifndef VECTOR_WIDTH
|
{
|
||||||
# define VECTOR_WIDTH 8
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TRACER_CONDITION
|
|
||||||
# define TRACER_CONDITION (!(timestep % param->every))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef MEM_TRACER
|
|
||||||
# define MEM_TRACER_INIT FILE *mem_tracer_fp; \
|
|
||||||
if(TRACER_CONDITION) { \
|
|
||||||
char mem_tracer_fn[128]; \
|
|
||||||
snprintf(mem_tracer_fn, sizeof mem_tracer_fn, "mem_tracer_%d.out", timestep); \
|
|
||||||
mem_tracer_fp = fopen(mem_tracer_fn, "w");
|
|
||||||
}
|
|
||||||
|
|
||||||
# define MEM_TRACER_END if(TRACER_CONDITION) { fclose(mem_tracer_fp); }
|
|
||||||
# define MEM_TRACE(addr, op) if(TRACER_CONDITION) { fprintf(mem_tracer_fp, "%c: %p\n", op, (void *)(&(addr))); }
|
|
||||||
#else
|
|
||||||
# define MEM_TRACER_INIT
|
|
||||||
# define MEM_TRACER_END
|
|
||||||
# define MEM_TRACE(addr, op)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef INDEX_TRACER
|
|
||||||
# define INDEX_TRACER_INIT FILE *index_tracer_fp; \
|
|
||||||
if(TRACER_CONDITION) { \
|
|
||||||
char index_tracer_fn[128]; \
|
|
||||||
snprintf(index_tracer_fn, sizeof index_tracer_fn, "index_tracer_%d.out", timestep); \
|
|
||||||
index_tracer_fp = fopen(index_tracer_fn, "w"); \
|
|
||||||
}
|
|
||||||
|
|
||||||
# define INDEX_TRACER_END if(TRACER_CONDITION) { fclose(index_tracer_fp); }
|
|
||||||
# define INDEX_TRACE_NATOMS(nl, ng, mn) if(TRACER_CONDITION) { fprintf(index_tracer_fp, "N: %d %d %d\n", nl, ng, mn); }
|
|
||||||
# define INDEX_TRACE_ATOM(a) if(TRACER_CONDITION) { fprintf(index_tracer_fp, "A: %d\n", a); }
|
|
||||||
# define INDEX_TRACE(l, e) if(TRACER_CONDITION) { \
|
|
||||||
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
|
|
||||||
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
|
|
||||||
fprintf(index_tracer_fp, "I: "); \
|
|
||||||
for(int __j = 0; __j < __e; ++__j) { \
|
|
||||||
fprintf(index_tracer_fp, "%d ", l[__i + __j]); \
|
|
||||||
} \
|
|
||||||
fprintf(index_tracer_fp, "\n"); \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
# define DIST_TRACE_SORT(l, e) if(TRACER_CONDITION) { \
|
|
||||||
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
|
|
||||||
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
|
|
||||||
if(__e > 1) { \
|
|
||||||
for(int __j = __i; __j < __i + __e - 1; ++__j) { \
|
|
||||||
for(int __k = __i; __k < __i + __e - (__j - __i) - 1; ++__k) { \
|
|
||||||
if(l[__k] > l[__k + 1]) { \
|
|
||||||
int __t = l[__k]; \
|
|
||||||
l[__k] = l[__k + 1]; \
|
|
||||||
l[__k + 1] = __t; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
# define DIST_TRACE(l, e) if(TRACER_CONDITION) { \
|
|
||||||
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
|
|
||||||
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
|
|
||||||
if(__e > 1) { \
|
|
||||||
fprintf(index_tracer_fp, "D: "); \
|
|
||||||
for(int __j = 0; __j < __e - 1; ++__j) { \
|
|
||||||
int __dist = abs(l[__i + __j + 1] - l[__i + __j]); \
|
|
||||||
fprintf(index_tracer_fp, "%d ", __dist); \
|
|
||||||
} \
|
|
||||||
fprintf(index_tracer_fp, "\n"); \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
# define INDEX_TRACER_INIT
|
|
||||||
# define INDEX_TRACER_END
|
|
||||||
# define INDEX_TRACE_NATOMS(nl, ng, mn)
|
|
||||||
# define INDEX_TRACE_ATOM(a)
|
|
||||||
# define INDEX_TRACE(l, e)
|
|
||||||
# define DIST_TRACE_SORT(l, e)
|
|
||||||
# define DIST_TRACE(l, e)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep) {
|
|
||||||
MEM_TRACER_INIT;
|
|
||||||
INDEX_TRACER_INIT;
|
|
||||||
int Nlocal = atom->Nlocal;
|
int Nlocal = atom->Nlocal;
|
||||||
int* neighs;
|
int* neighs;
|
||||||
MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz;
|
MD_FLOAT* fx = atom->fx;
|
||||||
#ifndef EXPLICIT_TYPES
|
MD_FLOAT* fy = atom->fy;
|
||||||
|
MD_FLOAT* fz = atom->fz;
|
||||||
|
#ifndef EXPLICIT_TYPES
|
||||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||||
MD_FLOAT sigma6 = param->sigma6;
|
MD_FLOAT sigma6 = param->sigma6;
|
||||||
MD_FLOAT epsilon = param->epsilon;
|
MD_FLOAT epsilon = param->epsilon;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for(int i = 0; i < Nlocal; i++) {
|
for(int i = 0; i < Nlocal; i++) {
|
||||||
fx[i] = 0.0;
|
fx[i] = 0.0;
|
||||||
@ -137,12 +50,10 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *sta
|
|||||||
fz[i] = 0.0;
|
fz[i] = 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
INDEX_TRACE_NATOMS(Nlocal, atom->Nghost, neighbor->maxneighs);
|
|
||||||
double S = getTimeStamp();
|
double S = getTimeStamp();
|
||||||
LIKWID_MARKER_START("force");
|
LIKWID_MARKER_START("force");
|
||||||
|
|
||||||
for(int na = 0; na < (first_exec ? 1 : ATOMS_LOOP_RUNS); na++) {
|
#pragma omp parallel for
|
||||||
#pragma omp parallel for
|
|
||||||
for(int i = 0; i < Nlocal; i++) {
|
for(int i = 0; i < Nlocal; i++) {
|
||||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||||
int numneighs = neighbor->numneigh[i];
|
int numneighs = neighbor->numneigh[i];
|
||||||
@ -153,25 +64,10 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *sta
|
|||||||
MD_FLOAT fiy = 0;
|
MD_FLOAT fiy = 0;
|
||||||
MD_FLOAT fiz = 0;
|
MD_FLOAT fiz = 0;
|
||||||
|
|
||||||
MEM_TRACE(atom_x(i), 'R');
|
#ifdef EXPLICIT_TYPES
|
||||||
MEM_TRACE(atom_y(i), 'R');
|
|
||||||
MEM_TRACE(atom_z(i), 'R');
|
|
||||||
INDEX_TRACE_ATOM(i);
|
|
||||||
|
|
||||||
#ifdef EXPLICIT_TYPES
|
|
||||||
const int type_i = atom->type[i];
|
const int type_i = atom->type[i];
|
||||||
MEM_TRACE(atom->type(i), 'R');
|
#endif
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(VARIANT) && VARIANT == stub && defined(NEIGHBORS_LOOP_RUNS) && NEIGHBORS_LOOP_RUNS > 1
|
|
||||||
#define REPEAT_NEIGHBORS_LOOP
|
|
||||||
int nmax = first_exec ? 1 : NEIGHBORS_LOOP_RUNS;
|
|
||||||
for(int nn = 0; nn < (first_exec ? 1 : NEIGHBORS_LOOP_RUNS); nn++) {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//DIST_TRACE_SORT(neighs, numneighs);
|
|
||||||
INDEX_TRACE(neighs, numneighs);
|
|
||||||
//DIST_TRACE(neighs, numneighs);
|
|
||||||
|
|
||||||
for(int k = 0; k < numneighs; k++) {
|
for(int k = 0; k < numneighs; k++) {
|
||||||
int j = neighs[k];
|
int j = neighs[k];
|
||||||
@ -180,19 +76,13 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *sta
|
|||||||
MD_FLOAT delz = ztmp - atom_z(j);
|
MD_FLOAT delz = ztmp - atom_z(j);
|
||||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||||
|
|
||||||
MEM_TRACE(neighs[k], 'R');
|
#ifdef EXPLICIT_TYPES
|
||||||
MEM_TRACE(atom_x(j), 'R');
|
|
||||||
MEM_TRACE(atom_y(j), 'R');
|
|
||||||
MEM_TRACE(atom_z(j), 'R');
|
|
||||||
|
|
||||||
#ifdef EXPLICIT_TYPES
|
|
||||||
const int type_j = atom->type[j];
|
const int type_j = atom->type[j];
|
||||||
const int type_ij = type_i * atom->ntypes + type_j;
|
const int type_ij = type_i * atom->ntypes + type_j;
|
||||||
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
||||||
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
||||||
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
||||||
MEM_TRACE(atom->type(j), 'R');
|
#endif
|
||||||
#endif
|
|
||||||
|
|
||||||
if(rsq < cutforcesq) {
|
if(rsq < cutforcesq) {
|
||||||
MD_FLOAT sr2 = 1.0 / rsq;
|
MD_FLOAT sr2 = 1.0 / rsq;
|
||||||
@ -204,29 +94,13 @@ double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *sta
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef REPEAT_NEIGHBORS_LOOP
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
fx[i] += fix;
|
fx[i] += fix;
|
||||||
fy[i] += fiy;
|
fy[i] += fiy;
|
||||||
fz[i] += fiz;
|
fz[i] += fiz;
|
||||||
|
|
||||||
addStat(stats->total_force_neighs, numneighs);
|
|
||||||
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
|
|
||||||
MEM_TRACE(fx[i], 'R');
|
|
||||||
MEM_TRACE(fx[i], 'W');
|
|
||||||
MEM_TRACE(fy[i], 'R');
|
|
||||||
MEM_TRACE(fy[i], 'W');
|
|
||||||
MEM_TRACE(fz[i], 'R');
|
|
||||||
MEM_TRACE(fz[i], 'W');
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
LIKWID_MARKER_STOP("force");
|
LIKWID_MARKER_STOP("force");
|
||||||
double E = getTimeStamp();
|
double E = getTimeStamp();
|
||||||
|
|
||||||
INDEX_TRACER_END;
|
|
||||||
MEM_TRACER_END;
|
|
||||||
return E-S;
|
return E-S;
|
||||||
}
|
}
|
||||||
|
27
src/main.c
27
src/main.c
@ -43,7 +43,8 @@
|
|||||||
|
|
||||||
#define HLINE "----------------------------------------------------------------------------\n"
|
#define HLINE "----------------------------------------------------------------------------\n"
|
||||||
|
|
||||||
extern double computeForce(Parameter*, Atom*, Neighbor*, Stats*, int, int);
|
extern double computeForce(Parameter*, Atom*, Neighbor*);
|
||||||
|
extern double computeForceTracing(Parameter*, Atom*, Neighbor*, Stats*, int, int);
|
||||||
extern double computeForceEam(Eam* eam, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep);
|
extern double computeForceEam(Eam* eam, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep);
|
||||||
|
|
||||||
void init(Parameter *param)
|
void init(Parameter *param)
|
||||||
@ -152,11 +153,11 @@ void printAtomState(Atom *atom)
|
|||||||
printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n",
|
printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n",
|
||||||
atom->Natoms, atom->Nlocal, atom->Nghost, atom->Nmax);
|
atom->Natoms, atom->Nlocal, atom->Nghost, atom->Nmax);
|
||||||
|
|
||||||
/* int nall = atom->Nlocal + atom->Nghost; */
|
/* int nall = atom->Nlocal + atom->Nghost; */
|
||||||
|
|
||||||
/* for (int i=0; i<nall; i++) { */
|
/* for (int i=0; i<nall; i++) { */
|
||||||
/* printf("%d %f %f %f\n", i, atom->x[i], atom->y[i], atom->z[i]); */
|
/* printf("%d %f %f %f\n", i, atom->x[i], atom->y[i], atom->z[i]); */
|
||||||
/* } */
|
/* } */
|
||||||
}
|
}
|
||||||
|
|
||||||
int str2ff(const char *string)
|
int str2ff(const char *string)
|
||||||
@ -247,11 +248,14 @@ int main(int argc, char** argv)
|
|||||||
|
|
||||||
setup(¶m, &eam, &atom, &neighbor, &stats);
|
setup(¶m, &eam, &atom, &neighbor, &stats);
|
||||||
computeThermo(0, ¶m, &atom);
|
computeThermo(0, ¶m, &atom);
|
||||||
|
|
||||||
if(param.force_field == FF_EAM) {
|
if(param.force_field == FF_EAM) {
|
||||||
computeForceEam(&eam, &atom, &neighbor, &stats, 1, 0);
|
computeForceEam(&eam, &atom, &neighbor, &stats, 1, 0);
|
||||||
} else {
|
} else {
|
||||||
computeForce(¶m, &atom, &neighbor, &stats, 1, 0);
|
#if defined(MEM_TRACER) || defined(INDEX_TRACER) || defined(COMPUTE_STATS)
|
||||||
|
computeForceTracing(¶m, &atom, &neighbor, &stats, 1, 0);
|
||||||
|
#else
|
||||||
|
computeForce(¶m, &atom, &neighbor);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
timer[FORCE] = 0.0;
|
timer[FORCE] = 0.0;
|
||||||
@ -270,9 +274,12 @@ int main(int argc, char** argv)
|
|||||||
if(param.force_field == FF_EAM) {
|
if(param.force_field == FF_EAM) {
|
||||||
timer[FORCE] += computeForceEam(&eam, &atom, &neighbor, &stats, 0, n + 1);
|
timer[FORCE] += computeForceEam(&eam, &atom, &neighbor, &stats, 0, n + 1);
|
||||||
} else {
|
} else {
|
||||||
timer[FORCE] += computeForce(¶m, &atom, &neighbor, &stats, 0, n + 1);
|
#if defined(MEM_TRACER) || defined(INDEX_TRACER) || defined(COMPUTE_STATS)
|
||||||
|
timer[FORCE] += computeForceTracing(¶m, &atom, &neighbor, &stats, 0, n + 1);
|
||||||
|
#else
|
||||||
|
timer[FORCE] += computeForce(¶m, &atom, &neighbor);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
finalIntegrate(¶m, &atom);
|
finalIntegrate(¶m, &atom);
|
||||||
|
|
||||||
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
|
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
|
||||||
@ -298,7 +305,9 @@ int main(int argc, char** argv)
|
|||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
printf("Performance: %.2f million atom updates per second\n",
|
printf("Performance: %.2f million atom updates per second\n",
|
||||||
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
|
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
|
||||||
|
#ifdef COMPUTE_STATS
|
||||||
displayStatistics(&atom, ¶m, &stats, timer);
|
displayStatistics(&atom, ¶m, &stats, timer);
|
||||||
|
#endif
|
||||||
LIKWID_MARKER_CLOSE;
|
LIKWID_MARKER_CLOSE;
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user