From df09c2861e2cb17da9dc1a94e1fafb4829ac27ec Mon Sep 17 00:00:00 2001 From: Rafael Ravedutti Date: Mon, 17 Jan 2022 14:15:02 +0100 Subject: [PATCH] Add first version with more than one optimization scheme Signed-off-by: Rafael Ravedutti --- asm/unused/force_lj.s | 326 +++++++++++++++++ {src => default}/allocate.c | 0 {src => default}/atom.c | 0 {src => default}/eam_utils.c | 0 {src => default}/force_eam.c | 0 {src => default}/force_lj.c | 0 {src => default}/includes/allocate.h | 0 {src => default}/includes/atom.h | 0 {src => default}/includes/eam.h | 0 {src => default}/includes/likwid-marker.h | 0 {src => default}/includes/neighbor.h | 0 {src => default}/includes/parameter.h | 0 {src => default}/includes/pbc.h | 0 {src => default}/includes/stats.h | 0 {src => default}/includes/thermo.h | 0 {src => default}/includes/timers.h | 0 {src => default}/includes/timing.h | 0 {src => default}/includes/tracing.h | 0 {src => default}/includes/util.h | 0 {src => default}/includes/vtk.h | 0 {src => default}/main-stub.c | 0 {src => default}/main.c | 0 {src => default}/neighbor.c | 0 {src => default}/pbc.c | 0 {src => default}/stats.c | 0 {src => default}/thermo.c | 0 {src => default}/timing.c | 0 {src => default}/tracing.c | 0 {src => default}/util.c | 0 {src => default}/vtk.c | 0 gromacs/allocate.c | 70 ++++ gromacs/atom.c | 276 ++++++++++++++ gromacs/eam_utils.c | 285 +++++++++++++++ gromacs/force_eam.c | 213 +++++++++++ gromacs/force_lj.c | 104 ++++++ gromacs/includes/allocate.h | 29 ++ gromacs/includes/atom.h | 59 +++ gromacs/includes/eam.h | 55 +++ gromacs/includes/likwid-marker.h | 170 +++++++++ gromacs/includes/neighbor.h | 41 +++ gromacs/includes/parameter.h | 56 +++ gromacs/includes/pbc.h | 32 ++ gromacs/includes/stats.h | 46 +++ gromacs/includes/thermo.h | 31 ++ gromacs/includes/timers.h | 11 + gromacs/includes/timing.h | 30 ++ gromacs/includes/tracing.h | 118 ++++++ gromacs/includes/util.h | 43 +++ gromacs/includes/vtk.h | 28 ++ gromacs/main-stub.c | 292 +++++++++++++++ gromacs/main.c | 324 +++++++++++++++++ gromacs/neighbor.c | 420 ++++++++++++++++++++++ gromacs/pbc.c | 172 +++++++++ gromacs/stats.c | 31 ++ gromacs/thermo.c | 138 +++++++ gromacs/timing.c | 43 +++ gromacs/tracing.c | 73 ++++ gromacs/util.c | 95 +++++ gromacs/vtk.c | 44 +++ 59 files changed, 3655 insertions(+) create mode 100644 asm/unused/force_lj.s rename {src => default}/allocate.c (100%) rename {src => default}/atom.c (100%) rename {src => default}/eam_utils.c (100%) rename {src => default}/force_eam.c (100%) rename {src => default}/force_lj.c (100%) rename {src => default}/includes/allocate.h (100%) rename {src => default}/includes/atom.h (100%) rename {src => default}/includes/eam.h (100%) rename {src => default}/includes/likwid-marker.h (100%) rename {src => default}/includes/neighbor.h (100%) rename {src => default}/includes/parameter.h (100%) rename {src => default}/includes/pbc.h (100%) rename {src => default}/includes/stats.h (100%) rename {src => default}/includes/thermo.h (100%) rename {src => default}/includes/timers.h (100%) rename {src => default}/includes/timing.h (100%) rename {src => default}/includes/tracing.h (100%) rename {src => default}/includes/util.h (100%) rename {src => default}/includes/vtk.h (100%) rename {src => default}/main-stub.c (100%) rename {src => default}/main.c (100%) rename {src => default}/neighbor.c (100%) rename {src => default}/pbc.c (100%) rename {src => default}/stats.c (100%) rename {src => default}/thermo.c (100%) rename {src => default}/timing.c (100%) rename {src => default}/tracing.c (100%) rename {src => default}/util.c (100%) rename {src => default}/vtk.c (100%) create mode 100644 gromacs/allocate.c create mode 100644 gromacs/atom.c create mode 100644 gromacs/eam_utils.c create mode 100644 gromacs/force_eam.c create mode 100644 gromacs/force_lj.c create mode 100644 gromacs/includes/allocate.h create mode 100644 gromacs/includes/atom.h create mode 100644 gromacs/includes/eam.h create mode 100644 gromacs/includes/likwid-marker.h create mode 100644 gromacs/includes/neighbor.h create mode 100644 gromacs/includes/parameter.h create mode 100644 gromacs/includes/pbc.h create mode 100644 gromacs/includes/stats.h create mode 100644 gromacs/includes/thermo.h create mode 100644 gromacs/includes/timers.h create mode 100644 gromacs/includes/timing.h create mode 100644 gromacs/includes/tracing.h create mode 100644 gromacs/includes/util.h create mode 100644 gromacs/includes/vtk.h create mode 100644 gromacs/main-stub.c create mode 100644 gromacs/main.c create mode 100644 gromacs/neighbor.c create mode 100644 gromacs/pbc.c create mode 100644 gromacs/stats.c create mode 100644 gromacs/thermo.c create mode 100644 gromacs/timing.c create mode 100644 gromacs/tracing.c create mode 100644 gromacs/util.c create mode 100644 gromacs/vtk.c diff --git a/asm/unused/force_lj.s b/asm/unused/force_lj.s new file mode 100644 index 0000000..f1d5cd8 --- /dev/null +++ b/asm/unused/force_lj.s @@ -0,0 +1,326 @@ +.intel_syntax noprefix + +.text +.align 16,0x90 +.globl computeForceLJ +computeForceLJ: +# parameter 1: rdi Parameter* +# parameter 2: rsi Atom* +# parameter 3: rdx Neighbor* + push rbp + push r12 + push r13 + push r14 + push r15 + push rbx + mov r9d, DWORD PTR [4+rsi] # r9d <- atom->Nlocal + vmovsd xmm2, QWORD PTR [96+rdi] # xmm2 <- param->cutforce + vmovsd xmm1, QWORD PTR [32+rdi] # xmm1 <- param->sigma6 + vmovsd xmm0, QWORD PTR [24+rdi] # xmm0 <- param->epsilon + mov r13, QWORD PTR [64+rsi] # r13 <- atom->fx + mov r14, QWORD PTR [72+rsi] # r14 <- atom->fy + mov rdi, QWORD PTR [80+rsi] # rdi <- atom->fz + test r9d, r9d # atom->Nlocal <= 0 + jle ..atom_loop_exit + xor r10d, r10d # r10d <- 0 + mov ecx, r9d # ecx <- atom->Nlocal + xor r8d, r8d # r8d <- 0 + mov r11d, 1 # r11d <- 1 + xor eax, eax # eax <- 0 + shr ecx, 1 # ecx <- atom->Nlocal >> 1 + je ..zero_last_element # ecx == 0 + +# Init forces to zero loop (unroll factor = 2) +..init_force_loop: + mov QWORD PTR [r8+r13], rax # fx[i] <- 0 + mov QWORD PTR [r8+r14], rax # fy[i] <- 0 + mov QWORD PTR [r8+rdi], rax # fz[i] <- 0 + mov QWORD PTR [8+r8+r13], rax # fx[i] <- 0 + mov QWORD PTR [8+r8+r14], rax # fy[i] <- 0 + mov QWORD PTR [8+r8+rdi], rax # fz[i] <- 0 + add r8, 16 # i++ + inc r10 # i++ + cmp r10, rcx # i < Nlocal + jb ..init_force_loop + +# Trick to make r11d contain value of last element to be zeroed plus 1 +# Maybe we can directly put r10+10 here and zero r11d above, then remove the -1 below + lea r11d, DWORD PTR [1+r10+r10] # r11d <- i * 2 + 1 +..zero_last_element: + lea ecx, DWORD PTR [-1+r11] # ecx <- i * 2 + cmp ecx, r9d # i >= Nlocal + jae ..before_atom_loop + + # Set last element to zero + movsxd r11, r11d # r11 <- i * 2 + mov QWORD PTR [-8+r13+r11*8], rax # fx[i] <- 0 + mov QWORD PTR [-8+r14+r11*8], rax # fy[i] <- 0 + mov QWORD PTR [-8+rdi+r11*8], rax # fz[i] <- 0 + +# Initialize registers to be used within atom loop +..before_atom_loop: + vmulsd xmm15, xmm2, xmm2 # xmm15 <- cutforcesq + vmovdqu32 ymm18, YMMWORD PTR .L_2il0floatpacket.0[rip] # ymm18 <- [8, ...] + vmulsd xmm0, xmm0, QWORD PTR .L_2il0floatpacket.3[rip] # xmm0 <- 48 * epsilon + vmovdqu32 ymm17, YMMWORD PTR .L_2il0floatpacket.1[rip] # ymm17 <- [0..7] + vmovups zmm7, ZMMWORD PTR .L_2il0floatpacket.4[rip] # zmm7 <- [0.5, ...] + vbroadcastsd zmm16, xmm15 # zmm16 <- [cutforcesq, ...] + vbroadcastsd zmm15, xmm1 # zmm15 <- [param->sigma6, ...] + vbroadcastsd zmm14, xmm0 # zmm14 <- [48 * epsilon, ...] + movsxd r9, r9d # r9 <- atom->Nlocal + xor r10d, r10d # r10d <- 0 (i) + mov rcx, QWORD PTR [24+rdx] # rcx <- neighbor->numneigh + mov r11, QWORD PTR [8+rdx] # r11 <- neighbor->neighbors + movsxd r12, DWORD PTR [16+rdx] # r12 <- neighbor->maxneighs + mov rdx, QWORD PTR [16+rsi] # rdx <- atom->x + ### AOS + xor eax, eax + ### SOA + #mov rax, QWORD PTR [24+rsi] # rax <- atom->y + #mov rsi, QWORD PTR [32+rsi] # rsi <- atom->z + ### + shl r12, 2 # r12 <- neighbor->maxneighs * 4 + + # Register spilling + mov QWORD PTR [-32+rsp], r9 # [-32+rsp] <- atom->Nlocal + mov QWORD PTR [-24+rsp], rcx # [-24+rsp] <- neighbor->numneigh + mov QWORD PTR [-16+rsp], r14 # [-16+rsp] <- atom->fy + mov QWORD PTR [-8+rsp], r13 # [-8+rsp] <- atom->fx + mov QWORD PTR [-40+rsp], r15 # [-40+rsp] <- r15 + mov QWORD PTR [-48+rsp], rbx # [-48+rsp] <- rbx + #sub rsp, 64 + #call getTimeStamp # xmm0 <- getTimeStamp() + #vmovsd QWORD PTR [-56+rsp], xmm0 # [-56+rsp] <- xmm0 [spill] + #add rsp, 64 + +..atom_loop_begin: + mov rcx, QWORD PTR [-24+rsp] # rcx <- neighbor->numneigh + vxorpd xmm25, xmm25, xmm25 # xmm25 <- 0 (fix) + vmovapd xmm20, xmm25 # xmm20 <- 0 (fiy) + mov r13d, DWORD PTR [rcx+r10*4] # r13d <- neighbor->numneigh[i] (numneighs) + vmovapd xmm4, xmm20 # xmm4 <- 0 (fiz) + + ### AOS + vmovsd xmm8, QWORD PTR[rdx+rax] # xmm8 <- atom->x[i * 3] + vmovsd xmm9, QWORD PTR[8+rdx+rax] # xmm9 <- atom->x[i * 3 + 1] + vmovsd xmm10, QWORD PTR[16+rdx+rax] # xmm10 <- atom->x[i * 3 + 2] + ### SOA + #vmovsd xmm8, QWORD PTR [rdx+r10*8] # xmm8 <- atom->x[i] + #vmovsd xmm9, QWORD PTR [rax+r10*8] # xmm9 <- atom->y[i] + #vmovsd xmm10, QWORD PTR [rsi+r10*8] # xmm10 <- atom->z[i] + ### + vbroadcastsd zmm0, xmm8 # zmm0 <- atom_x(i) + vbroadcastsd zmm1, xmm9 # zmm1 <- atom_y(i) + vbroadcastsd zmm2, xmm10 # zmm2 <- atom_z(i) + test r13d, r13d # numneighs <= 0 + jle ..atom_loop_exit + + vpxord zmm13, zmm13, zmm13 # zmm13 <- 0 (fix) + vmovaps zmm12, zmm13 # zmm12 <- 0 (fiy) + vmovaps zmm11, zmm12 # zmm11 <- 0 (fiz) + mov rcx, r12 # rcx <- neighbor->maxneighs * 4 + imul rcx, r10 # rcx <- neighbor->maxneighs * 4 * i + add rcx, r11 # rcx <- &neighbor->neighbors[neighbor->maxneighs * i] + xor r9d, r9d # r9d <- 0 (k) + mov r14d, r13d # r14d <- numneighs + cmp r14d, 8 + jl ..compute_forces_remainder + +..compute_forces: + vpcmpeqb k1, xmm0, xmm0 + vpcmpeqb k2, xmm0, xmm0 + vpcmpeqb k3, xmm0, xmm0 + vmovdqu ymm3, YMMWORD PTR [rcx+r9*4] + vpxord zmm5, zmm5, zmm5 + vpxord zmm6, zmm6, zmm6 + + ### AOS + vpaddd ymm4, ymm3, ymm3 + vpaddd ymm3, ymm3, ymm4 + vpxord zmm4, zmm4, zmm4 + vgatherdpd zmm4{k1}, [rdx+ymm3*8] + vgatherdpd zmm5{k2}, [8+rdx+ymm3*8] + vgatherdpd zmm6{k3}, [16+rdx+ymm3*8] + ### SOA + #vpxord zmm4, zmm4, zmm4 + #vgatherdpd zmm5{k2}, [rax+ymm3*8] + #vgatherdpd zmm4{k1}, [rdx+ymm3*8] + #vgatherdpd zmm6{k3}, [rsi+ymm3*8] + ### + + vsubpd zmm29, zmm1, zmm5 # zmm29 <- atom_y(i) - atom_y(j) -- dely + vsubpd zmm28, zmm0, zmm4 # zmm28 <- atom_x(i) - atom_x(j) -- delx + vsubpd zmm31, zmm2, zmm6 # zmm31 <- atom_z(i) - atom_z(j) -- delz + vmulpd zmm20, zmm29, zmm29 # zmm20 <- dely * dely + vfmadd231pd zmm20, zmm28, zmm28 # zmm20 <- dely * dely + delx * delx + vfmadd231pd zmm20, zmm31, zmm31 # zmm20 <- zmm20 + delz * delz -- rsq + + # Cutoff radius condition + vrcp14pd zmm27, zmm20 # zmm27 <- 1.0 / rsq (sr2) + vcmppd k5, zmm20, zmm16, 1 # k5 <- rsq < cutforcesq + vmulpd zmm22, zmm27, zmm15 # zmm22 <- sr2 * sigma6 + vmulpd zmm24, zmm27, zmm14 # zmm24 <- 48.0 * epsilon * sr2 + vmulpd zmm25, zmm27, zmm22 # zmm25 <- sr2 * sigma6 * sr2 + vmulpd zmm23, zmm27, zmm25 # zmm23 <- sr2 * sigma6 * sr2 * sr2 + vfmsub213pd zmm27, zmm25, zmm7 # zmm27 <- sr2 * sigma * sr2 * sr2 - 0.5 + vmulpd zmm26, zmm23, zmm24 # zmm26 <- 48.0 * epsilon * sr2 * sr2 * sigma6 * sr2 + vmulpd zmm30, zmm26, zmm27 # zmm30 <- force + vfmadd231pd zmm13{k5}, zmm30, zmm28 # fix += force * delx + vfmadd231pd zmm12{k5}, zmm30, zmm29 # fiy += force * dely + vfmadd231pd zmm11{k5}, zmm30, zmm31 # fiz += force * delz + sub r14d, 8 + add r9, 8 + cmp r14d, 8 + jge ..compute_forces + +# Check if there are remaining neighbors to be computed +..compute_forces_remainder: + test r14d, r14d + jle ..sum_up_forces + + vpbroadcastd ymm4, r14d + vpcmpgtd k1, ymm4, ymm17 + kmovw r15d, k1 + vmovdqu32 ymm3{k1}{z}, YMMWORD PTR [rcx+r9*4] + kmovw k2, k1 + kmovw k3, k1 + vpxord zmm5, zmm5, zmm5 + vpxord zmm6, zmm6, zmm6 + + ### AOS + vpaddd ymm4, ymm3, ymm3 + vpaddd ymm3, ymm3, ymm4 + vpxord zmm4, zmm4, zmm4 + vgatherdpd zmm4{k1}, [rdx+ymm3*8] + vgatherdpd zmm5{k2}, [8+rdx+ymm3*8] + vgatherdpd zmm6{k3}, [16+rdx+ymm3*8] + #### SOA + #vpxord zmm4, zmm4, zmm4 + #vgatherdpd zmm5{k2}, [rax+ymm3*8] + #vgatherdpd zmm4{k1}, [rdx+ymm3*8] + #vgatherdpd zmm6{k3}, [rsi+ymm3*8] + ### + + vsubpd zmm29, zmm1, zmm5 # zmm29 <- atom_y(i) - atom_y(j) -- dely + vsubpd zmm28, zmm0, zmm4 # zmm28 <- atom_x(i) - atom_x(j) -- delx + vsubpd zmm31, zmm2, zmm6 # zmm31 <- atom_z(i) - atom_z(j) -- delz + vmulpd zmm20, zmm29, zmm29 # zmm20 <- dely * dely + vfmadd231pd zmm20, zmm28, zmm28 # zmm20 <- dely * dely + delx * delx + vfmadd231pd zmm20, zmm31, zmm31 # zmm20 <- zmm20 + delz * delz -- rsq + + # Cutoff radius condition + vrcp14pd zmm27, zmm20 # zmm27 <- 1.0 / rsq (sr2) + vcmppd k5, zmm20, zmm16, 1 # k5 <- rsq < cutforcesq + kmovw r9d, k5 # r9d <- rsq < cutforcesq + and r15d, r9d # r15d <- rsq < cutforcesq && k < numneighs + kmovw k3, r15d # k3 <- rsq < cutforcesq && k < numneighs + vmulpd zmm22, zmm27, zmm15 # zmm22 <- sr2 * sigma6 + vmulpd zmm24, zmm27, zmm14 # zmm24 <- 48.0 * epsilon * sr2 + vmulpd zmm25, zmm27, zmm22 # zmm25 <- sr2 * sigma6 * sr2 + vmulpd zmm23, zmm27, zmm25 # zmm23 <- sr2 * sigma6 * sr2 * sr2 + vfmsub213pd zmm27, zmm25, zmm7 # zmm27 <- sr2 * sigma * sr2 * sr2 - 0.5 + vmulpd zmm26, zmm23, zmm24 # zmm26 <- 48.0 * epsilon * sr2 * sr2 * sigma6 * sr2 + vmulpd zmm30, zmm26, zmm27 # zmm30 <- force + vfmadd231pd zmm13{k3}, zmm30, zmm28 # fix += force * delx + vfmadd231pd zmm12{k3}, zmm30, zmm29 # fiy += force * dely + vfmadd231pd zmm11{k3}, zmm30, zmm31 # fiz += force * delz + +# Forces are currently separated in different lanes of zmm registers, hence it is necessary to permutate +# and add them (reduction) to obtain the final contribution for the current atom +..sum_up_forces: + vmovups zmm10, ZMMWORD PTR .L_2il0floatpacket.6[rip] + vpermd zmm0, zmm10, zmm11 + vpermd zmm5, zmm10, zmm12 + vpermd zmm21, zmm10, zmm13 + vaddpd zmm11, zmm0, zmm11 + vaddpd zmm12, zmm5, zmm12 + vaddpd zmm13, zmm21, zmm13 + vpermpd zmm1, zmm11, 78 + vpermpd zmm6, zmm12, 78 + vpermpd zmm22, zmm13, 78 + vaddpd zmm2, zmm11, zmm1 + vaddpd zmm8, zmm12, zmm6 + vaddpd zmm23, zmm13, zmm22 + vpermpd zmm3, zmm2, 177 + vpermpd zmm9, zmm8, 177 + vpermpd zmm24, zmm23, 177 + vaddpd zmm4, zmm2, zmm3 + vaddpd zmm20, zmm8, zmm9 + vaddpd zmm25, zmm23, zmm24 + +..atom_loop_exit: + mov rcx, QWORD PTR [-8+rsp] #84.9[spill] + mov rbx, QWORD PTR [-16+rsp] #85.9[spill] + + ### AOS + add rax, 24 + ### + + vaddsd xmm0, xmm25, QWORD PTR [rcx+r10*8] #84.9 + vmovsd QWORD PTR [rcx+r10*8], xmm0 #84.9 + vaddsd xmm1, xmm20, QWORD PTR [rbx+r10*8] #85.9 + vmovsd QWORD PTR [rbx+r10*8], xmm1 #85.9 + vaddsd xmm2, xmm4, QWORD PTR [rdi+r10*8] #86.9 + vmovsd QWORD PTR [rdi+r10*8], xmm2 #86.9 + inc r10 #55.5 + cmp r10, QWORD PTR [-32+rsp] #55.5[spill] + jb ..atom_loop_begin + vzeroupper #93.12 + vxorpd xmm0, xmm0, xmm0 #93.12 + #call getTimeStamp # xmm0 <- getTimeStamp() + #vsubsd xmm0, xmm0, QWORD PTR [-56+rsp] # xmm0 <- E-S + pop rbx + pop r15 + pop r14 #93.12 + pop r13 #93.12 + pop r12 #93.12 + pop rbp #93.12 + ret #93.12 + +.type computeForceLJ,@function +.size computeForceLJ,.-computeForceLJ + + +..LNcomputeForce.0: + .data +# -- End computeForceLJ + .section .rodata, "a" + .align 64 + .align 64 +.L_2il0floatpacket.2: + .long 0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000 + .type .L_2il0floatpacket.2,@object + .size .L_2il0floatpacket.2,64 + .align 64 +.L_2il0floatpacket.4: + .long 0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000 + .type .L_2il0floatpacket.4,@object + .size .L_2il0floatpacket.4,64 + .align 64 +.L_2il0floatpacket.6: + .long 0x00000008,0x00000009,0x0000000a,0x0000000b,0x0000000c,0x0000000d,0x0000000e,0x0000000f,0x00000008,0x00000009,0x0000000a,0x0000000b,0x0000000c,0x0000000d,0x0000000e,0x0000000f + .type .L_2il0floatpacket.6,@object + .size .L_2il0floatpacket.6,64 + .align 32 +.L_2il0floatpacket.0: + .long 0x00000008,0x00000008,0x00000008,0x00000008,0x00000008,0x00000008,0x00000008,0x00000008 + .type .L_2il0floatpacket.0,@object + .size .L_2il0floatpacket.0,32 + .align 32 +.L_2il0floatpacket.1: + .long 0x00000000,0x00000001,0x00000002,0x00000003,0x00000004,0x00000005,0x00000006,0x00000007 + .type .L_2il0floatpacket.1,@object + .size .L_2il0floatpacket.1,32 + .align 8 +.L_2il0floatpacket.3: + .long 0x00000000,0x40480000 + .type .L_2il0floatpacket.3,@object + .size .L_2il0floatpacket.3,8 + .align 8 +.L_2il0floatpacket.5: + .long 0x00000000,0x3ff00000 + .type .L_2il0floatpacket.5,@object + .size .L_2il0floatpacket.5,8 + .data + .section .note.GNU-stack, "" +# End diff --git a/src/allocate.c b/default/allocate.c similarity index 100% rename from src/allocate.c rename to default/allocate.c diff --git a/src/atom.c b/default/atom.c similarity index 100% rename from src/atom.c rename to default/atom.c diff --git a/src/eam_utils.c b/default/eam_utils.c similarity index 100% rename from src/eam_utils.c rename to default/eam_utils.c diff --git a/src/force_eam.c b/default/force_eam.c similarity index 100% rename from src/force_eam.c rename to default/force_eam.c diff --git a/src/force_lj.c b/default/force_lj.c similarity index 100% rename from src/force_lj.c rename to default/force_lj.c diff --git a/src/includes/allocate.h b/default/includes/allocate.h similarity index 100% rename from src/includes/allocate.h rename to default/includes/allocate.h diff --git a/src/includes/atom.h b/default/includes/atom.h similarity index 100% rename from src/includes/atom.h rename to default/includes/atom.h diff --git a/src/includes/eam.h b/default/includes/eam.h similarity index 100% rename from src/includes/eam.h rename to default/includes/eam.h diff --git a/src/includes/likwid-marker.h b/default/includes/likwid-marker.h similarity index 100% rename from src/includes/likwid-marker.h rename to default/includes/likwid-marker.h diff --git a/src/includes/neighbor.h b/default/includes/neighbor.h similarity index 100% rename from src/includes/neighbor.h rename to default/includes/neighbor.h diff --git a/src/includes/parameter.h b/default/includes/parameter.h similarity index 100% rename from src/includes/parameter.h rename to default/includes/parameter.h diff --git a/src/includes/pbc.h b/default/includes/pbc.h similarity index 100% rename from src/includes/pbc.h rename to default/includes/pbc.h diff --git a/src/includes/stats.h b/default/includes/stats.h similarity index 100% rename from src/includes/stats.h rename to default/includes/stats.h diff --git a/src/includes/thermo.h b/default/includes/thermo.h similarity index 100% rename from src/includes/thermo.h rename to default/includes/thermo.h diff --git a/src/includes/timers.h b/default/includes/timers.h similarity index 100% rename from src/includes/timers.h rename to default/includes/timers.h diff --git a/src/includes/timing.h b/default/includes/timing.h similarity index 100% rename from src/includes/timing.h rename to default/includes/timing.h diff --git a/src/includes/tracing.h b/default/includes/tracing.h similarity index 100% rename from src/includes/tracing.h rename to default/includes/tracing.h diff --git a/src/includes/util.h b/default/includes/util.h similarity index 100% rename from src/includes/util.h rename to default/includes/util.h diff --git a/src/includes/vtk.h b/default/includes/vtk.h similarity index 100% rename from src/includes/vtk.h rename to default/includes/vtk.h diff --git a/src/main-stub.c b/default/main-stub.c similarity index 100% rename from src/main-stub.c rename to default/main-stub.c diff --git a/src/main.c b/default/main.c similarity index 100% rename from src/main.c rename to default/main.c diff --git a/src/neighbor.c b/default/neighbor.c similarity index 100% rename from src/neighbor.c rename to default/neighbor.c diff --git a/src/pbc.c b/default/pbc.c similarity index 100% rename from src/pbc.c rename to default/pbc.c diff --git a/src/stats.c b/default/stats.c similarity index 100% rename from src/stats.c rename to default/stats.c diff --git a/src/thermo.c b/default/thermo.c similarity index 100% rename from src/thermo.c rename to default/thermo.c diff --git a/src/timing.c b/default/timing.c similarity index 100% rename from src/timing.c rename to default/timing.c diff --git a/src/tracing.c b/default/tracing.c similarity index 100% rename from src/tracing.c rename to default/tracing.c diff --git a/src/util.c b/default/util.c similarity index 100% rename from src/util.c rename to default/util.c diff --git a/src/vtk.c b/default/vtk.c similarity index 100% rename from src/vtk.c rename to default/vtk.c diff --git a/gromacs/allocate.c b/gromacs/allocate.c new file mode 100644 index 0000000..3aa4e1a --- /dev/null +++ b/gromacs/allocate.c @@ -0,0 +1,70 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include +#include +#include +#include + +void* allocate (int alignment, size_t bytesize) +{ + int errorCode; + void* ptr; + + errorCode = posix_memalign(&ptr, alignment, bytesize); + + if (errorCode) { + if (errorCode == EINVAL) { + fprintf(stderr, + "Error: Alignment parameter is not a power of two\n"); + exit(EXIT_FAILURE); + } + if (errorCode == ENOMEM) { + fprintf(stderr, + "Error: Insufficient memory to fulfill the request\n"); + exit(EXIT_FAILURE); + } + } + + if (ptr == NULL) { + fprintf(stderr, "Error: posix_memalign failed!\n"); + exit(EXIT_FAILURE); + } + + return ptr; +} + +void* reallocate ( + void* ptr, + int alignment, + size_t newBytesize, + size_t oldBytesize) +{ + void* newarray = allocate(alignment, newBytesize); + + if(ptr != NULL) { + memcpy(newarray, ptr, oldBytesize); + free(ptr); + } + + return newarray; +} diff --git a/gromacs/atom.c b/gromacs/atom.c new file mode 100644 index 0000000..5d41cd2 --- /dev/null +++ b/gromacs/atom.c @@ -0,0 +1,276 @@ +/* + * ======================================================================================= + * + * Authors: Jan Eitzinger (je), jan.eitzinger@fau.de + * Rafael Ravedutti (rr), rafaelravedutti@gmail.com + * + * Copyright (c) 2021 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include +#include +#include +#include + +#include +#include +#include + +#define DELTA 20000 + +#ifndef MAXLINE +#define MAXLINE 4096 +#endif + +#ifndef MAX +#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#endif + +void initAtom(Atom *atom) +{ + atom->x = NULL; atom->y = NULL; atom->z = NULL; + atom->vx = NULL; atom->vy = NULL; atom->vz = NULL; + atom->fx = NULL; atom->fy = NULL; atom->fz = NULL; + atom->Natoms = 0; + atom->Nlocal = 0; + atom->Nghost = 0; + atom->Nmax = 0; + atom->type = NULL; + atom->ntypes = 0; + atom->epsilon = NULL; + atom->sigma6 = NULL; + atom->cutforcesq = NULL; + atom->cutneighsq = NULL; +} + +void createAtom(Atom *atom, Parameter *param) +{ + MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd; + MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd; + MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd; + atom->Natoms = 4 * param->nx * param->ny * param->nz; + atom->Nlocal = 0; + atom->ntypes = param->ntypes; + atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + for(int i = 0; i < atom->ntypes * atom->ntypes; i++) { + atom->epsilon[i] = param->epsilon; + atom->sigma6[i] = param->sigma6; + atom->cutneighsq[i] = param->cutneigh * param->cutneigh; + atom->cutforcesq[i] = param->cutforce * param->cutforce; + } + + MD_FLOAT alat = pow((4.0 / param->rho), (1.0 / 3.0)); + int ilo = (int) (xlo / (0.5 * alat) - 1); + int ihi = (int) (xhi / (0.5 * alat) + 1); + int jlo = (int) (ylo / (0.5 * alat) - 1); + int jhi = (int) (yhi / (0.5 * alat) + 1); + int klo = (int) (zlo / (0.5 * alat) - 1); + int khi = (int) (zhi / (0.5 * alat) + 1); + + ilo = MAX(ilo, 0); + ihi = MIN(ihi, 2 * param->nx - 1); + jlo = MAX(jlo, 0); + jhi = MIN(jhi, 2 * param->ny - 1); + klo = MAX(klo, 0); + khi = MIN(khi, 2 * param->nz - 1); + + MD_FLOAT xtmp, ytmp, ztmp, vxtmp, vytmp, vztmp; + int i, j, k, m, n; + int sx = 0; int sy = 0; int sz = 0; + int ox = 0; int oy = 0; int oz = 0; + int subboxdim = 8; + + while(oz * subboxdim <= khi) { + + k = oz * subboxdim + sz; + j = oy * subboxdim + sy; + i = ox * subboxdim + sx; + + if(((i + j + k) % 2 == 0) && + (i >= ilo) && (i <= ihi) && + (j >= jlo) && (j <= jhi) && + (k >= klo) && (k <= khi)) { + + xtmp = 0.5 * alat * i; + ytmp = 0.5 * alat * j; + ztmp = 0.5 * alat * k; + + if( xtmp >= xlo && xtmp < xhi && + ytmp >= ylo && ytmp < yhi && + ztmp >= zlo && ztmp < zhi ) { + + n = k * (2 * param->ny) * (2 * param->nx) + + j * (2 * param->nx) + + i + 1; + + for(m = 0; m < 5; m++) { + myrandom(&n); + } + vxtmp = myrandom(&n); + + for(m = 0; m < 5; m++){ + myrandom(&n); + } + vytmp = myrandom(&n); + + for(m = 0; m < 5; m++) { + myrandom(&n); + } + vztmp = myrandom(&n); + + if(atom->Nlocal == atom->Nmax) { + growAtom(atom); + } + + atom_x(atom->Nlocal) = xtmp; + atom_y(atom->Nlocal) = ytmp; + atom_z(atom->Nlocal) = ztmp; + atom->vx[atom->Nlocal] = vxtmp; + atom->vy[atom->Nlocal] = vytmp; + atom->vz[atom->Nlocal] = vztmp; + atom->type[atom->Nlocal] = rand() % atom->ntypes; + atom->Nlocal++; + } + } + + sx++; + + if(sx == subboxdim) { sx = 0; sy++; } + if(sy == subboxdim) { sy = 0; sz++; } + if(sz == subboxdim) { sz = 0; ox++; } + if(ox * subboxdim > ihi) { ox = 0; oy++; } + if(oy * subboxdim > jhi) { oy = 0; oz++; } + } +} + +int readAtom(Atom* atom, Parameter* param) +{ + FILE *fp = fopen(param->input_file, "r"); + char line[MAXLINE]; + int natoms = 0; + int read_atoms = 0; + int atom_id = -1; + int ts = -1; + + if(!fp) { + fprintf(stderr, "Could not open input file: %s\n", param->input_file); + exit(-1); + return -1; + } + + while(!feof(fp) && ts < 1 && !read_atoms) { + fgets(line, MAXLINE, fp); + if(strncmp(line, "ITEM: ", 6) == 0) { + char *item = &line[6]; + + if(strncmp(item, "TIMESTEP", 8) == 0) { + fgets(line, MAXLINE, fp); + ts = atoi(line); + } else if(strncmp(item, "NUMBER OF ATOMS", 15) == 0) { + fgets(line, MAXLINE, fp); + natoms = atoi(line); + atom->Natoms = natoms; + atom->Nlocal = natoms; + while(atom->Nlocal >= atom->Nmax) { + growAtom(atom); + } + } else if(strncmp(item, "BOX BOUNDS pp pp pp", 19) == 0) { + fgets(line, MAXLINE, fp); + param->xlo = atof(strtok(line, " ")); + param->xhi = atof(strtok(NULL, " ")); + param->xprd = param->xhi - param->xlo; + + fgets(line, MAXLINE, fp); + param->ylo = atof(strtok(line, " ")); + param->yhi = atof(strtok(NULL, " ")); + param->yprd = param->yhi - param->ylo; + + fgets(line, MAXLINE, fp); + param->zlo = atof(strtok(line, " ")); + param->zhi = atof(strtok(NULL, " ")); + param->zprd = param->zhi - param->zlo; + } else if(strncmp(item, "ATOMS id type x y z vx vy vz", 28) == 0) { + for(int i = 0; i < natoms; i++) { + fgets(line, MAXLINE, fp); + atom_id = atoi(strtok(line, " ")) - 1; + atom->type[atom_id] = atoi(strtok(NULL, " ")); + atom_x(atom_id) = atof(strtok(NULL, " ")); + atom_y(atom_id) = atof(strtok(NULL, " ")); + atom_z(atom_id) = atof(strtok(NULL, " ")); + atom->vx[atom_id] = atof(strtok(NULL, " ")); + atom->vy[atom_id] = atof(strtok(NULL, " ")); + atom->vz[atom_id] = atof(strtok(NULL, " ")); + atom->ntypes = MAX(atom->type[atom_id], atom->ntypes); + read_atoms++; + } + } else { + fprintf(stderr, "Invalid item: %s\n", item); + exit(-1); + return -1; + } + } else { + fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line); + exit(-1); + return -1; + } + } + + if(ts < 0 || !natoms || !read_atoms) { + fprintf(stderr, "Input error: atom data was not read!\n"); + exit(-1); + return -1; + } + + atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + for(int i = 0; i < atom->ntypes * atom->ntypes; i++) { + atom->epsilon[i] = param->epsilon; + atom->sigma6[i] = param->sigma6; + atom->cutneighsq[i] = param->cutneigh * param->cutneigh; + atom->cutforcesq[i] = param->cutforce * param->cutforce; + } + + fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file); + return natoms; +} + +void growAtom(Atom *atom) +{ + int nold = atom->Nmax; + atom->Nmax += DELTA; + + #ifdef AOS + atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3); + #else + atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->y = (MD_FLOAT*) reallocate(atom->y, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->z = (MD_FLOAT*) reallocate(atom->z, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + #endif + atom->vx = (MD_FLOAT*) reallocate(atom->vx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->vy = (MD_FLOAT*) reallocate(atom->vy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->vz = (MD_FLOAT*) reallocate(atom->vz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->fy = (MD_FLOAT*) reallocate(atom->fy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->fz = (MD_FLOAT*) reallocate(atom->fz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->type = (int *) reallocate(atom->type, ALIGNMENT, atom->Nmax * sizeof(int), nold * sizeof(int)); +} diff --git a/gromacs/eam_utils.c b/gromacs/eam_utils.c new file mode 100644 index 0000000..aaa5f40 --- /dev/null +++ b/gromacs/eam_utils.c @@ -0,0 +1,285 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifndef MAXLINE +#define MAXLINE 4096 +#endif + +void initEam(Eam* eam, Parameter* param) { + int ntypes = param->ntypes; + eam->nmax = 0; + eam->fp = NULL; + coeff(eam, param); + init_style(eam, param); +} + +void coeff(Eam* eam, Parameter* param) { + read_eam_file(&eam->file, param->eam_file); + param->mass = eam->file.mass; + param->cutforce = eam->file.cut; + param->cutneigh = param->cutforce + 1.0; + param->temp = 600.0; + param->dt = 0.001; + param->rho = 0.07041125; + param->dtforce = 0.5 * param->dt / param->mass; +} + +void init_style(Eam* eam, Parameter* param) { + // convert read-in file(s) to arrays and spline them + file2array(eam); + array2spline(eam, param); +} + +void read_eam_file(Funcfl* file, const char* filename) { + FILE* fptr; + char line[MAXLINE]; + + fptr = fopen(filename, "r"); + if(fptr == NULL) { + printf("Can't open EAM Potential file: %s\n", filename); + exit(0); + } + + int tmp; + fgets(line, MAXLINE, fptr); + fgets(line, MAXLINE, fptr); + sscanf(line, "%d %lg", &tmp, &(file->mass)); + fgets(line, MAXLINE, fptr); + sscanf(line, "%d %lg %d %lg %lg", &file->nrho, &file->drho, &file->nr, &file->dr, &file->cut); + + //printf("Read: %lf %i %lf %i %lf %lf\n",file->mass,file->nrho,file->drho,file->nr,file->dr,file->cut); + file->frho = (MD_FLOAT *) allocate(ALIGNMENT, (file->nrho + 1) * sizeof(MD_FLOAT)); + file->rhor = (MD_FLOAT *) allocate(ALIGNMENT, (file->nr + 1) * sizeof(MD_FLOAT)); + file->zr = (MD_FLOAT *) allocate(ALIGNMENT, (file->nr + 1) * sizeof(MD_FLOAT)); + grab(fptr, file->nrho, file->frho); + grab(fptr, file->nr, file->zr); + grab(fptr, file->nr, file->rhor); + for(int i = file->nrho; i > 0; i--) file->frho[i] = file->frho[i - 1]; + for(int i = file->nr; i > 0; i--) file->rhor[i] = file->rhor[i - 1]; + for(int i = file->nr; i > 0; i--) file->zr[i] = file->zr[i - 1]; + fclose(fptr); +} + +void file2array(Eam* eam) { + int i, j, k, m, n; + double sixth = 1.0 / 6.0; + + // determine max function params from all active funcfl files + // active means some element is pointing at it via map + int active; + double rmax, rhomax; + eam->dr = eam->drho = rmax = rhomax = 0.0; + active = 0; + Funcfl* file = &eam->file; + eam->dr = MAX(eam->dr, file->dr); + eam->drho = MAX(eam->drho, file->drho); + rmax = MAX(rmax, (file->nr - 1) * file->dr); + rhomax = MAX(rhomax, (file->nrho - 1) * file->drho); + + // set nr,nrho from cutoff and spacings + // 0.5 is for round-off in divide + eam->nr = (int)(rmax / eam->dr + 0.5); + eam->nrho = (int)(rhomax / eam->drho + 0.5); + + // ------------------------------------------------------------------ + // setup frho arrays + // ------------------------------------------------------------------ + + // allocate frho arrays + // nfrho = # of funcfl files + 1 for zero array + eam->frho = (MD_FLOAT *) allocate(ALIGNMENT, (eam->nrho + 1) * sizeof(MD_FLOAT)); + + // interpolate each file's frho to a single grid and cutoff + double r, p, cof1, cof2, cof3, cof4; + n = 0; + + for(m = 1; m <= eam->nrho; m++) { + r = (m - 1) * eam->drho; + p = r / file->drho + 1.0; + k = (int)(p); + k = MIN(k, file->nrho - 2); + k = MAX(k, 2); + p -= k; + p = MIN(p, 2.0); + cof1 = -sixth * p * (p - 1.0) * (p - 2.0); + cof2 = 0.5 * (p * p - 1.0) * (p - 2.0); + cof3 = -0.5 * p * (p + 1.0) * (p - 2.0); + cof4 = sixth * p * (p * p - 1.0); + eam->frho[m] = cof1 * file->frho[k - 1] + cof2 * file->frho[k] + + cof3 * file->frho[k + 1] + cof4 * file->frho[k + 2]; + } + + + // ------------------------------------------------------------------ + // setup rhor arrays + // ------------------------------------------------------------------ + + // allocate rhor arrays + // nrhor = # of funcfl files + eam->rhor = (MD_FLOAT *) allocate(ALIGNMENT, (eam->nr + 1) * sizeof(MD_FLOAT)); + + // interpolate each file's rhor to a single grid and cutoff + for(m = 1; m <= eam->nr; m++) { + r = (m - 1) * eam->dr; + p = r / file->dr + 1.0; + k = (int)(p); + k = MIN(k, file->nr - 2); + k = MAX(k, 2); + p -= k; + p = MIN(p, 2.0); + cof1 = -sixth * p * (p - 1.0) * (p - 2.0); + cof2 = 0.5 * (p * p - 1.0) * (p - 2.0); + cof3 = -0.5 * p * (p + 1.0) * (p - 2.0); + cof4 = sixth * p * (p * p - 1.0); + eam->rhor[m] = cof1 * file->rhor[k - 1] + cof2 * file->rhor[k] + + cof3 * file->rhor[k + 1] + cof4 * file->rhor[k + 2]; + //if(m==119)printf("BuildRho: %e %e %e %e %e %e\n",rhor[m],cof1,cof2,cof3,cof4,file->rhor[k]); + } + + // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to + // for funcfl files, I,J mapping only depends on I + // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used + + // ------------------------------------------------------------------ + // setup z2r arrays + // ------------------------------------------------------------------ + + // allocate z2r arrays + // nz2r = N*(N+1)/2 where N = # of funcfl files + eam->z2r = (MD_FLOAT *) allocate(ALIGNMENT, (eam->nr + 1) * sizeof(MD_FLOAT)); + + // create a z2r array for each file against other files, only for I >= J + // interpolate zri and zrj to a single grid and cutoff + double zri, zrj; + Funcfl* ifile = &eam->file; + Funcfl* jfile = &eam->file; + + for(m = 1; m <= eam->nr; m++) { + r = (m - 1) * eam->dr; + p = r / ifile->dr + 1.0; + k = (int)(p); + k = MIN(k, ifile->nr - 2); + k = MAX(k, 2); + p -= k; + p = MIN(p, 2.0); + cof1 = -sixth * p * (p - 1.0) * (p - 2.0); + cof2 = 0.5 * (p * p - 1.0) * (p - 2.0); + cof3 = -0.5 * p * (p + 1.0) * (p - 2.0); + cof4 = sixth * p * (p * p - 1.0); + zri = cof1 * ifile->zr[k - 1] + cof2 * ifile->zr[k] + + cof3 * ifile->zr[k + 1] + cof4 * ifile->zr[k + 2]; + + p = r / jfile->dr + 1.0; + k = (int)(p); + k = MIN(k, jfile->nr - 2); + k = MAX(k, 2); + p -= k; + p = MIN(p, 2.0); + cof1 = -sixth * p * (p - 1.0) * (p - 2.0); + cof2 = 0.5 * (p * p - 1.0) * (p - 2.0); + cof3 = -0.5 * p * (p + 1.0) * (p - 2.0); + cof4 = sixth * p * (p * p - 1.0); + zrj = cof1 * jfile->zr[k - 1] + cof2 * jfile->zr[k] + + cof3 * jfile->zr[k + 1] + cof4 * jfile->zr[k + 2]; + + eam->z2r[m] = 27.2 * 0.529 * zri * zrj; + } +} + +void array2spline(Eam* eam, Parameter* param) { + eam->rdr = 1.0 / eam->dr; + eam->rdrho = 1.0 / eam->drho; + eam->nrho_tot = (eam->nrho + 1) * 7 + 64; + eam->nr_tot = (eam->nr + 1) * 7 + 64; + eam->nrho_tot -= eam->nrho_tot%64; + eam->nr_tot -= eam->nr_tot%64; + + int ntypes = param->ntypes; + eam->frho_spline = (MD_FLOAT *) allocate(ALIGNMENT, ntypes * ntypes * eam->nrho_tot * sizeof(MD_FLOAT)); + eam->rhor_spline = (MD_FLOAT *) allocate(ALIGNMENT, ntypes * ntypes * eam->nr_tot * sizeof(MD_FLOAT)); + eam->z2r_spline = (MD_FLOAT *) allocate(ALIGNMENT, ntypes * ntypes * eam->nr_tot * sizeof(MD_FLOAT)); + interpolate(eam->nrho, eam->drho, eam->frho, eam->frho_spline); + interpolate(eam->nr, eam->dr, eam->rhor, eam->rhor_spline); + interpolate(eam->nr, eam->dr, eam->z2r, eam->z2r_spline); + + // replicate data for multiple types; + for(int tt = 0; tt < ntypes * ntypes; tt++) { + for(int k = 0; k < eam->nrho_tot; k++) + eam->frho_spline[tt*eam->nrho_tot + k] = eam->frho_spline[k]; + for(int k = 0; k < eam->nr_tot; k++) + eam->rhor_spline[tt*eam->nr_tot + k] = eam->rhor_spline[k]; + for(int k = 0; k < eam->nr_tot; k++) + eam->z2r_spline[tt*eam->nr_tot + k] = eam->z2r_spline[k]; + } +} + +void interpolate(int n, MD_FLOAT delta, MD_FLOAT* f, MD_FLOAT* spline) { + for(int m = 1; m <= n; m++) spline[m * 7 + 6] = f[m]; + + spline[1 * 7 + 5] = spline[2 * 7 + 6] - spline[1 * 7 + 6]; + spline[2 * 7 + 5] = 0.5 * (spline[3 * 7 + 6] - spline[1 * 7 + 6]); + spline[(n - 1) * 7 + 5] = 0.5 * (spline[n * 7 + 6] - spline[(n - 2) * 7 + 6]); + spline[n * 7 + 5] = spline[n * 7 + 6] - spline[(n - 1) * 7 + 6]; + + for(int m = 3; m <= n - 2; m++) + spline[m * 7 + 5] = ((spline[(m - 2) * 7 + 6] - spline[(m + 2) * 7 + 6]) + + 8.0 * (spline[(m + 1) * 7 + 6] - spline[(m - 1) * 7 + 6])) / 12.0; + + for(int m = 1; m <= n - 1; m++) { + spline[m * 7 + 4] = 3.0 * (spline[(m + 1) * 7 + 6] - spline[m * 7 + 6]) - + 2.0 * spline[m * 7 + 5] - spline[(m + 1) * 7 + 5]; + spline[m * 7 + 3] = spline[m * 7 + 5] + spline[(m + 1) * 7 + 5] - + 2.0 * (spline[(m + 1) * 7 + 6] - spline[m * 7 + 6]); + } + + spline[n * 7 + 4] = 0.0; + spline[n * 7 + 3] = 0.0; + + for(int m = 1; m <= n; m++) { + spline[m * 7 + 2] = spline[m * 7 + 5] / delta; + spline[m * 7 + 1] = 2.0 * spline[m * 7 + 4] / delta; + spline[m * 7 + 0] = 3.0 * spline[m * 7 + 3] / delta; + } +} + +void grab(FILE* fptr, int n, MD_FLOAT* list) { + char* ptr; + char line[MAXLINE]; + int i = 0; + + while(i < n) { + fgets(line, MAXLINE, fptr); + ptr = strtok(line, " \t\n\r\f"); + list[i++] = atof(ptr); + while(ptr = strtok(NULL, " \t\n\r\f")) list[i++] = atof(ptr); + } +} diff --git a/gromacs/force_eam.c b/gromacs/force_eam.c new file mode 100644 index 0000000..b686abf --- /dev/null +++ b/gromacs/force_eam.c @@ -0,0 +1,213 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2021 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +double computeForceEam(Eam* eam, Parameter* param, Atom *atom, Neighbor *neighbor, Stats *stats) { + if(eam->nmax < atom->Nmax) { + eam->nmax = atom->Nmax; + if(eam->fp != NULL) { free(eam->fp); } + eam->fp = (MD_FLOAT *) allocate(ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT)); + } + + int Nlocal = atom->Nlocal; + int* neighs; + MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; int ntypes = atom->ntypes; MD_FLOAT* fp = eam->fp; + MD_FLOAT* rhor_spline = eam->rhor_spline; MD_FLOAT* frho_spline = eam->frho_spline; MD_FLOAT* z2r_spline = eam->z2r_spline; + MD_FLOAT rdr = eam->rdr; int nr = eam->nr; int nr_tot = eam->nr_tot; MD_FLOAT rdrho = eam->rdrho; + int nrho = eam->nrho; int nrho_tot = eam->nrho_tot; + double S = getTimeStamp(); + + LIKWID_MARKER_START("force_eam_fp"); + #pragma omp parallel for + for(int i = 0; i < Nlocal; i++) { + neighs = &neighbor->neighbors[i * neighbor->maxneighs]; + int numneighs = neighbor->numneigh[i]; + MD_FLOAT xtmp = atom_x(i); + MD_FLOAT ytmp = atom_y(i); + MD_FLOAT ztmp = atom_z(i); + MD_FLOAT rhoi = 0; +#ifdef EXPLICIT_TYPES + const int type_i = atom->type[i]; +#endif + #pragma ivdep + for(int k = 0; k < numneighs; k++) { + int j = neighs[k]; + MD_FLOAT delx = xtmp - atom_x(j); + MD_FLOAT dely = ytmp - atom_y(j); + MD_FLOAT delz = ztmp - atom_z(j); + MD_FLOAT rsq = delx * delx + dely * dely + delz * delz; +#ifdef EXPLICIT_TYPES + const int type_j = atom->type[j]; + const int type_ij = type_i * ntypes + type_j; + const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij]; +#else + const MD_FLOAT cutforcesq = param->cutforce * param->cutforce; +#endif + if(rsq < cutforcesq) { + MD_FLOAT p = sqrt(rsq) * rdr + 1.0; + int m = (int)(p); + m = m < nr - 1 ? m : nr - 1; + p -= m; + p = p < 1.0 ? p : 1.0; +#ifdef EXPLICIT_TYPES + rhoi += ((rhor_spline[type_ij * nr_tot + m * 7 + 3] * p + + rhor_spline[type_ij * nr_tot + m * 7 + 4]) * p + + rhor_spline[type_ij * nr_tot + m * 7 + 5]) * p + + rhor_spline[type_ij * nr_tot + m * 7 + 6]; +#else + rhoi += ((rhor_spline[m * 7 + 3] * p + + rhor_spline[m * 7 + 4]) * p + + rhor_spline[m * 7 + 5]) * p + + rhor_spline[m * 7 + 6]; +#endif + } + } + +#ifdef EXPLICIT_TYPES + const int type_ii = type_i * type_i; +#endif + MD_FLOAT p = 1.0 * rhoi * rdrho + 1.0; + int m = (int)(p); + m = MAX(1, MIN(m, nrho - 1)); + p -= m; + p = MIN(p, 1.0); +#ifdef EXPLICIT_TYPES + fp[i] = (frho_spline[type_ii * nrho_tot + m * 7 + 0] * p + + frho_spline[type_ii * nrho_tot + m * 7 + 1]) * p + + frho_spline[type_ii * nrho_tot + m * 7 + 2]; +#else + fp[i] = (frho_spline[m * 7 + 0] * p + frho_spline[m * 7 + 1]) * p + frho_spline[m * 7 + 2]; +#endif + } + + LIKWID_MARKER_STOP("force_eam_fp"); + + // We still need to update fp for PBC atoms + for(int i = 0; i < atom->Nghost; i++) { + fp[Nlocal + i] = fp[atom->border_map[i]]; + } + + LIKWID_MARKER_START("force_eam"); + for(int i = 0; i < Nlocal; i++) { + neighs = &neighbor->neighbors[i * neighbor->maxneighs]; + int numneighs = neighbor->numneigh[i]; + MD_FLOAT xtmp = atom_x(i); + MD_FLOAT ytmp = atom_y(i); + MD_FLOAT ztmp = atom_z(i); + MD_FLOAT fix = 0; + MD_FLOAT fiy = 0; + MD_FLOAT fiz = 0; +#ifdef EXPLICIT_TYPES + const int type_i = atom->type[i]; +#endif + + #pragma ivdep + for(int k = 0; k < numneighs; k++) { + int j = neighs[k]; + MD_FLOAT delx = xtmp - atom_x(j); + MD_FLOAT dely = ytmp - atom_y(j); + MD_FLOAT delz = ztmp - atom_z(j); + MD_FLOAT rsq = delx * delx + dely * dely + delz * delz; +#ifdef EXPLICIT_TYPES + const int type_j = atom->type[j]; + const int type_ij = type_i * ntypes + type_j; + const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij]; +#else + const MD_FLOAT cutforcesq = param->cutforce * param->cutforce; +#endif + + if(rsq < cutforcesq) { + MD_FLOAT r = sqrt(rsq); + MD_FLOAT p = r * rdr + 1.0; + int m = (int)(p); + m = m < nr - 1 ? m : nr - 1; + p -= m; + p = p < 1.0 ? p : 1.0; + + + // rhoip = derivative of (density at atom j due to atom i) + // rhojp = derivative of (density at atom i due to atom j) + // phi = pair potential energy + // phip = phi' + // z2 = phi * r + // z2p = (phi * r)' = (phi' r) + phi + // psip needs both fp[i] and fp[j] terms since r_ij appears in two + // terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji) + // hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip + +#ifdef EXPLICIT_TYPES + MD_FLOAT rhoip = (rhor_spline[type_ij * nr_tot + m * 7 + 0] * p + + rhor_spline[type_ij * nr_tot + m * 7 + 1]) * p + + rhor_spline[type_ij * nr_tot + m * 7 + 2]; + + MD_FLOAT z2p = (z2r_spline[type_ij * nr_tot + m * 7 + 0] * p + + z2r_spline[type_ij * nr_tot + m * 7 + 1]) * p + + z2r_spline[type_ij * nr_tot + m * 7 + 2]; + + MD_FLOAT z2 = ((z2r_spline[type_ij * nr_tot + m * 7 + 3] * p + + z2r_spline[type_ij * nr_tot + m * 7 + 4]) * p + + z2r_spline[type_ij * nr_tot + m * 7 + 5]) * p + + z2r_spline[type_ij * nr_tot + m * 7 + 6]; +#else + MD_FLOAT rhoip = (rhor_spline[m * 7 + 0] * p + rhor_spline[m * 7 + 1]) * p + rhor_spline[m * 7 + 2]; + MD_FLOAT z2p = (z2r_spline[m * 7 + 0] * p + z2r_spline[m * 7 + 1]) * p + z2r_spline[m * 7 + 2]; + MD_FLOAT z2 = ((z2r_spline[m * 7 + 3] * p + + z2r_spline[m * 7 + 4]) * p + + z2r_spline[m * 7 + 5]) * p + + z2r_spline[m * 7 + 6]; +#endif + + MD_FLOAT recip = 1.0 / r; + MD_FLOAT phi = z2 * recip; + MD_FLOAT phip = z2p * recip - phi * recip; + MD_FLOAT psip = fp[i] * rhoip + fp[j] * rhoip + phip; + MD_FLOAT fpair = -psip * recip; + + fix += delx * fpair; + fiy += dely * fpair; + fiz += delz * fpair; + //fpair *= 0.5; + } + } + + fx[i] = fix; + fy[i] = fiy; + fz[i] = fiz; + addStat(stats->total_force_neighs, numneighs); + addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH); + } + + LIKWID_MARKER_STOP("force_eam"); + double E = getTimeStamp(); + return E-S; +} diff --git a/gromacs/force_lj.c b/gromacs/force_lj.c new file mode 100644 index 0000000..73c0269 --- /dev/null +++ b/gromacs/force_lj.c @@ -0,0 +1,104 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2021 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include + +#include +#include +#include +#include +#include + +double computeForceLJ(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) { + int Nlocal = atom->Nlocal; + int* neighs; + MD_FLOAT* fx = atom->fx; + MD_FLOAT* fy = atom->fy; + MD_FLOAT* fz = atom->fz; +#ifndef EXPLICIT_TYPES + MD_FLOAT cutforcesq = param->cutforce * param->cutforce; + MD_FLOAT sigma6 = param->sigma6; + MD_FLOAT epsilon = param->epsilon; +#endif + + for(int i = 0; i < Nlocal; i++) { + fx[i] = 0.0; + fy[i] = 0.0; + fz[i] = 0.0; + } + + double S = getTimeStamp(); + LIKWID_MARKER_START("force"); + +#pragma omp parallel for + for(int i = 0; i < Nlocal; i++) { + neighs = &neighbor->neighbors[i * neighbor->maxneighs]; + int numneighs = neighbor->numneigh[i]; + MD_FLOAT xtmp = atom_x(i); + MD_FLOAT ytmp = atom_y(i); + MD_FLOAT ztmp = atom_z(i); + MD_FLOAT fix = 0; + MD_FLOAT fiy = 0; + MD_FLOAT fiz = 0; + +#ifdef EXPLICIT_TYPES + const int type_i = atom->type[i]; +#endif + + + for(int k = 0; k < numneighs; k++) { + int j = neighs[k]; + MD_FLOAT delx = xtmp - atom_x(j); + MD_FLOAT dely = ytmp - atom_y(j); + MD_FLOAT delz = ztmp - atom_z(j); + MD_FLOAT rsq = delx * delx + dely * dely + delz * delz; + +#ifdef EXPLICIT_TYPES + const int type_j = atom->type[j]; + const int type_ij = type_i * atom->ntypes + type_j; + const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij]; + const MD_FLOAT sigma6 = atom->sigma6[type_ij]; + const MD_FLOAT epsilon = atom->epsilon[type_ij]; +#endif + + if(rsq < cutforcesq) { + MD_FLOAT sr2 = 1.0 / rsq; + MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6; + MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon; + fix += delx * force; + fiy += dely * force; + fiz += delz * force; + } + } + + fx[i] += fix; + fy[i] += fiy; + fz[i] += fiz; + + addStat(stats->total_force_neighs, numneighs); + addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH); + } + + LIKWID_MARKER_STOP("force"); + double E = getTimeStamp(); + return E-S; +} diff --git a/gromacs/includes/allocate.h b/gromacs/includes/allocate.h new file mode 100644 index 0000000..b7587e3 --- /dev/null +++ b/gromacs/includes/allocate.h @@ -0,0 +1,29 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include + +#ifndef __ALLOCATE_H_ +#define __ALLOCATE_H_ +extern void* allocate (int alignment, size_t bytesize); +extern void* reallocate (void* ptr, int alignment, size_t newBytesize, size_t oldBytesize); +#endif diff --git a/gromacs/includes/atom.h b/gromacs/includes/atom.h new file mode 100644 index 0000000..70cef10 --- /dev/null +++ b/gromacs/includes/atom.h @@ -0,0 +1,59 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2021 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include + +#ifndef __ATOM_H_ +#define __ATOM_H_ + +typedef struct { + int Natoms, Nlocal, Nghost, Nmax; + MD_FLOAT *x, *y, *z; + MD_FLOAT *vx, *vy, *vz; + MD_FLOAT *fx, *fy, *fz; + int *border_map; + int *type; + int ntypes; + MD_FLOAT *epsilon; + MD_FLOAT *sigma6; + MD_FLOAT *cutforcesq; + MD_FLOAT *cutneighsq; +} Atom; + +extern void initAtom(Atom*); +extern void createAtom(Atom*, Parameter*); +extern int readAtom(Atom*, Parameter*); +extern void growAtom(Atom*); + +#ifdef AOS +#define POS_DATA_LAYOUT "AoS" +#define atom_x(i) atom->x[(i) * 3 + 0] +#define atom_y(i) atom->x[(i) * 3 + 1] +#define atom_z(i) atom->x[(i) * 3 + 2] +#else +#define POS_DATA_LAYOUT "SoA" +#define atom_x(i) atom->x[i] +#define atom_y(i) atom->y[i] +#define atom_z(i) atom->z[i] +#endif + +#endif diff --git a/gromacs/includes/eam.h b/gromacs/includes/eam.h new file mode 100644 index 0000000..03d7ecc --- /dev/null +++ b/gromacs/includes/eam.h @@ -0,0 +1,55 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include + +#include +#include + +#ifndef __EAM_H_ +#define __EAM_H_ +typedef struct { + int nrho, nr; + MD_FLOAT drho, dr, cut, mass; + MD_FLOAT *frho, *rhor, *zr; +} Funcfl; + +typedef struct { + MD_FLOAT* fp; + int nmax; + int nrho, nr; + int nrho_tot, nr_tot; + MD_FLOAT dr, rdr, drho, rdrho; + MD_FLOAT *frho, *rhor, *z2r; + MD_FLOAT *rhor_spline, *frho_spline, *z2r_spline; + Funcfl file; +} Eam; + +void initEam(Eam* eam, Parameter* param); +void coeff(Eam* eam, Parameter* param); +void init_style(Eam* eam, Parameter *param); +void read_eam_file(Funcfl* file, const char* filename); +void file2array(Eam* eam); +void array2spline(Eam* eam, Parameter* param); +void interpolate(int n, MD_FLOAT delta, MD_FLOAT* f, MD_FLOAT* spline); +void grab(FILE* fptr, int n, MD_FLOAT* list); +#endif diff --git a/gromacs/includes/likwid-marker.h b/gromacs/includes/likwid-marker.h new file mode 100644 index 0000000..ebf8b89 --- /dev/null +++ b/gromacs/includes/likwid-marker.h @@ -0,0 +1,170 @@ +/* + * ======================================================================================= + * + * Filename: likwid-marker.h + * + * Description: Header File of likwid Marker API + * + * Version: + * Released: + * + * Authors: Thomas Gruber (tg), thomas.roehl@googlemail.com + * + * Project: likwid + * + * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + * ======================================================================================= + */ +#ifndef LIKWID_MARKER_H +#define LIKWID_MARKER_H + + +/** \addtogroup MarkerAPI Marker API module +* @{ +*/ +/*! +\def LIKWID_MARKER_INIT +Shortcut for likwid_markerInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_MARKER_THREADINIT +Shortcut for likwid_markerThreadInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_MARKER_REGISTER(regionTag) +Shortcut for likwid_markerRegisterRegion() with \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_MARKER_START(regionTag) +Shortcut for likwid_markerStartRegion() with \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_MARKER_STOP(regionTag) +Shortcut for likwid_markerStopRegion() with \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_MARKER_GET(regionTag, nevents, events, time, count) +Shortcut for likwid_markerGetResults() for \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_MARKER_SWITCH +Shortcut for likwid_markerNextGroup() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_MARKER_RESET(regionTag) +Shortcut for likwid_markerResetRegion() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_MARKER_CLOSE +Shortcut for likwid_markerClose() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/** @}*/ + +#ifdef LIKWID_PERFMON +#include +#define LIKWID_MARKER_INIT likwid_markerInit() +#define LIKWID_MARKER_THREADINIT likwid_markerThreadInit() +#define LIKWID_MARKER_SWITCH likwid_markerNextGroup() +#define LIKWID_MARKER_REGISTER(regionTag) likwid_markerRegisterRegion(regionTag) +#define LIKWID_MARKER_START(regionTag) likwid_markerStartRegion(regionTag) +#define LIKWID_MARKER_STOP(regionTag) likwid_markerStopRegion(regionTag) +#define LIKWID_MARKER_CLOSE likwid_markerClose() +#define LIKWID_MARKER_RESET(regionTag) likwid_markerResetRegion(regionTag) +#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count) likwid_markerGetRegion(regionTag, nevents, events, time, count) +#else /* LIKWID_PERFMON */ +#define LIKWID_MARKER_INIT +#define LIKWID_MARKER_THREADINIT +#define LIKWID_MARKER_SWITCH +#define LIKWID_MARKER_REGISTER(regionTag) +#define LIKWID_MARKER_START(regionTag) +#define LIKWID_MARKER_STOP(regionTag) +#define LIKWID_MARKER_CLOSE +#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count) +#define LIKWID_MARKER_RESET(regionTag) +#endif /* LIKWID_PERFMON */ + + +/** \addtogroup NvMarkerAPI NvMarker API module (MarkerAPI for Nvidia GPUs) +* @{ +*/ +/*! +\def LIKWID_NVMARKER_INIT +Shortcut for likwid_gpuMarkerInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_NVMARKER_THREADINIT +Shortcut for likwid_gpuMarkerThreadInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_NVMARKER_REGISTER(regionTag) +Shortcut for likwid_gpuMarkerRegisterRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_NVMARKER_START(regionTag) +Shortcut for likwid_gpuMarkerStartRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_NVMARKER_STOP(regionTag) +Shortcut for likwid_gpuMarkerStopRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_NVMARKER_GET(regionTag, ngpus, nevents, events, time, count) +Shortcut for likwid_gpuMarkerGetRegion() for \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_NVMARKER_SWITCH +Shortcut for likwid_gpuMarkerNextGroup() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_NVMARKER_RESET(regionTag) +Shortcut for likwid_gpuMarkerResetRegion() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed +*/ +/*! +\def LIKWID_NVMARKER_CLOSE +Shortcut for likwid_gpuMarkerClose() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed +*/ +/** @}*/ + +#ifdef LIKWID_NVMON +#ifndef LIKWID_WITH_NVMON +#define LIKWID_WITH_NVMON +#endif +#include +#define LIKWID_NVMARKER_INIT likwid_gpuMarkerInit() +#define LIKWID_NVMARKER_THREADINIT likwid_gpuMarkerThreadInit() +#define LIKWID_NVMARKER_SWITCH likwid_gpuMarkerNextGroup() +#define LIKWID_NVMARKER_REGISTER(regionTag) likwid_gpuMarkerRegisterRegion(regionTag) +#define LIKWID_NVMARKER_START(regionTag) likwid_gpuMarkerStartRegion(regionTag) +#define LIKWID_NVMARKER_STOP(regionTag) likwid_gpuMarkerStopRegion(regionTag) +#define LIKWID_NVMARKER_CLOSE likwid_gpuMarkerClose() +#define LIKWID_NVMARKER_RESET(regionTag) likwid_gpuMarkerResetRegion(regionTag) +#define LIKWID_NVMARKER_GET(regionTag, ngpus, nevents, events, time, count) \ + likwid_gpuMarkerGetRegion(regionTag, ngpus, nevents, events, time, count) +#else /* LIKWID_NVMON */ +#define LIKWID_NVMARKER_INIT +#define LIKWID_NVMARKER_THREADINIT +#define LIKWID_NVMARKER_SWITCH +#define LIKWID_NVMARKER_REGISTER(regionTag) +#define LIKWID_NVMARKER_START(regionTag) +#define LIKWID_NVMARKER_STOP(regionTag) +#define LIKWID_NVMARKER_CLOSE +#define LIKWID_NVMARKER_GET(regionTag, nevents, events, time, count) +#define LIKWID_NVMARKER_RESET(regionTag) +#endif /* LIKWID_NVMON */ + + + +#endif /* LIKWID_MARKER_H */ diff --git a/gromacs/includes/neighbor.h b/gromacs/includes/neighbor.h new file mode 100644 index 0000000..949d515 --- /dev/null +++ b/gromacs/includes/neighbor.h @@ -0,0 +1,41 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include +#include + +#ifndef __NEIGHBOR_H_ +#define __NEIGHBOR_H_ +typedef struct { + int every; + int ncalls; + int* neighbors; + int maxneighs; + int* numneigh; +} Neighbor; + +extern void initNeighbor(Neighbor*, Parameter*); +extern void setupNeighbor(Parameter*); +extern void binatoms(Atom*); +extern void buildNeighbor(Atom*, Neighbor*); +extern void sortAtom(Atom*); +#endif diff --git a/gromacs/includes/parameter.h b/gromacs/includes/parameter.h new file mode 100644 index 0000000..bff46dd --- /dev/null +++ b/gromacs/includes/parameter.h @@ -0,0 +1,56 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#ifndef __PARAMETER_H_ +#define __PARAMETER_H_ + +#if PRECISION == 1 +#define MD_FLOAT float +#else +#define MD_FLOAT double +#endif + +typedef struct { + int force_field; + char* input_file; + char* vtk_file; + MD_FLOAT epsilon; + MD_FLOAT sigma6; + MD_FLOAT temp; + MD_FLOAT rho; + MD_FLOAT mass; + int ntypes; + int ntimes; + int nstat; + int every; + MD_FLOAT dt; + MD_FLOAT dtforce; + MD_FLOAT cutforce; + MD_FLOAT cutneigh; + int nx, ny, nz; + MD_FLOAT lattice; + MD_FLOAT xlo, xhi, ylo, yhi, zlo, zhi; + MD_FLOAT xprd, yprd, zprd; + double proc_freq; + char* eam_file; +} Parameter; +#endif diff --git a/gromacs/includes/pbc.h b/gromacs/includes/pbc.h new file mode 100644 index 0000000..727ac80 --- /dev/null +++ b/gromacs/includes/pbc.h @@ -0,0 +1,32 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include +#include + +#ifndef __PBC_H_ +#define __PBC_H_ +extern void initPbc(); +extern void updatePbc(Atom*, Parameter*); +extern void updateAtomsPbc(Atom*, Parameter*); +extern void setupPbc(Atom*, Parameter*); +#endif diff --git a/gromacs/includes/stats.h b/gromacs/includes/stats.h new file mode 100644 index 0000000..231119b --- /dev/null +++ b/gromacs/includes/stats.h @@ -0,0 +1,46 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include +#include + +#ifndef __STATS_H_ +#define __STATS_H_ +typedef struct { + long long int total_force_neighs; + long long int total_force_iters; +} Stats; + +void initStats(Stats *s); +void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer); + +#ifdef COMPUTE_STATS +# define addStat(stat, value) stat += value; +# define beginStatTimer() double Si = getTimeStamp(); +# define endStatTimer(stat) stat += getTimeStamp() - Si; +#else +# define addStat(stat, value) +# define beginStatTimer() +# define endStatTimer(stat) +#endif + +#endif diff --git a/gromacs/includes/thermo.h b/gromacs/includes/thermo.h new file mode 100644 index 0000000..3006bc4 --- /dev/null +++ b/gromacs/includes/thermo.h @@ -0,0 +1,31 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include +#include + +#ifndef __THERMO_H_ +#define __THERMO_H_ +extern void setupThermo(Parameter*, int); +extern void computeThermo(int, Parameter*, Atom*); +extern void adjustThermo(Parameter*, Atom*); +#endif diff --git a/gromacs/includes/timers.h b/gromacs/includes/timers.h new file mode 100644 index 0000000..2400c01 --- /dev/null +++ b/gromacs/includes/timers.h @@ -0,0 +1,11 @@ +#ifndef __TIMERS_H_ +#define __TIMERS_H_ + +typedef enum { + TOTAL = 0, + NEIGH, + FORCE, + NUMTIMER +} timertype; + +#endif diff --git a/gromacs/includes/timing.h b/gromacs/includes/timing.h new file mode 100644 index 0000000..4462b91 --- /dev/null +++ b/gromacs/includes/timing.h @@ -0,0 +1,30 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#ifndef __TIMING_H_ +#define __TIMING_H_ + +extern double getTimeStamp(); +extern double getTimeResolution(); +extern double getTimeStamp_(); + +#endif diff --git a/gromacs/includes/tracing.h b/gromacs/includes/tracing.h new file mode 100644 index 0000000..eab11ab --- /dev/null +++ b/gromacs/includes/tracing.h @@ -0,0 +1,118 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2021 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include +#include +#include + +#if defined(MEM_TRACER) || defined(INDEX_TRACER) +#include +#include +#endif + +#ifndef VECTOR_WIDTH +# define VECTOR_WIDTH 8 +#endif + +#ifndef TRACER_CONDITION +# define TRACER_CONDITION (!(timestep % param->every)) +#endif + +#ifdef MEM_TRACER +# define MEM_TRACER_INIT FILE *mem_tracer_fp; \ + if(TRACER_CONDITION) { \ + char mem_tracer_fn[128]; \ + snprintf(mem_tracer_fn, sizeof mem_tracer_fn, "mem_tracer_%d.out", timestep); \ + mem_tracer_fp = fopen(mem_tracer_fn, "w"); + } + +# define MEM_TRACER_END if(TRACER_CONDITION) { fclose(mem_tracer_fp); } +# define MEM_TRACE(addr, op) if(TRACER_CONDITION) { fprintf(mem_tracer_fp, "%c: %p\n", op, (void *)(&(addr))); } +#else +# define MEM_TRACER_INIT +# define MEM_TRACER_END +# define MEM_TRACE(addr, op) +#endif + +#ifdef INDEX_TRACER +# define INDEX_TRACER_INIT FILE *index_tracer_fp; \ + if(TRACER_CONDITION) { \ + char index_tracer_fn[128]; \ + snprintf(index_tracer_fn, sizeof index_tracer_fn, "index_tracer_%d.out", timestep); \ + index_tracer_fp = fopen(index_tracer_fn, "w"); \ + } + +# define INDEX_TRACER_END if(TRACER_CONDITION) { fclose(index_tracer_fp); } +# define INDEX_TRACE_NATOMS(nl, ng, mn) if(TRACER_CONDITION) { fprintf(index_tracer_fp, "N: %d %d %d\n", nl, ng, mn); } +# define INDEX_TRACE_ATOM(a) if(TRACER_CONDITION) { fprintf(index_tracer_fp, "A: %d\n", a); } +# define INDEX_TRACE(l, e) if(TRACER_CONDITION) { \ + for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \ + int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \ + fprintf(index_tracer_fp, "I: "); \ + for(int __j = 0; __j < __e; ++__j) { \ + fprintf(index_tracer_fp, "%d ", l[__i + __j]); \ + } \ + fprintf(index_tracer_fp, "\n"); \ + } \ + } + +# define DIST_TRACE_SORT(l, e) if(TRACER_CONDITION) { \ + for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \ + int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \ + if(__e > 1) { \ + for(int __j = __i; __j < __i + __e - 1; ++__j) { \ + for(int __k = __i; __k < __i + __e - (__j - __i) - 1; ++__k) { \ + if(l[__k] > l[__k + 1]) { \ + int __t = l[__k]; \ + l[__k] = l[__k + 1]; \ + l[__k + 1] = __t; \ + } \ + } \ + } \ + } \ + } \ + } + +# define DIST_TRACE(l, e) if(TRACER_CONDITION) { \ + for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \ + int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \ + if(__e > 1) { \ + fprintf(index_tracer_fp, "D: "); \ + for(int __j = 0; __j < __e - 1; ++__j) { \ + int __dist = abs(l[__i + __j + 1] - l[__i + __j]); \ + fprintf(index_tracer_fp, "%d ", __dist); \ + } \ + fprintf(index_tracer_fp, "\n"); \ + } \ + } \ + } +#else +# define INDEX_TRACER_INIT +# define INDEX_TRACER_END +# define INDEX_TRACE_NATOMS(nl, ng, mn) +# define INDEX_TRACE_ATOM(a) +# define INDEX_TRACE(l, e) +# define DIST_TRACE_SORT(l, e) +# define DIST_TRACE(l, e) +#endif + +extern void traceAddresses(Parameter *param, Atom *atom, Neighbor *neighbor, int timestep); diff --git a/gromacs/includes/util.h b/gromacs/includes/util.h new file mode 100644 index 0000000..20d8806 --- /dev/null +++ b/gromacs/includes/util.h @@ -0,0 +1,43 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#ifndef __UTIL_H_ +#define __UTIL_H_ + +#ifndef MIN +#define MIN(x,y) ((x)<(y)?(x):(y)) +#endif +#ifndef MAX +#define MAX(x,y) ((x)>(y)?(x):(y)) +#endif +#ifndef ABS +#define ABS(a) ((a) >= 0 ? (a) : -(a)) +#endif + +#define FF_LJ 0 +#define FF_EAM 1 + +extern double myrandom(int*); +extern void random_reset(int *seed, int ibase, double *coord); +extern int str2ff(const char *string); +extern const char* ff2str(int ff); +#endif diff --git a/gromacs/includes/vtk.h b/gromacs/includes/vtk.h new file mode 100644 index 0000000..0f03a74 --- /dev/null +++ b/gromacs/includes/vtk.h @@ -0,0 +1,28 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include + +#ifndef __VTK_H_ +#define __VTK_H_ +extern int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep); +#endif diff --git a/gromacs/main-stub.c b/gromacs/main-stub.c new file mode 100644 index 0000000..4da90db --- /dev/null +++ b/gromacs/main-stub.c @@ -0,0 +1,292 @@ +#include +#include +//--- +#include +//--- +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define HLINE "----------------------------------------------------------------------------\n" + +#define LATTICE_DISTANCE 10.0 +#define NEIGH_DISTANCE 1.0 + +extern double computeForceLJ(Parameter*, Atom*, Neighbor*, Stats*); +extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*); + +void init(Parameter *param) { + param->input_file = NULL; + param->epsilon = 1.0; + param->sigma6 = 1.0; + param->rho = 0.8442; + param->ntypes = 4; + param->ntimes = 200; + param->nx = 4; + param->ny = 4; + param->nz = 2; + param->lattice = LATTICE_DISTANCE; + param->cutforce = 5.0; + param->cutneigh = param->cutforce; + param->mass = 1.0; + // Unused + param->dt = 0.005; + param->dtforce = 0.5 * param->dt; + param->nstat = 100; + param->temp = 1.44; + param->every = 20; + param->proc_freq = 2.4; + param->eam_file = NULL; +} + +// Show debug messages +#define DEBUG(msg) printf(msg) +// Do not show debug messages +//#define DEBUG(msg) + +#define ADD_ATOM(x, y, z, vx, vy, vz) atom_x(atom->Nlocal) = base_x + x * NEIGH_DISTANCE; \ + atom_y(atom->Nlocal) = base_y + y * NEIGH_DISTANCE; \ + atom_z(atom->Nlocal) = base_z + z * NEIGH_DISTANCE; \ + atom->vx[atom->Nlocal] = vy; \ + atom->vy[atom->Nlocal] = vy; \ + atom->vz[atom->Nlocal] = vz; \ + atom->Nlocal++ + +int main(int argc, const char *argv[]) { + Eam eam; + Atom atom_data; + Atom *atom = (Atom *)(&atom_data); + Neighbor neighbor; + Stats stats; + Parameter param; + int atoms_per_unit_cell = 8; + int csv = 0; + + LIKWID_MARKER_INIT; + LIKWID_MARKER_REGISTER("force"); + DEBUG("Initializing parameters...\n"); + init(¶m); + + for(int i = 0; i < argc; i++) + { + if((strcmp(argv[i], "-f") == 0)) + { + if((param.force_field = str2ff(argv[++i])) < 0) { + fprintf(stderr, "Invalid force field!\n"); + exit(-1); + } + continue; + } + if((strcmp(argv[i], "-e") == 0)) + { + param.eam_file = strdup(argv[++i]); + continue; + } + if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0)) + { + param.ntimes = atoi(argv[++i]); + continue; + } + if((strcmp(argv[i], "-nx") == 0)) + { + param.nx = atoi(argv[++i]); + continue; + } + if((strcmp(argv[i], "-ny") == 0)) + { + param.ny = atoi(argv[++i]); + continue; + } + if((strcmp(argv[i], "-nz") == 0)) + { + param.nz = atoi(argv[++i]); + continue; + } + if((strcmp(argv[i], "-na") == 0)) + { + atoms_per_unit_cell = atoi(argv[++i]); + continue; + } + if((strcmp(argv[i], "--freq") == 0)) + { + param.proc_freq = atof(argv[++i]); + continue; + } + if((strcmp(argv[i], "--csv") == 0)) + { + csv = 1; + continue; + } + if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) + { + printf("MD Bench: A minimalistic re-implementation of miniMD\n"); + printf(HLINE); + printf("-f : force field (lj or eam), default lj\n"); + printf("-n / --nsteps : set number of timesteps for simulation\n"); + printf("-nx/-ny/-nz : set linear dimension of systembox in x/y/z direction\n"); + printf("-na : set number of atoms per unit cell\n"); + printf("--freq : set CPU frequency (GHz) and display average cycles per atom and neighbors\n"); + printf("--csv: set output as CSV style\n"); + printf(HLINE); + exit(EXIT_SUCCESS); + } + } + + + if(param.force_field == FF_EAM) { + DEBUG("Initializing EAM parameters...\n"); + initEam(&eam, ¶m); + } + + param.xprd = param.nx * LATTICE_DISTANCE; + param.yprd = param.ny * LATTICE_DISTANCE; + param.zprd = param.nz * LATTICE_DISTANCE; + + DEBUG("Initializing atoms...\n"); + initAtom(atom); + initStats(&stats); + + atom->ntypes = param.ntypes; + atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)); + for(int i = 0; i < atom->ntypes * atom->ntypes; i++) { + atom->epsilon[i] = param.epsilon; + atom->sigma6[i] = param.sigma6; + atom->cutneighsq[i] = param.cutneigh * param.cutneigh; + atom->cutforcesq[i] = param.cutforce * param.cutforce; + } + + DEBUG("Creating atoms...\n"); + for(int i = 0; i < param.nx; ++i) { + for(int j = 0; j < param.ny; ++j) { + for(int k = 0; k < param.nz; ++k) { + int added_atoms = 0; + int fac_x = 1; + int fac_y = 1; + int fac_z = 1; + int fmod = 0; + MD_FLOAT base_x = i * LATTICE_DISTANCE; + MD_FLOAT base_y = j * LATTICE_DISTANCE; + MD_FLOAT base_z = k * LATTICE_DISTANCE; + MD_FLOAT vx = 0.0; + MD_FLOAT vy = 0.0; + MD_FLOAT vz = 0.0; + + while(atom->Nlocal > atom->Nmax - atoms_per_unit_cell) { + growAtom(atom); + } + + while(fac_x * fac_y * fac_z < atoms_per_unit_cell) { + if(fmod == 0) { fac_x *= 2; } + if(fmod == 1) { fac_y *= 2; } + if(fmod == 2) { fac_z *= 2; } + fmod = (fmod + 1) % 3; + } + + MD_FLOAT offset_x = (fac_x > 1) ? 1.0 / (fac_x - 1) : (int)fac_x; + MD_FLOAT offset_y = (fac_y > 1) ? 1.0 / (fac_y - 1) : (int)fac_y; + MD_FLOAT offset_z = (fac_z > 1) ? 1.0 / (fac_z - 1) : (int)fac_z; + for(int ii = 0; ii < fac_x; ++ii) { + for(int jj = 0; jj < fac_y; ++jj) { + for(int kk = 0; kk < fac_z; ++kk) { + if(added_atoms < atoms_per_unit_cell) { + atom->type[atom->Nlocal] = rand() % atom->ntypes; + ADD_ATOM(ii * offset_x, jj * offset_y, kk * offset_z, vx, vy, vz); + added_atoms++; + } + } + } + } + } + } + } + + const double estim_atom_volume = (double)(atom->Nlocal * 3 * sizeof(MD_FLOAT)); + const double estim_neighbors_volume = (double)(atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int)); + const double estim_volume = (double)(atom->Nlocal * 6 * sizeof(MD_FLOAT) + estim_neighbors_volume); + + if(!csv) { + printf("Number of timesteps: %d\n", param.ntimes); + printf("Number of times to compute the atoms loop: %d\n", ATOMS_LOOP_RUNS); + printf("Number of times to compute the neighbors loop: %d\n", NEIGHBORS_LOOP_RUNS); + printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz); + printf("Atoms per unit cell: %d\n", atoms_per_unit_cell); + printf("Total number of atoms: %d\n", atom->Nlocal); + printf("Estimated total data volume (kB): %.4f\n", estim_volume / 1000.0); + printf("Estimated atom data volume (kB): %.4f\n", estim_atom_volume / 1000.0); + printf("Estimated neighborlist data volume (kB): %.4f\n", estim_neighbors_volume / 1000.0); + } + + DEBUG("Initializing neighbor lists...\n"); + initNeighbor(&neighbor, ¶m); + DEBUG("Setting up neighbor lists...\n"); + setupNeighbor(¶m); + DEBUG("Building neighbor lists...\n"); + buildNeighbor(atom, &neighbor); + DEBUG("Computing forces...\n"); + if(param.force_field == FF_EAM) { + computeForceEam(&eam, ¶m, atom, &neighbor, &stats); + } else { + computeForceLJ(¶m, atom, &neighbor, &stats); + } + + double S, E; + S = getTimeStamp(); + for(int i = 0; i < param.ntimes; i++) { + +#if defined(MEM_TRACER) || defined(INDEX_TRACER) + traceAddresses(¶m, atom, &neighbor, i + 1); +#endif + + if(param.force_field == FF_EAM) { + computeForceEam(&eam, ¶m, atom, &neighbor, &stats); + } else { + computeForceLJ(¶m, atom, &neighbor, &stats); + } + } + E = getTimeStamp(); + double T_accum = E-S; + double freq_hz = param.proc_freq * 1.e9; + const double repeats = ATOMS_LOOP_RUNS * NEIGHBORS_LOOP_RUNS; + const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes * repeats); + const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes * repeats) * freq_hz; + const double cycles_per_neigh = cycles_per_atom / (double)(atoms_per_unit_cell - 1); + + if(!csv) { + printf("Total time: %.4f, Mega atom updates/s: %.4f\n", T_accum, atoms_updates_per_sec / 1.e6); + if(param.proc_freq > 0.0) { + printf("Cycles per atom: %.4f, Cycles per neighbor: %.4f\n", cycles_per_atom, cycles_per_neigh); + } + } else { + printf("steps,unit cells,atoms/unit cell,total atoms,total vol.(kB),atoms vol.(kB),neigh vol.(kB),time(s),atom upds/s(M)"); + if(param.proc_freq > 0.0) { + printf(",cy/atom,cy/neigh"); + } + printf("\n"); + + printf("%d,%dx%dx%d,%d,%d,%.4f,%.4f,%.4f,%.4f,%.4f", + param.ntimes, param.nx, param.ny, param.nz, atoms_per_unit_cell, atom->Nlocal, + estim_volume / 1.e3, estim_atom_volume / 1.e3, estim_neighbors_volume / 1.e3, T_accum, atoms_updates_per_sec / 1.e6); + + if(param.proc_freq > 0.0) { + printf(",%.4f,%.4f", cycles_per_atom, cycles_per_neigh); + } + printf("\n"); + } + + double timer[NUMTIMER]; + timer[FORCE] = T_accum; + displayStatistics(atom, ¶m, &stats, timer); + LIKWID_MARKER_CLOSE; + return EXIT_SUCCESS; +} diff --git a/gromacs/main.c b/gromacs/main.c new file mode 100644 index 0000000..ebb2dbd --- /dev/null +++ b/gromacs/main.c @@ -0,0 +1,324 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define HLINE "----------------------------------------------------------------------------\n" + +extern double computeForceLJ(Parameter*, Atom*, Neighbor*, Stats*); +extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*); + +void init(Parameter *param) +{ + param->input_file = NULL; + param->vtk_file = NULL; + param->force_field = FF_LJ; + param->epsilon = 1.0; + param->sigma6 = 1.0; + param->rho = 0.8442; + param->ntypes = 4; + param->ntimes = 200; + param->dt = 0.005; + param->nx = 32; + param->ny = 32; + param->nz = 32; + param->cutforce = 2.5; + param->cutneigh = param->cutforce + 0.30; + param->temp = 1.44; + param->nstat = 100; + param->mass = 1.0; + param->dtforce = 0.5 * param->dt; + param->every = 20; + param->proc_freq = 2.4; +} + +double setup( + Parameter *param, + Eam *eam, + Atom *atom, + Neighbor *neighbor, + Stats *stats) +{ + if(param->force_field == FF_EAM) { initEam(eam, param); } + double S, E; + param->lattice = pow((4.0 / param->rho), (1.0 / 3.0)); + param->xprd = param->nx * param->lattice; + param->yprd = param->ny * param->lattice; + param->zprd = param->nz * param->lattice; + + S = getTimeStamp(); + initAtom(atom); + initPbc(atom); + initStats(stats); + initNeighbor(neighbor, param); + if(param->input_file == NULL) { + createAtom(atom, param); + } else { + readAtom(atom, param); + } + setupNeighbor(param); + setupThermo(param, atom->Natoms); + if(param->input_file == NULL) { adjustThermo(param, atom); } + setupPbc(atom, param); + updatePbc(atom, param); + buildNeighbor(atom, neighbor); + E = getTimeStamp(); + + return E-S; +} + +double reneighbour( + Parameter *param, + Atom *atom, + Neighbor *neighbor) +{ + double S, E; + + S = getTimeStamp(); + LIKWID_MARKER_START("reneighbour"); + updateAtomsPbc(atom, param); + setupPbc(atom, param); + updatePbc(atom, param); + //sortAtom(atom); + buildNeighbor(atom, neighbor); + LIKWID_MARKER_STOP("reneighbour"); + E = getTimeStamp(); + + return E-S; +} + +void initialIntegrate(Parameter *param, Atom *atom) +{ + MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; + MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz; + + for(int i = 0; i < atom->Nlocal; i++) { + vx[i] += param->dtforce * fx[i]; + vy[i] += param->dtforce * fy[i]; + vz[i] += param->dtforce * fz[i]; + atom_x(i) = atom_x(i) + param->dt * vx[i]; + atom_y(i) = atom_y(i) + param->dt * vy[i]; + atom_z(i) = atom_z(i) + param->dt * vz[i]; + } +} + +void finalIntegrate(Parameter *param, Atom *atom) +{ + MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; + MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz; + + for(int i = 0; i < atom->Nlocal; i++) { + vx[i] += param->dtforce * fx[i]; + vy[i] += param->dtforce * fy[i]; + vz[i] += param->dtforce * fz[i]; + } +} + +void printAtomState(Atom *atom) +{ + printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n", + atom->Natoms, atom->Nlocal, atom->Nghost, atom->Nmax); + + /* int nall = atom->Nlocal + atom->Nghost; */ + + /* for (int i=0; ix[i], atom->y[i], atom->z[i]); */ + /* } */ +} + +int main(int argc, char** argv) +{ + double timer[NUMTIMER]; + Eam eam; + Atom atom; + Neighbor neighbor; + Stats stats; + Parameter param; + + LIKWID_MARKER_INIT; +#pragma omp parallel + { + LIKWID_MARKER_REGISTER("force"); + //LIKWID_MARKER_REGISTER("reneighbour"); + //LIKWID_MARKER_REGISTER("pbc"); + } + init(¶m); + + for(int i = 0; i < argc; i++) + { + if((strcmp(argv[i], "-f") == 0)) + { + if((param.force_field = str2ff(argv[++i])) < 0) { + fprintf(stderr, "Invalid force field!\n"); + exit(-1); + } + continue; + } + if((strcmp(argv[i], "-i") == 0)) + { + param.input_file = strdup(argv[++i]); + continue; + } + if((strcmp(argv[i], "-e") == 0)) + { + param.eam_file = strdup(argv[++i]); + continue; + } + if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0)) + { + param.ntimes = atoi(argv[++i]); + continue; + } + if((strcmp(argv[i], "-nx") == 0)) + { + param.nx = atoi(argv[++i]); + continue; + } + if((strcmp(argv[i], "-ny") == 0)) + { + param.ny = atoi(argv[++i]); + continue; + } + if((strcmp(argv[i], "-nz") == 0)) + { + param.nz = atoi(argv[++i]); + continue; + } + if((strcmp(argv[i], "--freq") == 0)) + { + param.proc_freq = atof(argv[++i]); + continue; + } + if((strcmp(argv[i], "--vtk") == 0)) + { + param.vtk_file = strdup(argv[++i]); + continue; + } + if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) + { + printf("MD Bench: A minimalistic re-implementation of miniMD\n"); + printf(HLINE); + printf("-f : force field (lj or eam), default lj\n"); + printf("-i : input file with atom positions (dump)\n"); + printf("-e : input file for EAM\n"); + printf("-n / --nsteps : set number of timesteps for simulation\n"); + printf("-nx/-ny/-nz : set linear dimension of systembox in x/y/z direction\n"); + printf("--freq : processor frequency (GHz)\n"); + printf("--vtk : VTK file for visualization\n"); + printf(HLINE); + exit(EXIT_SUCCESS); + } + } + + setup(¶m, &eam, &atom, &neighbor, &stats); + computeThermo(0, ¶m, &atom); +#if defined(MEM_TRACER) || defined(INDEX_TRACER) + traceAddresses(¶m, &atom, &neighbor, n + 1); +#endif + if(param.force_field == FF_EAM) { + timer[FORCE] = computeForceEam(&eam, ¶m, &atom, &neighbor, &stats); + } else { + timer[FORCE] = computeForceLJ(¶m, &atom, &neighbor, &stats); + } + + timer[NEIGH] = 0.0; + timer[TOTAL] = getTimeStamp(); + + if(param.vtk_file != NULL) { + write_atoms_to_vtk_file(param.vtk_file, &atom, 0); + } + + for(int n = 0; n < param.ntimes; n++) { + initialIntegrate(¶m, &atom); + + if((n + 1) % param.every) { + updatePbc(&atom, ¶m); + } else { + timer[NEIGH] += reneighbour(¶m, &atom, &neighbor); + } + +#if defined(MEM_TRACER) || defined(INDEX_TRACER) + traceAddresses(¶m, &atom, &neighbor, n + 1); +#endif + + if(param.force_field == FF_EAM) { + timer[FORCE] += computeForceEam(&eam, ¶m, &atom, &neighbor, &stats); + } else { + timer[FORCE] += computeForceLJ(¶m, &atom, &neighbor, &stats); + } + + finalIntegrate(¶m, &atom); + + if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) { + computeThermo(n + 1, ¶m, &atom); + } + + if(param.vtk_file != NULL) { + write_atoms_to_vtk_file(param.vtk_file, &atom, n + 1); + } + } + + timer[TOTAL] = getTimeStamp() - timer[TOTAL]; + computeThermo(-1, ¶m, &atom); + + printf(HLINE); + printf("Force field: %s\n", ff2str(param.force_field)); + printf("Data layout for positions: %s\n", POS_DATA_LAYOUT); +#if PRECISION == 1 + printf("Using single precision floating point.\n"); +#else + printf("Using double precision floating point.\n"); +#endif + printf(HLINE); + printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes); + printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n", + timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]); + printf(HLINE); + printf("Performance: %.2f million atom updates per second\n", + 1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]); +#ifdef COMPUTE_STATS + displayStatistics(&atom, ¶m, &stats, timer); +#endif + LIKWID_MARKER_CLOSE; + return EXIT_SUCCESS; +} diff --git a/gromacs/neighbor.c b/gromacs/neighbor.c new file mode 100644 index 0000000..6b512a0 --- /dev/null +++ b/gromacs/neighbor.c @@ -0,0 +1,420 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2021 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include +#include +#include + +#include +#include +#include + +#define SMALL 1.0e-6 +#define FACTOR 0.999 + +static MD_FLOAT xprd, yprd, zprd; +static MD_FLOAT bininvx, bininvy, bininvz; +static int mbinxlo, mbinylo, mbinzlo; +static int nbinx, nbiny, nbinz; +static int mbinx, mbiny, mbinz; // n bins in x, y, z +static int *bincount; +static int *bins; +static int mbins; //total number of bins +static int atoms_per_bin; // max atoms per bin +static MD_FLOAT cutneigh; +static MD_FLOAT cutneighsq; // neighbor cutoff squared +static int nmax; +static int nstencil; // # of bins in stencil +static int* stencil; // stencil list of bin offsets +static MD_FLOAT binsizex, binsizey, binsizez; + +static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT); +static MD_FLOAT bindist(int, int, int); + +/* exported subroutines */ +void initNeighbor(Neighbor *neighbor, Parameter *param) +{ + MD_FLOAT neighscale = 5.0 / 6.0; + xprd = param->nx * param->lattice; + yprd = param->ny * param->lattice; + zprd = param->nz * param->lattice; + cutneigh = param->cutneigh; + nbinx = neighscale * param->nx; + nbiny = neighscale * param->ny; + nbinz = neighscale * param->nz; + nmax = 0; + atoms_per_bin = 8; + stencil = NULL; + bins = NULL; + bincount = NULL; + neighbor->maxneighs = 100; + neighbor->numneigh = NULL; + neighbor->neighbors = NULL; +} + +void setupNeighbor(Parameter* param) +{ + MD_FLOAT coord; + int mbinxhi, mbinyhi, mbinzhi; + int nextx, nexty, nextz; + + if(param->input_file != NULL) { + xprd = param->xprd; + yprd = param->yprd; + zprd = param->zprd; + } + + // TODO: update lo and hi for standard case and use them here instead + MD_FLOAT xlo = 0.0; MD_FLOAT xhi = xprd; + MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd; + MD_FLOAT zlo = 0.0; MD_FLOAT zhi = zprd; + + cutneighsq = cutneigh * cutneigh; + + if(param->input_file != NULL) { + binsizex = cutneigh * 0.5; + binsizey = cutneigh * 0.5; + binsizez = cutneigh * 0.5; + nbinx = (int)((param->xhi - param->xlo) / binsizex); + nbiny = (int)((param->yhi - param->ylo) / binsizey); + nbinz = (int)((param->zhi - param->zlo) / binsizez); + if(nbinx == 0) { nbinx = 1; } + if(nbiny == 0) { nbiny = 1; } + if(nbinz == 0) { nbinz = 1; } + bininvx = nbinx / (param->xhi - param->xlo); + bininvy = nbiny / (param->yhi - param->ylo); + bininvz = nbinz / (param->zhi - param->zlo); + } else { + binsizex = xprd / nbinx; + binsizey = yprd / nbiny; + binsizez = zprd / nbinz; + bininvx = 1.0 / binsizex; + bininvy = 1.0 / binsizey; + bininvz = 1.0 / binsizez; + } + + coord = xlo - cutneigh - SMALL * xprd; + mbinxlo = (int) (coord * bininvx); + if (coord < 0.0) { + mbinxlo = mbinxlo - 1; + } + coord = xhi + cutneigh + SMALL * xprd; + mbinxhi = (int) (coord * bininvx); + + coord = ylo - cutneigh - SMALL * yprd; + mbinylo = (int) (coord * bininvy); + if (coord < 0.0) { + mbinylo = mbinylo - 1; + } + coord = yhi + cutneigh + SMALL * yprd; + mbinyhi = (int) (coord * bininvy); + + coord = zlo - cutneigh - SMALL * zprd; + mbinzlo = (int) (coord * bininvz); + if (coord < 0.0) { + mbinzlo = mbinzlo - 1; + } + coord = zhi + cutneigh + SMALL * zprd; + mbinzhi = (int) (coord * bininvz); + + mbinxlo = mbinxlo - 1; + mbinxhi = mbinxhi + 1; + mbinx = mbinxhi - mbinxlo + 1; + + mbinylo = mbinylo - 1; + mbinyhi = mbinyhi + 1; + mbiny = mbinyhi - mbinylo + 1; + + mbinzlo = mbinzlo - 1; + mbinzhi = mbinzhi + 1; + mbinz = mbinzhi - mbinzlo + 1; + + nextx = (int) (cutneigh * bininvx); + if(nextx * binsizex < FACTOR * cutneigh) nextx++; + + nexty = (int) (cutneigh * bininvy); + if(nexty * binsizey < FACTOR * cutneigh) nexty++; + + nextz = (int) (cutneigh * bininvz); + if(nextz * binsizez < FACTOR * cutneigh) nextz++; + + if (stencil) { + free(stencil); + } + + stencil = (int*) malloc( + (2 * nextz + 1) * (2 * nexty + 1) * (2 * nextx + 1) * sizeof(int)); + + nstencil = 0; + int kstart = -nextz; + + for(int k = kstart; k <= nextz; k++) { + for(int j = -nexty; j <= nexty; j++) { + for(int i = -nextx; i <= nextx; i++) { + if(bindist(i, j, k) < cutneighsq) { + stencil[nstencil++] = + k * mbiny * mbinx + j * mbinx + i; + } + } + } + } + + mbins = mbinx * mbiny * mbinz; + + if (bincount) { + free(bincount); + } + bincount = (int*) malloc(mbins * sizeof(int)); + + if (bins) { + free(bins); + } + bins = (int*) malloc(mbins * atoms_per_bin * sizeof(int)); +} + +void buildNeighbor(Atom *atom, Neighbor *neighbor) +{ + int nall = atom->Nlocal + atom->Nghost; + + /* extend atom arrays if necessary */ + if(nall > nmax) { + nmax = nall; + if(neighbor->numneigh) free(neighbor->numneigh); + if(neighbor->neighbors) free(neighbor->neighbors); + neighbor->numneigh = (int*) malloc(nmax * sizeof(int)); + neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*)); + } + + /* bin local & ghost atoms */ + binatoms(atom); + int resize = 1; + + /* loop over each atom, storing neighbors */ + while(resize) { + int new_maxneighs = neighbor->maxneighs; + resize = 0; + + for(int i = 0; i < atom->Nlocal; i++) { + int* neighptr = &(neighbor->neighbors[i * neighbor->maxneighs]); + int n = 0; + MD_FLOAT xtmp = atom_x(i); + MD_FLOAT ytmp = atom_y(i); + MD_FLOAT ztmp = atom_z(i); + int ibin = coord2bin(xtmp, ytmp, ztmp); + #ifdef EXPLICIT_TYPES + int type_i = atom->type[i]; + #endif + for(int k = 0; k < nstencil; k++) { + int jbin = ibin + stencil[k]; + int* loc_bin = &bins[jbin * atoms_per_bin]; + + for(int m = 0; m < bincount[jbin]; m++) { + int j = loc_bin[m]; + + if ( j == i ){ + continue; + } + + MD_FLOAT delx = xtmp - atom_x(j); + MD_FLOAT dely = ytmp - atom_y(j); + MD_FLOAT delz = ztmp - atom_z(j); + MD_FLOAT rsq = delx * delx + dely * dely + delz * delz; + + #ifdef EXPLICIT_TYPES + int type_j = atom->type[j]; + const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j]; + #else + const MD_FLOAT cutoff = cutneighsq; + #endif + if( rsq <= cutoff ) { + neighptr[n++] = j; + } + } + } + + neighbor->numneigh[i] = n; + + if(n >= neighbor->maxneighs) { + resize = 1; + + if(n >= new_maxneighs) { + new_maxneighs = n; + } + } + } + + if(resize) { + printf("RESIZE %d\n", neighbor->maxneighs); + neighbor->maxneighs = new_maxneighs * 1.2; + free(neighbor->neighbors); + neighbor->neighbors = (int*) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int)); + } + } +} + +/* internal subroutines */ +MD_FLOAT bindist(int i, int j, int k) +{ + MD_FLOAT delx, dely, delz; + + if(i > 0) { + delx = (i - 1) * binsizex; + } else if(i == 0) { + delx = 0.0; + } else { + delx = (i + 1) * binsizex; + } + + if(j > 0) { + dely = (j - 1) * binsizey; + } else if(j == 0) { + dely = 0.0; + } else { + dely = (j + 1) * binsizey; + } + + if(k > 0) { + delz = (k - 1) * binsizez; + } else if(k == 0) { + delz = 0.0; + } else { + delz = (k + 1) * binsizez; + } + + return (delx * delx + dely * dely + delz * delz); +} + +int coord2bin(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin) +{ + int ix, iy, iz; + + if(xin >= xprd) { + ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo; + } else if(xin >= 0.0) { + ix = (int)(xin * bininvx) - mbinxlo; + } else { + ix = (int)(xin * bininvx) - mbinxlo - 1; + } + + if(yin >= yprd) { + iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo; + } else if(yin >= 0.0) { + iy = (int)(yin * bininvy) - mbinylo; + } else { + iy = (int)(yin * bininvy) - mbinylo - 1; + } + + if(zin >= zprd) { + iz = (int)((zin - zprd) * bininvz) + nbinz - mbinzlo; + } else if(zin >= 0.0) { + iz = (int)(zin * bininvz) - mbinzlo; + } else { + iz = (int)(zin * bininvz) - mbinzlo - 1; + } + + return (iz * mbiny * mbinx + iy * mbinx + ix + 1); +} + +void binatoms(Atom *atom) +{ + int nall = atom->Nlocal + atom->Nghost; + int resize = 1; + + while(resize > 0) { + resize = 0; + + for(int i = 0; i < mbins; i++) { + bincount[i] = 0; + } + + for(int i = 0; i < nall; i++) { + int ibin = coord2bin(atom_x(i), atom_y(i), atom_z(i)); + + if(bincount[ibin] < atoms_per_bin) { + int ac = bincount[ibin]++; + bins[ibin * atoms_per_bin + ac] = i; + } else { + resize = 1; + } + } + + if(resize) { + free(bins); + atoms_per_bin *= 2; + bins = (int*) malloc(mbins * atoms_per_bin * sizeof(int)); + } + } +} + +void sortAtom(Atom* atom) { + binatoms(atom); + int Nmax = atom->Nmax; + int* binpos = bincount; + + for(int i=1; ix; double* old_y = atom->y; double* old_z = atom->z; + double* old_vx = atom->vx; double* old_vy = atom->vy; double* old_vz = atom->vz; + + for(int mybin = 0; mybin0?binpos[mybin-1]:0; + int count = binpos[mybin] - start; + for(int k=0; kx); + atom->x = new_x; +#ifndef AOS + free(atom->y); + free(atom->z); + atom->y = new_y; atom->z = new_z; +#endif + free(atom->vx); free(atom->vy); free(atom->vz); + atom->vx = new_vx; atom->vy = new_vy; atom->vz = new_vz; +} diff --git a/gromacs/pbc.c b/gromacs/pbc.c new file mode 100644 index 0000000..3610701 --- /dev/null +++ b/gromacs/pbc.c @@ -0,0 +1,172 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include +#include + +#include +#include +#include + +#define DELTA 20000 + +static int NmaxGhost; +static int *PBCx, *PBCy, *PBCz; + +static void growPbc(Atom*); + +/* exported subroutines */ +void initPbc(Atom* atom) +{ + NmaxGhost = 0; + atom->border_map = NULL; + PBCx = NULL; PBCy = NULL; PBCz = NULL; +} + +/* update coordinates of ghost atoms */ +/* uses mapping created in setupPbc */ +void updatePbc(Atom *atom, Parameter *param) +{ + int *border_map = atom->border_map; + int nlocal = atom->Nlocal; + MD_FLOAT xprd = param->xprd; + MD_FLOAT yprd = param->yprd; + MD_FLOAT zprd = param->zprd; + + for(int i = 0; i < atom->Nghost; i++) { + atom_x(nlocal + i) = atom_x(border_map[i]) + PBCx[i] * xprd; + atom_y(nlocal + i) = atom_y(border_map[i]) + PBCy[i] * yprd; + atom_z(nlocal + i) = atom_z(border_map[i]) + PBCz[i] * zprd; + } +} + +/* relocate atoms that have left domain according + * to periodic boundary conditions */ +void updateAtomsPbc(Atom *atom, Parameter *param) +{ + MD_FLOAT xprd = param->xprd; + MD_FLOAT yprd = param->yprd; + MD_FLOAT zprd = param->zprd; + + for(int i = 0; i < atom->Nlocal; i++) { + + if(atom_x(i) < 0.0) { + atom_x(i) += xprd; + } else if(atom_x(i) >= xprd) { + atom_x(i) -= xprd; + } + + if(atom_y(i) < 0.0) { + atom_y(i) += yprd; + } else if(atom_y(i) >= yprd) { + atom_y(i) -= yprd; + } + + if(atom_z(i) < 0.0) { + atom_z(i) += zprd; + } else if(atom_z(i) >= zprd) { + atom_z(i) -= zprd; + } + } +} + +/* setup periodic boundary conditions by + * defining ghost atoms around domain + * only creates mapping and coordinate corrections + * that are then enforced in updatePbc */ +#define ADDGHOST(dx,dy,dz) \ + Nghost++; \ + border_map[Nghost] = i; \ + PBCx[Nghost] = dx; \ + PBCy[Nghost] = dy; \ + PBCz[Nghost] = dz; \ + atom->type[atom->Nlocal + Nghost] = atom->type[i] + +void setupPbc(Atom *atom, Parameter *param) +{ + int *border_map = atom->border_map; + MD_FLOAT xprd = param->xprd; + MD_FLOAT yprd = param->yprd; + MD_FLOAT zprd = param->zprd; + MD_FLOAT Cutneigh = param->cutneigh; + int Nghost = -1; + + for(int i = 0; i < atom->Nlocal; i++) { + + if (atom->Nlocal + Nghost + 7 >= atom->Nmax) { + growAtom(atom); + } + if (Nghost + 7 >= NmaxGhost) { + growPbc(atom); + border_map = atom->border_map; + } + + MD_FLOAT x = atom_x(i); + MD_FLOAT y = atom_y(i); + MD_FLOAT z = atom_z(i); + + /* Setup ghost atoms */ + /* 6 planes */ + if (x < Cutneigh) { ADDGHOST(+1,0,0); } + if (x >= (xprd-Cutneigh)) { ADDGHOST(-1,0,0); } + if (y < Cutneigh) { ADDGHOST(0,+1,0); } + if (y >= (yprd-Cutneigh)) { ADDGHOST(0,-1,0); } + if (z < Cutneigh) { ADDGHOST(0,0,+1); } + if (z >= (zprd-Cutneigh)) { ADDGHOST(0,0,-1); } + /* 8 corners */ + if (x < Cutneigh && y < Cutneigh && z < Cutneigh) { ADDGHOST(+1,+1,+1); } + if (x < Cutneigh && y >= (yprd-Cutneigh) && z < Cutneigh) { ADDGHOST(+1,-1,+1); } + if (x < Cutneigh && y >= Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(+1,+1,-1); } + if (x < Cutneigh && y >= (yprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(+1,-1,-1); } + if (x >= (xprd-Cutneigh) && y < Cutneigh && z < Cutneigh) { ADDGHOST(-1,+1,+1); } + if (x >= (xprd-Cutneigh) && y >= (yprd-Cutneigh) && z < Cutneigh) { ADDGHOST(-1,-1,+1); } + if (x >= (xprd-Cutneigh) && y < Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(-1,+1,-1); } + if (x >= (xprd-Cutneigh) && y >= (yprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(-1,-1,-1); } + /* 12 edges */ + if (x < Cutneigh && z < Cutneigh) { ADDGHOST(+1,0,+1); } + if (x < Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(+1,0,-1); } + if (x >= (xprd-Cutneigh) && z < Cutneigh) { ADDGHOST(-1,0,+1); } + if (x >= (xprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(-1,0,-1); } + if (y < Cutneigh && z < Cutneigh) { ADDGHOST(0,+1,+1); } + if (y < Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(0,+1,-1); } + if (y >= (yprd-Cutneigh) && z < Cutneigh) { ADDGHOST(0,-1,+1); } + if (y >= (yprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(0,-1,-1); } + if (y < Cutneigh && x < Cutneigh) { ADDGHOST(+1,+1,0); } + if (y < Cutneigh && x >= (xprd-Cutneigh)) { ADDGHOST(-1,+1,0); } + if (y >= (yprd-Cutneigh) && x < Cutneigh) { ADDGHOST(+1,-1,0); } + if (y >= (yprd-Cutneigh) && x >= (xprd-Cutneigh)) { ADDGHOST(-1,-1,0); } + } + // increase by one to make it the ghost atom count + atom->Nghost = Nghost + 1; +} + +/* internal subroutines */ +void growPbc(Atom* atom) +{ + int nold = NmaxGhost; + NmaxGhost += DELTA; + + atom->border_map = (int*) reallocate(atom->border_map, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int)); + PBCx = (int*) reallocate(PBCx, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int)); + PBCy = (int*) reallocate(PBCy, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int)); + PBCz = (int*) reallocate(PBCz, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int)); +} diff --git a/gromacs/stats.c b/gromacs/stats.c new file mode 100644 index 0000000..defaeac --- /dev/null +++ b/gromacs/stats.c @@ -0,0 +1,31 @@ +#include + +#include +#include +#include +#include + +void initStats(Stats *s) { + s->total_force_neighs = 0; + s->total_force_iters = 0; +} + +void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer) { +#ifdef COMPUTE_STATS + double force_useful_volume = 1e-9 * ( (double)(atom->Nlocal * (param->ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) + + (double)(stats->total_force_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) ); + double avg_neigh = stats->total_force_neighs / (double)(atom->Nlocal * (param->ntimes + 1)); + double avg_simd = stats->total_force_iters / (double)(atom->Nlocal * (param->ntimes + 1)); +#ifdef EXPLICIT_TYPES + force_useful_volume += 1e-9 * (double)((atom->Nlocal * (param->ntimes + 1)) + stats->total_force_neighs) * sizeof(int); +#endif + printf("Statistics:\n"); + printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param->proc_freq); + printf("\tAverage neighbors per atom: %.4f\n", avg_neigh); + printf("\tAverage SIMD iterations per atom: %.4f\n", avg_simd); + printf("\tTotal number of computed pair interactions: %lld\n", stats->total_force_neighs); + printf("\tTotal number of SIMD iterations: %lld\n", stats->total_force_iters); + printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume); + printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param->proc_freq * 1e9 / stats->total_force_iters); +#endif +} diff --git a/gromacs/thermo.c b/gromacs/thermo.c new file mode 100644 index 0000000..6f83663 --- /dev/null +++ b/gromacs/thermo.c @@ -0,0 +1,138 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include +#include +#include + +#include +#include + +static int *steparr; +static MD_FLOAT *tmparr; +static MD_FLOAT *engarr; +static MD_FLOAT *prsarr; +static MD_FLOAT mvv2e; +static MD_FLOAT dof_boltz; +static MD_FLOAT t_scale; +static MD_FLOAT p_scale; +static MD_FLOAT e_scale; +static MD_FLOAT t_act; +static MD_FLOAT p_act; +static MD_FLOAT e_act; +static int mstat; + +/* exported subroutines */ +void setupThermo(Parameter *param, int natoms) +{ + int maxstat = param->ntimes / param->nstat + 2; + + steparr = (int*) malloc(maxstat * sizeof(int)); + tmparr = (MD_FLOAT*) malloc(maxstat * sizeof(MD_FLOAT)); + engarr = (MD_FLOAT*) malloc(maxstat * sizeof(MD_FLOAT)); + prsarr = (MD_FLOAT*) malloc(maxstat * sizeof(MD_FLOAT)); + + if(param->force_field == FF_LJ) { + mvv2e = 1.0; + dof_boltz = (natoms * 3 - 3); + t_scale = mvv2e / dof_boltz; + p_scale = 1.0 / 3 / param->xprd / param->yprd / param->zprd; + e_scale = 0.5; + } else { + mvv2e = 1.036427e-04; + dof_boltz = (natoms * 3 - 3) * 8.617343e-05; + t_scale = mvv2e / dof_boltz; + p_scale = 1.602176e+06 / 3 / param->xprd / param->yprd / param->zprd; + e_scale = 524287.985533;//16.0; + param->dtforce /= mvv2e; + } + + printf("step\ttemp\t\tpressure\n"); +} + +void computeThermo(int iflag, Parameter *param, Atom *atom) +{ + MD_FLOAT t = 0.0, p; + MD_FLOAT* vx = atom->vx; + MD_FLOAT* vy = atom->vy; + MD_FLOAT* vz = atom->vz; + + for(int i = 0; i < atom->Nlocal; i++) { + t += (vx[i] * vx[i] + vy[i] * vy[i] + vz[i] * vz[i]) * param->mass; + } + + t = t * t_scale; + p = (t * dof_boltz) * p_scale; + int istep = iflag; + + if(iflag == -1){ + istep = param->ntimes; + } + if(iflag == 0){ + mstat = 0; + } + + steparr[mstat] = istep; + tmparr[mstat] = t; + prsarr[mstat] = p; + mstat++; + fprintf(stdout, "%i\t%e\t%e\n", istep, t, p); +} + +void adjustThermo(Parameter *param, Atom *atom) +{ + /* zero center-of-mass motion */ + MD_FLOAT vxtot = 0.0; MD_FLOAT vytot = 0.0; MD_FLOAT vztot = 0.0; + MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz; + + for(int i = 0; i < atom->Nlocal; i++) { + vxtot += vx[i]; + vytot += vy[i]; + vztot += vz[i]; + } + + vxtot = vxtot / atom->Natoms; + vytot = vytot / atom->Natoms; + vztot = vztot / atom->Natoms; + + for(int i = 0; i < atom->Nlocal; i++) { + vx[i] -= vxtot; + vy[i] -= vytot; + vz[i] -= vztot; + } + + t_act = 0; + MD_FLOAT t = 0.0; + + for(int i = 0; i < atom->Nlocal; i++) { + t += (vx[i] * vx[i] + vy[i] * vy[i] + vz[i] * vz[i]) * param->mass; + } + + t *= t_scale; + MD_FLOAT factor = sqrt(param->temp / t); + + for(int i = 0; i < atom->Nlocal; i++) { + vx[i] *= factor; + vy[i] *= factor; + vz[i] *= factor; + } +} diff --git a/gromacs/timing.c b/gromacs/timing.c new file mode 100644 index 0000000..2c1f12a --- /dev/null +++ b/gromacs/timing.c @@ -0,0 +1,43 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include +#include + +double getTimeStamp() +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9; +} + +double getTimeResolution() +{ + struct timespec ts; + clock_getres(CLOCK_MONOTONIC, &ts); + return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9; +} + +double getTimeStamp_() +{ + return getTimeStamp(); +} diff --git a/gromacs/tracing.c b/gromacs/tracing.c new file mode 100644 index 0000000..d91a97a --- /dev/null +++ b/gromacs/tracing.c @@ -0,0 +1,73 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2021 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include +#include +#include +#include + +void traceAddresses(Parameter *param, Atom *atom, Neighbor *neighbor, int timestep) { + MEM_TRACER_INIT; + INDEX_TRACER_INIT; + int Nlocal = atom->Nlocal; + int* neighs; + MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; + + INDEX_TRACE_NATOMS(Nlocal, atom->Nghost, neighbor->maxneighs); + for(int i = 0; i < Nlocal; i++) { + neighs = &neighbor->neighbors[i * neighbor->maxneighs]; + int numneighs = neighbor->numneigh[i]; + MEM_TRACE(atom_x(i), 'R'); + MEM_TRACE(atom_y(i), 'R'); + MEM_TRACE(atom_z(i), 'R'); + INDEX_TRACE_ATOM(i); + + #ifdef EXPLICIT_TYPES + MEM_TRACE(atom->type[i], 'R'); + #endif + + DIST_TRACE_SORT(neighs, numneighs); + INDEX_TRACE(neighs, numneighs); + DIST_TRACE(neighs, numneighs); + + for(int k = 0; k < numneighs; k++) { + MEM_TRACE(neighs[k], 'R'); + MEM_TRACE(atom_x(j), 'R'); + MEM_TRACE(atom_y(j), 'R'); + MEM_TRACE(atom_z(j), 'R'); + + #ifdef EXPLICIT_TYPES + MEM_TRACE(atom->type[j], 'R'); + #endif + } + + MEM_TRACE(fx[i], 'R'); + MEM_TRACE(fx[i], 'W'); + MEM_TRACE(fy[i], 'R'); + MEM_TRACE(fy[i], 'W'); + MEM_TRACE(fz[i], 'R'); + MEM_TRACE(fz[i], 'W'); + } + + INDEX_TRACER_END; + MEM_TRACER_END; +} diff --git a/gromacs/util.c b/gromacs/util.c new file mode 100644 index 0000000..0347ab6 --- /dev/null +++ b/gromacs/util.c @@ -0,0 +1,95 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg + * + * This file is part of MD-Bench. + * + * MD-Bench is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with MD-Bench. If not, see . + * ======================================================================================= + */ +#include + +#include + +/* Park/Miller RNG w/out MASKING, so as to be like f90s version */ +#define IA 16807 +#define IM 2147483647 +#define AM (1.0/IM) +#define IQ 127773 +#define IR 2836 +#define MASK 123459876 + +double myrandom(int* seed) +{ + int k= (*seed) / IQ; + double ans; + + *seed = IA * (*seed - k * IQ) - IR * k; + if(*seed < 0) *seed += IM; + ans = AM * (*seed); + return ans; +} + +void random_reset(int *seed, int ibase, double *coord) +{ + int i; + char *str = (char *) &ibase; + int n = sizeof(int); + unsigned int hash = 0; + + for (i = 0; i < n; i++) { + hash += str[i]; + hash += (hash << 10); + hash ^= (hash >> 6); + } + + str = (char *) coord; + n = 3 * sizeof(double); + for (i = 0; i < n; i++) { + hash += str[i]; + hash += (hash << 10); + hash ^= (hash >> 6); + } + + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + + // keep 31 bits of unsigned int as new seed + // do not allow seed = 0, since will cause hang in gaussian() + + *seed = hash & 0x7ffffff; + if (!(*seed)) *seed = 1; + + // warm up the RNG + + for (i = 0; i < 5; i++) myrandom(seed); + //save = 0; +} + +int str2ff(const char *string) +{ + if(strncmp(string, "lj", 2) == 0) return FF_LJ; + if(strncmp(string, "eam", 3) == 0) return FF_EAM; + return -1; +} + +const char* ff2str(int ff) +{ + if(ff == FF_LJ) { return "lj"; } + if(ff == FF_EAM) { return "eam"; } + return "invalid"; +} diff --git a/gromacs/vtk.c b/gromacs/vtk.c new file mode 100644 index 0000000..b3ff0e6 --- /dev/null +++ b/gromacs/vtk.c @@ -0,0 +1,44 @@ +#include +#include + +#include + +int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) { + char timestep_filename[128]; + snprintf(timestep_filename, sizeof timestep_filename, "%s_%d.vtk", filename, timestep); + FILE* fp = fopen(timestep_filename, "wb"); + + if(fp == NULL) { + fprintf(stderr, "Could not open VTK file for writing!\n"); + return -1; + } + + fprintf(fp, "# vtk DataFile Version 2.0\n"); + fprintf(fp, "Particle data\n"); + fprintf(fp, "ASCII\n"); + fprintf(fp, "DATASET UNSTRUCTURED_GRID\n"); + fprintf(fp, "POINTS %d double\n", atom->Nlocal); + for(int i = 0; i < atom->Nlocal; ++i) { + fprintf(fp, "%.4f %.4f %.4f\n", atom_x(i), atom_y(i), atom_z(i)); + } + fprintf(fp, "\n\n"); + fprintf(fp, "CELLS %d %d\n", atom->Nlocal, atom->Nlocal * 2); + for(int i = 0; i < atom->Nlocal; ++i) { + fprintf(fp, "1 %d\n", i); + } + fprintf(fp, "\n\n"); + fprintf(fp, "CELL_TYPES %d\n", atom->Nlocal); + for(int i = 0; i < atom->Nlocal; ++i) { + fprintf(fp, "1\n"); + } + fprintf(fp, "\n\n"); + fprintf(fp, "POINT_DATA %d\n", atom->Nlocal); + fprintf(fp, "SCALARS mass double\n"); + fprintf(fp, "LOOKUP_TABLE default\n"); + for(int i = 0; i < atom->Nlocal; i++) { + fprintf(fp, "1.0\n"); + } + fprintf(fp, "\n\n"); + fclose(fp); + return 0; +}