diff --git a/asm/unused/force_lj.s b/asm/unused/force_lj.s
new file mode 100644
index 0000000..f1d5cd8
--- /dev/null
+++ b/asm/unused/force_lj.s
@@ -0,0 +1,326 @@
+.intel_syntax noprefix
+
+.text
+.align 16,0x90
+.globl computeForceLJ
+computeForceLJ:
+# parameter 1: rdi Parameter*
+# parameter 2: rsi Atom*
+# parameter 3: rdx Neighbor*
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ push rbx
+ mov r9d, DWORD PTR [4+rsi] # r9d <- atom->Nlocal
+ vmovsd xmm2, QWORD PTR [96+rdi] # xmm2 <- param->cutforce
+ vmovsd xmm1, QWORD PTR [32+rdi] # xmm1 <- param->sigma6
+ vmovsd xmm0, QWORD PTR [24+rdi] # xmm0 <- param->epsilon
+ mov r13, QWORD PTR [64+rsi] # r13 <- atom->fx
+ mov r14, QWORD PTR [72+rsi] # r14 <- atom->fy
+ mov rdi, QWORD PTR [80+rsi] # rdi <- atom->fz
+ test r9d, r9d # atom->Nlocal <= 0
+ jle ..atom_loop_exit
+ xor r10d, r10d # r10d <- 0
+ mov ecx, r9d # ecx <- atom->Nlocal
+ xor r8d, r8d # r8d <- 0
+ mov r11d, 1 # r11d <- 1
+ xor eax, eax # eax <- 0
+ shr ecx, 1 # ecx <- atom->Nlocal >> 1
+ je ..zero_last_element # ecx == 0
+
+# Init forces to zero loop (unroll factor = 2)
+..init_force_loop:
+ mov QWORD PTR [r8+r13], rax # fx[i] <- 0
+ mov QWORD PTR [r8+r14], rax # fy[i] <- 0
+ mov QWORD PTR [r8+rdi], rax # fz[i] <- 0
+ mov QWORD PTR [8+r8+r13], rax # fx[i] <- 0
+ mov QWORD PTR [8+r8+r14], rax # fy[i] <- 0
+ mov QWORD PTR [8+r8+rdi], rax # fz[i] <- 0
+ add r8, 16 # i++
+ inc r10 # i++
+ cmp r10, rcx # i < Nlocal
+ jb ..init_force_loop
+
+# Trick to make r11d contain value of last element to be zeroed plus 1
+# Maybe we can directly put r10+10 here and zero r11d above, then remove the -1 below
+ lea r11d, DWORD PTR [1+r10+r10] # r11d <- i * 2 + 1
+..zero_last_element:
+ lea ecx, DWORD PTR [-1+r11] # ecx <- i * 2
+ cmp ecx, r9d # i >= Nlocal
+ jae ..before_atom_loop
+
+ # Set last element to zero
+ movsxd r11, r11d # r11 <- i * 2
+ mov QWORD PTR [-8+r13+r11*8], rax # fx[i] <- 0
+ mov QWORD PTR [-8+r14+r11*8], rax # fy[i] <- 0
+ mov QWORD PTR [-8+rdi+r11*8], rax # fz[i] <- 0
+
+# Initialize registers to be used within atom loop
+..before_atom_loop:
+ vmulsd xmm15, xmm2, xmm2 # xmm15 <- cutforcesq
+ vmovdqu32 ymm18, YMMWORD PTR .L_2il0floatpacket.0[rip] # ymm18 <- [8, ...]
+ vmulsd xmm0, xmm0, QWORD PTR .L_2il0floatpacket.3[rip] # xmm0 <- 48 * epsilon
+ vmovdqu32 ymm17, YMMWORD PTR .L_2il0floatpacket.1[rip] # ymm17 <- [0..7]
+ vmovups zmm7, ZMMWORD PTR .L_2il0floatpacket.4[rip] # zmm7 <- [0.5, ...]
+ vbroadcastsd zmm16, xmm15 # zmm16 <- [cutforcesq, ...]
+ vbroadcastsd zmm15, xmm1 # zmm15 <- [param->sigma6, ...]
+ vbroadcastsd zmm14, xmm0 # zmm14 <- [48 * epsilon, ...]
+ movsxd r9, r9d # r9 <- atom->Nlocal
+ xor r10d, r10d # r10d <- 0 (i)
+ mov rcx, QWORD PTR [24+rdx] # rcx <- neighbor->numneigh
+ mov r11, QWORD PTR [8+rdx] # r11 <- neighbor->neighbors
+ movsxd r12, DWORD PTR [16+rdx] # r12 <- neighbor->maxneighs
+ mov rdx, QWORD PTR [16+rsi] # rdx <- atom->x
+ ### AOS
+ xor eax, eax
+ ### SOA
+ #mov rax, QWORD PTR [24+rsi] # rax <- atom->y
+ #mov rsi, QWORD PTR [32+rsi] # rsi <- atom->z
+ ###
+ shl r12, 2 # r12 <- neighbor->maxneighs * 4
+
+ # Register spilling
+ mov QWORD PTR [-32+rsp], r9 # [-32+rsp] <- atom->Nlocal
+ mov QWORD PTR [-24+rsp], rcx # [-24+rsp] <- neighbor->numneigh
+ mov QWORD PTR [-16+rsp], r14 # [-16+rsp] <- atom->fy
+ mov QWORD PTR [-8+rsp], r13 # [-8+rsp] <- atom->fx
+ mov QWORD PTR [-40+rsp], r15 # [-40+rsp] <- r15
+ mov QWORD PTR [-48+rsp], rbx # [-48+rsp] <- rbx
+ #sub rsp, 64
+ #call getTimeStamp # xmm0 <- getTimeStamp()
+ #vmovsd QWORD PTR [-56+rsp], xmm0 # [-56+rsp] <- xmm0 [spill]
+ #add rsp, 64
+
+..atom_loop_begin:
+ mov rcx, QWORD PTR [-24+rsp] # rcx <- neighbor->numneigh
+ vxorpd xmm25, xmm25, xmm25 # xmm25 <- 0 (fix)
+ vmovapd xmm20, xmm25 # xmm20 <- 0 (fiy)
+ mov r13d, DWORD PTR [rcx+r10*4] # r13d <- neighbor->numneigh[i] (numneighs)
+ vmovapd xmm4, xmm20 # xmm4 <- 0 (fiz)
+
+ ### AOS
+ vmovsd xmm8, QWORD PTR[rdx+rax] # xmm8 <- atom->x[i * 3]
+ vmovsd xmm9, QWORD PTR[8+rdx+rax] # xmm9 <- atom->x[i * 3 + 1]
+ vmovsd xmm10, QWORD PTR[16+rdx+rax] # xmm10 <- atom->x[i * 3 + 2]
+ ### SOA
+ #vmovsd xmm8, QWORD PTR [rdx+r10*8] # xmm8 <- atom->x[i]
+ #vmovsd xmm9, QWORD PTR [rax+r10*8] # xmm9 <- atom->y[i]
+ #vmovsd xmm10, QWORD PTR [rsi+r10*8] # xmm10 <- atom->z[i]
+ ###
+ vbroadcastsd zmm0, xmm8 # zmm0 <- atom_x(i)
+ vbroadcastsd zmm1, xmm9 # zmm1 <- atom_y(i)
+ vbroadcastsd zmm2, xmm10 # zmm2 <- atom_z(i)
+ test r13d, r13d # numneighs <= 0
+ jle ..atom_loop_exit
+
+ vpxord zmm13, zmm13, zmm13 # zmm13 <- 0 (fix)
+ vmovaps zmm12, zmm13 # zmm12 <- 0 (fiy)
+ vmovaps zmm11, zmm12 # zmm11 <- 0 (fiz)
+ mov rcx, r12 # rcx <- neighbor->maxneighs * 4
+ imul rcx, r10 # rcx <- neighbor->maxneighs * 4 * i
+ add rcx, r11 # rcx <- &neighbor->neighbors[neighbor->maxneighs * i]
+ xor r9d, r9d # r9d <- 0 (k)
+ mov r14d, r13d # r14d <- numneighs
+ cmp r14d, 8
+ jl ..compute_forces_remainder
+
+..compute_forces:
+ vpcmpeqb k1, xmm0, xmm0
+ vpcmpeqb k2, xmm0, xmm0
+ vpcmpeqb k3, xmm0, xmm0
+ vmovdqu ymm3, YMMWORD PTR [rcx+r9*4]
+ vpxord zmm5, zmm5, zmm5
+ vpxord zmm6, zmm6, zmm6
+
+ ### AOS
+ vpaddd ymm4, ymm3, ymm3
+ vpaddd ymm3, ymm3, ymm4
+ vpxord zmm4, zmm4, zmm4
+ vgatherdpd zmm4{k1}, [rdx+ymm3*8]
+ vgatherdpd zmm5{k2}, [8+rdx+ymm3*8]
+ vgatherdpd zmm6{k3}, [16+rdx+ymm3*8]
+ ### SOA
+ #vpxord zmm4, zmm4, zmm4
+ #vgatherdpd zmm5{k2}, [rax+ymm3*8]
+ #vgatherdpd zmm4{k1}, [rdx+ymm3*8]
+ #vgatherdpd zmm6{k3}, [rsi+ymm3*8]
+ ###
+
+ vsubpd zmm29, zmm1, zmm5 # zmm29 <- atom_y(i) - atom_y(j) -- dely
+ vsubpd zmm28, zmm0, zmm4 # zmm28 <- atom_x(i) - atom_x(j) -- delx
+ vsubpd zmm31, zmm2, zmm6 # zmm31 <- atom_z(i) - atom_z(j) -- delz
+ vmulpd zmm20, zmm29, zmm29 # zmm20 <- dely * dely
+ vfmadd231pd zmm20, zmm28, zmm28 # zmm20 <- dely * dely + delx * delx
+ vfmadd231pd zmm20, zmm31, zmm31 # zmm20 <- zmm20 + delz * delz -- rsq
+
+ # Cutoff radius condition
+ vrcp14pd zmm27, zmm20 # zmm27 <- 1.0 / rsq (sr2)
+ vcmppd k5, zmm20, zmm16, 1 # k5 <- rsq < cutforcesq
+ vmulpd zmm22, zmm27, zmm15 # zmm22 <- sr2 * sigma6
+ vmulpd zmm24, zmm27, zmm14 # zmm24 <- 48.0 * epsilon * sr2
+ vmulpd zmm25, zmm27, zmm22 # zmm25 <- sr2 * sigma6 * sr2
+ vmulpd zmm23, zmm27, zmm25 # zmm23 <- sr2 * sigma6 * sr2 * sr2
+ vfmsub213pd zmm27, zmm25, zmm7 # zmm27 <- sr2 * sigma * sr2 * sr2 - 0.5
+ vmulpd zmm26, zmm23, zmm24 # zmm26 <- 48.0 * epsilon * sr2 * sr2 * sigma6 * sr2
+ vmulpd zmm30, zmm26, zmm27 # zmm30 <- force
+ vfmadd231pd zmm13{k5}, zmm30, zmm28 # fix += force * delx
+ vfmadd231pd zmm12{k5}, zmm30, zmm29 # fiy += force * dely
+ vfmadd231pd zmm11{k5}, zmm30, zmm31 # fiz += force * delz
+ sub r14d, 8
+ add r9, 8
+ cmp r14d, 8
+ jge ..compute_forces
+
+# Check if there are remaining neighbors to be computed
+..compute_forces_remainder:
+ test r14d, r14d
+ jle ..sum_up_forces
+
+ vpbroadcastd ymm4, r14d
+ vpcmpgtd k1, ymm4, ymm17
+ kmovw r15d, k1
+ vmovdqu32 ymm3{k1}{z}, YMMWORD PTR [rcx+r9*4]
+ kmovw k2, k1
+ kmovw k3, k1
+ vpxord zmm5, zmm5, zmm5
+ vpxord zmm6, zmm6, zmm6
+
+ ### AOS
+ vpaddd ymm4, ymm3, ymm3
+ vpaddd ymm3, ymm3, ymm4
+ vpxord zmm4, zmm4, zmm4
+ vgatherdpd zmm4{k1}, [rdx+ymm3*8]
+ vgatherdpd zmm5{k2}, [8+rdx+ymm3*8]
+ vgatherdpd zmm6{k3}, [16+rdx+ymm3*8]
+ #### SOA
+ #vpxord zmm4, zmm4, zmm4
+ #vgatherdpd zmm5{k2}, [rax+ymm3*8]
+ #vgatherdpd zmm4{k1}, [rdx+ymm3*8]
+ #vgatherdpd zmm6{k3}, [rsi+ymm3*8]
+ ###
+
+ vsubpd zmm29, zmm1, zmm5 # zmm29 <- atom_y(i) - atom_y(j) -- dely
+ vsubpd zmm28, zmm0, zmm4 # zmm28 <- atom_x(i) - atom_x(j) -- delx
+ vsubpd zmm31, zmm2, zmm6 # zmm31 <- atom_z(i) - atom_z(j) -- delz
+ vmulpd zmm20, zmm29, zmm29 # zmm20 <- dely * dely
+ vfmadd231pd zmm20, zmm28, zmm28 # zmm20 <- dely * dely + delx * delx
+ vfmadd231pd zmm20, zmm31, zmm31 # zmm20 <- zmm20 + delz * delz -- rsq
+
+ # Cutoff radius condition
+ vrcp14pd zmm27, zmm20 # zmm27 <- 1.0 / rsq (sr2)
+ vcmppd k5, zmm20, zmm16, 1 # k5 <- rsq < cutforcesq
+ kmovw r9d, k5 # r9d <- rsq < cutforcesq
+ and r15d, r9d # r15d <- rsq < cutforcesq && k < numneighs
+ kmovw k3, r15d # k3 <- rsq < cutforcesq && k < numneighs
+ vmulpd zmm22, zmm27, zmm15 # zmm22 <- sr2 * sigma6
+ vmulpd zmm24, zmm27, zmm14 # zmm24 <- 48.0 * epsilon * sr2
+ vmulpd zmm25, zmm27, zmm22 # zmm25 <- sr2 * sigma6 * sr2
+ vmulpd zmm23, zmm27, zmm25 # zmm23 <- sr2 * sigma6 * sr2 * sr2
+ vfmsub213pd zmm27, zmm25, zmm7 # zmm27 <- sr2 * sigma * sr2 * sr2 - 0.5
+ vmulpd zmm26, zmm23, zmm24 # zmm26 <- 48.0 * epsilon * sr2 * sr2 * sigma6 * sr2
+ vmulpd zmm30, zmm26, zmm27 # zmm30 <- force
+ vfmadd231pd zmm13{k3}, zmm30, zmm28 # fix += force * delx
+ vfmadd231pd zmm12{k3}, zmm30, zmm29 # fiy += force * dely
+ vfmadd231pd zmm11{k3}, zmm30, zmm31 # fiz += force * delz
+
+# Forces are currently separated in different lanes of zmm registers, hence it is necessary to permutate
+# and add them (reduction) to obtain the final contribution for the current atom
+..sum_up_forces:
+ vmovups zmm10, ZMMWORD PTR .L_2il0floatpacket.6[rip]
+ vpermd zmm0, zmm10, zmm11
+ vpermd zmm5, zmm10, zmm12
+ vpermd zmm21, zmm10, zmm13
+ vaddpd zmm11, zmm0, zmm11
+ vaddpd zmm12, zmm5, zmm12
+ vaddpd zmm13, zmm21, zmm13
+ vpermpd zmm1, zmm11, 78
+ vpermpd zmm6, zmm12, 78
+ vpermpd zmm22, zmm13, 78
+ vaddpd zmm2, zmm11, zmm1
+ vaddpd zmm8, zmm12, zmm6
+ vaddpd zmm23, zmm13, zmm22
+ vpermpd zmm3, zmm2, 177
+ vpermpd zmm9, zmm8, 177
+ vpermpd zmm24, zmm23, 177
+ vaddpd zmm4, zmm2, zmm3
+ vaddpd zmm20, zmm8, zmm9
+ vaddpd zmm25, zmm23, zmm24
+
+..atom_loop_exit:
+ mov rcx, QWORD PTR [-8+rsp] #84.9[spill]
+ mov rbx, QWORD PTR [-16+rsp] #85.9[spill]
+
+ ### AOS
+ add rax, 24
+ ###
+
+ vaddsd xmm0, xmm25, QWORD PTR [rcx+r10*8] #84.9
+ vmovsd QWORD PTR [rcx+r10*8], xmm0 #84.9
+ vaddsd xmm1, xmm20, QWORD PTR [rbx+r10*8] #85.9
+ vmovsd QWORD PTR [rbx+r10*8], xmm1 #85.9
+ vaddsd xmm2, xmm4, QWORD PTR [rdi+r10*8] #86.9
+ vmovsd QWORD PTR [rdi+r10*8], xmm2 #86.9
+ inc r10 #55.5
+ cmp r10, QWORD PTR [-32+rsp] #55.5[spill]
+ jb ..atom_loop_begin
+ vzeroupper #93.12
+ vxorpd xmm0, xmm0, xmm0 #93.12
+ #call getTimeStamp # xmm0 <- getTimeStamp()
+ #vsubsd xmm0, xmm0, QWORD PTR [-56+rsp] # xmm0 <- E-S
+ pop rbx
+ pop r15
+ pop r14 #93.12
+ pop r13 #93.12
+ pop r12 #93.12
+ pop rbp #93.12
+ ret #93.12
+
+.type computeForceLJ,@function
+.size computeForceLJ,.-computeForceLJ
+
+
+..LNcomputeForce.0:
+ .data
+# -- End computeForceLJ
+ .section .rodata, "a"
+ .align 64
+ .align 64
+.L_2il0floatpacket.2:
+ .long 0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000,0x00000000,0x3ff00000
+ .type .L_2il0floatpacket.2,@object
+ .size .L_2il0floatpacket.2,64
+ .align 64
+.L_2il0floatpacket.4:
+ .long 0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000,0x00000000,0x3fe00000
+ .type .L_2il0floatpacket.4,@object
+ .size .L_2il0floatpacket.4,64
+ .align 64
+.L_2il0floatpacket.6:
+ .long 0x00000008,0x00000009,0x0000000a,0x0000000b,0x0000000c,0x0000000d,0x0000000e,0x0000000f,0x00000008,0x00000009,0x0000000a,0x0000000b,0x0000000c,0x0000000d,0x0000000e,0x0000000f
+ .type .L_2il0floatpacket.6,@object
+ .size .L_2il0floatpacket.6,64
+ .align 32
+.L_2il0floatpacket.0:
+ .long 0x00000008,0x00000008,0x00000008,0x00000008,0x00000008,0x00000008,0x00000008,0x00000008
+ .type .L_2il0floatpacket.0,@object
+ .size .L_2il0floatpacket.0,32
+ .align 32
+.L_2il0floatpacket.1:
+ .long 0x00000000,0x00000001,0x00000002,0x00000003,0x00000004,0x00000005,0x00000006,0x00000007
+ .type .L_2il0floatpacket.1,@object
+ .size .L_2il0floatpacket.1,32
+ .align 8
+.L_2il0floatpacket.3:
+ .long 0x00000000,0x40480000
+ .type .L_2il0floatpacket.3,@object
+ .size .L_2il0floatpacket.3,8
+ .align 8
+.L_2il0floatpacket.5:
+ .long 0x00000000,0x3ff00000
+ .type .L_2il0floatpacket.5,@object
+ .size .L_2il0floatpacket.5,8
+ .data
+ .section .note.GNU-stack, ""
+# End
diff --git a/src/allocate.c b/default/allocate.c
similarity index 100%
rename from src/allocate.c
rename to default/allocate.c
diff --git a/src/atom.c b/default/atom.c
similarity index 100%
rename from src/atom.c
rename to default/atom.c
diff --git a/src/eam_utils.c b/default/eam_utils.c
similarity index 100%
rename from src/eam_utils.c
rename to default/eam_utils.c
diff --git a/src/force_eam.c b/default/force_eam.c
similarity index 100%
rename from src/force_eam.c
rename to default/force_eam.c
diff --git a/src/force_lj.c b/default/force_lj.c
similarity index 100%
rename from src/force_lj.c
rename to default/force_lj.c
diff --git a/src/includes/allocate.h b/default/includes/allocate.h
similarity index 100%
rename from src/includes/allocate.h
rename to default/includes/allocate.h
diff --git a/src/includes/atom.h b/default/includes/atom.h
similarity index 100%
rename from src/includes/atom.h
rename to default/includes/atom.h
diff --git a/src/includes/eam.h b/default/includes/eam.h
similarity index 100%
rename from src/includes/eam.h
rename to default/includes/eam.h
diff --git a/src/includes/likwid-marker.h b/default/includes/likwid-marker.h
similarity index 100%
rename from src/includes/likwid-marker.h
rename to default/includes/likwid-marker.h
diff --git a/src/includes/neighbor.h b/default/includes/neighbor.h
similarity index 100%
rename from src/includes/neighbor.h
rename to default/includes/neighbor.h
diff --git a/src/includes/parameter.h b/default/includes/parameter.h
similarity index 100%
rename from src/includes/parameter.h
rename to default/includes/parameter.h
diff --git a/src/includes/pbc.h b/default/includes/pbc.h
similarity index 100%
rename from src/includes/pbc.h
rename to default/includes/pbc.h
diff --git a/src/includes/stats.h b/default/includes/stats.h
similarity index 100%
rename from src/includes/stats.h
rename to default/includes/stats.h
diff --git a/src/includes/thermo.h b/default/includes/thermo.h
similarity index 100%
rename from src/includes/thermo.h
rename to default/includes/thermo.h
diff --git a/src/includes/timers.h b/default/includes/timers.h
similarity index 100%
rename from src/includes/timers.h
rename to default/includes/timers.h
diff --git a/src/includes/timing.h b/default/includes/timing.h
similarity index 100%
rename from src/includes/timing.h
rename to default/includes/timing.h
diff --git a/src/includes/tracing.h b/default/includes/tracing.h
similarity index 100%
rename from src/includes/tracing.h
rename to default/includes/tracing.h
diff --git a/src/includes/util.h b/default/includes/util.h
similarity index 100%
rename from src/includes/util.h
rename to default/includes/util.h
diff --git a/src/includes/vtk.h b/default/includes/vtk.h
similarity index 100%
rename from src/includes/vtk.h
rename to default/includes/vtk.h
diff --git a/src/main-stub.c b/default/main-stub.c
similarity index 100%
rename from src/main-stub.c
rename to default/main-stub.c
diff --git a/src/main.c b/default/main.c
similarity index 100%
rename from src/main.c
rename to default/main.c
diff --git a/src/neighbor.c b/default/neighbor.c
similarity index 100%
rename from src/neighbor.c
rename to default/neighbor.c
diff --git a/src/pbc.c b/default/pbc.c
similarity index 100%
rename from src/pbc.c
rename to default/pbc.c
diff --git a/src/stats.c b/default/stats.c
similarity index 100%
rename from src/stats.c
rename to default/stats.c
diff --git a/src/thermo.c b/default/thermo.c
similarity index 100%
rename from src/thermo.c
rename to default/thermo.c
diff --git a/src/timing.c b/default/timing.c
similarity index 100%
rename from src/timing.c
rename to default/timing.c
diff --git a/src/tracing.c b/default/tracing.c
similarity index 100%
rename from src/tracing.c
rename to default/tracing.c
diff --git a/src/util.c b/default/util.c
similarity index 100%
rename from src/util.c
rename to default/util.c
diff --git a/src/vtk.c b/default/vtk.c
similarity index 100%
rename from src/vtk.c
rename to default/vtk.c
diff --git a/gromacs/allocate.c b/gromacs/allocate.c
new file mode 100644
index 0000000..3aa4e1a
--- /dev/null
+++ b/gromacs/allocate.c
@@ -0,0 +1,70 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+#include
+#include
+#include
+
+void* allocate (int alignment, size_t bytesize)
+{
+ int errorCode;
+ void* ptr;
+
+ errorCode = posix_memalign(&ptr, alignment, bytesize);
+
+ if (errorCode) {
+ if (errorCode == EINVAL) {
+ fprintf(stderr,
+ "Error: Alignment parameter is not a power of two\n");
+ exit(EXIT_FAILURE);
+ }
+ if (errorCode == ENOMEM) {
+ fprintf(stderr,
+ "Error: Insufficient memory to fulfill the request\n");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ if (ptr == NULL) {
+ fprintf(stderr, "Error: posix_memalign failed!\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return ptr;
+}
+
+void* reallocate (
+ void* ptr,
+ int alignment,
+ size_t newBytesize,
+ size_t oldBytesize)
+{
+ void* newarray = allocate(alignment, newBytesize);
+
+ if(ptr != NULL) {
+ memcpy(newarray, ptr, oldBytesize);
+ free(ptr);
+ }
+
+ return newarray;
+}
diff --git a/gromacs/atom.c b/gromacs/atom.c
new file mode 100644
index 0000000..5d41cd2
--- /dev/null
+++ b/gromacs/atom.c
@@ -0,0 +1,276 @@
+/*
+ * =======================================================================================
+ *
+ * Authors: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Rafael Ravedutti (rr), rafaelravedutti@gmail.com
+ *
+ * Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+#define DELTA 20000
+
+#ifndef MAXLINE
+#define MAXLINE 4096
+#endif
+
+#ifndef MAX
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+#endif
+
+void initAtom(Atom *atom)
+{
+ atom->x = NULL; atom->y = NULL; atom->z = NULL;
+ atom->vx = NULL; atom->vy = NULL; atom->vz = NULL;
+ atom->fx = NULL; atom->fy = NULL; atom->fz = NULL;
+ atom->Natoms = 0;
+ atom->Nlocal = 0;
+ atom->Nghost = 0;
+ atom->Nmax = 0;
+ atom->type = NULL;
+ atom->ntypes = 0;
+ atom->epsilon = NULL;
+ atom->sigma6 = NULL;
+ atom->cutforcesq = NULL;
+ atom->cutneighsq = NULL;
+}
+
+void createAtom(Atom *atom, Parameter *param)
+{
+ MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
+ MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
+ MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
+ atom->Natoms = 4 * param->nx * param->ny * param->nz;
+ atom->Nlocal = 0;
+ atom->ntypes = param->ntypes;
+ atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
+ atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
+ atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
+ atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
+ for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
+ atom->epsilon[i] = param->epsilon;
+ atom->sigma6[i] = param->sigma6;
+ atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
+ atom->cutforcesq[i] = param->cutforce * param->cutforce;
+ }
+
+ MD_FLOAT alat = pow((4.0 / param->rho), (1.0 / 3.0));
+ int ilo = (int) (xlo / (0.5 * alat) - 1);
+ int ihi = (int) (xhi / (0.5 * alat) + 1);
+ int jlo = (int) (ylo / (0.5 * alat) - 1);
+ int jhi = (int) (yhi / (0.5 * alat) + 1);
+ int klo = (int) (zlo / (0.5 * alat) - 1);
+ int khi = (int) (zhi / (0.5 * alat) + 1);
+
+ ilo = MAX(ilo, 0);
+ ihi = MIN(ihi, 2 * param->nx - 1);
+ jlo = MAX(jlo, 0);
+ jhi = MIN(jhi, 2 * param->ny - 1);
+ klo = MAX(klo, 0);
+ khi = MIN(khi, 2 * param->nz - 1);
+
+ MD_FLOAT xtmp, ytmp, ztmp, vxtmp, vytmp, vztmp;
+ int i, j, k, m, n;
+ int sx = 0; int sy = 0; int sz = 0;
+ int ox = 0; int oy = 0; int oz = 0;
+ int subboxdim = 8;
+
+ while(oz * subboxdim <= khi) {
+
+ k = oz * subboxdim + sz;
+ j = oy * subboxdim + sy;
+ i = ox * subboxdim + sx;
+
+ if(((i + j + k) % 2 == 0) &&
+ (i >= ilo) && (i <= ihi) &&
+ (j >= jlo) && (j <= jhi) &&
+ (k >= klo) && (k <= khi)) {
+
+ xtmp = 0.5 * alat * i;
+ ytmp = 0.5 * alat * j;
+ ztmp = 0.5 * alat * k;
+
+ if( xtmp >= xlo && xtmp < xhi &&
+ ytmp >= ylo && ytmp < yhi &&
+ ztmp >= zlo && ztmp < zhi ) {
+
+ n = k * (2 * param->ny) * (2 * param->nx) +
+ j * (2 * param->nx) +
+ i + 1;
+
+ for(m = 0; m < 5; m++) {
+ myrandom(&n);
+ }
+ vxtmp = myrandom(&n);
+
+ for(m = 0; m < 5; m++){
+ myrandom(&n);
+ }
+ vytmp = myrandom(&n);
+
+ for(m = 0; m < 5; m++) {
+ myrandom(&n);
+ }
+ vztmp = myrandom(&n);
+
+ if(atom->Nlocal == atom->Nmax) {
+ growAtom(atom);
+ }
+
+ atom_x(atom->Nlocal) = xtmp;
+ atom_y(atom->Nlocal) = ytmp;
+ atom_z(atom->Nlocal) = ztmp;
+ atom->vx[atom->Nlocal] = vxtmp;
+ atom->vy[atom->Nlocal] = vytmp;
+ atom->vz[atom->Nlocal] = vztmp;
+ atom->type[atom->Nlocal] = rand() % atom->ntypes;
+ atom->Nlocal++;
+ }
+ }
+
+ sx++;
+
+ if(sx == subboxdim) { sx = 0; sy++; }
+ if(sy == subboxdim) { sy = 0; sz++; }
+ if(sz == subboxdim) { sz = 0; ox++; }
+ if(ox * subboxdim > ihi) { ox = 0; oy++; }
+ if(oy * subboxdim > jhi) { oy = 0; oz++; }
+ }
+}
+
+int readAtom(Atom* atom, Parameter* param)
+{
+ FILE *fp = fopen(param->input_file, "r");
+ char line[MAXLINE];
+ int natoms = 0;
+ int read_atoms = 0;
+ int atom_id = -1;
+ int ts = -1;
+
+ if(!fp) {
+ fprintf(stderr, "Could not open input file: %s\n", param->input_file);
+ exit(-1);
+ return -1;
+ }
+
+ while(!feof(fp) && ts < 1 && !read_atoms) {
+ fgets(line, MAXLINE, fp);
+ if(strncmp(line, "ITEM: ", 6) == 0) {
+ char *item = &line[6];
+
+ if(strncmp(item, "TIMESTEP", 8) == 0) {
+ fgets(line, MAXLINE, fp);
+ ts = atoi(line);
+ } else if(strncmp(item, "NUMBER OF ATOMS", 15) == 0) {
+ fgets(line, MAXLINE, fp);
+ natoms = atoi(line);
+ atom->Natoms = natoms;
+ atom->Nlocal = natoms;
+ while(atom->Nlocal >= atom->Nmax) {
+ growAtom(atom);
+ }
+ } else if(strncmp(item, "BOX BOUNDS pp pp pp", 19) == 0) {
+ fgets(line, MAXLINE, fp);
+ param->xlo = atof(strtok(line, " "));
+ param->xhi = atof(strtok(NULL, " "));
+ param->xprd = param->xhi - param->xlo;
+
+ fgets(line, MAXLINE, fp);
+ param->ylo = atof(strtok(line, " "));
+ param->yhi = atof(strtok(NULL, " "));
+ param->yprd = param->yhi - param->ylo;
+
+ fgets(line, MAXLINE, fp);
+ param->zlo = atof(strtok(line, " "));
+ param->zhi = atof(strtok(NULL, " "));
+ param->zprd = param->zhi - param->zlo;
+ } else if(strncmp(item, "ATOMS id type x y z vx vy vz", 28) == 0) {
+ for(int i = 0; i < natoms; i++) {
+ fgets(line, MAXLINE, fp);
+ atom_id = atoi(strtok(line, " ")) - 1;
+ atom->type[atom_id] = atoi(strtok(NULL, " "));
+ atom_x(atom_id) = atof(strtok(NULL, " "));
+ atom_y(atom_id) = atof(strtok(NULL, " "));
+ atom_z(atom_id) = atof(strtok(NULL, " "));
+ atom->vx[atom_id] = atof(strtok(NULL, " "));
+ atom->vy[atom_id] = atof(strtok(NULL, " "));
+ atom->vz[atom_id] = atof(strtok(NULL, " "));
+ atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
+ read_atoms++;
+ }
+ } else {
+ fprintf(stderr, "Invalid item: %s\n", item);
+ exit(-1);
+ return -1;
+ }
+ } else {
+ fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
+ exit(-1);
+ return -1;
+ }
+ }
+
+ if(ts < 0 || !natoms || !read_atoms) {
+ fprintf(stderr, "Input error: atom data was not read!\n");
+ exit(-1);
+ return -1;
+ }
+
+ atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
+ atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
+ atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
+ atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
+ for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
+ atom->epsilon[i] = param->epsilon;
+ atom->sigma6[i] = param->sigma6;
+ atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
+ atom->cutforcesq[i] = param->cutforce * param->cutforce;
+ }
+
+ fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
+ return natoms;
+}
+
+void growAtom(Atom *atom)
+{
+ int nold = atom->Nmax;
+ atom->Nmax += DELTA;
+
+ #ifdef AOS
+ atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
+ #else
+ atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
+ atom->y = (MD_FLOAT*) reallocate(atom->y, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
+ atom->z = (MD_FLOAT*) reallocate(atom->z, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
+ #endif
+ atom->vx = (MD_FLOAT*) reallocate(atom->vx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
+ atom->vy = (MD_FLOAT*) reallocate(atom->vy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
+ atom->vz = (MD_FLOAT*) reallocate(atom->vz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
+ atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
+ atom->fy = (MD_FLOAT*) reallocate(atom->fy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
+ atom->fz = (MD_FLOAT*) reallocate(atom->fz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
+ atom->type = (int *) reallocate(atom->type, ALIGNMENT, atom->Nmax * sizeof(int), nold * sizeof(int));
+}
diff --git a/gromacs/eam_utils.c b/gromacs/eam_utils.c
new file mode 100644
index 0000000..aaa5f40
--- /dev/null
+++ b/gromacs/eam_utils.c
@@ -0,0 +1,285 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+#ifndef MAXLINE
+#define MAXLINE 4096
+#endif
+
+void initEam(Eam* eam, Parameter* param) {
+ int ntypes = param->ntypes;
+ eam->nmax = 0;
+ eam->fp = NULL;
+ coeff(eam, param);
+ init_style(eam, param);
+}
+
+void coeff(Eam* eam, Parameter* param) {
+ read_eam_file(&eam->file, param->eam_file);
+ param->mass = eam->file.mass;
+ param->cutforce = eam->file.cut;
+ param->cutneigh = param->cutforce + 1.0;
+ param->temp = 600.0;
+ param->dt = 0.001;
+ param->rho = 0.07041125;
+ param->dtforce = 0.5 * param->dt / param->mass;
+}
+
+void init_style(Eam* eam, Parameter* param) {
+ // convert read-in file(s) to arrays and spline them
+ file2array(eam);
+ array2spline(eam, param);
+}
+
+void read_eam_file(Funcfl* file, const char* filename) {
+ FILE* fptr;
+ char line[MAXLINE];
+
+ fptr = fopen(filename, "r");
+ if(fptr == NULL) {
+ printf("Can't open EAM Potential file: %s\n", filename);
+ exit(0);
+ }
+
+ int tmp;
+ fgets(line, MAXLINE, fptr);
+ fgets(line, MAXLINE, fptr);
+ sscanf(line, "%d %lg", &tmp, &(file->mass));
+ fgets(line, MAXLINE, fptr);
+ sscanf(line, "%d %lg %d %lg %lg", &file->nrho, &file->drho, &file->nr, &file->dr, &file->cut);
+
+ //printf("Read: %lf %i %lf %i %lf %lf\n",file->mass,file->nrho,file->drho,file->nr,file->dr,file->cut);
+ file->frho = (MD_FLOAT *) allocate(ALIGNMENT, (file->nrho + 1) * sizeof(MD_FLOAT));
+ file->rhor = (MD_FLOAT *) allocate(ALIGNMENT, (file->nr + 1) * sizeof(MD_FLOAT));
+ file->zr = (MD_FLOAT *) allocate(ALIGNMENT, (file->nr + 1) * sizeof(MD_FLOAT));
+ grab(fptr, file->nrho, file->frho);
+ grab(fptr, file->nr, file->zr);
+ grab(fptr, file->nr, file->rhor);
+ for(int i = file->nrho; i > 0; i--) file->frho[i] = file->frho[i - 1];
+ for(int i = file->nr; i > 0; i--) file->rhor[i] = file->rhor[i - 1];
+ for(int i = file->nr; i > 0; i--) file->zr[i] = file->zr[i - 1];
+ fclose(fptr);
+}
+
+void file2array(Eam* eam) {
+ int i, j, k, m, n;
+ double sixth = 1.0 / 6.0;
+
+ // determine max function params from all active funcfl files
+ // active means some element is pointing at it via map
+ int active;
+ double rmax, rhomax;
+ eam->dr = eam->drho = rmax = rhomax = 0.0;
+ active = 0;
+ Funcfl* file = &eam->file;
+ eam->dr = MAX(eam->dr, file->dr);
+ eam->drho = MAX(eam->drho, file->drho);
+ rmax = MAX(rmax, (file->nr - 1) * file->dr);
+ rhomax = MAX(rhomax, (file->nrho - 1) * file->drho);
+
+ // set nr,nrho from cutoff and spacings
+ // 0.5 is for round-off in divide
+ eam->nr = (int)(rmax / eam->dr + 0.5);
+ eam->nrho = (int)(rhomax / eam->drho + 0.5);
+
+ // ------------------------------------------------------------------
+ // setup frho arrays
+ // ------------------------------------------------------------------
+
+ // allocate frho arrays
+ // nfrho = # of funcfl files + 1 for zero array
+ eam->frho = (MD_FLOAT *) allocate(ALIGNMENT, (eam->nrho + 1) * sizeof(MD_FLOAT));
+
+ // interpolate each file's frho to a single grid and cutoff
+ double r, p, cof1, cof2, cof3, cof4;
+ n = 0;
+
+ for(m = 1; m <= eam->nrho; m++) {
+ r = (m - 1) * eam->drho;
+ p = r / file->drho + 1.0;
+ k = (int)(p);
+ k = MIN(k, file->nrho - 2);
+ k = MAX(k, 2);
+ p -= k;
+ p = MIN(p, 2.0);
+ cof1 = -sixth * p * (p - 1.0) * (p - 2.0);
+ cof2 = 0.5 * (p * p - 1.0) * (p - 2.0);
+ cof3 = -0.5 * p * (p + 1.0) * (p - 2.0);
+ cof4 = sixth * p * (p * p - 1.0);
+ eam->frho[m] = cof1 * file->frho[k - 1] + cof2 * file->frho[k] +
+ cof3 * file->frho[k + 1] + cof4 * file->frho[k + 2];
+ }
+
+
+ // ------------------------------------------------------------------
+ // setup rhor arrays
+ // ------------------------------------------------------------------
+
+ // allocate rhor arrays
+ // nrhor = # of funcfl files
+ eam->rhor = (MD_FLOAT *) allocate(ALIGNMENT, (eam->nr + 1) * sizeof(MD_FLOAT));
+
+ // interpolate each file's rhor to a single grid and cutoff
+ for(m = 1; m <= eam->nr; m++) {
+ r = (m - 1) * eam->dr;
+ p = r / file->dr + 1.0;
+ k = (int)(p);
+ k = MIN(k, file->nr - 2);
+ k = MAX(k, 2);
+ p -= k;
+ p = MIN(p, 2.0);
+ cof1 = -sixth * p * (p - 1.0) * (p - 2.0);
+ cof2 = 0.5 * (p * p - 1.0) * (p - 2.0);
+ cof3 = -0.5 * p * (p + 1.0) * (p - 2.0);
+ cof4 = sixth * p * (p * p - 1.0);
+ eam->rhor[m] = cof1 * file->rhor[k - 1] + cof2 * file->rhor[k] +
+ cof3 * file->rhor[k + 1] + cof4 * file->rhor[k + 2];
+ //if(m==119)printf("BuildRho: %e %e %e %e %e %e\n",rhor[m],cof1,cof2,cof3,cof4,file->rhor[k]);
+ }
+
+ // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
+ // for funcfl files, I,J mapping only depends on I
+ // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
+
+ // ------------------------------------------------------------------
+ // setup z2r arrays
+ // ------------------------------------------------------------------
+
+ // allocate z2r arrays
+ // nz2r = N*(N+1)/2 where N = # of funcfl files
+ eam->z2r = (MD_FLOAT *) allocate(ALIGNMENT, (eam->nr + 1) * sizeof(MD_FLOAT));
+
+ // create a z2r array for each file against other files, only for I >= J
+ // interpolate zri and zrj to a single grid and cutoff
+ double zri, zrj;
+ Funcfl* ifile = &eam->file;
+ Funcfl* jfile = &eam->file;
+
+ for(m = 1; m <= eam->nr; m++) {
+ r = (m - 1) * eam->dr;
+ p = r / ifile->dr + 1.0;
+ k = (int)(p);
+ k = MIN(k, ifile->nr - 2);
+ k = MAX(k, 2);
+ p -= k;
+ p = MIN(p, 2.0);
+ cof1 = -sixth * p * (p - 1.0) * (p - 2.0);
+ cof2 = 0.5 * (p * p - 1.0) * (p - 2.0);
+ cof3 = -0.5 * p * (p + 1.0) * (p - 2.0);
+ cof4 = sixth * p * (p * p - 1.0);
+ zri = cof1 * ifile->zr[k - 1] + cof2 * ifile->zr[k] +
+ cof3 * ifile->zr[k + 1] + cof4 * ifile->zr[k + 2];
+
+ p = r / jfile->dr + 1.0;
+ k = (int)(p);
+ k = MIN(k, jfile->nr - 2);
+ k = MAX(k, 2);
+ p -= k;
+ p = MIN(p, 2.0);
+ cof1 = -sixth * p * (p - 1.0) * (p - 2.0);
+ cof2 = 0.5 * (p * p - 1.0) * (p - 2.0);
+ cof3 = -0.5 * p * (p + 1.0) * (p - 2.0);
+ cof4 = sixth * p * (p * p - 1.0);
+ zrj = cof1 * jfile->zr[k - 1] + cof2 * jfile->zr[k] +
+ cof3 * jfile->zr[k + 1] + cof4 * jfile->zr[k + 2];
+
+ eam->z2r[m] = 27.2 * 0.529 * zri * zrj;
+ }
+}
+
+void array2spline(Eam* eam, Parameter* param) {
+ eam->rdr = 1.0 / eam->dr;
+ eam->rdrho = 1.0 / eam->drho;
+ eam->nrho_tot = (eam->nrho + 1) * 7 + 64;
+ eam->nr_tot = (eam->nr + 1) * 7 + 64;
+ eam->nrho_tot -= eam->nrho_tot%64;
+ eam->nr_tot -= eam->nr_tot%64;
+
+ int ntypes = param->ntypes;
+ eam->frho_spline = (MD_FLOAT *) allocate(ALIGNMENT, ntypes * ntypes * eam->nrho_tot * sizeof(MD_FLOAT));
+ eam->rhor_spline = (MD_FLOAT *) allocate(ALIGNMENT, ntypes * ntypes * eam->nr_tot * sizeof(MD_FLOAT));
+ eam->z2r_spline = (MD_FLOAT *) allocate(ALIGNMENT, ntypes * ntypes * eam->nr_tot * sizeof(MD_FLOAT));
+ interpolate(eam->nrho, eam->drho, eam->frho, eam->frho_spline);
+ interpolate(eam->nr, eam->dr, eam->rhor, eam->rhor_spline);
+ interpolate(eam->nr, eam->dr, eam->z2r, eam->z2r_spline);
+
+ // replicate data for multiple types;
+ for(int tt = 0; tt < ntypes * ntypes; tt++) {
+ for(int k = 0; k < eam->nrho_tot; k++)
+ eam->frho_spline[tt*eam->nrho_tot + k] = eam->frho_spline[k];
+ for(int k = 0; k < eam->nr_tot; k++)
+ eam->rhor_spline[tt*eam->nr_tot + k] = eam->rhor_spline[k];
+ for(int k = 0; k < eam->nr_tot; k++)
+ eam->z2r_spline[tt*eam->nr_tot + k] = eam->z2r_spline[k];
+ }
+}
+
+void interpolate(int n, MD_FLOAT delta, MD_FLOAT* f, MD_FLOAT* spline) {
+ for(int m = 1; m <= n; m++) spline[m * 7 + 6] = f[m];
+
+ spline[1 * 7 + 5] = spline[2 * 7 + 6] - spline[1 * 7 + 6];
+ spline[2 * 7 + 5] = 0.5 * (spline[3 * 7 + 6] - spline[1 * 7 + 6]);
+ spline[(n - 1) * 7 + 5] = 0.5 * (spline[n * 7 + 6] - spline[(n - 2) * 7 + 6]);
+ spline[n * 7 + 5] = spline[n * 7 + 6] - spline[(n - 1) * 7 + 6];
+
+ for(int m = 3; m <= n - 2; m++)
+ spline[m * 7 + 5] = ((spline[(m - 2) * 7 + 6] - spline[(m + 2) * 7 + 6]) +
+ 8.0 * (spline[(m + 1) * 7 + 6] - spline[(m - 1) * 7 + 6])) / 12.0;
+
+ for(int m = 1; m <= n - 1; m++) {
+ spline[m * 7 + 4] = 3.0 * (spline[(m + 1) * 7 + 6] - spline[m * 7 + 6]) -
+ 2.0 * spline[m * 7 + 5] - spline[(m + 1) * 7 + 5];
+ spline[m * 7 + 3] = spline[m * 7 + 5] + spline[(m + 1) * 7 + 5] -
+ 2.0 * (spline[(m + 1) * 7 + 6] - spline[m * 7 + 6]);
+ }
+
+ spline[n * 7 + 4] = 0.0;
+ spline[n * 7 + 3] = 0.0;
+
+ for(int m = 1; m <= n; m++) {
+ spline[m * 7 + 2] = spline[m * 7 + 5] / delta;
+ spline[m * 7 + 1] = 2.0 * spline[m * 7 + 4] / delta;
+ spline[m * 7 + 0] = 3.0 * spline[m * 7 + 3] / delta;
+ }
+}
+
+void grab(FILE* fptr, int n, MD_FLOAT* list) {
+ char* ptr;
+ char line[MAXLINE];
+ int i = 0;
+
+ while(i < n) {
+ fgets(line, MAXLINE, fptr);
+ ptr = strtok(line, " \t\n\r\f");
+ list[i++] = atof(ptr);
+ while(ptr = strtok(NULL, " \t\n\r\f")) list[i++] = atof(ptr);
+ }
+}
diff --git a/gromacs/force_eam.c b/gromacs/force_eam.c
new file mode 100644
index 0000000..b686abf
--- /dev/null
+++ b/gromacs/force_eam.c
@@ -0,0 +1,213 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+double computeForceEam(Eam* eam, Parameter* param, Atom *atom, Neighbor *neighbor, Stats *stats) {
+ if(eam->nmax < atom->Nmax) {
+ eam->nmax = atom->Nmax;
+ if(eam->fp != NULL) { free(eam->fp); }
+ eam->fp = (MD_FLOAT *) allocate(ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT));
+ }
+
+ int Nlocal = atom->Nlocal;
+ int* neighs;
+ MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; int ntypes = atom->ntypes; MD_FLOAT* fp = eam->fp;
+ MD_FLOAT* rhor_spline = eam->rhor_spline; MD_FLOAT* frho_spline = eam->frho_spline; MD_FLOAT* z2r_spline = eam->z2r_spline;
+ MD_FLOAT rdr = eam->rdr; int nr = eam->nr; int nr_tot = eam->nr_tot; MD_FLOAT rdrho = eam->rdrho;
+ int nrho = eam->nrho; int nrho_tot = eam->nrho_tot;
+ double S = getTimeStamp();
+
+ LIKWID_MARKER_START("force_eam_fp");
+ #pragma omp parallel for
+ for(int i = 0; i < Nlocal; i++) {
+ neighs = &neighbor->neighbors[i * neighbor->maxneighs];
+ int numneighs = neighbor->numneigh[i];
+ MD_FLOAT xtmp = atom_x(i);
+ MD_FLOAT ytmp = atom_y(i);
+ MD_FLOAT ztmp = atom_z(i);
+ MD_FLOAT rhoi = 0;
+#ifdef EXPLICIT_TYPES
+ const int type_i = atom->type[i];
+#endif
+ #pragma ivdep
+ for(int k = 0; k < numneighs; k++) {
+ int j = neighs[k];
+ MD_FLOAT delx = xtmp - atom_x(j);
+ MD_FLOAT dely = ytmp - atom_y(j);
+ MD_FLOAT delz = ztmp - atom_z(j);
+ MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
+#ifdef EXPLICIT_TYPES
+ const int type_j = atom->type[j];
+ const int type_ij = type_i * ntypes + type_j;
+ const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
+#else
+ const MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
+#endif
+ if(rsq < cutforcesq) {
+ MD_FLOAT p = sqrt(rsq) * rdr + 1.0;
+ int m = (int)(p);
+ m = m < nr - 1 ? m : nr - 1;
+ p -= m;
+ p = p < 1.0 ? p : 1.0;
+#ifdef EXPLICIT_TYPES
+ rhoi += ((rhor_spline[type_ij * nr_tot + m * 7 + 3] * p +
+ rhor_spline[type_ij * nr_tot + m * 7 + 4]) * p +
+ rhor_spline[type_ij * nr_tot + m * 7 + 5]) * p +
+ rhor_spline[type_ij * nr_tot + m * 7 + 6];
+#else
+ rhoi += ((rhor_spline[m * 7 + 3] * p +
+ rhor_spline[m * 7 + 4]) * p +
+ rhor_spline[m * 7 + 5]) * p +
+ rhor_spline[m * 7 + 6];
+#endif
+ }
+ }
+
+#ifdef EXPLICIT_TYPES
+ const int type_ii = type_i * type_i;
+#endif
+ MD_FLOAT p = 1.0 * rhoi * rdrho + 1.0;
+ int m = (int)(p);
+ m = MAX(1, MIN(m, nrho - 1));
+ p -= m;
+ p = MIN(p, 1.0);
+#ifdef EXPLICIT_TYPES
+ fp[i] = (frho_spline[type_ii * nrho_tot + m * 7 + 0] * p +
+ frho_spline[type_ii * nrho_tot + m * 7 + 1]) * p +
+ frho_spline[type_ii * nrho_tot + m * 7 + 2];
+#else
+ fp[i] = (frho_spline[m * 7 + 0] * p + frho_spline[m * 7 + 1]) * p + frho_spline[m * 7 + 2];
+#endif
+ }
+
+ LIKWID_MARKER_STOP("force_eam_fp");
+
+ // We still need to update fp for PBC atoms
+ for(int i = 0; i < atom->Nghost; i++) {
+ fp[Nlocal + i] = fp[atom->border_map[i]];
+ }
+
+ LIKWID_MARKER_START("force_eam");
+ for(int i = 0; i < Nlocal; i++) {
+ neighs = &neighbor->neighbors[i * neighbor->maxneighs];
+ int numneighs = neighbor->numneigh[i];
+ MD_FLOAT xtmp = atom_x(i);
+ MD_FLOAT ytmp = atom_y(i);
+ MD_FLOAT ztmp = atom_z(i);
+ MD_FLOAT fix = 0;
+ MD_FLOAT fiy = 0;
+ MD_FLOAT fiz = 0;
+#ifdef EXPLICIT_TYPES
+ const int type_i = atom->type[i];
+#endif
+
+ #pragma ivdep
+ for(int k = 0; k < numneighs; k++) {
+ int j = neighs[k];
+ MD_FLOAT delx = xtmp - atom_x(j);
+ MD_FLOAT dely = ytmp - atom_y(j);
+ MD_FLOAT delz = ztmp - atom_z(j);
+ MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
+#ifdef EXPLICIT_TYPES
+ const int type_j = atom->type[j];
+ const int type_ij = type_i * ntypes + type_j;
+ const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
+#else
+ const MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
+#endif
+
+ if(rsq < cutforcesq) {
+ MD_FLOAT r = sqrt(rsq);
+ MD_FLOAT p = r * rdr + 1.0;
+ int m = (int)(p);
+ m = m < nr - 1 ? m : nr - 1;
+ p -= m;
+ p = p < 1.0 ? p : 1.0;
+
+
+ // rhoip = derivative of (density at atom j due to atom i)
+ // rhojp = derivative of (density at atom i due to atom j)
+ // phi = pair potential energy
+ // phip = phi'
+ // z2 = phi * r
+ // z2p = (phi * r)' = (phi' r) + phi
+ // psip needs both fp[i] and fp[j] terms since r_ij appears in two
+ // terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji)
+ // hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip
+
+#ifdef EXPLICIT_TYPES
+ MD_FLOAT rhoip = (rhor_spline[type_ij * nr_tot + m * 7 + 0] * p +
+ rhor_spline[type_ij * nr_tot + m * 7 + 1]) * p +
+ rhor_spline[type_ij * nr_tot + m * 7 + 2];
+
+ MD_FLOAT z2p = (z2r_spline[type_ij * nr_tot + m * 7 + 0] * p +
+ z2r_spline[type_ij * nr_tot + m * 7 + 1]) * p +
+ z2r_spline[type_ij * nr_tot + m * 7 + 2];
+
+ MD_FLOAT z2 = ((z2r_spline[type_ij * nr_tot + m * 7 + 3] * p +
+ z2r_spline[type_ij * nr_tot + m * 7 + 4]) * p +
+ z2r_spline[type_ij * nr_tot + m * 7 + 5]) * p +
+ z2r_spline[type_ij * nr_tot + m * 7 + 6];
+#else
+ MD_FLOAT rhoip = (rhor_spline[m * 7 + 0] * p + rhor_spline[m * 7 + 1]) * p + rhor_spline[m * 7 + 2];
+ MD_FLOAT z2p = (z2r_spline[m * 7 + 0] * p + z2r_spline[m * 7 + 1]) * p + z2r_spline[m * 7 + 2];
+ MD_FLOAT z2 = ((z2r_spline[m * 7 + 3] * p +
+ z2r_spline[m * 7 + 4]) * p +
+ z2r_spline[m * 7 + 5]) * p +
+ z2r_spline[m * 7 + 6];
+#endif
+
+ MD_FLOAT recip = 1.0 / r;
+ MD_FLOAT phi = z2 * recip;
+ MD_FLOAT phip = z2p * recip - phi * recip;
+ MD_FLOAT psip = fp[i] * rhoip + fp[j] * rhoip + phip;
+ MD_FLOAT fpair = -psip * recip;
+
+ fix += delx * fpair;
+ fiy += dely * fpair;
+ fiz += delz * fpair;
+ //fpair *= 0.5;
+ }
+ }
+
+ fx[i] = fix;
+ fy[i] = fiy;
+ fz[i] = fiz;
+ addStat(stats->total_force_neighs, numneighs);
+ addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
+ }
+
+ LIKWID_MARKER_STOP("force_eam");
+ double E = getTimeStamp();
+ return E-S;
+}
diff --git a/gromacs/force_lj.c b/gromacs/force_lj.c
new file mode 100644
index 0000000..73c0269
--- /dev/null
+++ b/gromacs/force_lj.c
@@ -0,0 +1,104 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+double computeForceLJ(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
+ int Nlocal = atom->Nlocal;
+ int* neighs;
+ MD_FLOAT* fx = atom->fx;
+ MD_FLOAT* fy = atom->fy;
+ MD_FLOAT* fz = atom->fz;
+#ifndef EXPLICIT_TYPES
+ MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
+ MD_FLOAT sigma6 = param->sigma6;
+ MD_FLOAT epsilon = param->epsilon;
+#endif
+
+ for(int i = 0; i < Nlocal; i++) {
+ fx[i] = 0.0;
+ fy[i] = 0.0;
+ fz[i] = 0.0;
+ }
+
+ double S = getTimeStamp();
+ LIKWID_MARKER_START("force");
+
+#pragma omp parallel for
+ for(int i = 0; i < Nlocal; i++) {
+ neighs = &neighbor->neighbors[i * neighbor->maxneighs];
+ int numneighs = neighbor->numneigh[i];
+ MD_FLOAT xtmp = atom_x(i);
+ MD_FLOAT ytmp = atom_y(i);
+ MD_FLOAT ztmp = atom_z(i);
+ MD_FLOAT fix = 0;
+ MD_FLOAT fiy = 0;
+ MD_FLOAT fiz = 0;
+
+#ifdef EXPLICIT_TYPES
+ const int type_i = atom->type[i];
+#endif
+
+
+ for(int k = 0; k < numneighs; k++) {
+ int j = neighs[k];
+ MD_FLOAT delx = xtmp - atom_x(j);
+ MD_FLOAT dely = ytmp - atom_y(j);
+ MD_FLOAT delz = ztmp - atom_z(j);
+ MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
+
+#ifdef EXPLICIT_TYPES
+ const int type_j = atom->type[j];
+ const int type_ij = type_i * atom->ntypes + type_j;
+ const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
+ const MD_FLOAT sigma6 = atom->sigma6[type_ij];
+ const MD_FLOAT epsilon = atom->epsilon[type_ij];
+#endif
+
+ if(rsq < cutforcesq) {
+ MD_FLOAT sr2 = 1.0 / rsq;
+ MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
+ MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
+ fix += delx * force;
+ fiy += dely * force;
+ fiz += delz * force;
+ }
+ }
+
+ fx[i] += fix;
+ fy[i] += fiy;
+ fz[i] += fiz;
+
+ addStat(stats->total_force_neighs, numneighs);
+ addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
+ }
+
+ LIKWID_MARKER_STOP("force");
+ double E = getTimeStamp();
+ return E-S;
+}
diff --git a/gromacs/includes/allocate.h b/gromacs/includes/allocate.h
new file mode 100644
index 0000000..b7587e3
--- /dev/null
+++ b/gromacs/includes/allocate.h
@@ -0,0 +1,29 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+
+#ifndef __ALLOCATE_H_
+#define __ALLOCATE_H_
+extern void* allocate (int alignment, size_t bytesize);
+extern void* reallocate (void* ptr, int alignment, size_t newBytesize, size_t oldBytesize);
+#endif
diff --git a/gromacs/includes/atom.h b/gromacs/includes/atom.h
new file mode 100644
index 0000000..70cef10
--- /dev/null
+++ b/gromacs/includes/atom.h
@@ -0,0 +1,59 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+
+#ifndef __ATOM_H_
+#define __ATOM_H_
+
+typedef struct {
+ int Natoms, Nlocal, Nghost, Nmax;
+ MD_FLOAT *x, *y, *z;
+ MD_FLOAT *vx, *vy, *vz;
+ MD_FLOAT *fx, *fy, *fz;
+ int *border_map;
+ int *type;
+ int ntypes;
+ MD_FLOAT *epsilon;
+ MD_FLOAT *sigma6;
+ MD_FLOAT *cutforcesq;
+ MD_FLOAT *cutneighsq;
+} Atom;
+
+extern void initAtom(Atom*);
+extern void createAtom(Atom*, Parameter*);
+extern int readAtom(Atom*, Parameter*);
+extern void growAtom(Atom*);
+
+#ifdef AOS
+#define POS_DATA_LAYOUT "AoS"
+#define atom_x(i) atom->x[(i) * 3 + 0]
+#define atom_y(i) atom->x[(i) * 3 + 1]
+#define atom_z(i) atom->x[(i) * 3 + 2]
+#else
+#define POS_DATA_LAYOUT "SoA"
+#define atom_x(i) atom->x[i]
+#define atom_y(i) atom->y[i]
+#define atom_z(i) atom->z[i]
+#endif
+
+#endif
diff --git a/gromacs/includes/eam.h b/gromacs/includes/eam.h
new file mode 100644
index 0000000..03d7ecc
--- /dev/null
+++ b/gromacs/includes/eam.h
@@ -0,0 +1,55 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+
+#include
+#include
+
+#ifndef __EAM_H_
+#define __EAM_H_
+typedef struct {
+ int nrho, nr;
+ MD_FLOAT drho, dr, cut, mass;
+ MD_FLOAT *frho, *rhor, *zr;
+} Funcfl;
+
+typedef struct {
+ MD_FLOAT* fp;
+ int nmax;
+ int nrho, nr;
+ int nrho_tot, nr_tot;
+ MD_FLOAT dr, rdr, drho, rdrho;
+ MD_FLOAT *frho, *rhor, *z2r;
+ MD_FLOAT *rhor_spline, *frho_spline, *z2r_spline;
+ Funcfl file;
+} Eam;
+
+void initEam(Eam* eam, Parameter* param);
+void coeff(Eam* eam, Parameter* param);
+void init_style(Eam* eam, Parameter *param);
+void read_eam_file(Funcfl* file, const char* filename);
+void file2array(Eam* eam);
+void array2spline(Eam* eam, Parameter* param);
+void interpolate(int n, MD_FLOAT delta, MD_FLOAT* f, MD_FLOAT* spline);
+void grab(FILE* fptr, int n, MD_FLOAT* list);
+#endif
diff --git a/gromacs/includes/likwid-marker.h b/gromacs/includes/likwid-marker.h
new file mode 100644
index 0000000..ebf8b89
--- /dev/null
+++ b/gromacs/includes/likwid-marker.h
@@ -0,0 +1,170 @@
+/*
+ * =======================================================================================
+ *
+ * Filename: likwid-marker.h
+ *
+ * Description: Header File of likwid Marker API
+ *
+ * Version:
+ * Released:
+ *
+ * Authors: Thomas Gruber (tg), thomas.roehl@googlemail.com
+ *
+ * Project: likwid
+ *
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *
+ * This program is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free Software
+ * Foundation, either version 3 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see .
+ *
+ * =======================================================================================
+ */
+#ifndef LIKWID_MARKER_H
+#define LIKWID_MARKER_H
+
+
+/** \addtogroup MarkerAPI Marker API module
+* @{
+*/
+/*!
+\def LIKWID_MARKER_INIT
+Shortcut for likwid_markerInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_MARKER_THREADINIT
+Shortcut for likwid_markerThreadInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_MARKER_REGISTER(regionTag)
+Shortcut for likwid_markerRegisterRegion() with \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_MARKER_START(regionTag)
+Shortcut for likwid_markerStartRegion() with \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_MARKER_STOP(regionTag)
+Shortcut for likwid_markerStopRegion() with \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
+Shortcut for likwid_markerGetResults() for \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_MARKER_SWITCH
+Shortcut for likwid_markerNextGroup() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_MARKER_RESET(regionTag)
+Shortcut for likwid_markerResetRegion() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_MARKER_CLOSE
+Shortcut for likwid_markerClose() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/** @}*/
+
+#ifdef LIKWID_PERFMON
+#include
+#define LIKWID_MARKER_INIT likwid_markerInit()
+#define LIKWID_MARKER_THREADINIT likwid_markerThreadInit()
+#define LIKWID_MARKER_SWITCH likwid_markerNextGroup()
+#define LIKWID_MARKER_REGISTER(regionTag) likwid_markerRegisterRegion(regionTag)
+#define LIKWID_MARKER_START(regionTag) likwid_markerStartRegion(regionTag)
+#define LIKWID_MARKER_STOP(regionTag) likwid_markerStopRegion(regionTag)
+#define LIKWID_MARKER_CLOSE likwid_markerClose()
+#define LIKWID_MARKER_RESET(regionTag) likwid_markerResetRegion(regionTag)
+#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count) likwid_markerGetRegion(regionTag, nevents, events, time, count)
+#else /* LIKWID_PERFMON */
+#define LIKWID_MARKER_INIT
+#define LIKWID_MARKER_THREADINIT
+#define LIKWID_MARKER_SWITCH
+#define LIKWID_MARKER_REGISTER(regionTag)
+#define LIKWID_MARKER_START(regionTag)
+#define LIKWID_MARKER_STOP(regionTag)
+#define LIKWID_MARKER_CLOSE
+#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
+#define LIKWID_MARKER_RESET(regionTag)
+#endif /* LIKWID_PERFMON */
+
+
+/** \addtogroup NvMarkerAPI NvMarker API module (MarkerAPI for Nvidia GPUs)
+* @{
+*/
+/*!
+\def LIKWID_NVMARKER_INIT
+Shortcut for likwid_gpuMarkerInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_NVMARKER_THREADINIT
+Shortcut for likwid_gpuMarkerThreadInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_NVMARKER_REGISTER(regionTag)
+Shortcut for likwid_gpuMarkerRegisterRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_NVMARKER_START(regionTag)
+Shortcut for likwid_gpuMarkerStartRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_NVMARKER_STOP(regionTag)
+Shortcut for likwid_gpuMarkerStopRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_NVMARKER_GET(regionTag, ngpus, nevents, events, time, count)
+Shortcut for likwid_gpuMarkerGetRegion() for \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_NVMARKER_SWITCH
+Shortcut for likwid_gpuMarkerNextGroup() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_NVMARKER_RESET(regionTag)
+Shortcut for likwid_gpuMarkerResetRegion() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_NVMARKER_CLOSE
+Shortcut for likwid_gpuMarkerClose() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
+*/
+/** @}*/
+
+#ifdef LIKWID_NVMON
+#ifndef LIKWID_WITH_NVMON
+#define LIKWID_WITH_NVMON
+#endif
+#include
+#define LIKWID_NVMARKER_INIT likwid_gpuMarkerInit()
+#define LIKWID_NVMARKER_THREADINIT likwid_gpuMarkerThreadInit()
+#define LIKWID_NVMARKER_SWITCH likwid_gpuMarkerNextGroup()
+#define LIKWID_NVMARKER_REGISTER(regionTag) likwid_gpuMarkerRegisterRegion(regionTag)
+#define LIKWID_NVMARKER_START(regionTag) likwid_gpuMarkerStartRegion(regionTag)
+#define LIKWID_NVMARKER_STOP(regionTag) likwid_gpuMarkerStopRegion(regionTag)
+#define LIKWID_NVMARKER_CLOSE likwid_gpuMarkerClose()
+#define LIKWID_NVMARKER_RESET(regionTag) likwid_gpuMarkerResetRegion(regionTag)
+#define LIKWID_NVMARKER_GET(regionTag, ngpus, nevents, events, time, count) \
+ likwid_gpuMarkerGetRegion(regionTag, ngpus, nevents, events, time, count)
+#else /* LIKWID_NVMON */
+#define LIKWID_NVMARKER_INIT
+#define LIKWID_NVMARKER_THREADINIT
+#define LIKWID_NVMARKER_SWITCH
+#define LIKWID_NVMARKER_REGISTER(regionTag)
+#define LIKWID_NVMARKER_START(regionTag)
+#define LIKWID_NVMARKER_STOP(regionTag)
+#define LIKWID_NVMARKER_CLOSE
+#define LIKWID_NVMARKER_GET(regionTag, nevents, events, time, count)
+#define LIKWID_NVMARKER_RESET(regionTag)
+#endif /* LIKWID_NVMON */
+
+
+
+#endif /* LIKWID_MARKER_H */
diff --git a/gromacs/includes/neighbor.h b/gromacs/includes/neighbor.h
new file mode 100644
index 0000000..949d515
--- /dev/null
+++ b/gromacs/includes/neighbor.h
@@ -0,0 +1,41 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+#include
+
+#ifndef __NEIGHBOR_H_
+#define __NEIGHBOR_H_
+typedef struct {
+ int every;
+ int ncalls;
+ int* neighbors;
+ int maxneighs;
+ int* numneigh;
+} Neighbor;
+
+extern void initNeighbor(Neighbor*, Parameter*);
+extern void setupNeighbor(Parameter*);
+extern void binatoms(Atom*);
+extern void buildNeighbor(Atom*, Neighbor*);
+extern void sortAtom(Atom*);
+#endif
diff --git a/gromacs/includes/parameter.h b/gromacs/includes/parameter.h
new file mode 100644
index 0000000..bff46dd
--- /dev/null
+++ b/gromacs/includes/parameter.h
@@ -0,0 +1,56 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#ifndef __PARAMETER_H_
+#define __PARAMETER_H_
+
+#if PRECISION == 1
+#define MD_FLOAT float
+#else
+#define MD_FLOAT double
+#endif
+
+typedef struct {
+ int force_field;
+ char* input_file;
+ char* vtk_file;
+ MD_FLOAT epsilon;
+ MD_FLOAT sigma6;
+ MD_FLOAT temp;
+ MD_FLOAT rho;
+ MD_FLOAT mass;
+ int ntypes;
+ int ntimes;
+ int nstat;
+ int every;
+ MD_FLOAT dt;
+ MD_FLOAT dtforce;
+ MD_FLOAT cutforce;
+ MD_FLOAT cutneigh;
+ int nx, ny, nz;
+ MD_FLOAT lattice;
+ MD_FLOAT xlo, xhi, ylo, yhi, zlo, zhi;
+ MD_FLOAT xprd, yprd, zprd;
+ double proc_freq;
+ char* eam_file;
+} Parameter;
+#endif
diff --git a/gromacs/includes/pbc.h b/gromacs/includes/pbc.h
new file mode 100644
index 0000000..727ac80
--- /dev/null
+++ b/gromacs/includes/pbc.h
@@ -0,0 +1,32 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+#include
+
+#ifndef __PBC_H_
+#define __PBC_H_
+extern void initPbc();
+extern void updatePbc(Atom*, Parameter*);
+extern void updateAtomsPbc(Atom*, Parameter*);
+extern void setupPbc(Atom*, Parameter*);
+#endif
diff --git a/gromacs/includes/stats.h b/gromacs/includes/stats.h
new file mode 100644
index 0000000..231119b
--- /dev/null
+++ b/gromacs/includes/stats.h
@@ -0,0 +1,46 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+#include
+
+#ifndef __STATS_H_
+#define __STATS_H_
+typedef struct {
+ long long int total_force_neighs;
+ long long int total_force_iters;
+} Stats;
+
+void initStats(Stats *s);
+void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer);
+
+#ifdef COMPUTE_STATS
+# define addStat(stat, value) stat += value;
+# define beginStatTimer() double Si = getTimeStamp();
+# define endStatTimer(stat) stat += getTimeStamp() - Si;
+#else
+# define addStat(stat, value)
+# define beginStatTimer()
+# define endStatTimer(stat)
+#endif
+
+#endif
diff --git a/gromacs/includes/thermo.h b/gromacs/includes/thermo.h
new file mode 100644
index 0000000..3006bc4
--- /dev/null
+++ b/gromacs/includes/thermo.h
@@ -0,0 +1,31 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+#include
+
+#ifndef __THERMO_H_
+#define __THERMO_H_
+extern void setupThermo(Parameter*, int);
+extern void computeThermo(int, Parameter*, Atom*);
+extern void adjustThermo(Parameter*, Atom*);
+#endif
diff --git a/gromacs/includes/timers.h b/gromacs/includes/timers.h
new file mode 100644
index 0000000..2400c01
--- /dev/null
+++ b/gromacs/includes/timers.h
@@ -0,0 +1,11 @@
+#ifndef __TIMERS_H_
+#define __TIMERS_H_
+
+typedef enum {
+ TOTAL = 0,
+ NEIGH,
+ FORCE,
+ NUMTIMER
+} timertype;
+
+#endif
diff --git a/gromacs/includes/timing.h b/gromacs/includes/timing.h
new file mode 100644
index 0000000..4462b91
--- /dev/null
+++ b/gromacs/includes/timing.h
@@ -0,0 +1,30 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#ifndef __TIMING_H_
+#define __TIMING_H_
+
+extern double getTimeStamp();
+extern double getTimeResolution();
+extern double getTimeStamp_();
+
+#endif
diff --git a/gromacs/includes/tracing.h b/gromacs/includes/tracing.h
new file mode 100644
index 0000000..eab11ab
--- /dev/null
+++ b/gromacs/includes/tracing.h
@@ -0,0 +1,118 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+#include
+#include
+
+#if defined(MEM_TRACER) || defined(INDEX_TRACER)
+#include
+#include
+#endif
+
+#ifndef VECTOR_WIDTH
+# define VECTOR_WIDTH 8
+#endif
+
+#ifndef TRACER_CONDITION
+# define TRACER_CONDITION (!(timestep % param->every))
+#endif
+
+#ifdef MEM_TRACER
+# define MEM_TRACER_INIT FILE *mem_tracer_fp; \
+ if(TRACER_CONDITION) { \
+ char mem_tracer_fn[128]; \
+ snprintf(mem_tracer_fn, sizeof mem_tracer_fn, "mem_tracer_%d.out", timestep); \
+ mem_tracer_fp = fopen(mem_tracer_fn, "w");
+ }
+
+# define MEM_TRACER_END if(TRACER_CONDITION) { fclose(mem_tracer_fp); }
+# define MEM_TRACE(addr, op) if(TRACER_CONDITION) { fprintf(mem_tracer_fp, "%c: %p\n", op, (void *)(&(addr))); }
+#else
+# define MEM_TRACER_INIT
+# define MEM_TRACER_END
+# define MEM_TRACE(addr, op)
+#endif
+
+#ifdef INDEX_TRACER
+# define INDEX_TRACER_INIT FILE *index_tracer_fp; \
+ if(TRACER_CONDITION) { \
+ char index_tracer_fn[128]; \
+ snprintf(index_tracer_fn, sizeof index_tracer_fn, "index_tracer_%d.out", timestep); \
+ index_tracer_fp = fopen(index_tracer_fn, "w"); \
+ }
+
+# define INDEX_TRACER_END if(TRACER_CONDITION) { fclose(index_tracer_fp); }
+# define INDEX_TRACE_NATOMS(nl, ng, mn) if(TRACER_CONDITION) { fprintf(index_tracer_fp, "N: %d %d %d\n", nl, ng, mn); }
+# define INDEX_TRACE_ATOM(a) if(TRACER_CONDITION) { fprintf(index_tracer_fp, "A: %d\n", a); }
+# define INDEX_TRACE(l, e) if(TRACER_CONDITION) { \
+ for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
+ int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
+ fprintf(index_tracer_fp, "I: "); \
+ for(int __j = 0; __j < __e; ++__j) { \
+ fprintf(index_tracer_fp, "%d ", l[__i + __j]); \
+ } \
+ fprintf(index_tracer_fp, "\n"); \
+ } \
+ }
+
+# define DIST_TRACE_SORT(l, e) if(TRACER_CONDITION) { \
+ for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
+ int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
+ if(__e > 1) { \
+ for(int __j = __i; __j < __i + __e - 1; ++__j) { \
+ for(int __k = __i; __k < __i + __e - (__j - __i) - 1; ++__k) { \
+ if(l[__k] > l[__k + 1]) { \
+ int __t = l[__k]; \
+ l[__k] = l[__k + 1]; \
+ l[__k + 1] = __t; \
+ } \
+ } \
+ } \
+ } \
+ } \
+ }
+
+# define DIST_TRACE(l, e) if(TRACER_CONDITION) { \
+ for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
+ int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
+ if(__e > 1) { \
+ fprintf(index_tracer_fp, "D: "); \
+ for(int __j = 0; __j < __e - 1; ++__j) { \
+ int __dist = abs(l[__i + __j + 1] - l[__i + __j]); \
+ fprintf(index_tracer_fp, "%d ", __dist); \
+ } \
+ fprintf(index_tracer_fp, "\n"); \
+ } \
+ } \
+ }
+#else
+# define INDEX_TRACER_INIT
+# define INDEX_TRACER_END
+# define INDEX_TRACE_NATOMS(nl, ng, mn)
+# define INDEX_TRACE_ATOM(a)
+# define INDEX_TRACE(l, e)
+# define DIST_TRACE_SORT(l, e)
+# define DIST_TRACE(l, e)
+#endif
+
+extern void traceAddresses(Parameter *param, Atom *atom, Neighbor *neighbor, int timestep);
diff --git a/gromacs/includes/util.h b/gromacs/includes/util.h
new file mode 100644
index 0000000..20d8806
--- /dev/null
+++ b/gromacs/includes/util.h
@@ -0,0 +1,43 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#ifndef __UTIL_H_
+#define __UTIL_H_
+
+#ifndef MIN
+#define MIN(x,y) ((x)<(y)?(x):(y))
+#endif
+#ifndef MAX
+#define MAX(x,y) ((x)>(y)?(x):(y))
+#endif
+#ifndef ABS
+#define ABS(a) ((a) >= 0 ? (a) : -(a))
+#endif
+
+#define FF_LJ 0
+#define FF_EAM 1
+
+extern double myrandom(int*);
+extern void random_reset(int *seed, int ibase, double *coord);
+extern int str2ff(const char *string);
+extern const char* ff2str(int ff);
+#endif
diff --git a/gromacs/includes/vtk.h b/gromacs/includes/vtk.h
new file mode 100644
index 0000000..0f03a74
--- /dev/null
+++ b/gromacs/includes/vtk.h
@@ -0,0 +1,28 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+
+#ifndef __VTK_H_
+#define __VTK_H_
+extern int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
+#endif
diff --git a/gromacs/main-stub.c b/gromacs/main-stub.c
new file mode 100644
index 0000000..4da90db
--- /dev/null
+++ b/gromacs/main-stub.c
@@ -0,0 +1,292 @@
+#include
+#include
+//---
+#include
+//---
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#define HLINE "----------------------------------------------------------------------------\n"
+
+#define LATTICE_DISTANCE 10.0
+#define NEIGH_DISTANCE 1.0
+
+extern double computeForceLJ(Parameter*, Atom*, Neighbor*, Stats*);
+extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
+
+void init(Parameter *param) {
+ param->input_file = NULL;
+ param->epsilon = 1.0;
+ param->sigma6 = 1.0;
+ param->rho = 0.8442;
+ param->ntypes = 4;
+ param->ntimes = 200;
+ param->nx = 4;
+ param->ny = 4;
+ param->nz = 2;
+ param->lattice = LATTICE_DISTANCE;
+ param->cutforce = 5.0;
+ param->cutneigh = param->cutforce;
+ param->mass = 1.0;
+ // Unused
+ param->dt = 0.005;
+ param->dtforce = 0.5 * param->dt;
+ param->nstat = 100;
+ param->temp = 1.44;
+ param->every = 20;
+ param->proc_freq = 2.4;
+ param->eam_file = NULL;
+}
+
+// Show debug messages
+#define DEBUG(msg) printf(msg)
+// Do not show debug messages
+//#define DEBUG(msg)
+
+#define ADD_ATOM(x, y, z, vx, vy, vz) atom_x(atom->Nlocal) = base_x + x * NEIGH_DISTANCE; \
+ atom_y(atom->Nlocal) = base_y + y * NEIGH_DISTANCE; \
+ atom_z(atom->Nlocal) = base_z + z * NEIGH_DISTANCE; \
+ atom->vx[atom->Nlocal] = vy; \
+ atom->vy[atom->Nlocal] = vy; \
+ atom->vz[atom->Nlocal] = vz; \
+ atom->Nlocal++
+
+int main(int argc, const char *argv[]) {
+ Eam eam;
+ Atom atom_data;
+ Atom *atom = (Atom *)(&atom_data);
+ Neighbor neighbor;
+ Stats stats;
+ Parameter param;
+ int atoms_per_unit_cell = 8;
+ int csv = 0;
+
+ LIKWID_MARKER_INIT;
+ LIKWID_MARKER_REGISTER("force");
+ DEBUG("Initializing parameters...\n");
+ init(¶m);
+
+ for(int i = 0; i < argc; i++)
+ {
+ if((strcmp(argv[i], "-f") == 0))
+ {
+ if((param.force_field = str2ff(argv[++i])) < 0) {
+ fprintf(stderr, "Invalid force field!\n");
+ exit(-1);
+ }
+ continue;
+ }
+ if((strcmp(argv[i], "-e") == 0))
+ {
+ param.eam_file = strdup(argv[++i]);
+ continue;
+ }
+ if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0))
+ {
+ param.ntimes = atoi(argv[++i]);
+ continue;
+ }
+ if((strcmp(argv[i], "-nx") == 0))
+ {
+ param.nx = atoi(argv[++i]);
+ continue;
+ }
+ if((strcmp(argv[i], "-ny") == 0))
+ {
+ param.ny = atoi(argv[++i]);
+ continue;
+ }
+ if((strcmp(argv[i], "-nz") == 0))
+ {
+ param.nz = atoi(argv[++i]);
+ continue;
+ }
+ if((strcmp(argv[i], "-na") == 0))
+ {
+ atoms_per_unit_cell = atoi(argv[++i]);
+ continue;
+ }
+ if((strcmp(argv[i], "--freq") == 0))
+ {
+ param.proc_freq = atof(argv[++i]);
+ continue;
+ }
+ if((strcmp(argv[i], "--csv") == 0))
+ {
+ csv = 1;
+ continue;
+ }
+ if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0))
+ {
+ printf("MD Bench: A minimalistic re-implementation of miniMD\n");
+ printf(HLINE);
+ printf("-f : force field (lj or eam), default lj\n");
+ printf("-n / --nsteps : set number of timesteps for simulation\n");
+ printf("-nx/-ny/-nz : set linear dimension of systembox in x/y/z direction\n");
+ printf("-na : set number of atoms per unit cell\n");
+ printf("--freq : set CPU frequency (GHz) and display average cycles per atom and neighbors\n");
+ printf("--csv: set output as CSV style\n");
+ printf(HLINE);
+ exit(EXIT_SUCCESS);
+ }
+ }
+
+
+ if(param.force_field == FF_EAM) {
+ DEBUG("Initializing EAM parameters...\n");
+ initEam(&eam, ¶m);
+ }
+
+ param.xprd = param.nx * LATTICE_DISTANCE;
+ param.yprd = param.ny * LATTICE_DISTANCE;
+ param.zprd = param.nz * LATTICE_DISTANCE;
+
+ DEBUG("Initializing atoms...\n");
+ initAtom(atom);
+ initStats(&stats);
+
+ atom->ntypes = param.ntypes;
+ atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
+ atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
+ atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
+ atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
+ for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
+ atom->epsilon[i] = param.epsilon;
+ atom->sigma6[i] = param.sigma6;
+ atom->cutneighsq[i] = param.cutneigh * param.cutneigh;
+ atom->cutforcesq[i] = param.cutforce * param.cutforce;
+ }
+
+ DEBUG("Creating atoms...\n");
+ for(int i = 0; i < param.nx; ++i) {
+ for(int j = 0; j < param.ny; ++j) {
+ for(int k = 0; k < param.nz; ++k) {
+ int added_atoms = 0;
+ int fac_x = 1;
+ int fac_y = 1;
+ int fac_z = 1;
+ int fmod = 0;
+ MD_FLOAT base_x = i * LATTICE_DISTANCE;
+ MD_FLOAT base_y = j * LATTICE_DISTANCE;
+ MD_FLOAT base_z = k * LATTICE_DISTANCE;
+ MD_FLOAT vx = 0.0;
+ MD_FLOAT vy = 0.0;
+ MD_FLOAT vz = 0.0;
+
+ while(atom->Nlocal > atom->Nmax - atoms_per_unit_cell) {
+ growAtom(atom);
+ }
+
+ while(fac_x * fac_y * fac_z < atoms_per_unit_cell) {
+ if(fmod == 0) { fac_x *= 2; }
+ if(fmod == 1) { fac_y *= 2; }
+ if(fmod == 2) { fac_z *= 2; }
+ fmod = (fmod + 1) % 3;
+ }
+
+ MD_FLOAT offset_x = (fac_x > 1) ? 1.0 / (fac_x - 1) : (int)fac_x;
+ MD_FLOAT offset_y = (fac_y > 1) ? 1.0 / (fac_y - 1) : (int)fac_y;
+ MD_FLOAT offset_z = (fac_z > 1) ? 1.0 / (fac_z - 1) : (int)fac_z;
+ for(int ii = 0; ii < fac_x; ++ii) {
+ for(int jj = 0; jj < fac_y; ++jj) {
+ for(int kk = 0; kk < fac_z; ++kk) {
+ if(added_atoms < atoms_per_unit_cell) {
+ atom->type[atom->Nlocal] = rand() % atom->ntypes;
+ ADD_ATOM(ii * offset_x, jj * offset_y, kk * offset_z, vx, vy, vz);
+ added_atoms++;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ const double estim_atom_volume = (double)(atom->Nlocal * 3 * sizeof(MD_FLOAT));
+ const double estim_neighbors_volume = (double)(atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int));
+ const double estim_volume = (double)(atom->Nlocal * 6 * sizeof(MD_FLOAT) + estim_neighbors_volume);
+
+ if(!csv) {
+ printf("Number of timesteps: %d\n", param.ntimes);
+ printf("Number of times to compute the atoms loop: %d\n", ATOMS_LOOP_RUNS);
+ printf("Number of times to compute the neighbors loop: %d\n", NEIGHBORS_LOOP_RUNS);
+ printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz);
+ printf("Atoms per unit cell: %d\n", atoms_per_unit_cell);
+ printf("Total number of atoms: %d\n", atom->Nlocal);
+ printf("Estimated total data volume (kB): %.4f\n", estim_volume / 1000.0);
+ printf("Estimated atom data volume (kB): %.4f\n", estim_atom_volume / 1000.0);
+ printf("Estimated neighborlist data volume (kB): %.4f\n", estim_neighbors_volume / 1000.0);
+ }
+
+ DEBUG("Initializing neighbor lists...\n");
+ initNeighbor(&neighbor, ¶m);
+ DEBUG("Setting up neighbor lists...\n");
+ setupNeighbor(¶m);
+ DEBUG("Building neighbor lists...\n");
+ buildNeighbor(atom, &neighbor);
+ DEBUG("Computing forces...\n");
+ if(param.force_field == FF_EAM) {
+ computeForceEam(&eam, ¶m, atom, &neighbor, &stats);
+ } else {
+ computeForceLJ(¶m, atom, &neighbor, &stats);
+ }
+
+ double S, E;
+ S = getTimeStamp();
+ for(int i = 0; i < param.ntimes; i++) {
+
+#if defined(MEM_TRACER) || defined(INDEX_TRACER)
+ traceAddresses(¶m, atom, &neighbor, i + 1);
+#endif
+
+ if(param.force_field == FF_EAM) {
+ computeForceEam(&eam, ¶m, atom, &neighbor, &stats);
+ } else {
+ computeForceLJ(¶m, atom, &neighbor, &stats);
+ }
+ }
+ E = getTimeStamp();
+ double T_accum = E-S;
+ double freq_hz = param.proc_freq * 1.e9;
+ const double repeats = ATOMS_LOOP_RUNS * NEIGHBORS_LOOP_RUNS;
+ const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes * repeats);
+ const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes * repeats) * freq_hz;
+ const double cycles_per_neigh = cycles_per_atom / (double)(atoms_per_unit_cell - 1);
+
+ if(!csv) {
+ printf("Total time: %.4f, Mega atom updates/s: %.4f\n", T_accum, atoms_updates_per_sec / 1.e6);
+ if(param.proc_freq > 0.0) {
+ printf("Cycles per atom: %.4f, Cycles per neighbor: %.4f\n", cycles_per_atom, cycles_per_neigh);
+ }
+ } else {
+ printf("steps,unit cells,atoms/unit cell,total atoms,total vol.(kB),atoms vol.(kB),neigh vol.(kB),time(s),atom upds/s(M)");
+ if(param.proc_freq > 0.0) {
+ printf(",cy/atom,cy/neigh");
+ }
+ printf("\n");
+
+ printf("%d,%dx%dx%d,%d,%d,%.4f,%.4f,%.4f,%.4f,%.4f",
+ param.ntimes, param.nx, param.ny, param.nz, atoms_per_unit_cell, atom->Nlocal,
+ estim_volume / 1.e3, estim_atom_volume / 1.e3, estim_neighbors_volume / 1.e3, T_accum, atoms_updates_per_sec / 1.e6);
+
+ if(param.proc_freq > 0.0) {
+ printf(",%.4f,%.4f", cycles_per_atom, cycles_per_neigh);
+ }
+ printf("\n");
+ }
+
+ double timer[NUMTIMER];
+ timer[FORCE] = T_accum;
+ displayStatistics(atom, ¶m, &stats, timer);
+ LIKWID_MARKER_CLOSE;
+ return EXIT_SUCCESS;
+}
diff --git a/gromacs/main.c b/gromacs/main.c
new file mode 100644
index 0000000..ebb2dbd
--- /dev/null
+++ b/gromacs/main.c
@@ -0,0 +1,324 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#define HLINE "----------------------------------------------------------------------------\n"
+
+extern double computeForceLJ(Parameter*, Atom*, Neighbor*, Stats*);
+extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
+
+void init(Parameter *param)
+{
+ param->input_file = NULL;
+ param->vtk_file = NULL;
+ param->force_field = FF_LJ;
+ param->epsilon = 1.0;
+ param->sigma6 = 1.0;
+ param->rho = 0.8442;
+ param->ntypes = 4;
+ param->ntimes = 200;
+ param->dt = 0.005;
+ param->nx = 32;
+ param->ny = 32;
+ param->nz = 32;
+ param->cutforce = 2.5;
+ param->cutneigh = param->cutforce + 0.30;
+ param->temp = 1.44;
+ param->nstat = 100;
+ param->mass = 1.0;
+ param->dtforce = 0.5 * param->dt;
+ param->every = 20;
+ param->proc_freq = 2.4;
+}
+
+double setup(
+ Parameter *param,
+ Eam *eam,
+ Atom *atom,
+ Neighbor *neighbor,
+ Stats *stats)
+{
+ if(param->force_field == FF_EAM) { initEam(eam, param); }
+ double S, E;
+ param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
+ param->xprd = param->nx * param->lattice;
+ param->yprd = param->ny * param->lattice;
+ param->zprd = param->nz * param->lattice;
+
+ S = getTimeStamp();
+ initAtom(atom);
+ initPbc(atom);
+ initStats(stats);
+ initNeighbor(neighbor, param);
+ if(param->input_file == NULL) {
+ createAtom(atom, param);
+ } else {
+ readAtom(atom, param);
+ }
+ setupNeighbor(param);
+ setupThermo(param, atom->Natoms);
+ if(param->input_file == NULL) { adjustThermo(param, atom); }
+ setupPbc(atom, param);
+ updatePbc(atom, param);
+ buildNeighbor(atom, neighbor);
+ E = getTimeStamp();
+
+ return E-S;
+}
+
+double reneighbour(
+ Parameter *param,
+ Atom *atom,
+ Neighbor *neighbor)
+{
+ double S, E;
+
+ S = getTimeStamp();
+ LIKWID_MARKER_START("reneighbour");
+ updateAtomsPbc(atom, param);
+ setupPbc(atom, param);
+ updatePbc(atom, param);
+ //sortAtom(atom);
+ buildNeighbor(atom, neighbor);
+ LIKWID_MARKER_STOP("reneighbour");
+ E = getTimeStamp();
+
+ return E-S;
+}
+
+void initialIntegrate(Parameter *param, Atom *atom)
+{
+ MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz;
+ MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz;
+
+ for(int i = 0; i < atom->Nlocal; i++) {
+ vx[i] += param->dtforce * fx[i];
+ vy[i] += param->dtforce * fy[i];
+ vz[i] += param->dtforce * fz[i];
+ atom_x(i) = atom_x(i) + param->dt * vx[i];
+ atom_y(i) = atom_y(i) + param->dt * vy[i];
+ atom_z(i) = atom_z(i) + param->dt * vz[i];
+ }
+}
+
+void finalIntegrate(Parameter *param, Atom *atom)
+{
+ MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz;
+ MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz;
+
+ for(int i = 0; i < atom->Nlocal; i++) {
+ vx[i] += param->dtforce * fx[i];
+ vy[i] += param->dtforce * fy[i];
+ vz[i] += param->dtforce * fz[i];
+ }
+}
+
+void printAtomState(Atom *atom)
+{
+ printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n",
+ atom->Natoms, atom->Nlocal, atom->Nghost, atom->Nmax);
+
+ /* int nall = atom->Nlocal + atom->Nghost; */
+
+ /* for (int i=0; ix[i], atom->y[i], atom->z[i]); */
+ /* } */
+}
+
+int main(int argc, char** argv)
+{
+ double timer[NUMTIMER];
+ Eam eam;
+ Atom atom;
+ Neighbor neighbor;
+ Stats stats;
+ Parameter param;
+
+ LIKWID_MARKER_INIT;
+#pragma omp parallel
+ {
+ LIKWID_MARKER_REGISTER("force");
+ //LIKWID_MARKER_REGISTER("reneighbour");
+ //LIKWID_MARKER_REGISTER("pbc");
+ }
+ init(¶m);
+
+ for(int i = 0; i < argc; i++)
+ {
+ if((strcmp(argv[i], "-f") == 0))
+ {
+ if((param.force_field = str2ff(argv[++i])) < 0) {
+ fprintf(stderr, "Invalid force field!\n");
+ exit(-1);
+ }
+ continue;
+ }
+ if((strcmp(argv[i], "-i") == 0))
+ {
+ param.input_file = strdup(argv[++i]);
+ continue;
+ }
+ if((strcmp(argv[i], "-e") == 0))
+ {
+ param.eam_file = strdup(argv[++i]);
+ continue;
+ }
+ if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0))
+ {
+ param.ntimes = atoi(argv[++i]);
+ continue;
+ }
+ if((strcmp(argv[i], "-nx") == 0))
+ {
+ param.nx = atoi(argv[++i]);
+ continue;
+ }
+ if((strcmp(argv[i], "-ny") == 0))
+ {
+ param.ny = atoi(argv[++i]);
+ continue;
+ }
+ if((strcmp(argv[i], "-nz") == 0))
+ {
+ param.nz = atoi(argv[++i]);
+ continue;
+ }
+ if((strcmp(argv[i], "--freq") == 0))
+ {
+ param.proc_freq = atof(argv[++i]);
+ continue;
+ }
+ if((strcmp(argv[i], "--vtk") == 0))
+ {
+ param.vtk_file = strdup(argv[++i]);
+ continue;
+ }
+ if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0))
+ {
+ printf("MD Bench: A minimalistic re-implementation of miniMD\n");
+ printf(HLINE);
+ printf("-f : force field (lj or eam), default lj\n");
+ printf("-i : input file with atom positions (dump)\n");
+ printf("-e : input file for EAM\n");
+ printf("-n / --nsteps : set number of timesteps for simulation\n");
+ printf("-nx/-ny/-nz : set linear dimension of systembox in x/y/z direction\n");
+ printf("--freq : processor frequency (GHz)\n");
+ printf("--vtk : VTK file for visualization\n");
+ printf(HLINE);
+ exit(EXIT_SUCCESS);
+ }
+ }
+
+ setup(¶m, &eam, &atom, &neighbor, &stats);
+ computeThermo(0, ¶m, &atom);
+#if defined(MEM_TRACER) || defined(INDEX_TRACER)
+ traceAddresses(¶m, &atom, &neighbor, n + 1);
+#endif
+ if(param.force_field == FF_EAM) {
+ timer[FORCE] = computeForceEam(&eam, ¶m, &atom, &neighbor, &stats);
+ } else {
+ timer[FORCE] = computeForceLJ(¶m, &atom, &neighbor, &stats);
+ }
+
+ timer[NEIGH] = 0.0;
+ timer[TOTAL] = getTimeStamp();
+
+ if(param.vtk_file != NULL) {
+ write_atoms_to_vtk_file(param.vtk_file, &atom, 0);
+ }
+
+ for(int n = 0; n < param.ntimes; n++) {
+ initialIntegrate(¶m, &atom);
+
+ if((n + 1) % param.every) {
+ updatePbc(&atom, ¶m);
+ } else {
+ timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
+ }
+
+#if defined(MEM_TRACER) || defined(INDEX_TRACER)
+ traceAddresses(¶m, &atom, &neighbor, n + 1);
+#endif
+
+ if(param.force_field == FF_EAM) {
+ timer[FORCE] += computeForceEam(&eam, ¶m, &atom, &neighbor, &stats);
+ } else {
+ timer[FORCE] += computeForceLJ(¶m, &atom, &neighbor, &stats);
+ }
+
+ finalIntegrate(¶m, &atom);
+
+ if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
+ computeThermo(n + 1, ¶m, &atom);
+ }
+
+ if(param.vtk_file != NULL) {
+ write_atoms_to_vtk_file(param.vtk_file, &atom, n + 1);
+ }
+ }
+
+ timer[TOTAL] = getTimeStamp() - timer[TOTAL];
+ computeThermo(-1, ¶m, &atom);
+
+ printf(HLINE);
+ printf("Force field: %s\n", ff2str(param.force_field));
+ printf("Data layout for positions: %s\n", POS_DATA_LAYOUT);
+#if PRECISION == 1
+ printf("Using single precision floating point.\n");
+#else
+ printf("Using double precision floating point.\n");
+#endif
+ printf(HLINE);
+ printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
+ printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
+ timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
+ printf(HLINE);
+ printf("Performance: %.2f million atom updates per second\n",
+ 1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
+#ifdef COMPUTE_STATS
+ displayStatistics(&atom, ¶m, &stats, timer);
+#endif
+ LIKWID_MARKER_CLOSE;
+ return EXIT_SUCCESS;
+}
diff --git a/gromacs/neighbor.c b/gromacs/neighbor.c
new file mode 100644
index 0000000..6b512a0
--- /dev/null
+++ b/gromacs/neighbor.c
@@ -0,0 +1,420 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+#define SMALL 1.0e-6
+#define FACTOR 0.999
+
+static MD_FLOAT xprd, yprd, zprd;
+static MD_FLOAT bininvx, bininvy, bininvz;
+static int mbinxlo, mbinylo, mbinzlo;
+static int nbinx, nbiny, nbinz;
+static int mbinx, mbiny, mbinz; // n bins in x, y, z
+static int *bincount;
+static int *bins;
+static int mbins; //total number of bins
+static int atoms_per_bin; // max atoms per bin
+static MD_FLOAT cutneigh;
+static MD_FLOAT cutneighsq; // neighbor cutoff squared
+static int nmax;
+static int nstencil; // # of bins in stencil
+static int* stencil; // stencil list of bin offsets
+static MD_FLOAT binsizex, binsizey, binsizez;
+
+static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT);
+static MD_FLOAT bindist(int, int, int);
+
+/* exported subroutines */
+void initNeighbor(Neighbor *neighbor, Parameter *param)
+{
+ MD_FLOAT neighscale = 5.0 / 6.0;
+ xprd = param->nx * param->lattice;
+ yprd = param->ny * param->lattice;
+ zprd = param->nz * param->lattice;
+ cutneigh = param->cutneigh;
+ nbinx = neighscale * param->nx;
+ nbiny = neighscale * param->ny;
+ nbinz = neighscale * param->nz;
+ nmax = 0;
+ atoms_per_bin = 8;
+ stencil = NULL;
+ bins = NULL;
+ bincount = NULL;
+ neighbor->maxneighs = 100;
+ neighbor->numneigh = NULL;
+ neighbor->neighbors = NULL;
+}
+
+void setupNeighbor(Parameter* param)
+{
+ MD_FLOAT coord;
+ int mbinxhi, mbinyhi, mbinzhi;
+ int nextx, nexty, nextz;
+
+ if(param->input_file != NULL) {
+ xprd = param->xprd;
+ yprd = param->yprd;
+ zprd = param->zprd;
+ }
+
+ // TODO: update lo and hi for standard case and use them here instead
+ MD_FLOAT xlo = 0.0; MD_FLOAT xhi = xprd;
+ MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
+ MD_FLOAT zlo = 0.0; MD_FLOAT zhi = zprd;
+
+ cutneighsq = cutneigh * cutneigh;
+
+ if(param->input_file != NULL) {
+ binsizex = cutneigh * 0.5;
+ binsizey = cutneigh * 0.5;
+ binsizez = cutneigh * 0.5;
+ nbinx = (int)((param->xhi - param->xlo) / binsizex);
+ nbiny = (int)((param->yhi - param->ylo) / binsizey);
+ nbinz = (int)((param->zhi - param->zlo) / binsizez);
+ if(nbinx == 0) { nbinx = 1; }
+ if(nbiny == 0) { nbiny = 1; }
+ if(nbinz == 0) { nbinz = 1; }
+ bininvx = nbinx / (param->xhi - param->xlo);
+ bininvy = nbiny / (param->yhi - param->ylo);
+ bininvz = nbinz / (param->zhi - param->zlo);
+ } else {
+ binsizex = xprd / nbinx;
+ binsizey = yprd / nbiny;
+ binsizez = zprd / nbinz;
+ bininvx = 1.0 / binsizex;
+ bininvy = 1.0 / binsizey;
+ bininvz = 1.0 / binsizez;
+ }
+
+ coord = xlo - cutneigh - SMALL * xprd;
+ mbinxlo = (int) (coord * bininvx);
+ if (coord < 0.0) {
+ mbinxlo = mbinxlo - 1;
+ }
+ coord = xhi + cutneigh + SMALL * xprd;
+ mbinxhi = (int) (coord * bininvx);
+
+ coord = ylo - cutneigh - SMALL * yprd;
+ mbinylo = (int) (coord * bininvy);
+ if (coord < 0.0) {
+ mbinylo = mbinylo - 1;
+ }
+ coord = yhi + cutneigh + SMALL * yprd;
+ mbinyhi = (int) (coord * bininvy);
+
+ coord = zlo - cutneigh - SMALL * zprd;
+ mbinzlo = (int) (coord * bininvz);
+ if (coord < 0.0) {
+ mbinzlo = mbinzlo - 1;
+ }
+ coord = zhi + cutneigh + SMALL * zprd;
+ mbinzhi = (int) (coord * bininvz);
+
+ mbinxlo = mbinxlo - 1;
+ mbinxhi = mbinxhi + 1;
+ mbinx = mbinxhi - mbinxlo + 1;
+
+ mbinylo = mbinylo - 1;
+ mbinyhi = mbinyhi + 1;
+ mbiny = mbinyhi - mbinylo + 1;
+
+ mbinzlo = mbinzlo - 1;
+ mbinzhi = mbinzhi + 1;
+ mbinz = mbinzhi - mbinzlo + 1;
+
+ nextx = (int) (cutneigh * bininvx);
+ if(nextx * binsizex < FACTOR * cutneigh) nextx++;
+
+ nexty = (int) (cutneigh * bininvy);
+ if(nexty * binsizey < FACTOR * cutneigh) nexty++;
+
+ nextz = (int) (cutneigh * bininvz);
+ if(nextz * binsizez < FACTOR * cutneigh) nextz++;
+
+ if (stencil) {
+ free(stencil);
+ }
+
+ stencil = (int*) malloc(
+ (2 * nextz + 1) * (2 * nexty + 1) * (2 * nextx + 1) * sizeof(int));
+
+ nstencil = 0;
+ int kstart = -nextz;
+
+ for(int k = kstart; k <= nextz; k++) {
+ for(int j = -nexty; j <= nexty; j++) {
+ for(int i = -nextx; i <= nextx; i++) {
+ if(bindist(i, j, k) < cutneighsq) {
+ stencil[nstencil++] =
+ k * mbiny * mbinx + j * mbinx + i;
+ }
+ }
+ }
+ }
+
+ mbins = mbinx * mbiny * mbinz;
+
+ if (bincount) {
+ free(bincount);
+ }
+ bincount = (int*) malloc(mbins * sizeof(int));
+
+ if (bins) {
+ free(bins);
+ }
+ bins = (int*) malloc(mbins * atoms_per_bin * sizeof(int));
+}
+
+void buildNeighbor(Atom *atom, Neighbor *neighbor)
+{
+ int nall = atom->Nlocal + atom->Nghost;
+
+ /* extend atom arrays if necessary */
+ if(nall > nmax) {
+ nmax = nall;
+ if(neighbor->numneigh) free(neighbor->numneigh);
+ if(neighbor->neighbors) free(neighbor->neighbors);
+ neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
+ neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*));
+ }
+
+ /* bin local & ghost atoms */
+ binatoms(atom);
+ int resize = 1;
+
+ /* loop over each atom, storing neighbors */
+ while(resize) {
+ int new_maxneighs = neighbor->maxneighs;
+ resize = 0;
+
+ for(int i = 0; i < atom->Nlocal; i++) {
+ int* neighptr = &(neighbor->neighbors[i * neighbor->maxneighs]);
+ int n = 0;
+ MD_FLOAT xtmp = atom_x(i);
+ MD_FLOAT ytmp = atom_y(i);
+ MD_FLOAT ztmp = atom_z(i);
+ int ibin = coord2bin(xtmp, ytmp, ztmp);
+ #ifdef EXPLICIT_TYPES
+ int type_i = atom->type[i];
+ #endif
+ for(int k = 0; k < nstencil; k++) {
+ int jbin = ibin + stencil[k];
+ int* loc_bin = &bins[jbin * atoms_per_bin];
+
+ for(int m = 0; m < bincount[jbin]; m++) {
+ int j = loc_bin[m];
+
+ if ( j == i ){
+ continue;
+ }
+
+ MD_FLOAT delx = xtmp - atom_x(j);
+ MD_FLOAT dely = ytmp - atom_y(j);
+ MD_FLOAT delz = ztmp - atom_z(j);
+ MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
+
+ #ifdef EXPLICIT_TYPES
+ int type_j = atom->type[j];
+ const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
+ #else
+ const MD_FLOAT cutoff = cutneighsq;
+ #endif
+ if( rsq <= cutoff ) {
+ neighptr[n++] = j;
+ }
+ }
+ }
+
+ neighbor->numneigh[i] = n;
+
+ if(n >= neighbor->maxneighs) {
+ resize = 1;
+
+ if(n >= new_maxneighs) {
+ new_maxneighs = n;
+ }
+ }
+ }
+
+ if(resize) {
+ printf("RESIZE %d\n", neighbor->maxneighs);
+ neighbor->maxneighs = new_maxneighs * 1.2;
+ free(neighbor->neighbors);
+ neighbor->neighbors = (int*) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
+ }
+ }
+}
+
+/* internal subroutines */
+MD_FLOAT bindist(int i, int j, int k)
+{
+ MD_FLOAT delx, dely, delz;
+
+ if(i > 0) {
+ delx = (i - 1) * binsizex;
+ } else if(i == 0) {
+ delx = 0.0;
+ } else {
+ delx = (i + 1) * binsizex;
+ }
+
+ if(j > 0) {
+ dely = (j - 1) * binsizey;
+ } else if(j == 0) {
+ dely = 0.0;
+ } else {
+ dely = (j + 1) * binsizey;
+ }
+
+ if(k > 0) {
+ delz = (k - 1) * binsizez;
+ } else if(k == 0) {
+ delz = 0.0;
+ } else {
+ delz = (k + 1) * binsizez;
+ }
+
+ return (delx * delx + dely * dely + delz * delz);
+}
+
+int coord2bin(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin)
+{
+ int ix, iy, iz;
+
+ if(xin >= xprd) {
+ ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
+ } else if(xin >= 0.0) {
+ ix = (int)(xin * bininvx) - mbinxlo;
+ } else {
+ ix = (int)(xin * bininvx) - mbinxlo - 1;
+ }
+
+ if(yin >= yprd) {
+ iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
+ } else if(yin >= 0.0) {
+ iy = (int)(yin * bininvy) - mbinylo;
+ } else {
+ iy = (int)(yin * bininvy) - mbinylo - 1;
+ }
+
+ if(zin >= zprd) {
+ iz = (int)((zin - zprd) * bininvz) + nbinz - mbinzlo;
+ } else if(zin >= 0.0) {
+ iz = (int)(zin * bininvz) - mbinzlo;
+ } else {
+ iz = (int)(zin * bininvz) - mbinzlo - 1;
+ }
+
+ return (iz * mbiny * mbinx + iy * mbinx + ix + 1);
+}
+
+void binatoms(Atom *atom)
+{
+ int nall = atom->Nlocal + atom->Nghost;
+ int resize = 1;
+
+ while(resize > 0) {
+ resize = 0;
+
+ for(int i = 0; i < mbins; i++) {
+ bincount[i] = 0;
+ }
+
+ for(int i = 0; i < nall; i++) {
+ int ibin = coord2bin(atom_x(i), atom_y(i), atom_z(i));
+
+ if(bincount[ibin] < atoms_per_bin) {
+ int ac = bincount[ibin]++;
+ bins[ibin * atoms_per_bin + ac] = i;
+ } else {
+ resize = 1;
+ }
+ }
+
+ if(resize) {
+ free(bins);
+ atoms_per_bin *= 2;
+ bins = (int*) malloc(mbins * atoms_per_bin * sizeof(int));
+ }
+ }
+}
+
+void sortAtom(Atom* atom) {
+ binatoms(atom);
+ int Nmax = atom->Nmax;
+ int* binpos = bincount;
+
+ for(int i=1; ix; double* old_y = atom->y; double* old_z = atom->z;
+ double* old_vx = atom->vx; double* old_vy = atom->vy; double* old_vz = atom->vz;
+
+ for(int mybin = 0; mybin0?binpos[mybin-1]:0;
+ int count = binpos[mybin] - start;
+ for(int k=0; kx);
+ atom->x = new_x;
+#ifndef AOS
+ free(atom->y);
+ free(atom->z);
+ atom->y = new_y; atom->z = new_z;
+#endif
+ free(atom->vx); free(atom->vy); free(atom->vz);
+ atom->vx = new_vx; atom->vy = new_vy; atom->vz = new_vz;
+}
diff --git a/gromacs/pbc.c b/gromacs/pbc.c
new file mode 100644
index 0000000..3610701
--- /dev/null
+++ b/gromacs/pbc.c
@@ -0,0 +1,172 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+#include
+
+#include
+#include
+#include
+
+#define DELTA 20000
+
+static int NmaxGhost;
+static int *PBCx, *PBCy, *PBCz;
+
+static void growPbc(Atom*);
+
+/* exported subroutines */
+void initPbc(Atom* atom)
+{
+ NmaxGhost = 0;
+ atom->border_map = NULL;
+ PBCx = NULL; PBCy = NULL; PBCz = NULL;
+}
+
+/* update coordinates of ghost atoms */
+/* uses mapping created in setupPbc */
+void updatePbc(Atom *atom, Parameter *param)
+{
+ int *border_map = atom->border_map;
+ int nlocal = atom->Nlocal;
+ MD_FLOAT xprd = param->xprd;
+ MD_FLOAT yprd = param->yprd;
+ MD_FLOAT zprd = param->zprd;
+
+ for(int i = 0; i < atom->Nghost; i++) {
+ atom_x(nlocal + i) = atom_x(border_map[i]) + PBCx[i] * xprd;
+ atom_y(nlocal + i) = atom_y(border_map[i]) + PBCy[i] * yprd;
+ atom_z(nlocal + i) = atom_z(border_map[i]) + PBCz[i] * zprd;
+ }
+}
+
+/* relocate atoms that have left domain according
+ * to periodic boundary conditions */
+void updateAtomsPbc(Atom *atom, Parameter *param)
+{
+ MD_FLOAT xprd = param->xprd;
+ MD_FLOAT yprd = param->yprd;
+ MD_FLOAT zprd = param->zprd;
+
+ for(int i = 0; i < atom->Nlocal; i++) {
+
+ if(atom_x(i) < 0.0) {
+ atom_x(i) += xprd;
+ } else if(atom_x(i) >= xprd) {
+ atom_x(i) -= xprd;
+ }
+
+ if(atom_y(i) < 0.0) {
+ atom_y(i) += yprd;
+ } else if(atom_y(i) >= yprd) {
+ atom_y(i) -= yprd;
+ }
+
+ if(atom_z(i) < 0.0) {
+ atom_z(i) += zprd;
+ } else if(atom_z(i) >= zprd) {
+ atom_z(i) -= zprd;
+ }
+ }
+}
+
+/* setup periodic boundary conditions by
+ * defining ghost atoms around domain
+ * only creates mapping and coordinate corrections
+ * that are then enforced in updatePbc */
+#define ADDGHOST(dx,dy,dz) \
+ Nghost++; \
+ border_map[Nghost] = i; \
+ PBCx[Nghost] = dx; \
+ PBCy[Nghost] = dy; \
+ PBCz[Nghost] = dz; \
+ atom->type[atom->Nlocal + Nghost] = atom->type[i]
+
+void setupPbc(Atom *atom, Parameter *param)
+{
+ int *border_map = atom->border_map;
+ MD_FLOAT xprd = param->xprd;
+ MD_FLOAT yprd = param->yprd;
+ MD_FLOAT zprd = param->zprd;
+ MD_FLOAT Cutneigh = param->cutneigh;
+ int Nghost = -1;
+
+ for(int i = 0; i < atom->Nlocal; i++) {
+
+ if (atom->Nlocal + Nghost + 7 >= atom->Nmax) {
+ growAtom(atom);
+ }
+ if (Nghost + 7 >= NmaxGhost) {
+ growPbc(atom);
+ border_map = atom->border_map;
+ }
+
+ MD_FLOAT x = atom_x(i);
+ MD_FLOAT y = atom_y(i);
+ MD_FLOAT z = atom_z(i);
+
+ /* Setup ghost atoms */
+ /* 6 planes */
+ if (x < Cutneigh) { ADDGHOST(+1,0,0); }
+ if (x >= (xprd-Cutneigh)) { ADDGHOST(-1,0,0); }
+ if (y < Cutneigh) { ADDGHOST(0,+1,0); }
+ if (y >= (yprd-Cutneigh)) { ADDGHOST(0,-1,0); }
+ if (z < Cutneigh) { ADDGHOST(0,0,+1); }
+ if (z >= (zprd-Cutneigh)) { ADDGHOST(0,0,-1); }
+ /* 8 corners */
+ if (x < Cutneigh && y < Cutneigh && z < Cutneigh) { ADDGHOST(+1,+1,+1); }
+ if (x < Cutneigh && y >= (yprd-Cutneigh) && z < Cutneigh) { ADDGHOST(+1,-1,+1); }
+ if (x < Cutneigh && y >= Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(+1,+1,-1); }
+ if (x < Cutneigh && y >= (yprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(+1,-1,-1); }
+ if (x >= (xprd-Cutneigh) && y < Cutneigh && z < Cutneigh) { ADDGHOST(-1,+1,+1); }
+ if (x >= (xprd-Cutneigh) && y >= (yprd-Cutneigh) && z < Cutneigh) { ADDGHOST(-1,-1,+1); }
+ if (x >= (xprd-Cutneigh) && y < Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(-1,+1,-1); }
+ if (x >= (xprd-Cutneigh) && y >= (yprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(-1,-1,-1); }
+ /* 12 edges */
+ if (x < Cutneigh && z < Cutneigh) { ADDGHOST(+1,0,+1); }
+ if (x < Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(+1,0,-1); }
+ if (x >= (xprd-Cutneigh) && z < Cutneigh) { ADDGHOST(-1,0,+1); }
+ if (x >= (xprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(-1,0,-1); }
+ if (y < Cutneigh && z < Cutneigh) { ADDGHOST(0,+1,+1); }
+ if (y < Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(0,+1,-1); }
+ if (y >= (yprd-Cutneigh) && z < Cutneigh) { ADDGHOST(0,-1,+1); }
+ if (y >= (yprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(0,-1,-1); }
+ if (y < Cutneigh && x < Cutneigh) { ADDGHOST(+1,+1,0); }
+ if (y < Cutneigh && x >= (xprd-Cutneigh)) { ADDGHOST(-1,+1,0); }
+ if (y >= (yprd-Cutneigh) && x < Cutneigh) { ADDGHOST(+1,-1,0); }
+ if (y >= (yprd-Cutneigh) && x >= (xprd-Cutneigh)) { ADDGHOST(-1,-1,0); }
+ }
+ // increase by one to make it the ghost atom count
+ atom->Nghost = Nghost + 1;
+}
+
+/* internal subroutines */
+void growPbc(Atom* atom)
+{
+ int nold = NmaxGhost;
+ NmaxGhost += DELTA;
+
+ atom->border_map = (int*) reallocate(atom->border_map, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
+ PBCx = (int*) reallocate(PBCx, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
+ PBCy = (int*) reallocate(PBCy, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
+ PBCz = (int*) reallocate(PBCz, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
+}
diff --git a/gromacs/stats.c b/gromacs/stats.c
new file mode 100644
index 0000000..defaeac
--- /dev/null
+++ b/gromacs/stats.c
@@ -0,0 +1,31 @@
+#include
+
+#include
+#include
+#include
+#include
+
+void initStats(Stats *s) {
+ s->total_force_neighs = 0;
+ s->total_force_iters = 0;
+}
+
+void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer) {
+#ifdef COMPUTE_STATS
+ double force_useful_volume = 1e-9 * ( (double)(atom->Nlocal * (param->ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) +
+ (double)(stats->total_force_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) );
+ double avg_neigh = stats->total_force_neighs / (double)(atom->Nlocal * (param->ntimes + 1));
+ double avg_simd = stats->total_force_iters / (double)(atom->Nlocal * (param->ntimes + 1));
+#ifdef EXPLICIT_TYPES
+ force_useful_volume += 1e-9 * (double)((atom->Nlocal * (param->ntimes + 1)) + stats->total_force_neighs) * sizeof(int);
+#endif
+ printf("Statistics:\n");
+ printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param->proc_freq);
+ printf("\tAverage neighbors per atom: %.4f\n", avg_neigh);
+ printf("\tAverage SIMD iterations per atom: %.4f\n", avg_simd);
+ printf("\tTotal number of computed pair interactions: %lld\n", stats->total_force_neighs);
+ printf("\tTotal number of SIMD iterations: %lld\n", stats->total_force_iters);
+ printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume);
+ printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param->proc_freq * 1e9 / stats->total_force_iters);
+#endif
+}
diff --git a/gromacs/thermo.c b/gromacs/thermo.c
new file mode 100644
index 0000000..6f83663
--- /dev/null
+++ b/gromacs/thermo.c
@@ -0,0 +1,138 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+#include
+#include
+
+#include
+#include
+
+static int *steparr;
+static MD_FLOAT *tmparr;
+static MD_FLOAT *engarr;
+static MD_FLOAT *prsarr;
+static MD_FLOAT mvv2e;
+static MD_FLOAT dof_boltz;
+static MD_FLOAT t_scale;
+static MD_FLOAT p_scale;
+static MD_FLOAT e_scale;
+static MD_FLOAT t_act;
+static MD_FLOAT p_act;
+static MD_FLOAT e_act;
+static int mstat;
+
+/* exported subroutines */
+void setupThermo(Parameter *param, int natoms)
+{
+ int maxstat = param->ntimes / param->nstat + 2;
+
+ steparr = (int*) malloc(maxstat * sizeof(int));
+ tmparr = (MD_FLOAT*) malloc(maxstat * sizeof(MD_FLOAT));
+ engarr = (MD_FLOAT*) malloc(maxstat * sizeof(MD_FLOAT));
+ prsarr = (MD_FLOAT*) malloc(maxstat * sizeof(MD_FLOAT));
+
+ if(param->force_field == FF_LJ) {
+ mvv2e = 1.0;
+ dof_boltz = (natoms * 3 - 3);
+ t_scale = mvv2e / dof_boltz;
+ p_scale = 1.0 / 3 / param->xprd / param->yprd / param->zprd;
+ e_scale = 0.5;
+ } else {
+ mvv2e = 1.036427e-04;
+ dof_boltz = (natoms * 3 - 3) * 8.617343e-05;
+ t_scale = mvv2e / dof_boltz;
+ p_scale = 1.602176e+06 / 3 / param->xprd / param->yprd / param->zprd;
+ e_scale = 524287.985533;//16.0;
+ param->dtforce /= mvv2e;
+ }
+
+ printf("step\ttemp\t\tpressure\n");
+}
+
+void computeThermo(int iflag, Parameter *param, Atom *atom)
+{
+ MD_FLOAT t = 0.0, p;
+ MD_FLOAT* vx = atom->vx;
+ MD_FLOAT* vy = atom->vy;
+ MD_FLOAT* vz = atom->vz;
+
+ for(int i = 0; i < atom->Nlocal; i++) {
+ t += (vx[i] * vx[i] + vy[i] * vy[i] + vz[i] * vz[i]) * param->mass;
+ }
+
+ t = t * t_scale;
+ p = (t * dof_boltz) * p_scale;
+ int istep = iflag;
+
+ if(iflag == -1){
+ istep = param->ntimes;
+ }
+ if(iflag == 0){
+ mstat = 0;
+ }
+
+ steparr[mstat] = istep;
+ tmparr[mstat] = t;
+ prsarr[mstat] = p;
+ mstat++;
+ fprintf(stdout, "%i\t%e\t%e\n", istep, t, p);
+}
+
+void adjustThermo(Parameter *param, Atom *atom)
+{
+ /* zero center-of-mass motion */
+ MD_FLOAT vxtot = 0.0; MD_FLOAT vytot = 0.0; MD_FLOAT vztot = 0.0;
+ MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz;
+
+ for(int i = 0; i < atom->Nlocal; i++) {
+ vxtot += vx[i];
+ vytot += vy[i];
+ vztot += vz[i];
+ }
+
+ vxtot = vxtot / atom->Natoms;
+ vytot = vytot / atom->Natoms;
+ vztot = vztot / atom->Natoms;
+
+ for(int i = 0; i < atom->Nlocal; i++) {
+ vx[i] -= vxtot;
+ vy[i] -= vytot;
+ vz[i] -= vztot;
+ }
+
+ t_act = 0;
+ MD_FLOAT t = 0.0;
+
+ for(int i = 0; i < atom->Nlocal; i++) {
+ t += (vx[i] * vx[i] + vy[i] * vy[i] + vz[i] * vz[i]) * param->mass;
+ }
+
+ t *= t_scale;
+ MD_FLOAT factor = sqrt(param->temp / t);
+
+ for(int i = 0; i < atom->Nlocal; i++) {
+ vx[i] *= factor;
+ vy[i] *= factor;
+ vz[i] *= factor;
+ }
+}
diff --git a/gromacs/timing.c b/gromacs/timing.c
new file mode 100644
index 0000000..2c1f12a
--- /dev/null
+++ b/gromacs/timing.c
@@ -0,0 +1,43 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+#include
+
+double getTimeStamp()
+{
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
+}
+
+double getTimeResolution()
+{
+ struct timespec ts;
+ clock_getres(CLOCK_MONOTONIC, &ts);
+ return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
+}
+
+double getTimeStamp_()
+{
+ return getTimeStamp();
+}
diff --git a/gromacs/tracing.c b/gromacs/tracing.c
new file mode 100644
index 0000000..d91a97a
--- /dev/null
+++ b/gromacs/tracing.c
@@ -0,0 +1,73 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+#include
+#include
+#include
+
+void traceAddresses(Parameter *param, Atom *atom, Neighbor *neighbor, int timestep) {
+ MEM_TRACER_INIT;
+ INDEX_TRACER_INIT;
+ int Nlocal = atom->Nlocal;
+ int* neighs;
+ MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz;
+
+ INDEX_TRACE_NATOMS(Nlocal, atom->Nghost, neighbor->maxneighs);
+ for(int i = 0; i < Nlocal; i++) {
+ neighs = &neighbor->neighbors[i * neighbor->maxneighs];
+ int numneighs = neighbor->numneigh[i];
+ MEM_TRACE(atom_x(i), 'R');
+ MEM_TRACE(atom_y(i), 'R');
+ MEM_TRACE(atom_z(i), 'R');
+ INDEX_TRACE_ATOM(i);
+
+ #ifdef EXPLICIT_TYPES
+ MEM_TRACE(atom->type[i], 'R');
+ #endif
+
+ DIST_TRACE_SORT(neighs, numneighs);
+ INDEX_TRACE(neighs, numneighs);
+ DIST_TRACE(neighs, numneighs);
+
+ for(int k = 0; k < numneighs; k++) {
+ MEM_TRACE(neighs[k], 'R');
+ MEM_TRACE(atom_x(j), 'R');
+ MEM_TRACE(atom_y(j), 'R');
+ MEM_TRACE(atom_z(j), 'R');
+
+ #ifdef EXPLICIT_TYPES
+ MEM_TRACE(atom->type[j], 'R');
+ #endif
+ }
+
+ MEM_TRACE(fx[i], 'R');
+ MEM_TRACE(fx[i], 'W');
+ MEM_TRACE(fy[i], 'R');
+ MEM_TRACE(fy[i], 'W');
+ MEM_TRACE(fz[i], 'R');
+ MEM_TRACE(fz[i], 'W');
+ }
+
+ INDEX_TRACER_END;
+ MEM_TRACER_END;
+}
diff --git a/gromacs/util.c b/gromacs/util.c
new file mode 100644
index 0000000..0347ab6
--- /dev/null
+++ b/gromacs/util.c
@@ -0,0 +1,95 @@
+/*
+ * =======================================================================================
+ *
+ * Author: Jan Eitzinger (je), jan.eitzinger@fau.de
+ * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
+ *
+ * This file is part of MD-Bench.
+ *
+ * MD-Bench is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with MD-Bench. If not, see .
+ * =======================================================================================
+ */
+#include
+
+#include
+
+/* Park/Miller RNG w/out MASKING, so as to be like f90s version */
+#define IA 16807
+#define IM 2147483647
+#define AM (1.0/IM)
+#define IQ 127773
+#define IR 2836
+#define MASK 123459876
+
+double myrandom(int* seed)
+{
+ int k= (*seed) / IQ;
+ double ans;
+
+ *seed = IA * (*seed - k * IQ) - IR * k;
+ if(*seed < 0) *seed += IM;
+ ans = AM * (*seed);
+ return ans;
+}
+
+void random_reset(int *seed, int ibase, double *coord)
+{
+ int i;
+ char *str = (char *) &ibase;
+ int n = sizeof(int);
+ unsigned int hash = 0;
+
+ for (i = 0; i < n; i++) {
+ hash += str[i];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+
+ str = (char *) coord;
+ n = 3 * sizeof(double);
+ for (i = 0; i < n; i++) {
+ hash += str[i];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+
+ // keep 31 bits of unsigned int as new seed
+ // do not allow seed = 0, since will cause hang in gaussian()
+
+ *seed = hash & 0x7ffffff;
+ if (!(*seed)) *seed = 1;
+
+ // warm up the RNG
+
+ for (i = 0; i < 5; i++) myrandom(seed);
+ //save = 0;
+}
+
+int str2ff(const char *string)
+{
+ if(strncmp(string, "lj", 2) == 0) return FF_LJ;
+ if(strncmp(string, "eam", 3) == 0) return FF_EAM;
+ return -1;
+}
+
+const char* ff2str(int ff)
+{
+ if(ff == FF_LJ) { return "lj"; }
+ if(ff == FF_EAM) { return "eam"; }
+ return "invalid";
+}
diff --git a/gromacs/vtk.c b/gromacs/vtk.c
new file mode 100644
index 0000000..b3ff0e6
--- /dev/null
+++ b/gromacs/vtk.c
@@ -0,0 +1,44 @@
+#include
+#include
+
+#include
+
+int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
+ char timestep_filename[128];
+ snprintf(timestep_filename, sizeof timestep_filename, "%s_%d.vtk", filename, timestep);
+ FILE* fp = fopen(timestep_filename, "wb");
+
+ if(fp == NULL) {
+ fprintf(stderr, "Could not open VTK file for writing!\n");
+ return -1;
+ }
+
+ fprintf(fp, "# vtk DataFile Version 2.0\n");
+ fprintf(fp, "Particle data\n");
+ fprintf(fp, "ASCII\n");
+ fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
+ fprintf(fp, "POINTS %d double\n", atom->Nlocal);
+ for(int i = 0; i < atom->Nlocal; ++i) {
+ fprintf(fp, "%.4f %.4f %.4f\n", atom_x(i), atom_y(i), atom_z(i));
+ }
+ fprintf(fp, "\n\n");
+ fprintf(fp, "CELLS %d %d\n", atom->Nlocal, atom->Nlocal * 2);
+ for(int i = 0; i < atom->Nlocal; ++i) {
+ fprintf(fp, "1 %d\n", i);
+ }
+ fprintf(fp, "\n\n");
+ fprintf(fp, "CELL_TYPES %d\n", atom->Nlocal);
+ for(int i = 0; i < atom->Nlocal; ++i) {
+ fprintf(fp, "1\n");
+ }
+ fprintf(fp, "\n\n");
+ fprintf(fp, "POINT_DATA %d\n", atom->Nlocal);
+ fprintf(fp, "SCALARS mass double\n");
+ fprintf(fp, "LOOKUP_TABLE default\n");
+ for(int i = 0; i < atom->Nlocal; i++) {
+ fprintf(fp, "1.0\n");
+ }
+ fprintf(fp, "\n\n");
+ fclose(fp);
+ return 0;
+}