Move src directory to lammps

Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
Rafael Ravedutti
2022-08-04 17:25:31 +02:00
parent eeba125a52
commit bc7b523979
28 changed files with 0 additions and 0 deletions

89
lammps/allocate.c Normal file
View File

@@ -0,0 +1,89 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <cuda_runtime.h>
void checkCUDAError(const char *msg, cudaError_t err)
{
if (err != cudaSuccess)
{
//print a human readable error message
printf("[CUDA ERROR %s]: %s\r\n", msg, cudaGetErrorString(err));
exit(-1);
}
}
void* allocate (int alignment, size_t bytesize)
{
int errorCode;
void* ptr;
checkCUDAError( "allocate", cudaMallocHost((void**)&ptr, bytesize) );
return ptr;
/*
errorCode = posix_memalign(&ptr, alignment, bytesize);
if (errorCode) {
if (errorCode == EINVAL) {
fprintf(stderr,
"Error: Alignment parameter is not a power of two\n");
exit(EXIT_FAILURE);
}
if (errorCode == ENOMEM) {
fprintf(stderr,
"Error: Insufficient memory to fulfill the request\n");
exit(EXIT_FAILURE);
}
}
if (ptr == NULL) {
fprintf(stderr, "Error: posix_memalign failed!\n");
exit(EXIT_FAILURE);
}
return ptr;
*/
}
void* reallocate (
void* ptr,
int alignment,
size_t newBytesize,
size_t oldBytesize)
{
void* newarray = allocate(alignment, newBytesize);
if(ptr != NULL) {
memcpy(newarray, ptr, oldBytesize);
cudaFreeHost(ptr);
}
return newarray;
}

185
lammps/atom.c Normal file
View File

@@ -0,0 +1,185 @@
/*
* =======================================================================================
*
* Authors: Jan Eitzinger (je), jan.eitzinger@fau.de
* Rafael Ravedutti (rr), rafaelravedutti@gmail.com
*
* Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <atom.h>
#include <allocate.h>
#include <util.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#define DELTA 20000
void initAtom(Atom *atom)
{
atom->x = NULL; atom->y = NULL; atom->z = NULL;
atom->vx = NULL; atom->vy = NULL; atom->vz = NULL;
atom->fx = NULL; atom->fy = NULL; atom->fz = NULL;
atom->Natoms = 0;
atom->Nlocal = 0;
atom->Nghost = 0;
atom->Nmax = 0;
atom->type = NULL;
atom->ntypes = 0;
atom->epsilon = NULL;
atom->sigma6 = NULL;
atom->cutforcesq = NULL;
atom->cutneighsq = NULL;
}
void createAtom(Atom *atom, Parameter *param)
{
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
atom->Natoms = 4 * param->nx * param->ny * param->nz;
atom->Nlocal = 0;
atom->ntypes = param->ntypes;
checkCUDAError( "atom->epsilon cudaMallocHost", cudaMallocHost((void**)&(atom->epsilon), atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)) ); // atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
checkCUDAError( "atom->sigma6 cudaMallocHost", cudaMallocHost((void**)&(atom->sigma6), atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)) ); // atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
checkCUDAError( "atom->cutforcesq cudaMallocHost", cudaMallocHost((void**)&(atom->cutforcesq), atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)) ); // atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
checkCUDAError( "atom->cutneighsq cudaMallocHost", cudaMallocHost((void**)&(atom->cutneighsq), atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)) ); // atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
atom->epsilon[i] = param->epsilon;
atom->sigma6[i] = param->sigma6;
atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
MD_FLOAT alat = pow((4.0 / param->rho), (1.0 / 3.0));
int ilo = (int) (xlo / (0.5 * alat) - 1);
int ihi = (int) (xhi / (0.5 * alat) + 1);
int jlo = (int) (ylo / (0.5 * alat) - 1);
int jhi = (int) (yhi / (0.5 * alat) + 1);
int klo = (int) (zlo / (0.5 * alat) - 1);
int khi = (int) (zhi / (0.5 * alat) + 1);
ilo = MAX(ilo, 0);
ihi = MIN(ihi, 2 * param->nx - 1);
jlo = MAX(jlo, 0);
jhi = MIN(jhi, 2 * param->ny - 1);
klo = MAX(klo, 0);
khi = MIN(khi, 2 * param->nz - 1);
MD_FLOAT xtmp, ytmp, ztmp, vxtmp, vytmp, vztmp;
int i, j, k, m, n;
int sx = 0; int sy = 0; int sz = 0;
int ox = 0; int oy = 0; int oz = 0;
int subboxdim = 8;
while(oz * subboxdim <= khi) {
k = oz * subboxdim + sz;
j = oy * subboxdim + sy;
i = ox * subboxdim + sx;
if(((i + j + k) % 2 == 0) &&
(i >= ilo) && (i <= ihi) &&
(j >= jlo) && (j <= jhi) &&
(k >= klo) && (k <= khi)) {
xtmp = 0.5 * alat * i;
ytmp = 0.5 * alat * j;
ztmp = 0.5 * alat * k;
if( xtmp >= xlo && xtmp < xhi &&
ytmp >= ylo && ytmp < yhi &&
ztmp >= zlo && ztmp < zhi ) {
n = k * (2 * param->ny) * (2 * param->nx) +
j * (2 * param->nx) +
i + 1;
for(m = 0; m < 5; m++) {
myrandom(&n);
}
vxtmp = myrandom(&n);
for(m = 0; m < 5; m++){
myrandom(&n);
}
vytmp = myrandom(&n);
for(m = 0; m < 5; m++) {
myrandom(&n);
}
vztmp = myrandom(&n);
if(atom->Nlocal == atom->Nmax) {
growAtom(atom);
}
atom_x(atom->Nlocal) = xtmp;
atom_y(atom->Nlocal) = ytmp;
atom_z(atom->Nlocal) = ztmp;
atom_vx(atom->Nlocal) = vxtmp;
atom_vy(atom->Nlocal) = vytmp;
atom_vz(atom->Nlocal) = vztmp;
atom->type[atom->Nlocal] = rand() % atom->ntypes;
atom->Nlocal++;
}
}
sx++;
if(sx == subboxdim) { sx = 0; sy++; }
if(sy == subboxdim) { sy = 0; sz++; }
if(sz == subboxdim) { sz = 0; ox++; }
if(ox * subboxdim > ihi) { ox = 0; oy++; }
if(oy * subboxdim > jhi) { oy = 0; oz++; }
}
}
void growAtom(Atom *atom)
{
int nold = atom->Nmax;
atom->Nmax += DELTA;
#ifdef AOS
atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
atom->vx = (MD_FLOAT*) reallocate(atom->vx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
#else
atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->y = (MD_FLOAT*) reallocate(atom->y, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->z = (MD_FLOAT*) reallocate(atom->z, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->fy = (MD_FLOAT*) reallocate(atom->fy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->fz = (MD_FLOAT*) reallocate(atom->fz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->vx = (MD_FLOAT*) reallocate(atom->vx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->vy = (MD_FLOAT*) reallocate(atom->vy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->vz = (MD_FLOAT*) reallocate(atom->vz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
#endif
atom->type = (int *) reallocate(atom->type, ALIGNMENT, atom->Nmax * sizeof(int), nold * sizeof(int));
}

285
lammps/eam_utils.c Normal file
View File

@@ -0,0 +1,285 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <allocate.h>
#include <atom.h>
#include <eam.h>
#include <parameter.h>
#include <util.h>
#ifndef MAXLINE
#define MAXLINE 4096
#endif
void initEam(Eam* eam, Parameter* param) {
int ntypes = param->ntypes;
eam->nmax = 0;
eam->fp = NULL;
coeff(eam, param);
init_style(eam, param);
}
void coeff(Eam* eam, Parameter* param) {
read_file(&eam->file, param->input_file);
param->mass = eam->file.mass;
param->cutforce = eam->file.cut;
param->cutneigh = param->cutforce + 1.0;
param->temp = 600.0;
param->dt = 0.001;
param->rho = 0.07041125;
param->dtforce = 0.5 * param->dt / param->mass;
}
void init_style(Eam* eam, Parameter* param) {
// convert read-in file(s) to arrays and spline them
file2array(eam);
array2spline(eam, param);
}
void read_file(Funcfl* file, const char* filename) {
FILE* fptr;
char line[MAXLINE];
fptr = fopen(filename, "r");
if(fptr == NULL) {
printf("Can't open EAM Potential file: %s\n", filename);
exit(0);
}
int tmp;
fgets(line, MAXLINE, fptr);
fgets(line, MAXLINE, fptr);
sscanf(line, "%d %lg", &tmp, &(file->mass));
fgets(line, MAXLINE, fptr);
sscanf(line, "%d %lg %d %lg %lg", &file->nrho, &file->drho, &file->nr, &file->dr, &file->cut);
//printf("Read: %lf %i %lf %i %lf %lf\n",file->mass,file->nrho,file->drho,file->nr,file->dr,file->cut);
file->frho = (MD_FLOAT *) allocate(ALIGNMENT, (file->nrho + 1) * sizeof(MD_FLOAT));
file->rhor = (MD_FLOAT *) allocate(ALIGNMENT, (file->nr + 1) * sizeof(MD_FLOAT));
file->zr = (MD_FLOAT *) allocate(ALIGNMENT, (file->nr + 1) * sizeof(MD_FLOAT));
grab(fptr, file->nrho, file->frho);
grab(fptr, file->nr, file->zr);
grab(fptr, file->nr, file->rhor);
for(int i = file->nrho; i > 0; i--) file->frho[i] = file->frho[i - 1];
for(int i = file->nr; i > 0; i--) file->rhor[i] = file->rhor[i - 1];
for(int i = file->nr; i > 0; i--) file->zr[i] = file->zr[i - 1];
fclose(fptr);
}
void file2array(Eam* eam) {
int i, j, k, m, n;
double sixth = 1.0 / 6.0;
// determine max function params from all active funcfl files
// active means some element is pointing at it via map
int active;
double rmax, rhomax;
eam->dr = eam->drho = rmax = rhomax = 0.0;
active = 0;
Funcfl* file = &eam->file;
eam->dr = MAX(eam->dr, file->dr);
eam->drho = MAX(eam->drho, file->drho);
rmax = MAX(rmax, (file->nr - 1) * file->dr);
rhomax = MAX(rhomax, (file->nrho - 1) * file->drho);
// set nr,nrho from cutoff and spacings
// 0.5 is for round-off in divide
eam->nr = (int)(rmax / eam->dr + 0.5);
eam->nrho = (int)(rhomax / eam->drho + 0.5);
// ------------------------------------------------------------------
// setup frho arrays
// ------------------------------------------------------------------
// allocate frho arrays
// nfrho = # of funcfl files + 1 for zero array
eam->frho = (MD_FLOAT *) allocate(ALIGNMENT, (eam->nrho + 1) * sizeof(MD_FLOAT));
// interpolate each file's frho to a single grid and cutoff
double r, p, cof1, cof2, cof3, cof4;
n = 0;
for(m = 1; m <= eam->nrho; m++) {
r = (m - 1) * eam->drho;
p = r / file->drho + 1.0;
k = (int)(p);
k = MIN(k, file->nrho - 2);
k = MAX(k, 2);
p -= k;
p = MIN(p, 2.0);
cof1 = -sixth * p * (p - 1.0) * (p - 2.0);
cof2 = 0.5 * (p * p - 1.0) * (p - 2.0);
cof3 = -0.5 * p * (p + 1.0) * (p - 2.0);
cof4 = sixth * p * (p * p - 1.0);
eam->frho[m] = cof1 * file->frho[k - 1] + cof2 * file->frho[k] +
cof3 * file->frho[k + 1] + cof4 * file->frho[k + 2];
}
// ------------------------------------------------------------------
// setup rhor arrays
// ------------------------------------------------------------------
// allocate rhor arrays
// nrhor = # of funcfl files
eam->rhor = (MD_FLOAT *) allocate(ALIGNMENT, (eam->nr + 1) * sizeof(MD_FLOAT));
// interpolate each file's rhor to a single grid and cutoff
for(m = 1; m <= eam->nr; m++) {
r = (m - 1) * eam->dr;
p = r / file->dr + 1.0;
k = (int)(p);
k = MIN(k, file->nr - 2);
k = MAX(k, 2);
p -= k;
p = MIN(p, 2.0);
cof1 = -sixth * p * (p - 1.0) * (p - 2.0);
cof2 = 0.5 * (p * p - 1.0) * (p - 2.0);
cof3 = -0.5 * p * (p + 1.0) * (p - 2.0);
cof4 = sixth * p * (p * p - 1.0);
eam->rhor[m] = cof1 * file->rhor[k - 1] + cof2 * file->rhor[k] +
cof3 * file->rhor[k + 1] + cof4 * file->rhor[k + 2];
//if(m==119)printf("BuildRho: %e %e %e %e %e %e\n",rhor[m],cof1,cof2,cof3,cof4,file->rhor[k]);
}
// type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
// for funcfl files, I,J mapping only depends on I
// OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
// ------------------------------------------------------------------
// setup z2r arrays
// ------------------------------------------------------------------
// allocate z2r arrays
// nz2r = N*(N+1)/2 where N = # of funcfl files
eam->z2r = (MD_FLOAT *) allocate(ALIGNMENT, (eam->nr + 1) * sizeof(MD_FLOAT));
// create a z2r array for each file against other files, only for I >= J
// interpolate zri and zrj to a single grid and cutoff
double zri, zrj;
Funcfl* ifile = &eam->file;
Funcfl* jfile = &eam->file;
for(m = 1; m <= eam->nr; m++) {
r = (m - 1) * eam->dr;
p = r / ifile->dr + 1.0;
k = (int)(p);
k = MIN(k, ifile->nr - 2);
k = MAX(k, 2);
p -= k;
p = MIN(p, 2.0);
cof1 = -sixth * p * (p - 1.0) * (p - 2.0);
cof2 = 0.5 * (p * p - 1.0) * (p - 2.0);
cof3 = -0.5 * p * (p + 1.0) * (p - 2.0);
cof4 = sixth * p * (p * p - 1.0);
zri = cof1 * ifile->zr[k - 1] + cof2 * ifile->zr[k] +
cof3 * ifile->zr[k + 1] + cof4 * ifile->zr[k + 2];
p = r / jfile->dr + 1.0;
k = (int)(p);
k = MIN(k, jfile->nr - 2);
k = MAX(k, 2);
p -= k;
p = MIN(p, 2.0);
cof1 = -sixth * p * (p - 1.0) * (p - 2.0);
cof2 = 0.5 * (p * p - 1.0) * (p - 2.0);
cof3 = -0.5 * p * (p + 1.0) * (p - 2.0);
cof4 = sixth * p * (p * p - 1.0);
zrj = cof1 * jfile->zr[k - 1] + cof2 * jfile->zr[k] +
cof3 * jfile->zr[k + 1] + cof4 * jfile->zr[k + 2];
eam->z2r[m] = 27.2 * 0.529 * zri * zrj;
}
}
void array2spline(Eam* eam, Parameter* param) {
eam->rdr = 1.0 / eam->dr;
eam->rdrho = 1.0 / eam->drho;
eam->nrho_tot = (eam->nrho + 1) * 7 + 64;
eam->nr_tot = (eam->nr + 1) * 7 + 64;
eam->nrho_tot -= eam->nrho_tot%64;
eam->nr_tot -= eam->nr_tot%64;
int ntypes = param->ntypes;
eam->frho_spline = (MD_FLOAT *) allocate(ALIGNMENT, ntypes * ntypes * eam->nrho_tot * sizeof(MD_FLOAT));
eam->rhor_spline = (MD_FLOAT *) allocate(ALIGNMENT, ntypes * ntypes * eam->nr_tot * sizeof(MD_FLOAT));
eam->z2r_spline = (MD_FLOAT *) allocate(ALIGNMENT, ntypes * ntypes * eam->nr_tot * sizeof(MD_FLOAT));
interpolate(eam->nrho, eam->drho, eam->frho, eam->frho_spline);
interpolate(eam->nr, eam->dr, eam->rhor, eam->rhor_spline);
interpolate(eam->nr, eam->dr, eam->z2r, eam->z2r_spline);
// replicate data for multiple types;
for(int tt = 0; tt < ntypes * ntypes; tt++) {
for(int k = 0; k < eam->nrho_tot; k++)
eam->frho_spline[tt*eam->nrho_tot + k] = eam->frho_spline[k];
for(int k = 0; k < eam->nr_tot; k++)
eam->rhor_spline[tt*eam->nr_tot + k] = eam->rhor_spline[k];
for(int k = 0; k < eam->nr_tot; k++)
eam->z2r_spline[tt*eam->nr_tot + k] = eam->z2r_spline[k];
}
}
void interpolate(int n, MD_FLOAT delta, MD_FLOAT* f, MD_FLOAT* spline) {
for(int m = 1; m <= n; m++) spline[m * 7 + 6] = f[m];
spline[1 * 7 + 5] = spline[2 * 7 + 6] - spline[1 * 7 + 6];
spline[2 * 7 + 5] = 0.5 * (spline[3 * 7 + 6] - spline[1 * 7 + 6]);
spline[(n - 1) * 7 + 5] = 0.5 * (spline[n * 7 + 6] - spline[(n - 2) * 7 + 6]);
spline[n * 7 + 5] = spline[n * 7 + 6] - spline[(n - 1) * 7 + 6];
for(int m = 3; m <= n - 2; m++)
spline[m * 7 + 5] = ((spline[(m - 2) * 7 + 6] - spline[(m + 2) * 7 + 6]) +
8.0 * (spline[(m + 1) * 7 + 6] - spline[(m - 1) * 7 + 6])) / 12.0;
for(int m = 1; m <= n - 1; m++) {
spline[m * 7 + 4] = 3.0 * (spline[(m + 1) * 7 + 6] - spline[m * 7 + 6]) -
2.0 * spline[m * 7 + 5] - spline[(m + 1) * 7 + 5];
spline[m * 7 + 3] = spline[m * 7 + 5] + spline[(m + 1) * 7 + 5] -
2.0 * (spline[(m + 1) * 7 + 6] - spline[m * 7 + 6]);
}
spline[n * 7 + 4] = 0.0;
spline[n * 7 + 3] = 0.0;
for(int m = 1; m <= n; m++) {
spline[m * 7 + 2] = spline[m * 7 + 5] / delta;
spline[m * 7 + 1] = 2.0 * spline[m * 7 + 4] / delta;
spline[m * 7 + 0] = 3.0 * spline[m * 7 + 3] / delta;
}
}
void grab(FILE* fptr, int n, MD_FLOAT* list) {
char* ptr;
char line[MAXLINE];
int i = 0;
while(i < n) {
fgets(line, MAXLINE, fptr);
ptr = strtok(line, " \t\n\r\f");
list[i++] = atof(ptr);
while(ptr = strtok(NULL, " \t\n\r\f")) list[i++] = atof(ptr);
}
}

232
lammps/force-tracing.c Normal file
View File

@@ -0,0 +1,232 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <likwid-marker.h>
#include <timing.h>
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#include <stats.h>
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
#include <stdio.h>
#include <stdlib.h>
#endif
#ifndef VECTOR_WIDTH
# define VECTOR_WIDTH 8
#endif
#ifndef TRACER_CONDITION
# define TRACER_CONDITION (!(timestep % param->every))
#endif
#ifdef MEM_TRACER
# define MEM_TRACER_INIT FILE *mem_tracer_fp; \
if(TRACER_CONDITION) { \
char mem_tracer_fn[128]; \
snprintf(mem_tracer_fn, sizeof mem_tracer_fn, "mem_tracer_%d.out", timestep); \
mem_tracer_fp = fopen(mem_tracer_fn, "w");
}
# define MEM_TRACER_END if(TRACER_CONDITION) { fclose(mem_tracer_fp); }
# define MEM_TRACE(addr, op) if(TRACER_CONDITION) { fprintf(mem_tracer_fp, "%c: %p\n", op, (void *)(&(addr))); }
#else
# define MEM_TRACER_INIT
# define MEM_TRACER_END
# define MEM_TRACE(addr, op)
#endif
#ifdef INDEX_TRACER
# define INDEX_TRACER_INIT FILE *index_tracer_fp; \
if(TRACER_CONDITION) { \
char index_tracer_fn[128]; \
snprintf(index_tracer_fn, sizeof index_tracer_fn, "index_tracer_%d.out", timestep); \
index_tracer_fp = fopen(index_tracer_fn, "w"); \
}
# define INDEX_TRACER_END if(TRACER_CONDITION) { fclose(index_tracer_fp); }
# define INDEX_TRACE_NATOMS(nl, ng, mn) if(TRACER_CONDITION) { fprintf(index_tracer_fp, "N: %d %d %d\n", nl, ng, mn); }
# define INDEX_TRACE_ATOM(a) if(TRACER_CONDITION) { fprintf(index_tracer_fp, "A: %d\n", a); }
# define INDEX_TRACE(l, e) if(TRACER_CONDITION) { \
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
fprintf(index_tracer_fp, "I: "); \
for(int __j = 0; __j < __e; ++__j) { \
fprintf(index_tracer_fp, "%d ", l[__i + __j]); \
} \
fprintf(index_tracer_fp, "\n"); \
} \
}
# define DIST_TRACE_SORT(l, e) if(TRACER_CONDITION) { \
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
if(__e > 1) { \
for(int __j = __i; __j < __i + __e - 1; ++__j) { \
for(int __k = __i; __k < __i + __e - (__j - __i) - 1; ++__k) { \
if(l[__k] > l[__k + 1]) { \
int __t = l[__k]; \
l[__k] = l[__k + 1]; \
l[__k + 1] = __t; \
} \
} \
} \
} \
} \
}
# define DIST_TRACE(l, e) if(TRACER_CONDITION) { \
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
if(__e > 1) { \
fprintf(index_tracer_fp, "D: "); \
for(int __j = 0; __j < __e - 1; ++__j) { \
int __dist = abs(l[__i + __j + 1] - l[__i + __j]); \
fprintf(index_tracer_fp, "%d ", __dist); \
} \
fprintf(index_tracer_fp, "\n"); \
} \
} \
}
#else
# define INDEX_TRACER_INIT
# define INDEX_TRACER_END
# define INDEX_TRACE_NATOMS(nl, ng, mn)
# define INDEX_TRACE_ATOM(a)
# define INDEX_TRACE(l, e)
# define DIST_TRACE_SORT(l, e)
# define DIST_TRACE(l, e)
#endif
double computeForceTracing(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep) {
MEM_TRACER_INIT;
INDEX_TRACER_INIT;
int Nlocal = atom->Nlocal;
int* neighs;
MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz;
#ifndef EXPLICIT_TYPES
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
#endif
for(int i = 0; i < Nlocal; i++) {
fx[i] = 0.0;
fy[i] = 0.0;
fz[i] = 0.0;
}
INDEX_TRACE_NATOMS(Nlocal, atom->Nghost, neighbor->maxneighs);
double S = getTimeStamp();
LIKWID_MARKER_START("force");
for(int na = 0; na < (first_exec ? 1 : ATOMS_LOOP_RUNS); na++) {
#pragma omp parallel for
for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
MD_FLOAT fix = 0;
MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0;
MEM_TRACE(atom_x(i), 'R');
MEM_TRACE(atom_y(i), 'R');
MEM_TRACE(atom_z(i), 'R');
INDEX_TRACE_ATOM(i);
#ifdef EXPLICIT_TYPES
const int type_i = atom->type[i];
MEM_TRACE(atom->type(i), 'R');
#endif
#if defined(VARIANT) && VARIANT == stub && defined(NEIGHBORS_LOOP_RUNS) && NEIGHBORS_LOOP_RUNS > 1
#define REPEAT_NEIGHBORS_LOOP
int nmax = first_exec ? 1 : NEIGHBORS_LOOP_RUNS;
for(int nn = 0; nn < (first_exec ? 1 : NEIGHBORS_LOOP_RUNS); nn++) {
#endif
//DIST_TRACE_SORT(neighs, numneighs);
INDEX_TRACE(neighs, numneighs);
//DIST_TRACE(neighs, numneighs);
for(int k = 0; k < numneighs; k++) {
int j = neighs[k];
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
MEM_TRACE(neighs[k], 'R');
MEM_TRACE(atom_x(j), 'R');
MEM_TRACE(atom_y(j), 'R');
MEM_TRACE(atom_z(j), 'R');
#ifdef EXPLICIT_TYPES
const int type_j = atom->type[j];
const int type_ij = type_i * atom->ntypes + type_j;
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
const MD_FLOAT epsilon = atom->epsilon[type_ij];
MEM_TRACE(atom->type(j), 'R');
#endif
if(rsq < cutforcesq) {
MD_FLOAT sr2 = 1.0 / rsq;
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
}
}
#ifdef REPEAT_NEIGHBORS_LOOP
}
#endif
fx[i] += fix;
fy[i] += fiy;
fz[i] += fiz;
addStat(stats->total_force_neighs, numneighs);
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
MEM_TRACE(fx[i], 'R');
MEM_TRACE(fx[i], 'W');
MEM_TRACE(fy[i], 'R');
MEM_TRACE(fy[i], 'W');
MEM_TRACE(fz[i], 'R');
MEM_TRACE(fz[i], 'W');
}
}
LIKWID_MARKER_STOP("force");
double E = getTimeStamp();
INDEX_TRACER_END;
MEM_TRACER_END;
return E-S;
}

212
lammps/force.cu Normal file
View File

@@ -0,0 +1,212 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <cuda_profiler_api.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
extern "C" {
#include <likwid-marker.h>
#include <timing.h>
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#include <allocate.h>
}
// cuda kernel
__global__ void calc_force(
Atom a,
MD_FLOAT cutforcesq, MD_FLOAT sigma6, MD_FLOAT epsilon,
int Nlocal, int neigh_maxneighs, int *neigh_neighbors, int *neigh_numneigh) {
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if( i >= Nlocal ) {
return;
}
Atom *atom = &a;
const int numneighs = neigh_numneigh[i];
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
MD_FLOAT fix = 0;
MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0;
for(int k = 0; k < numneighs; k++) {
int j = neigh_neighbors[atom->Nlocal * k + i];
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
const int type_j = atom->type[j];
const int type_ij = type_i * atom->ntypes + type_j;
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
const MD_FLOAT epsilon = atom->epsilon[type_ij];
#endif
if(rsq < cutforcesq) {
MD_FLOAT sr2 = 1.0 / rsq;
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
}
}
atom_fx(i) = fix;
atom_fy(i) = fiy;
atom_fz(i) = fiz;
}
__global__ void kernel_initial_integrate(MD_FLOAT dtforce, MD_FLOAT dt, int Nlocal, Atom a) {
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if( i >= Nlocal ) {
return;
}
Atom *atom = &a;
atom_vx(i) += dtforce * atom_fx(i);
atom_vy(i) += dtforce * atom_fy(i);
atom_vz(i) += dtforce * atom_fz(i);
atom_x(i) = atom_x(i) + dt * atom_vx(i);
atom_y(i) = atom_y(i) + dt * atom_vy(i);
atom_z(i) = atom_z(i) + dt * atom_vz(i);
}
__global__ void kernel_final_integrate(MD_FLOAT dtforce, int Nlocal, Atom a) {
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if( i >= Nlocal ) {
return;
}
Atom *atom = &a;
atom_vx(i) += dtforce * atom_fx(i);
atom_vy(i) += dtforce * atom_fy(i);
atom_vz(i) += dtforce * atom_fz(i);
}
extern "C" {
void cuda_final_integrate(bool doReneighbour, Parameter *param, Atom *atom, Atom *c_atom, const int num_threads_per_block) {
const int Nlocal = atom->Nlocal;
const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
kernel_final_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, Nlocal, *c_atom);
checkCUDAError( "PeekAtLastError FinalIntegrate", cudaPeekAtLastError() );
checkCUDAError( "DeviceSync FinalIntegrate", cudaDeviceSynchronize() );
if(doReneighbour) {
checkCUDAError( "FinalIntegrate: velocity memcpy", cudaMemcpy(atom->vx, c_atom->vx, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
}
}
void cuda_initial_integrate(bool doReneighbour, Parameter *param, Atom *atom, Atom *c_atom, const int num_threads_per_block) {
const int Nlocal = atom->Nlocal;
const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
kernel_initial_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, param->dt, Nlocal, *c_atom);
checkCUDAError( "PeekAtLastError InitialIntegrate", cudaPeekAtLastError() );
checkCUDAError( "DeviceSync InitialIntegrate", cudaDeviceSynchronize() );
if(doReneighbour) {
checkCUDAError( "InitialIntegrate: velocity memcpy", cudaMemcpy(atom->vx, c_atom->vx, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
}
}
double computeForce(
bool reneighbourHappenend,
Parameter *param,
Atom *atom,
Neighbor *neighbor,
Atom *c_atom,
Neighbor *c_neighbor,
int num_threads_per_block
)
{
int Nlocal = atom->Nlocal;
#ifndef EXPLICIT_TYPES
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
#endif
/*
int nDevices;
cudaGetDeviceCount(&nDevices);
size_t free, total;
for(int i = 0; i < nDevices; ++i) {
cudaMemGetInfo( &free, &total );
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, i);
printf("DEVICE %d/%d NAME: %s\r\n with %ld MB/%ld MB memory used", i + 1, nDevices, prop.name, free / 1024 / 1024, total / 1024 / 1024);
}
*/
// HINT: Run with cuda-memcheck ./MDBench-NVCC in case of error
// checkCUDAError( "c_atom->fx memset", cudaMemset(c_atom->fx, 0, sizeof(MD_FLOAT) * Nlocal * 3) );
cudaProfilerStart();
const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
double S = getTimeStamp();
LIKWID_MARKER_START("force");
calc_force <<< num_blocks, num_threads_per_block >>> (*c_atom, cutforcesq, sigma6, epsilon, Nlocal, neighbor->maxneighs, c_neighbor->neighbors, c_neighbor->numneigh);
checkCUDAError( "PeekAtLastError ComputeForce", cudaPeekAtLastError() );
checkCUDAError( "DeviceSync ComputeForce", cudaDeviceSynchronize() );
cudaProfilerStop();
LIKWID_MARKER_STOP("force");
double E = getTimeStamp();
return E-S;
}
}

213
lammps/force_eam.c Normal file
View File

@@ -0,0 +1,213 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <likwid-marker.h>
#include <math.h>
#include <allocate.h>
#include <timing.h>
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#include <stats.h>
#include <eam.h>
#include <util.h>
double computeForceEam(Eam* eam, Parameter* param, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep) {
if(eam->nmax < atom->Nmax) {
eam->nmax = atom->Nmax;
if(eam->fp != NULL) { free(eam->fp); }
eam->fp = (MD_FLOAT *) allocate(ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT));
}
int Nlocal = atom->Nlocal;
int* neighs;
MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; int ntypes = atom->ntypes; MD_FLOAT* fp = eam->fp;
MD_FLOAT* rhor_spline = eam->rhor_spline; MD_FLOAT* frho_spline = eam->frho_spline; MD_FLOAT* z2r_spline = eam->z2r_spline;
int rdr = eam->rdr; int nr = eam->nr; int nr_tot = eam->nr_tot; int rdrho = eam->rdrho;
int nrho = eam->nrho; int nrho_tot = eam->nrho_tot;
double S = getTimeStamp();
LIKWID_MARKER_START("force_eam_fp");
#pragma omp parallel for
for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
MD_FLOAT rhoi = 0;
#ifdef EXPLICIT_TYPES
const int type_i = atom->type[i];
#endif
#pragma ivdep
for(int k = 0; k < numneighs; k++) {
int j = neighs[k];
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
const int type_j = atom->type[j];
const int type_ij = type_i * ntypes + type_j;
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
#else
const MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
#endif
if(rsq < cutforcesq) {
MD_FLOAT p = sqrt(rsq) * rdr + 1.0;
int m = (int)(p);
m = m < nr - 1 ? m : nr - 1;
p -= m;
p = p < 1.0 ? p : 1.0;
#ifdef EXPLICIT_TYPES
rhoi += ((rhor_spline[type_ij * nr_tot + m * 7 + 3] * p +
rhor_spline[type_ij * nr_tot + m * 7 + 4]) * p +
rhor_spline[type_ij * nr_tot + m * 7 + 5]) * p +
rhor_spline[type_ij * nr_tot + m * 7 + 6];
#else
rhoi += ((rhor_spline[m * 7 + 3] * p +
rhor_spline[m * 7 + 4]) * p +
rhor_spline[m * 7 + 5]) * p +
rhor_spline[m * 7 + 6];
#endif
}
}
#ifdef EXPLICIT_TYPES
const int type_ii = type_i * type_i;
#endif
MD_FLOAT p = 1.0 * rhoi * rdrho + 1.0;
int m = (int)(p);
m = MAX(1, MIN(m, nrho - 1));
p -= m;
p = MIN(p, 1.0);
#ifdef EXPLICIT_TYPES
fp[i] = (frho_spline[type_ii * nrho_tot + m * 7 + 0] * p +
frho_spline[type_ii * nrho_tot + m * 7 + 1]) * p +
frho_spline[type_ii * nrho_tot + m * 7 + 2];
#else
fp[i] = (frho_spline[m * 7 + 0] * p + frho_spline[m * 7 + 1]) * p + frho_spline[m * 7 + 2];
#endif
}
LIKWID_MARKER_STOP("force_eam_fp");
// We still need to update fp for PBC atoms
for(int i = 0; i < atom->Nghost; i++) {
fp[Nlocal + i] = fp[atom->border_map[i]];
}
LIKWID_MARKER_START("force_eam");
for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
MD_FLOAT fix = 0;
MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0;
#ifdef EXPLICIT_TYPES
const int type_i = atom->type[i];
#endif
#pragma ivdep
for(int k = 0; k < numneighs; k++) {
int j = neighs[k];
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
const int type_j = atom->type[j];
const int type_ij = type_i * ntypes + type_j;
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
#else
const MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
#endif
if(rsq < cutforcesq) {
MD_FLOAT r = sqrt(rsq);
MD_FLOAT p = r * rdr + 1.0;
int m = (int)(p);
m = m < nr - 1 ? m : nr - 1;
p -= m;
p = p < 1.0 ? p : 1.0;
// rhoip = derivative of (density at atom j due to atom i)
// rhojp = derivative of (density at atom i due to atom j)
// phi = pair potential energy
// phip = phi'
// z2 = phi * r
// z2p = (phi * r)' = (phi' r) + phi
// psip needs both fp[i] and fp[j] terms since r_ij appears in two
// terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji)
// hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip
#ifdef EXPLICIT_TYPES
MD_FLOAT rhoip = (rhor_spline[type_ij * nr_tot + m * 7 + 0] * p +
rhor_spline[type_ij * nr_tot + m * 7 + 1]) * p +
rhor_spline[type_ij * nr_tot + m * 7 + 2];
MD_FLOAT z2p = (z2r_spline[type_ij * nr_tot + m * 7 + 0] * p +
z2r_spline[type_ij * nr_tot + m * 7 + 1]) * p +
z2r_spline[type_ij * nr_tot + m * 7 + 2];
MD_FLOAT z2 = ((z2r_spline[type_ij * nr_tot + m * 7 + 3] * p +
z2r_spline[type_ij * nr_tot + m * 7 + 4]) * p +
z2r_spline[type_ij * nr_tot + m * 7 + 5]) * p +
z2r_spline[type_ij * nr_tot + m * 7 + 6];
#else
MD_FLOAT rhoip = (rhor_spline[m * 7 + 0] * p + rhor_spline[m * 7 + 1]) * p + rhor_spline[m * 7 + 2];
MD_FLOAT z2p = (z2r_spline[m * 7 + 0] * p + z2r_spline[m * 7 + 1]) * p + z2r_spline[m * 7 + 2];
MD_FLOAT z2 = ((z2r_spline[m * 7 + 3] * p +
z2r_spline[m * 7 + 4]) * p +
z2r_spline[m * 7 + 5]) * p +
z2r_spline[m * 7 + 6];
#endif
MD_FLOAT recip = 1.0 / r;
MD_FLOAT phi = z2 * recip;
MD_FLOAT phip = z2p * recip - phi * recip;
MD_FLOAT psip = fp[i] * rhoip + fp[j] * rhoip + phip;
MD_FLOAT fpair = -psip * recip;
fix += delx * fpair;
fiy += dely * fpair;
fiz += delz * fpair;
//fpair *= 0.5;
}
}
fx[i] = fix;
fy[i] = fiy;
fz[i] = fiz;
addStat(stats->total_force_neighs, numneighs);
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
}
LIKWID_MARKER_STOP("force_eam");
double E = getTimeStamp();
return E-S;
}

View File

@@ -0,0 +1,33 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <stdlib.h>
#include <cuda_runtime.h>
#ifndef __ALLOCATE_H_
#define __ALLOCATE_H_
extern void* allocate (int alignment, size_t bytesize);
extern void* reallocate (void* ptr, int alignment, size_t newBytesize, size_t oldBytesize);
extern void checkCUDAError(const char *msg, cudaError_t err);
#endif

78
lammps/includes/atom.h Normal file
View File

@@ -0,0 +1,78 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <parameter.h>
#ifndef __ATOM_H_
#define __ATOM_H_
typedef struct {
int Natoms, Nlocal, Nghost, Nmax;
MD_FLOAT *x, *y, *z;
MD_FLOAT *vx, *vy, *vz;
MD_FLOAT *fx, *fy, *fz;
int *border_map;
int *type;
int ntypes;
MD_FLOAT *epsilon;
MD_FLOAT *sigma6;
MD_FLOAT *cutforcesq;
MD_FLOAT *cutneighsq;
} Atom;
extern void initAtom(Atom*);
extern void createAtom(Atom*, Parameter*);
extern void growAtom(Atom*);
#ifdef AOS
#define POS_DATA_LAYOUT "AoS"
#define atom_x(i) atom->x[(i) * 3 + 0]
#define atom_y(i) atom->x[(i) * 3 + 1]
#define atom_z(i) atom->x[(i) * 3 + 2]
#define atom_fx(i) atom->fx[(i) * 3 + 0]
#define atom_fy(i) atom->fx[(i) * 3 + 1]
#define atom_fz(i) atom->fx[(i) * 3 + 2]
#define atom_vx(i) atom->vx[(i) * 3 + 0]
#define atom_vy(i) atom->vx[(i) * 3 + 1]
#define atom_vz(i) atom->vx[(i) * 3 + 2]
#else
#define POS_DATA_LAYOUT "SoA"
#define atom_x(i) atom->x[i]
#define atom_y(i) atom->y[i]
#define atom_z(i) atom->z[i]
#define atom_fx(i) atom->fx[i]
#define atom_fy(i) atom->fy[i]
#define atom_fz(i) atom->fz[i]
#define atom_vx(i) atom->vx[i]
#define atom_vy(i) atom->vy[i]
#define atom_vz(i) atom->vz[i]
#endif
#endif

55
lammps/includes/eam.h Normal file
View File

@@ -0,0 +1,55 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <stdio.h>
#include <atom.h>
#include <parameter.h>
#ifndef __EAM_H_
#define __EAM_H_
typedef struct {
int nrho, nr;
MD_FLOAT drho, dr, cut, mass;
MD_FLOAT *frho, *rhor, *zr;
} Funcfl;
typedef struct {
MD_FLOAT* fp;
int nmax;
int nrho, nr;
int nrho_tot, nr_tot;
MD_FLOAT dr, rdr, drho, rdrho;
MD_FLOAT *frho, *rhor, *z2r;
MD_FLOAT *rhor_spline, *frho_spline, *z2r_spline;
Funcfl file;
} Eam;
void initEam(Eam* eam, Parameter* param);
void coeff(Eam* eam, Parameter* param);
void init_style(Eam* eam, Parameter *param);
void read_file(Funcfl* file, const char* filename);
void file2array(Eam* eam);
void array2spline(Eam* eam, Parameter* param);
void interpolate(int n, MD_FLOAT delta, MD_FLOAT* f, MD_FLOAT* spline);
void grab(FILE* fptr, int n, MD_FLOAT* list);
#endif

View File

@@ -0,0 +1,170 @@
/*
* =======================================================================================
*
* Filename: likwid-marker.h
*
* Description: Header File of likwid Marker API
*
* Version: <VERSION>
* Released: <DATE>
*
* Authors: Thomas Gruber (tg), thomas.roehl@googlemail.com
*
* Project: likwid
*
* Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*
* =======================================================================================
*/
#ifndef LIKWID_MARKER_H
#define LIKWID_MARKER_H
/** \addtogroup MarkerAPI Marker API module
* @{
*/
/*!
\def LIKWID_MARKER_INIT
Shortcut for likwid_markerInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_MARKER_THREADINIT
Shortcut for likwid_markerThreadInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_MARKER_REGISTER(regionTag)
Shortcut for likwid_markerRegisterRegion() with \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_MARKER_START(regionTag)
Shortcut for likwid_markerStartRegion() with \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_MARKER_STOP(regionTag)
Shortcut for likwid_markerStopRegion() with \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
Shortcut for likwid_markerGetResults() for \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_MARKER_SWITCH
Shortcut for likwid_markerNextGroup() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_MARKER_RESET(regionTag)
Shortcut for likwid_markerResetRegion() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_MARKER_CLOSE
Shortcut for likwid_markerClose() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
*/
/** @}*/
#ifdef LIKWID_PERFMON
#include <likwid.h>
#define LIKWID_MARKER_INIT likwid_markerInit()
#define LIKWID_MARKER_THREADINIT likwid_markerThreadInit()
#define LIKWID_MARKER_SWITCH likwid_markerNextGroup()
#define LIKWID_MARKER_REGISTER(regionTag) likwid_markerRegisterRegion(regionTag)
#define LIKWID_MARKER_START(regionTag) likwid_markerStartRegion(regionTag)
#define LIKWID_MARKER_STOP(regionTag) likwid_markerStopRegion(regionTag)
#define LIKWID_MARKER_CLOSE likwid_markerClose()
#define LIKWID_MARKER_RESET(regionTag) likwid_markerResetRegion(regionTag)
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count) likwid_markerGetRegion(regionTag, nevents, events, time, count)
#else /* LIKWID_PERFMON */
#define LIKWID_MARKER_INIT
#define LIKWID_MARKER_THREADINIT
#define LIKWID_MARKER_SWITCH
#define LIKWID_MARKER_REGISTER(regionTag)
#define LIKWID_MARKER_START(regionTag)
#define LIKWID_MARKER_STOP(regionTag)
#define LIKWID_MARKER_CLOSE
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
#define LIKWID_MARKER_RESET(regionTag)
#endif /* LIKWID_PERFMON */
/** \addtogroup NvMarkerAPI NvMarker API module (MarkerAPI for Nvidia GPUs)
* @{
*/
/*!
\def LIKWID_NVMARKER_INIT
Shortcut for likwid_gpuMarkerInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_NVMARKER_THREADINIT
Shortcut for likwid_gpuMarkerThreadInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_NVMARKER_REGISTER(regionTag)
Shortcut for likwid_gpuMarkerRegisterRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_NVMARKER_START(regionTag)
Shortcut for likwid_gpuMarkerStartRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_NVMARKER_STOP(regionTag)
Shortcut for likwid_gpuMarkerStopRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_NVMARKER_GET(regionTag, ngpus, nevents, events, time, count)
Shortcut for likwid_gpuMarkerGetRegion() for \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_NVMARKER_SWITCH
Shortcut for likwid_gpuMarkerNextGroup() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_NVMARKER_RESET(regionTag)
Shortcut for likwid_gpuMarkerResetRegion() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_NVMARKER_CLOSE
Shortcut for likwid_gpuMarkerClose() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
*/
/** @}*/
#ifdef LIKWID_NVMON
#ifndef LIKWID_WITH_NVMON
#define LIKWID_WITH_NVMON
#endif
#include <likwid.h>
#define LIKWID_NVMARKER_INIT likwid_gpuMarkerInit()
#define LIKWID_NVMARKER_THREADINIT likwid_gpuMarkerThreadInit()
#define LIKWID_NVMARKER_SWITCH likwid_gpuMarkerNextGroup()
#define LIKWID_NVMARKER_REGISTER(regionTag) likwid_gpuMarkerRegisterRegion(regionTag)
#define LIKWID_NVMARKER_START(regionTag) likwid_gpuMarkerStartRegion(regionTag)
#define LIKWID_NVMARKER_STOP(regionTag) likwid_gpuMarkerStopRegion(regionTag)
#define LIKWID_NVMARKER_CLOSE likwid_gpuMarkerClose()
#define LIKWID_NVMARKER_RESET(regionTag) likwid_gpuMarkerResetRegion(regionTag)
#define LIKWID_NVMARKER_GET(regionTag, ngpus, nevents, events, time, count) \
likwid_gpuMarkerGetRegion(regionTag, ngpus, nevents, events, time, count)
#else /* LIKWID_NVMON */
#define LIKWID_NVMARKER_INIT
#define LIKWID_NVMARKER_THREADINIT
#define LIKWID_NVMARKER_SWITCH
#define LIKWID_NVMARKER_REGISTER(regionTag)
#define LIKWID_NVMARKER_START(regionTag)
#define LIKWID_NVMARKER_STOP(regionTag)
#define LIKWID_NVMARKER_CLOSE
#define LIKWID_NVMARKER_GET(regionTag, nevents, events, time, count)
#define LIKWID_NVMARKER_RESET(regionTag)
#endif /* LIKWID_NVMON */
#endif /* LIKWID_MARKER_H */

View File

@@ -0,0 +1,58 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <atom.h>
#include <parameter.h>
#ifndef __NEIGHBOR_H_
#define __NEIGHBOR_H_
typedef struct {
int every;
int ncalls;
int* neighbors;
int maxneighs;
int* numneigh;
} Neighbor;
typedef struct {
MD_FLOAT xprd; MD_FLOAT yprd; MD_FLOAT zprd;
MD_FLOAT bininvx; MD_FLOAT bininvy; MD_FLOAT bininvz;
int mbinxlo; int mbinylo; int mbinzlo;
int nbinx; int nbiny; int nbinz;
int mbinx; int mbiny; int mbinz;
} Neighbor_params;
typedef struct {
int* bincount;
int* bins;
int mbins;
int atoms_per_bin;
} Binning;
extern void initNeighbor(Neighbor*, Parameter*);
extern void setupNeighbor();
extern void binatoms(Atom*);
extern void buildNeighbor(Atom*, Neighbor*);
extern void sortAtom(Atom*);
extern void binatoms_cuda(Atom*, Binning*, int*, Neighbor_params*, const int);
extern void buildNeighbor_cuda(Atom*, Neighbor*, Atom*, Neighbor*, const int, double*);
#endif

View File

@@ -0,0 +1,57 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#ifndef __PARAMETER_H_
#define __PARAMETER_H_
#define FF_LJ 0
#define FF_EAM 1
#if PRECISION == 1
#define MD_FLOAT float
#else
#define MD_FLOAT double
#endif
typedef struct {
int force_field;
char* input_file;
char* vtk_file;
MD_FLOAT epsilon;
MD_FLOAT sigma6;
MD_FLOAT temp;
MD_FLOAT rho;
MD_FLOAT mass;
int ntypes;
int ntimes;
int nstat;
int every;
MD_FLOAT dt;
MD_FLOAT dtforce;
MD_FLOAT cutforce;
MD_FLOAT cutneigh;
int nx, ny, nz;
MD_FLOAT lattice;
MD_FLOAT xprd, yprd, zprd;
double proc_freq;
} Parameter;
#endif

34
lammps/includes/pbc.h Normal file
View File

@@ -0,0 +1,34 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <atom.h>
#include <parameter.h>
#ifndef __PBC_H_
#define __PBC_H_
extern void initPbc(Atom*);
extern void updatePbc(Atom*, Parameter*);
extern void updatePbc_cuda(Atom*, Parameter*, Atom*, bool, const int);
extern void updateAtomsPbc(Atom*, Parameter*);
extern void updateAtomsPbc_cuda(Atom*, Parameter*, Atom*, const int);
extern void setupPbc(Atom*, Parameter*);
#endif

46
lammps/includes/stats.h Normal file
View File

@@ -0,0 +1,46 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <atom.h>
#include <parameter.h>
#ifndef __STATS_H_
#define __STATS_H_
typedef struct {
long long int total_force_neighs;
long long int total_force_iters;
} Stats;
void initStats(Stats *s);
void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer);
#ifdef COMPUTE_STATS
# define addStat(stat, value) stat += value;
# define beginStatTimer() double Si = getTimeStamp();
# define endStatTimer(stat) stat += getTimeStamp() - Si;
#else
# define addStat(stat, value)
# define beginStatTimer()
# define endStatTimer(stat)
#endif
#endif

31
lammps/includes/thermo.h Normal file
View File

@@ -0,0 +1,31 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <parameter.h>
#include <atom.h>
#ifndef __THERMO_H_
#define __THERMO_H_
extern void setupThermo(Parameter*, int);
extern void computeThermo(int, Parameter*, Atom*);
extern void adjustThermo(Parameter*, Atom*);
#endif

16
lammps/includes/timers.h Normal file
View File

@@ -0,0 +1,16 @@
#ifndef __TIMERS_H_
#define __TIMERS_H_
typedef enum {
TOTAL = 0,
NEIGH,
FORCE,
NEIGH_UPDATE_ATOMS_PBC,
NEIGH_SETUP_PBC,
NEIGH_UPDATE_PBC,
NEIGH_BINATOMS,
NEIGH_BUILD_LISTS,
NUMTIMER
} timertype;
#endif

30
lammps/includes/timing.h Normal file
View File

@@ -0,0 +1,30 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#ifndef __TIMING_H_
#define __TIMING_H_
extern double getTimeStamp();
extern double getTimeResolution();
extern double getTimeStamp_();
#endif

37
lammps/includes/util.h Normal file
View File

@@ -0,0 +1,37 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#ifndef __UTIL_H_
#define __UTIL_H_
#ifndef MIN
#define MIN(x,y) ((x)<(y)?(x):(y))
#endif
#ifndef MAX
#define MAX(x,y) ((x)>(y)?(x):(y))
#endif
#ifndef ABS
#define ABS(a) ((a) >= 0 ? (a) : -(a))
#endif
extern double myrandom(int*);
#endif

28
lammps/includes/vtk.h Normal file
View File

@@ -0,0 +1,28 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <atom.h>
#ifndef __VTK_H_
#define __VTK_H_
extern int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
#endif

253
lammps/main-stub.c Normal file
View File

@@ -0,0 +1,253 @@
#include <stdio.h>
#include <string.h>
//---
#include <likwid-marker.h>
//---
#include <timing.h>
#include <allocate.h>
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#include <stats.h>
#include <thermo.h>
#include <pbc.h>
#include <timers.h>
#define HLINE "----------------------------------------------------------------------------\n"
#define LATTICE_DISTANCE 10.0
#define NEIGH_DISTANCE 1.0
extern double computeForce(Parameter*, Atom*, Neighbor*, Stats*, int, int);
void init(Parameter *param) {
param->epsilon = 1.0;
param->sigma6 = 1.0;
param->rho = 0.8442;
param->ntypes = 4;
param->ntimes = 200;
param->nx = 4;
param->ny = 4;
param->nz = 2;
param->lattice = LATTICE_DISTANCE;
param->cutforce = 5.0;
param->cutneigh = param->cutforce;
param->mass = 1.0;
// Unused
param->dt = 0.005;
param->dtforce = 0.5 * param->dt;
param->nstat = 100;
param->temp = 1.44;
param->every = 20;
param->proc_freq = 0.0;
}
// Show debug messages
//#define DEBUG(msg) printf(msg)
// Do not show debug messages
#define DEBUG(msg)
#define ADD_ATOM(x, y, z, vx, vy, vz) atom_x(atom->Nlocal) = base_x + x * NEIGH_DISTANCE; \
atom_y(atom->Nlocal) = base_y + y * NEIGH_DISTANCE; \
atom_z(atom->Nlocal) = base_z + z * NEIGH_DISTANCE; \
atom->vx[atom->Nlocal] = vy; \
atom->vy[atom->Nlocal] = vy; \
atom->vz[atom->Nlocal] = vz; \
atom->Nlocal++
int main(int argc, const char *argv[]) {
Atom atom_data;
Atom *atom = (Atom *)(&atom_data);
Neighbor neighbor;
Stats stats;
Parameter param;
int atoms_per_unit_cell = 8;
int csv = 0;
LIKWID_MARKER_INIT;
LIKWID_MARKER_REGISTER("force");
DEBUG("Initializing parameters...\n");
init(&param);
for(int i = 0; i < argc; i++)
{
if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0))
{
param.ntimes = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-nx") == 0))
{
param.nx = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-ny") == 0))
{
param.ny = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-nz") == 0))
{
param.nz = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-na") == 0))
{
atoms_per_unit_cell = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-f") == 0))
{
param.proc_freq = atof(argv[++i]);
continue;
}
if((strcmp(argv[i], "-csv") == 0))
{
csv = 1;
continue;
}
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0))
{
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
printf(HLINE);
printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
printf("-na <int>: set number of atoms per unit cell\n");
printf("-f <real>: set CPU frequency (GHz) and display average cycles per atom and neighbors\n");
printf("-csv: set output as CSV style\n");
printf(HLINE);
exit(EXIT_SUCCESS);
}
}
param.xprd = param.nx * LATTICE_DISTANCE;
param.yprd = param.ny * LATTICE_DISTANCE;
param.zprd = param.nz * LATTICE_DISTANCE;
DEBUG("Initializing atoms...\n");
initAtom(atom);
initStats(&stats);
atom->ntypes = param.ntypes;
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
atom->epsilon[i] = param.epsilon;
atom->sigma6[i] = param.sigma6;
atom->cutneighsq[i] = param.cutneigh * param.cutneigh;
atom->cutforcesq[i] = param.cutforce * param.cutforce;
}
DEBUG("Creating atoms...\n");
for(int i = 0; i < param.nx; ++i) {
for(int j = 0; j < param.ny; ++j) {
for(int k = 0; k < param.nz; ++k) {
int added_atoms = 0;
int fac_x = 1;
int fac_y = 1;
int fac_z = 1;
int fmod = 0;
MD_FLOAT base_x = i * LATTICE_DISTANCE;
MD_FLOAT base_y = j * LATTICE_DISTANCE;
MD_FLOAT base_z = k * LATTICE_DISTANCE;
MD_FLOAT vx = 0.0;
MD_FLOAT vy = 0.0;
MD_FLOAT vz = 0.0;
while(atom->Nlocal > atom->Nmax - atoms_per_unit_cell) {
growAtom(atom);
}
while(fac_x * fac_y * fac_z < atoms_per_unit_cell) {
if(fmod == 0) { fac_x *= 2; }
if(fmod == 1) { fac_y *= 2; }
if(fmod == 2) { fac_z *= 2; }
fmod = (fmod + 1) % 3;
}
MD_FLOAT offset_x = (fac_x > 1) ? 1.0 / (fac_x - 1) : (int)fac_x;
MD_FLOAT offset_y = (fac_y > 1) ? 1.0 / (fac_y - 1) : (int)fac_y;
MD_FLOAT offset_z = (fac_z > 1) ? 1.0 / (fac_z - 1) : (int)fac_z;
for(int ii = 0; ii < fac_x; ++ii) {
for(int jj = 0; jj < fac_y; ++jj) {
for(int kk = 0; kk < fac_z; ++kk) {
if(added_atoms < atoms_per_unit_cell) {
atom->type[atom->Nlocal] = rand() % atom->ntypes;
ADD_ATOM(ii * offset_x, jj * offset_y, kk * offset_z, vx, vy, vz);
added_atoms++;
}
}
}
}
}
}
}
const double estim_atom_volume = (double)(atom->Nlocal * 3 * sizeof(MD_FLOAT));
const double estim_neighbors_volume = (double)(atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int));
const double estim_volume = (double)(atom->Nlocal * 6 * sizeof(MD_FLOAT) + estim_neighbors_volume);
if(!csv) {
printf("Number of timesteps: %d\n", param.ntimes);
printf("Number of times to compute the atoms loop: %d\n", ATOMS_LOOP_RUNS);
printf("Number of times to compute the neighbors loop: %d\n", NEIGHBORS_LOOP_RUNS);
printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz);
printf("Atoms per unit cell: %d\n", atoms_per_unit_cell);
printf("Total number of atoms: %d\n", atom->Nlocal);
printf("Estimated total data volume (kB): %.4f\n", estim_volume / 1000.0);
printf("Estimated atom data volume (kB): %.4f\n", estim_atom_volume / 1000.0);
printf("Estimated neighborlist data volume (kB): %.4f\n", estim_neighbors_volume / 1000.0);
}
DEBUG("Initializing neighbor lists...\n");
initNeighbor(&neighbor, &param);
DEBUG("Setting up neighbor lists...\n");
setupNeighbor();
DEBUG("Building neighbor lists...\n");
buildNeighbor(atom, &neighbor);
DEBUG("Computing forces...\n");
computeForce(&param, atom, &neighbor, &stats, 1, 1);
double S, E;
S = getTimeStamp();
for(int i = 0; i < param.ntimes; i++) {
computeForce(&param, atom, &neighbor, &stats, 0, i + 1);
}
E = getTimeStamp();
double T_accum = E-S;
double freq_hz = param.proc_freq * 1.e9;
const double repeats = ATOMS_LOOP_RUNS * NEIGHBORS_LOOP_RUNS;
const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes * repeats);
const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes * repeats) * freq_hz;
const double cycles_per_neigh = cycles_per_atom / (double)(atoms_per_unit_cell - 1);
if(!csv) {
printf("Total time: %.4f, Mega atom updates/s: %.4f\n", T_accum, atoms_updates_per_sec / 1.e6);
if(param.proc_freq > 0.0) {
printf("Cycles per atom: %.4f, Cycles per neighbor: %.4f\n", cycles_per_atom, cycles_per_neigh);
}
} else {
printf("steps,unit cells,atoms/unit cell,total atoms,total vol.(kB),atoms vol.(kB),neigh vol.(kB),time(s),atom upds/s(M)");
if(param.proc_freq > 0.0) {
printf(",cy/atom,cy/neigh");
}
printf("\n");
printf("%d,%dx%dx%d,%d,%d,%.4f,%.4f,%.4f,%.4f,%.4f",
param.ntimes, param.nx, param.ny, param.nz, atoms_per_unit_cell, atom->Nlocal,
estim_volume / 1.e3, estim_atom_volume / 1.e3, estim_neighbors_volume / 1.e3, T_accum, atoms_updates_per_sec / 1.e6);
if(param.proc_freq > 0.0) {
printf(",%.4f,%.4f", cycles_per_atom, cycles_per_neigh);
}
printf("\n");
}
double timer[NUMTIMER];
timer[FORCE] = T_accum;
displayStatistics(atom, &param, &stats, timer);
LIKWID_MARKER_CLOSE;
return EXIT_SUCCESS;
}

425
lammps/main.c Normal file
View File

@@ -0,0 +1,425 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <unistd.h>
#include <limits.h>
#include <math.h>
#include <float.h>
#include <likwid-marker.h>
#include <timing.h>
#include <allocate.h>
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#include <stats.h>
#include <thermo.h>
#include <pbc.h>
#include <timers.h>
#include <eam.h>
#include <vtk.h>
#define HLINE "----------------------------------------------------------------------------\n"
extern void cuda_final_integrate(bool doReneighbour, Parameter *param,
Atom *atom, Atom *c_atom,
const int num_threads_per_block);
extern void cuda_initial_integrate(bool doReneighbour, Parameter *param,
Atom *atom, Atom *c_atom,
const int num_threads_per_block);
extern double computeForce(bool, Parameter*, Atom*, Neighbor*, Atom*, Neighbor*, const int);
extern double computeForceTracing(Parameter*, Atom*, Neighbor*, Stats*, int, int);
extern double computeForceEam(Eam* eam, Parameter*, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep);
void init(Parameter *param)
{
param->input_file = NULL;
param->vtk_file = NULL;
param->force_field = FF_LJ;
param->epsilon = 1.0;
param->sigma6 = 1.0;
param->rho = 0.8442;
param->ntypes = 4;
param->ntimes = 200;
param->dt = 0.005;
param->nx = 32;
param->ny = 32;
param->nz = 32;
param->cutforce = 2.5;
param->cutneigh = param->cutforce + 0.30;
param->temp = 1.44;
param->nstat = 100;
param->mass = 1.0;
param->dtforce = 0.5 * param->dt;
param->every = 20;
param->proc_freq = 2.4;
}
void initCudaAtom(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *c_neighbor) {
c_atom->Natoms = atom->Natoms;
c_atom->Nlocal = atom->Nlocal;
c_atom->Nghost = atom->Nghost;
c_atom->Nmax = atom->Nmax;
c_atom->ntypes = atom->ntypes;
c_atom->border_map = NULL;
const int Nlocal = atom->Nlocal;
checkCUDAError( "c_atom->x malloc", cudaMalloc((void**)&(c_atom->x), sizeof(MD_FLOAT) * atom->Nmax * 3) );
checkCUDAError( "c_atom->x memcpy", cudaMemcpy(c_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) );
checkCUDAError( "c_atom->fx malloc", cudaMalloc((void**)&(c_atom->fx), sizeof(MD_FLOAT) * Nlocal * 3) );
checkCUDAError( "c_atom->vx malloc", cudaMalloc((void**)&(c_atom->vx), sizeof(MD_FLOAT) * Nlocal * 3) );
checkCUDAError( "c_atom->vx memcpy", cudaMemcpy(c_atom->vx, atom->vx, sizeof(MD_FLOAT) * Nlocal * 3, cudaMemcpyHostToDevice) );
checkCUDAError( "c_atom->type malloc", cudaMalloc((void**)&(c_atom->type), sizeof(int) * atom->Nmax) );
checkCUDAError( "c_atom->epsilon malloc", cudaMalloc((void**)&(c_atom->epsilon), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
checkCUDAError( "c_atom->sigma6 malloc", cudaMalloc((void**)&(c_atom->sigma6), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
checkCUDAError( "c_atom->cutforcesq malloc", cudaMalloc((void**)&(c_atom->cutforcesq), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
checkCUDAError( "c_neighbor->neighbors malloc", cudaMalloc((void**)&c_neighbor->neighbors, sizeof(int) * Nlocal * neighbor->maxneighs) );
checkCUDAError( "c_neighbor->numneigh malloc", cudaMalloc((void**)&c_neighbor->numneigh, sizeof(int) * Nlocal) );
checkCUDAError( "c_atom->type memcpy", cudaMemcpy(c_atom->type, atom->type, sizeof(int) * atom->Nmax, cudaMemcpyHostToDevice) );
checkCUDAError( "c_atom->sigma6 memcpy", cudaMemcpy(c_atom->sigma6, atom->sigma6, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
checkCUDAError( "c_atom->epsilon memcpy", cudaMemcpy(c_atom->epsilon, atom->epsilon, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
checkCUDAError( "c_atom->cutforcesq memcpy", cudaMemcpy(c_atom->cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
}
double setup(
Parameter *param,
Eam *eam,
Atom *atom,
Neighbor *neighbor,
Atom *c_atom,
Neighbor *c_neighbor,
Stats *stats,
const int num_threads_per_block,
double* timers)
{
if(param->force_field == FF_EAM) { initEam(eam, param); }
double S, E;
param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
param->xprd = param->nx * param->lattice;
param->yprd = param->ny * param->lattice;
param->zprd = param->nz * param->lattice;
S = getTimeStamp();
initAtom(atom);
initNeighbor(neighbor, param);
initPbc(atom);
initStats(stats);
setupNeighbor();
createAtom(atom, param);
setupThermo(param, atom->Natoms);
adjustThermo(param, atom);
setupPbc(atom, param);
initCudaAtom(atom, neighbor, c_atom, c_neighbor);
updatePbc_cuda(atom, param, c_atom, true, num_threads_per_block);
buildNeighbor_cuda(atom, neighbor, c_atom, c_neighbor, num_threads_per_block, timers);
E = getTimeStamp();
return E-S;
}
double reneighbour(
Parameter *param,
Atom *atom,
Neighbor *neighbor,
Atom *c_atom,
Neighbor *c_neighbor,
const int num_threads_per_block,
double* timers)
{
double S, E, beforeEvent, afterEvent;
S = getTimeStamp();
beforeEvent = S;
LIKWID_MARKER_START("reneighbour");
updateAtomsPbc_cuda(atom, param, c_atom, num_threads_per_block);
afterEvent = getTimeStamp();
timers[NEIGH_UPDATE_ATOMS_PBC] += afterEvent - beforeEvent;
beforeEvent = afterEvent;
setupPbc(atom, param);
afterEvent = getTimeStamp();
timers[NEIGH_SETUP_PBC] += afterEvent - beforeEvent;
beforeEvent = afterEvent;
updatePbc_cuda(atom, param, c_atom, true, num_threads_per_block);
afterEvent = getTimeStamp();
timers[NEIGH_UPDATE_PBC] += afterEvent - beforeEvent;
beforeEvent = afterEvent;
//sortAtom(atom);
buildNeighbor_cuda(atom, neighbor, c_atom, c_neighbor, num_threads_per_block, timers);
LIKWID_MARKER_STOP("reneighbour");
E = getTimeStamp();
afterEvent = E;
timers[NEIGH_BUILD_LISTS] += afterEvent - beforeEvent;
return E-S;
}
void initialIntegrate(Parameter *param, Atom *atom)
{
for(int i = 0; i < atom->Nlocal; i++) {
atom_vx(i) += param->dtforce * atom_fx(i);
atom_vy(i) += param->dtforce * atom_fy(i);
atom_vz(i) += param->dtforce * atom_fz(i);
atom_x(i) = atom_x(i) + param->dt * atom_vx(i);
atom_y(i) = atom_y(i) + param->dt * atom_vy(i);
atom_z(i) = atom_z(i) + param->dt * atom_vz(i);
}
}
void finalIntegrate(Parameter *param, Atom *atom)
{
for(int i = 0; i < atom->Nlocal; i++) {
atom_vx(i) += param->dtforce * atom_fx(i);
atom_vy(i) += param->dtforce * atom_fy(i);
atom_vz(i) += param->dtforce * atom_fz(i);
}
}
void printAtomState(Atom *atom)
{
printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n",
atom->Natoms, atom->Nlocal, atom->Nghost, atom->Nmax);
/* int nall = atom->Nlocal + atom->Nghost; */
/* for (int i=0; i<nall; i++) { */
/* printf("%d %f %f %f\n", i, atom->x[i], atom->y[i], atom->z[i]); */
/* } */
}
int str2ff(const char *string)
{
if(strncmp(string, "lj", 2) == 0) return FF_LJ;
if(strncmp(string, "eam", 3) == 0) return FF_EAM;
return -1;
}
const char* ff2str(int ff)
{
if(ff == FF_LJ) { return "lj"; }
if(ff == FF_EAM) { return "eam"; }
return "invalid";
}
int get_num_threads() {
const char *num_threads_env = getenv("NUM_THREADS");
int num_threads = 0;
if(num_threads_env == 0)
num_threads = 32;
else {
num_threads = atoi(num_threads_env);
}
return num_threads;
}
int main(int argc, char** argv)
{
double timer[NUMTIMER];
Eam eam;
Atom atom;
Neighbor neighbor;
Stats stats;
Parameter param;
Atom c_atom;
Neighbor c_neighbor;
LIKWID_MARKER_INIT;
#pragma omp parallel
{
LIKWID_MARKER_REGISTER("force");
//LIKWID_MARKER_REGISTER("reneighbour");
//LIKWID_MARKER_REGISTER("pbc");
}
init(&param);
for(int i = 0; i < argc; i++)
{
if((strcmp(argv[i], "-f") == 0))
{
if((param.force_field = str2ff(argv[++i])) < 0) {
fprintf(stderr, "Invalid force field!\n");
exit(-1);
}
continue;
}
if((strcmp(argv[i], "-i") == 0))
{
param.input_file = strdup(argv[++i]);
continue;
}
if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0))
{
param.ntimes = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-nx") == 0))
{
param.nx = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-ny") == 0))
{
param.ny = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-nz") == 0))
{
param.nz = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "--freq") == 0))
{
param.proc_freq = atof(argv[++i]);
continue;
}
if((strcmp(argv[i], "--vtk") == 0))
{
param.vtk_file = strdup(argv[++i]);
continue;
}
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0))
{
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
printf(HLINE);
printf("-f <string>: force field (lj or eam), default lj\n");
printf("-i <string>: input file for EAM\n");
printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
printf("--freq <real>: processor frequency (GHz)\n");
printf("--vtk <string>: VTK file for visualization\n");
printf(HLINE);
exit(EXIT_SUCCESS);
}
}
// this should be multiple of 32 as operations are performed at the level of warps
const int num_threads_per_block = get_num_threads();
setup(&param, &eam, &atom, &neighbor, &c_atom, &c_neighbor, &stats, num_threads_per_block, (double*) &timer);
computeThermo(0, &param, &atom);
if(param.force_field == FF_EAM) {
computeForceEam(&eam, &param, &atom, &neighbor, &stats, 1, 0);
} else {
#if defined(MEM_TRACER) || defined(INDEX_TRACER) || defined(COMPUTE_STATS)
computeForceTracing(&param, &atom, &neighbor, &stats, 1, 0);
#else
computeForce(true, &param, &atom, &neighbor, &c_atom, &c_neighbor, num_threads_per_block);
#endif
}
timer[FORCE] = 0.0;
timer[NEIGH] = 0.0;
timer[TOTAL] = getTimeStamp();
timer[NEIGH_UPDATE_ATOMS_PBC] = 0.0;
timer[NEIGH_SETUP_PBC] = 0.0;
timer[NEIGH_UPDATE_PBC] = 0.0;
timer[NEIGH_BINATOMS] = 0.0;
timer[NEIGH_BUILD_LISTS] = 0.0;
if(param.vtk_file != NULL) {
write_atoms_to_vtk_file(param.vtk_file, &atom, 0);
}
for(int n = 0; n < param.ntimes; n++) {
const bool doReneighbour = (n + 1) % param.every == 0;
cuda_initial_integrate(doReneighbour, &param, &atom, &c_atom, num_threads_per_block);
if(doReneighbour) {
timer[NEIGH] += reneighbour(&param, &atom, &neighbor, &c_atom, &c_neighbor, num_threads_per_block, (double*) &timer);
} else {
double before = getTimeStamp();
updatePbc_cuda(&atom, &param, &c_atom, false, num_threads_per_block);
double after = getTimeStamp();
timer[NEIGH_UPDATE_PBC] += after - before;
}
if(param.force_field == FF_EAM) {
timer[FORCE] += computeForceEam(&eam, &param, &atom, &neighbor, &stats, 0, n + 1);
} else {
#if defined(MEM_TRACER) || defined(INDEX_TRACER) || defined(COMPUTE_STATS)
timer[FORCE] += computeForceTracing(&param, &atom, &neighbor, &stats, 0, n + 1);
#else
timer[FORCE] += computeForce(doReneighbour, &param, &atom, &neighbor, &c_atom, &c_neighbor, num_threads_per_block);
#endif
}
cuda_final_integrate(doReneighbour, &param, &atom, &c_atom, num_threads_per_block);
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
checkCUDAError("computeThermo atom->x memcpy back", cudaMemcpy(atom.x, c_atom.x, atom.Nmax * sizeof(MD_FLOAT) * 3, cudaMemcpyDeviceToHost) );
computeThermo(n + 1, &param, &atom);
}
if(param.vtk_file != NULL) {
write_atoms_to_vtk_file(param.vtk_file, &atom, n + 1);
}
}
timer[NEIGH_BUILD_LISTS] -= timer[NEIGH_BINATOMS];
timer[TOTAL] = getTimeStamp() - timer[TOTAL];
computeThermo(-1, &param, &atom);
printf(HLINE);
printf("Force field: %s\n", ff2str(param.force_field));
printf("Data layout for positions: %s\n", POS_DATA_LAYOUT);
#if PRECISION == 1
printf("Using single precision floating point.\n");
#else
printf("Using double precision floating point.\n");
#endif
printf(HLINE);
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs NEIGH_TIMERS: UPD_AT: %.2fs SETUP_PBC %.2fs UPDATE_PBC %.2fs BINATOMS %.2fs BUILD_NEIGHBOR %.2fs\n",
timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH], timer[NEIGH_UPDATE_ATOMS_PBC], timer[NEIGH_SETUP_PBC], timer[NEIGH_UPDATE_PBC], timer[NEIGH_BINATOMS], timer[NEIGH_BUILD_LISTS]);
printf(HLINE);
printf("Performance: %.2f million atom updates per second\n",
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
double atomUpdatesTotal = (double) atom.Natoms * param.ntimes;
printf("Force_perf in millions per sec: %.2f\n", 1e-6 * atomUpdatesTotal / timer[FORCE]);
double atomNeighUpdatesTotal = (double) atom.Natoms * param.ntimes / param.every;
printf("Neighbor_perf in millions per sec: updateAtomsPbc: %.2f setupPbc: %.2f updatePbc: %.2f binAtoms: %.2f buildNeighbor_wo_binning: %.2f\n", 1e-6 * atomNeighUpdatesTotal / timer[NEIGH_UPDATE_ATOMS_PBC], 1e-6 * atomNeighUpdatesTotal / timer[NEIGH_SETUP_PBC], 1e-6 * atomUpdatesTotal / timer[NEIGH_UPDATE_PBC], 1e-6 * atomNeighUpdatesTotal / timer[NEIGH_BINATOMS], 1e-6 * atomNeighUpdatesTotal / timer[NEIGH_BUILD_LISTS]);
#ifdef COMPUTE_STATS
displayStatistics(&atom, &param, &stats, timer);
#endif
LIKWID_MARKER_CLOSE;
return EXIT_SUCCESS;
}

719
lammps/neighbor.cu Normal file
View File

@@ -0,0 +1,719 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <cuda_profiler_api.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
extern "C" {
#include <neighbor.h>
#include <parameter.h>
#include <allocate.h>
#include <atom.h>
#include <timing.h>
#include <timers.h>
#define SMALL 1.0e-6
#define FACTOR 0.999
}
__device__ int coord2bin_device(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin,
Neighbor_params np)
{
int ix, iy, iz;
if(xin >= np.xprd) {
ix = (int)((xin - np.xprd) * np.bininvx) + np.nbinx - np.mbinxlo;
} else if(xin >= 0.0) {
ix = (int)(xin * np.bininvx) - np.mbinxlo;
} else {
ix = (int)(xin * np.bininvx) - np.mbinxlo - 1;
}
if(yin >= np.yprd) {
iy = (int)((yin - np.yprd) * np.bininvy) + np.nbiny - np.mbinylo;
} else if(yin >= 0.0) {
iy = (int)(yin * np.bininvy) - np.mbinylo;
} else {
iy = (int)(yin * np.bininvy) - np.mbinylo - 1;
}
if(zin >= np.zprd) {
iz = (int)((zin - np.zprd) * np.bininvz) + np.nbinz - np.mbinzlo;
} else if(zin >= 0.0) {
iz = (int)(zin * np.bininvz) - np.mbinzlo;
} else {
iz = (int)(zin * np.bininvz) - np.mbinzlo - 1;
}
return (iz * np.mbiny * np.mbinx + iy * np.mbinx + ix + 1);
}
/* sorts the contents of a bin to make it comparable to the CPU version */
/* uses bubble sort since atoms per bin should be relatively small and can be done in situ */
__global__ void sort_bin_contents_kernel(int* bincount, int* bins, int mbins, int atoms_per_bin){
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i >= mbins){
return;
}
int atoms_in_bin = bincount[i];
int* bin_ptr = &bins[i * atoms_per_bin];
int sorted;
do {
sorted = 1;
int tmp;
for(int index = 0; index < atoms_in_bin - 1; index++){
if (bin_ptr[index] > bin_ptr[index + 1]){
tmp = bin_ptr[index];
bin_ptr[index] = bin_ptr[index + 1];
bin_ptr[index + 1] = tmp;
sorted = 0;
}
}
} while (!sorted);
}
__global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_bin, Neighbor_params np, int *resize_needed){
Atom* atom = &a;
const int i = blockIdx.x * blockDim.x + threadIdx.x;
int nall = atom->Nlocal + atom->Nghost;
if(i >= nall){
return;
}
MD_FLOAT x = atom_x(i);
MD_FLOAT y = atom_y(i);
MD_FLOAT z = atom_z(i);
int ibin = coord2bin_device(x, y, z, np);
int ac = atomicAdd(&bincount[ibin], 1);
if(ac < atoms_per_bin){
bins[ibin * atoms_per_bin + ac] = i;
} else {
atomicMax(resize_needed, ac);
}
}
__global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil,
int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq){
const int i = blockIdx.x * blockDim.x + threadIdx.x;
const int Nlocal = a.Nlocal;
if( i >= Nlocal ) {
return;
}
Atom *atom = &a;
Neighbor *neighbor = &neigh;
int* neighptr = &(neighbor->neighbors[i]);
int n = 0;
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
int ibin = coord2bin_device(xtmp, ytmp, ztmp, np);
#ifdef EXPLICIT_TYPES
int type_i = atom->type[i];
#endif
for(int k = 0; k < nstencil; k++) {
int jbin = ibin + stencil[k];
int* loc_bin = &bins[jbin * atoms_per_bin];
for(int m = 0; m < bincount[jbin]; m++) {
int j = loc_bin[m];
if ( j == i ){
continue;
}
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
int type_j = atom->type[j];
const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
#else
const MD_FLOAT cutoff = cutneighsq;
#endif
if( rsq <= cutoff ) {
int idx = atom->Nlocal * n;
neighptr[idx] = j;
n += 1;
}
}
}
neighbor->numneigh[i] = n;
if(n > neighbor->maxneighs) {
atomicMax(new_maxneighs, n);
}
}
extern "C" {
static MD_FLOAT xprd, yprd, zprd;
static MD_FLOAT bininvx, bininvy, bininvz;
static int mbinxlo, mbinylo, mbinzlo;
static int nbinx, nbiny, nbinz;
static int mbinx, mbiny, mbinz; // n bins in x, y, z
static int *bincount;
static int *bins;
static int mbins; //total number of bins
static int atoms_per_bin; // max atoms per bin
static MD_FLOAT cutneigh;
static MD_FLOAT cutneighsq; // neighbor cutoff squared
static int nmax;
static int nstencil; // # of bins in stencil
static int* stencil; // stencil list of bin offsets
static MD_FLOAT binsizex, binsizey, binsizez;
static int* c_stencil = NULL;
static int* c_resize_needed = NULL;
static int* c_new_maxneighs = NULL;
static Binning c_binning{
.bincount = NULL,
.bins = NULL,
.mbins = 0,
.atoms_per_bin = 0
};
static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT);
static MD_FLOAT bindist(int, int, int);
/* exported subroutines */
void initNeighbor(Neighbor *neighbor, Parameter *param)
{
MD_FLOAT neighscale = 5.0 / 6.0;
xprd = param->nx * param->lattice;
yprd = param->ny * param->lattice;
zprd = param->nz * param->lattice;
cutneigh = param->cutneigh;
nbinx = neighscale * param->nx;
nbiny = neighscale * param->ny;
nbinz = neighscale * param->nz;
nmax = 0;
atoms_per_bin = 8;
stencil = NULL;
bins = NULL;
bincount = NULL;
neighbor->maxneighs = 100;
neighbor->numneigh = NULL;
neighbor->neighbors = NULL;
}
void setupNeighbor()
{
MD_FLOAT coord;
int mbinxhi, mbinyhi, mbinzhi;
int nextx, nexty, nextz;
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = xprd;
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = zprd;
cutneighsq = cutneigh * cutneigh;
binsizex = xprd / nbinx;
binsizey = yprd / nbiny;
binsizez = zprd / nbinz;
bininvx = 1.0 / binsizex;
bininvy = 1.0 / binsizey;
bininvz = 1.0 / binsizez;
coord = xlo - cutneigh - SMALL * xprd;
mbinxlo = (int) (coord * bininvx);
if (coord < 0.0) {
mbinxlo = mbinxlo - 1;
}
coord = xhi + cutneigh + SMALL * xprd;
mbinxhi = (int) (coord * bininvx);
coord = ylo - cutneigh - SMALL * yprd;
mbinylo = (int) (coord * bininvy);
if (coord < 0.0) {
mbinylo = mbinylo - 1;
}
coord = yhi + cutneigh + SMALL * yprd;
mbinyhi = (int) (coord * bininvy);
coord = zlo - cutneigh - SMALL * zprd;
mbinzlo = (int) (coord * bininvz);
if (coord < 0.0) {
mbinzlo = mbinzlo - 1;
}
coord = zhi + cutneigh + SMALL * zprd;
mbinzhi = (int) (coord * bininvz);
mbinxlo = mbinxlo - 1;
mbinxhi = mbinxhi + 1;
mbinx = mbinxhi - mbinxlo + 1;
mbinylo = mbinylo - 1;
mbinyhi = mbinyhi + 1;
mbiny = mbinyhi - mbinylo + 1;
mbinzlo = mbinzlo - 1;
mbinzhi = mbinzhi + 1;
mbinz = mbinzhi - mbinzlo + 1;
nextx = (int) (cutneigh * bininvx);
if(nextx * binsizex < FACTOR * cutneigh) nextx++;
nexty = (int) (cutneigh * bininvy);
if(nexty * binsizey < FACTOR * cutneigh) nexty++;
nextz = (int) (cutneigh * bininvz);
if(nextz * binsizez < FACTOR * cutneigh) nextz++;
if (stencil) {
free(stencil);
}
stencil = (int*) malloc(
(2 * nextz + 1) * (2 * nexty + 1) * (2 * nextx + 1) * sizeof(int));
nstencil = 0;
int kstart = -nextz;
for(int k = kstart; k <= nextz; k++) {
for(int j = -nexty; j <= nexty; j++) {
for(int i = -nextx; i <= nextx; i++) {
if(bindist(i, j, k) < cutneighsq) {
stencil[nstencil++] =
k * mbiny * mbinx + j * mbinx + i;
}
}
}
}
mbins = mbinx * mbiny * mbinz;
if (bincount) {
free(bincount);
}
bincount = (int*) malloc(mbins * sizeof(int));
if (bins) {
free(bins);
}
bins = (int*) malloc(mbins * atoms_per_bin * sizeof(int));
}
void buildNeighbor(Atom *atom, Neighbor *neighbor)
{
int nall = atom->Nlocal + atom->Nghost;
/* extend atom arrays if necessary */
if(nall > nmax) {
nmax = nall;
if(neighbor->numneigh) cudaFreeHost(neighbor->numneigh);
if(neighbor->neighbors) cudaFreeHost(neighbor->neighbors);
checkCUDAError( "buildNeighbor numneigh", cudaMallocHost((void**)&(neighbor->numneigh), nmax * sizeof(int)) );
checkCUDAError( "buildNeighbor neighbors", cudaMallocHost((void**)&(neighbor->neighbors), nmax * neighbor->maxneighs * sizeof(int)) );
// neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
// neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*));
}
/* bin local & ghost atoms */
binatoms(atom);
int resize = 1;
/* loop over each atom, storing neighbors */
while(resize) {
int new_maxneighs = neighbor->maxneighs;
resize = 0;
for(int i = 0; i < atom->Nlocal; i++) {
int* neighptr = &(neighbor->neighbors[i]);
int n = 0;
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
int ibin = coord2bin(xtmp, ytmp, ztmp);
#ifdef EXPLICIT_TYPES
int type_i = atom->type[i];
#endif
for(int k = 0; k < nstencil; k++) {
int jbin = ibin + stencil[k];
int* loc_bin = &bins[jbin * atoms_per_bin];
for(int m = 0; m < bincount[jbin]; m++) {
int j = loc_bin[m];
if ( j == i ){
continue;
}
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
int type_j = atom->type[j];
const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
#else
const MD_FLOAT cutoff = cutneighsq;
#endif
if( rsq <= cutoff ) {
int idx = atom->Nlocal * n;
neighptr[idx] = j;
n += 1;
}
}
}
neighbor->numneigh[i] = n;
if(n >= neighbor->maxneighs) {
resize = 1;
if(n >= new_maxneighs) {
new_maxneighs = n;
}
}
}
if(resize) {
printf("RESIZE %d\n", neighbor->maxneighs);
neighbor->maxneighs = new_maxneighs * 1.2;
free(neighbor->neighbors);
neighbor->neighbors = (int*) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
}
}
}
/* internal subroutines */
MD_FLOAT bindist(int i, int j, int k)
{
MD_FLOAT delx, dely, delz;
if(i > 0) {
delx = (i - 1) * binsizex;
} else if(i == 0) {
delx = 0.0;
} else {
delx = (i + 1) * binsizex;
}
if(j > 0) {
dely = (j - 1) * binsizey;
} else if(j == 0) {
dely = 0.0;
} else {
dely = (j + 1) * binsizey;
}
if(k > 0) {
delz = (k - 1) * binsizez;
} else if(k == 0) {
delz = 0.0;
} else {
delz = (k + 1) * binsizez;
}
return (delx * delx + dely * dely + delz * delz);
}
int coord2bin(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin)
{
int ix, iy, iz;
if(xin >= xprd) {
ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
} else if(xin >= 0.0) {
ix = (int)(xin * bininvx) - mbinxlo;
} else {
ix = (int)(xin * bininvx) - mbinxlo - 1;
}
if(yin >= yprd) {
iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
} else if(yin >= 0.0) {
iy = (int)(yin * bininvy) - mbinylo;
} else {
iy = (int)(yin * bininvy) - mbinylo - 1;
}
if(zin >= zprd) {
iz = (int)((zin - zprd) * bininvz) + nbinz - mbinzlo;
} else if(zin >= 0.0) {
iz = (int)(zin * bininvz) - mbinzlo;
} else {
iz = (int)(zin * bininvz) - mbinzlo - 1;
}
return (iz * mbiny * mbinx + iy * mbinx + ix + 1);
}
void binatoms(Atom *atom)
{
int nall = atom->Nlocal + atom->Nghost;
int resize = 1;
while(resize > 0) {
resize = 0;
for(int i = 0; i < mbins; i++) {
bincount[i] = 0;
}
for(int i = 0; i < nall; i++) {
MD_FLOAT x = atom_x(i);
MD_FLOAT y = atom_y(i);
MD_FLOAT z = atom_z(i);
int ibin = coord2bin(x, y, z);
if(bincount[ibin] < atoms_per_bin) {
int ac = bincount[ibin]++;
bins[ibin * atoms_per_bin + ac] = i;
} else {
resize = 1;
}
}
if(resize) {
free(bins);
atoms_per_bin *= 2;
bins = (int*) malloc(mbins * atoms_per_bin * sizeof(int));
}
}
}
void sortAtom(Atom* atom) {
binatoms(atom);
int Nmax = atom->Nmax;
int* binpos = bincount;
for(int i=1; i<mbins; i++) {
binpos[i] += binpos[i-1];
}
#ifdef AOS
MD_FLOAT* new_x = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
MD_FLOAT* new_vx = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
#else
MD_FLOAT* new_x = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
MD_FLOAT* new_y = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
MD_FLOAT* new_z = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
MD_FLOAT* new_vx = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
MD_FLOAT* new_vy = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
MD_FLOAT* new_vz = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
#endif
MD_FLOAT* old_x = atom->x; MD_FLOAT* old_y = atom->y; MD_FLOAT* old_z = atom->z;
MD_FLOAT* old_vx = atom->vx; MD_FLOAT* old_vy = atom->vy; MD_FLOAT* old_vz = atom->vz;
for(int mybin = 0; mybin<mbins; mybin++) {
int start = mybin>0?binpos[mybin-1]:0;
int count = binpos[mybin] - start;
for(int k=0; k<count; k++) {
int new_i = start + k;
int old_i = bins[mybin * atoms_per_bin + k];
#ifdef AOS
new_x[new_i * 3 + 0] = old_x[old_i * 3 + 0];
new_x[new_i * 3 + 1] = old_x[old_i * 3 + 1];
new_x[new_i * 3 + 2] = old_x[old_i * 3 + 2];
new_vx[new_i * 3 + 0] = old_vx[old_i * 3 + 0];
new_vx[new_i * 3 + 1] = old_vy[old_i * 3 + 1];
new_vx[new_i * 3 + 2] = old_vz[old_i * 3 + 2];
#else
new_x[new_i] = old_x[old_i];
new_y[new_i] = old_y[old_i];
new_z[new_i] = old_z[old_i];
new_vx[new_i] = old_vx[old_i];
new_vy[new_i] = old_vy[old_i];
new_vz[new_i] = old_vz[old_i];
#endif
}
}
free(atom->x);
atom->x = new_x;
free(atom->vx);
atom->vx = new_vx;
#ifndef AOS
free(atom->y);
free(atom->z);
atom->y = new_y; atom->z = new_z;
free(atom->vy); free(atom->vz);
atom->vy = new_vy; atom->vz = new_vz;
#endif
}
void binatoms_cuda(Atom* c_atom, Binning* c_binning, int* c_resize_needed, Neighbor_params *np, const int threads_per_block)
{
int nall = c_atom->Nlocal + c_atom->Nghost;
int resize = 1;
const int num_blocks = ceil((float)nall / (float)threads_per_block);
while(resize > 0) {
resize = 0;
checkCUDAError("binatoms_cuda c_binning->bincount memset", cudaMemset(c_binning->bincount, 0, c_binning->mbins * sizeof(int)));
checkCUDAError("binatoms_cuda c_resize_needed memset", cudaMemset(c_resize_needed, 0, sizeof(int)) );
/*binatoms_kernel(Atom a, int* bincount, int* bins, int c_binning->atoms_per_bin, Neighbor_params np, int *resize_needed) */
binatoms_kernel<<<num_blocks, threads_per_block>>>(*c_atom, c_binning->bincount, c_binning->bins, c_binning->atoms_per_bin, *np, c_resize_needed);
checkCUDAError( "PeekAtLastError binatoms kernel", cudaPeekAtLastError() );
checkCUDAError( "DeviceSync binatoms kernel", cudaDeviceSynchronize() );
checkCUDAError("binatoms_cuda c_resize_needed memcpy back", cudaMemcpy(&resize, c_resize_needed, sizeof(int), cudaMemcpyDeviceToHost) );
if(resize) {
cudaFree(c_binning->bins);
c_binning->atoms_per_bin *= 2;
checkCUDAError("binatoms_cuda c_binning->bins resize malloc", cudaMalloc(&c_binning->bins, c_binning->mbins * c_binning->atoms_per_bin * sizeof(int)) );
}
}
atoms_per_bin = c_binning->atoms_per_bin;
const int sortBlocks = ceil((float)mbins / (float)threads_per_block);
/*void sort_bin_contents_kernel(int* bincount, int* bins, int mbins, int atoms_per_bin)*/
sort_bin_contents_kernel<<<sortBlocks, threads_per_block>>>(c_binning->bincount, c_binning->bins, c_binning->mbins, c_binning->atoms_per_bin);
checkCUDAError( "PeekAtLastError sort_bin_contents kernel", cudaPeekAtLastError() );
checkCUDAError( "DeviceSync sort_bin_contents kernel", cudaDeviceSynchronize() );
}
void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *c_neighbor, const int num_threads_per_block, double* timers)
{
int nall = atom->Nlocal + atom->Nghost;
c_neighbor->maxneighs = neighbor->maxneighs;
cudaProfilerStart();
/* upload stencil */
// TODO move all of this initialization into its own method
if(c_stencil == NULL){
checkCUDAError( "buildNeighbor c_n_stencil malloc", cudaMalloc((void**)&c_stencil, nstencil * sizeof(int)) );
checkCUDAError( "buildNeighbor c_n_stencil memcpy", cudaMemcpy(c_stencil, stencil, nstencil * sizeof(int), cudaMemcpyHostToDevice ));
}
if(c_binning.mbins == 0){
c_binning.mbins = mbins;
c_binning.atoms_per_bin = atoms_per_bin;
checkCUDAError( "buildNeighbor c_binning->bincount malloc", cudaMalloc((void**)&(c_binning.bincount), c_binning.mbins * sizeof(int)) );
checkCUDAError( "buidlNeighbor c_binning->bins malloc", cudaMalloc((void**)&(c_binning.bins), c_binning.mbins * c_binning.atoms_per_bin * sizeof(int)) );
}
Neighbor_params np{
.xprd = xprd,
.yprd = yprd,
.zprd = zprd,
.bininvx = bininvx,
.bininvy = bininvy,
.bininvz = bininvz,
.mbinxlo = mbinxlo,
.mbinylo = mbinylo,
.mbinzlo = mbinzlo,
.nbinx = nbinx,
.nbiny = nbiny,
.nbinz = nbinz,
.mbinx = mbinx,
.mbiny = mbiny,
.mbinz = mbinz
};
if(c_resize_needed == NULL){
checkCUDAError("buildNeighbor c_resize_needed malloc", cudaMalloc((void**)&c_resize_needed, sizeof(int)) );
}
/* bin local & ghost atoms */
double beforeBinning = getTimeStamp();
binatoms_cuda(c_atom, &c_binning, c_resize_needed, &np, num_threads_per_block);
double afterBinning = getTimeStamp();
timers[NEIGH_BINATOMS] += afterBinning - beforeBinning;
if(c_new_maxneighs == NULL){
checkCUDAError("c_new_maxneighs malloc", cudaMalloc((void**)&c_new_maxneighs, sizeof(int) ));
}
int resize = 1;
/* extend c_neighbor arrays if necessary */
if(nall > nmax) {
nmax = nall;
if(c_neighbor->numneigh) cudaFree(c_neighbor->numneigh);
if(c_neighbor->neighbors) cudaFree(c_neighbor->neighbors);
checkCUDAError( "buildNeighbor c_numneigh malloc", cudaMalloc((void**)&(c_neighbor->numneigh), nmax * sizeof(int)) );
checkCUDAError( "buildNeighbor c_neighbors malloc", cudaMalloc((void**)&(c_neighbor->neighbors), nmax * c_neighbor->maxneighs * sizeof(int)) );
}
/* loop over each atom, storing neighbors */
while(resize) {
resize = 0;
checkCUDAError("c_new_maxneighs memset", cudaMemset(c_new_maxneighs, 0, sizeof(int) ));
// TODO call compute_neigborhood kernel here
const int num_blocks = ceil((float)atom->Nlocal / (float)num_threads_per_block);
/*compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil,
int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs)
* */
compute_neighborhood<<<num_blocks, num_threads_per_block>>>(*c_atom, *c_neighbor,
np, nstencil, c_stencil,
c_binning.bins, c_binning.atoms_per_bin, c_binning.bincount,
c_new_maxneighs,
cutneighsq);
checkCUDAError( "PeekAtLastError ComputeNeighbor", cudaPeekAtLastError() );
checkCUDAError( "DeviceSync ComputeNeighbor", cudaDeviceSynchronize() );
// TODO copy the value of c_new_maxneighs back to host and check if it has been modified
int new_maxneighs;
checkCUDAError("c_new_maxneighs memcpy back", cudaMemcpy(&new_maxneighs, c_new_maxneighs, sizeof(int), cudaMemcpyDeviceToHost));
if (new_maxneighs > c_neighbor->maxneighs){
resize = 1;
}
if(resize) {
printf("RESIZE %d\n", c_neighbor->maxneighs);
c_neighbor->maxneighs = new_maxneighs * 1.2;
printf("NEW SIZE %d\n", c_neighbor->maxneighs);
cudaFree(c_neighbor->neighbors);
checkCUDAError("c_neighbor->neighbors resize malloc",
cudaMalloc((void**)(&c_neighbor->neighbors),
c_atom->Nmax * c_neighbor->maxneighs * sizeof(int)));
}
}
neighbor->maxneighs = c_neighbor->maxneighs;
cudaProfilerStop();
}
}

286
lammps/pbc.cu Normal file
View File

@@ -0,0 +1,286 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <stdlib.h>
#include <stdio.h>
extern "C" {
#include <pbc.h>
#include <atom.h>
#include <allocate.h>
#define DELTA 20000
}
__global__ void computeAtomsPbcUpdate(Atom a, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){
const int i = blockIdx.x * blockDim.x + threadIdx.x;
Atom* atom = &a;
if( i >= atom->Nlocal ){
return;
}
if (atom_x(i) < 0.0) {
atom_x(i) += xprd;
} else if (atom_x(i) >= xprd) {
atom_x(i) -= xprd;
}
if (atom_y(i) < 0.0) {
atom_y(i) += yprd;
} else if (atom_y(i) >= yprd) {
atom_y(i) -= yprd;
}
if (atom_z(i) < 0.0) {
atom_z(i) += zprd;
} else if (atom_z(i) >= zprd) {
atom_z(i) -= zprd;
}
}
__global__ void computePbcUpdate(Atom a, int* PBCx, int* PBCy, int* PBCz, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){
const int i = blockIdx.x * blockDim.x + threadIdx.x;
const int Nghost = a.Nghost;
if( i >= Nghost ) {
return;
}
Atom* atom = &a;
int *border_map = atom->border_map;
int nlocal = atom->Nlocal;
atom_x(nlocal + i) = atom_x(border_map[i]) + PBCx[i] * xprd;
atom_y(nlocal + i) = atom_y(border_map[i]) + PBCy[i] * yprd;
atom_z(nlocal + i) = atom_z(border_map[i]) + PBCz[i] * zprd;
}
extern "C"{
static int NmaxGhost;
static int *PBCx, *PBCy, *PBCz;
static int c_NmaxGhost = 0;
static int *c_PBCx = NULL, *c_PBCy = NULL, *c_PBCz = NULL;
static void growPbc(Atom *);
/* exported subroutines */
void initPbc(Atom *atom) {
NmaxGhost = 0;
atom->border_map = NULL;
PBCx = NULL;
PBCy = NULL;
PBCz = NULL;
}
/* update coordinates of ghost atoms */
/* uses mapping created in setupPbc */
void updatePbc(Atom *atom, Parameter *param) {
int *border_map = atom->border_map;
int nlocal = atom->Nlocal;
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
for (int i = 0; i < atom->Nghost; i++) {
atom_x(nlocal + i) = atom_x(border_map[i]) + PBCx[i] * xprd;
atom_y(nlocal + i) = atom_y(border_map[i]) + PBCy[i] * yprd;
atom_z(nlocal + i) = atom_z(border_map[i]) + PBCz[i] * zprd;
}
}
/* update coordinates of ghost atoms */
/* uses mapping created in setupPbc */
void updatePbc_cuda(Atom *atom, Parameter *param, Atom *c_atom, bool doReneighbor, const int num_threads_per_block) {
if (doReneighbor){
c_atom->Natoms = atom->Natoms;
c_atom->Nlocal = atom->Nlocal;
c_atom->Nghost = atom->Nghost;
c_atom->ntypes = atom->ntypes;
if (atom->Nmax > c_atom->Nmax){ // the number of ghost atoms has increased -> more space is needed
c_atom->Nmax = atom->Nmax;
if(c_atom->x != NULL){ cudaFree(c_atom->x); }
if(c_atom->type != NULL){ cudaFree(c_atom->type); }
checkCUDAError( "updatePbc c_atom->x malloc", cudaMalloc((void**)&(c_atom->x), sizeof(MD_FLOAT) * atom->Nmax * 3) );
checkCUDAError( "updatePbc c_atom->type malloc", cudaMalloc((void**)&(c_atom->type), sizeof(int) * atom->Nmax) );
}
// TODO if the sort is reactivated the atom->vx needs to be copied to GPU as well
checkCUDAError( "updatePbc c_atom->x memcpy", cudaMemcpy(c_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) );
checkCUDAError( "updatePbc c_atom->type memcpy", cudaMemcpy(c_atom->type, atom->type, sizeof(int) * atom->Nmax, cudaMemcpyHostToDevice) );
if(c_NmaxGhost < NmaxGhost){
c_NmaxGhost = NmaxGhost;
if(c_PBCx != NULL){ cudaFree(c_PBCx); }
if(c_PBCy != NULL){ cudaFree(c_PBCy); }
if(c_PBCz != NULL){ cudaFree(c_PBCz); }
if(c_atom->border_map != NULL){ cudaFree(c_atom->border_map); }
checkCUDAError( "updatePbc c_PBCx malloc", cudaMalloc((void**)&c_PBCx, NmaxGhost * sizeof(int)) );
checkCUDAError( "updatePbc c_PBCy malloc", cudaMalloc((void**)&c_PBCy, NmaxGhost * sizeof(int)) );
checkCUDAError( "updatePbc c_PBCz malloc", cudaMalloc((void**)&c_PBCz, NmaxGhost * sizeof(int)) );
checkCUDAError( "updatePbc c_atom->border_map malloc", cudaMalloc((void**)&(c_atom->border_map), NmaxGhost * sizeof(int)) );
}
checkCUDAError( "updatePbc c_PBCx memcpy", cudaMemcpy(c_PBCx, PBCx, NmaxGhost * sizeof(int), cudaMemcpyHostToDevice) );
checkCUDAError( "updatePbc c_PBCy memcpy", cudaMemcpy(c_PBCy, PBCy, NmaxGhost * sizeof(int), cudaMemcpyHostToDevice) );
checkCUDAError( "updatePbc c_PBCz memcpy", cudaMemcpy(c_PBCz, PBCz, NmaxGhost * sizeof(int), cudaMemcpyHostToDevice) );
checkCUDAError( "updatePbc c_atom->border_map memcpy", cudaMemcpy(c_atom->border_map, atom->border_map, NmaxGhost * sizeof(int), cudaMemcpyHostToDevice) );
}
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
const int num_blocks = ceil((float)atom->Nghost / (float)num_threads_per_block);
/*__global__ void computePbcUpdate(Atom a, int* PBCx, int* PBCy, int* PBCz,
* MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd)
* */
computePbcUpdate<<<num_blocks, num_threads_per_block>>>(*c_atom, c_PBCx, c_PBCy, c_PBCz, xprd, yprd, zprd);
checkCUDAError( "PeekAtLastError UpdatePbc", cudaPeekAtLastError() );
checkCUDAError( "DeviceSync UpdatePbc", cudaDeviceSynchronize() );
}
/* relocate atoms that have left domain according
* to periodic boundary conditions */
void updateAtomsPbc(Atom *atom, Parameter *param) {
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
for (int i = 0; i < atom->Nlocal; i++) {
if (atom_x(i) < 0.0) {
atom_x(i) += xprd;
} else if (atom_x(i) >= xprd) {
atom_x(i) -= xprd;
}
if (atom_y(i) < 0.0) {
atom_y(i) += yprd;
} else if (atom_y(i) >= yprd) {
atom_y(i) -= yprd;
}
if (atom_z(i) < 0.0) {
atom_z(i) += zprd;
} else if (atom_z(i) >= zprd) {
atom_z(i) -= zprd;
}
}
}
void updateAtomsPbc_cuda(Atom* atom, Parameter* param, Atom* c_atom, const int num_threads_per_block){
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
const int num_blocks = ceil((float)atom->Nlocal / (float)num_threads_per_block);
/*void computeAtomsPbcUpdate(Atom a, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd)*/
computeAtomsPbcUpdate<<<num_blocks, num_threads_per_block>>>(*c_atom, xprd, yprd, zprd);
checkCUDAError( "PeekAtLastError UpdateAtomsPbc", cudaPeekAtLastError() );
checkCUDAError( "DeviceSync UpdateAtomsPbc", cudaDeviceSynchronize() );
checkCUDAError( "updateAtomsPbc position memcpy back", cudaMemcpy(atom->x, c_atom->x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
}
/* setup periodic boundary conditions by
* defining ghost atoms around domain
* only creates mapping and coordinate corrections
* that are then enforced in updatePbc */
#define ADDGHOST(dx, dy, dz) \
Nghost++; \
border_map[Nghost] = i; \
PBCx[Nghost] = dx; \
PBCy[Nghost] = dy; \
PBCz[Nghost] = dz; \
atom->type[atom->Nlocal + Nghost] = atom->type[i]
void setupPbc(Atom *atom, Parameter *param) {
int *border_map = atom->border_map;
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
MD_FLOAT Cutneigh = param->cutneigh;
int Nghost = -1;
for (int i = 0; i < atom->Nlocal; i++) {
if (atom->Nlocal + Nghost + 7 >= atom->Nmax) {
growAtom(atom);
}
if (Nghost + 7 >= NmaxGhost) {
growPbc(atom);
border_map = atom->border_map;
}
MD_FLOAT x = atom_x(i);
MD_FLOAT y = atom_y(i);
MD_FLOAT z = atom_z(i);
/* Setup ghost atoms */
/* 6 planes */
if (x < Cutneigh) { ADDGHOST(+1, 0, 0); }
if (x >= (xprd - Cutneigh)) { ADDGHOST(-1, 0, 0); }
if (y < Cutneigh) { ADDGHOST(0, +1, 0); }
if (y >= (yprd - Cutneigh)) { ADDGHOST(0, -1, 0); }
if (z < Cutneigh) { ADDGHOST(0, 0, +1); }
if (z >= (zprd - Cutneigh)) { ADDGHOST(0, 0, -1); }
/* 8 corners */
if (x < Cutneigh && y < Cutneigh && z < Cutneigh) { ADDGHOST(+1, +1, +1); }
if (x < Cutneigh && y >= (yprd - Cutneigh) && z < Cutneigh) { ADDGHOST(+1, -1, +1); }
if (x < Cutneigh && y >= Cutneigh && z >= (zprd - Cutneigh)) { ADDGHOST(+1, +1, -1); }
if (x < Cutneigh && y >= (yprd - Cutneigh) && z >= (zprd - Cutneigh)) { ADDGHOST(+1, -1, -1); }
if (x >= (xprd - Cutneigh) && y < Cutneigh && z < Cutneigh) { ADDGHOST(-1, +1, +1); }
if (x >= (xprd - Cutneigh) && y >= (yprd - Cutneigh) && z < Cutneigh) { ADDGHOST(-1, -1, +1); }
if (x >= (xprd - Cutneigh) && y < Cutneigh && z >= (zprd - Cutneigh)) { ADDGHOST(-1, +1, -1); }
if (x >= (xprd - Cutneigh) && y >= (yprd - Cutneigh) && z >= (zprd - Cutneigh)) { ADDGHOST(-1, -1, -1); }
/* 12 edges */
if (x < Cutneigh && z < Cutneigh) { ADDGHOST(+1, 0, +1); }
if (x < Cutneigh && z >= (zprd - Cutneigh)) { ADDGHOST(+1, 0, -1); }
if (x >= (xprd - Cutneigh) && z < Cutneigh) { ADDGHOST(-1, 0, +1); }
if (x >= (xprd - Cutneigh) && z >= (zprd - Cutneigh)) { ADDGHOST(-1, 0, -1); }
if (y < Cutneigh && z < Cutneigh) { ADDGHOST(0, +1, +1); }
if (y < Cutneigh && z >= (zprd - Cutneigh)) { ADDGHOST(0, +1, -1); }
if (y >= (yprd - Cutneigh) && z < Cutneigh) { ADDGHOST(0, -1, +1); }
if (y >= (yprd - Cutneigh) && z >= (zprd - Cutneigh)) { ADDGHOST(0, -1, -1); }
if (y < Cutneigh && x < Cutneigh) { ADDGHOST(+1, +1, 0); }
if (y < Cutneigh && x >= (xprd - Cutneigh)) { ADDGHOST(-1, +1, 0); }
if (y >= (yprd - Cutneigh) && x < Cutneigh) { ADDGHOST(+1, -1, 0); }
if (y >= (yprd - Cutneigh) && x >= (xprd - Cutneigh)) { ADDGHOST(-1, -1, 0); }
}
// increase by one to make it the ghost atom count
atom->Nghost = Nghost + 1;
}
/* internal subroutines */
void growPbc(Atom *atom) {
int nold = NmaxGhost;
NmaxGhost += DELTA;
atom->border_map = (int *) reallocate(atom->border_map, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
PBCx = (int *) reallocate(PBCx, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
PBCy = (int *) reallocate(PBCy, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
PBCz = (int *) reallocate(PBCz, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
}
}

31
lammps/stats.c Normal file
View File

@@ -0,0 +1,31 @@
#include <stdio.h>
#include <atom.h>
#include <parameter.h>
#include <stats.h>
#include <timers.h>
void initStats(Stats *s) {
s->total_force_neighs = 0;
s->total_force_iters = 0;
}
void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer) {
#ifdef COMPUTE_STATS
double force_useful_volume = 1e-9 * ( (double)(atom->Nlocal * (param->ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) +
(double)(stats->total_force_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) );
double avg_neigh = stats->total_force_neighs / (double)(atom->Nlocal * (param->ntimes + 1));
double avg_simd = stats->total_force_iters / (double)(atom->Nlocal * (param->ntimes + 1));
#ifdef EXPLICIT_TYPES
force_useful_volume += 1e-9 * (double)((atom->Nlocal * (param->ntimes + 1)) + stats->total_force_neighs) * sizeof(int);
#endif
printf("Statistics:\n");
printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param->proc_freq);
printf("\tAverage neighbors per atom: %.4f\n", avg_neigh);
printf("\tAverage SIMD iterations per atom: %.4f\n", avg_simd);
printf("\tTotal number of computed pair interactions: %lld\n", stats->total_force_neighs);
printf("\tTotal number of SIMD iterations: %lld\n", stats->total_force_iters);
printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume);
printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param->proc_freq * 1e9 / stats->total_force_iters);
#endif
}

133
lammps/thermo.c Normal file
View File

@@ -0,0 +1,133 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <thermo.h>
static int *steparr;
static MD_FLOAT *tmparr;
static MD_FLOAT *engarr;
static MD_FLOAT *prsarr;
static MD_FLOAT mvv2e;
static MD_FLOAT dof_boltz;
static MD_FLOAT t_scale;
static MD_FLOAT p_scale;
static MD_FLOAT e_scale;
static MD_FLOAT t_act;
static MD_FLOAT p_act;
static MD_FLOAT e_act;
static int mstat;
/* exported subroutines */
void setupThermo(Parameter *param, int natoms)
{
int maxstat = param->ntimes / param->nstat + 2;
steparr = (int*) malloc(maxstat * sizeof(int));
tmparr = (MD_FLOAT*) malloc(maxstat * sizeof(MD_FLOAT));
engarr = (MD_FLOAT*) malloc(maxstat * sizeof(MD_FLOAT));
prsarr = (MD_FLOAT*) malloc(maxstat * sizeof(MD_FLOAT));
if(param->force_field == FF_LJ) {
mvv2e = 1.0;
dof_boltz = (natoms * 3 - 3);
t_scale = mvv2e / dof_boltz;
p_scale = 1.0 / 3 / param->xprd / param->yprd / param->zprd;
e_scale = 0.5;
} else {
mvv2e = 1.036427e-04;
dof_boltz = (natoms * 3 - 3) * 8.617343e-05;
t_scale = mvv2e / dof_boltz;
p_scale = 1.602176e+06 / 3 / param->xprd / param->yprd / param->zprd;
e_scale = 524287.985533;//16.0;
param->dtforce /= mvv2e;
}
printf("step\ttemp\t\tpressure\n");
}
void computeThermo(int iflag, Parameter *param, Atom *atom)
{
MD_FLOAT t = 0.0, p;
for(int i = 0; i < atom->Nlocal; i++) {
t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
}
t = t * t_scale;
p = (t * dof_boltz) * p_scale;
int istep = iflag;
if(iflag == -1){
istep = param->ntimes;
}
if(iflag == 0){
mstat = 0;
}
steparr[mstat] = istep;
tmparr[mstat] = t;
prsarr[mstat] = p;
mstat++;
fprintf(stdout, "%i\t%e\t%e\n", istep, t, p);
}
void adjustThermo(Parameter *param, Atom *atom)
{
/* zero center-of-mass motion */
MD_FLOAT vxtot = 0.0; MD_FLOAT vytot = 0.0; MD_FLOAT vztot = 0.0;
for(int i = 0; i < atom->Nlocal; i++) {
vxtot += atom_vx(i);
vytot += atom_vy(i);
vztot += atom_vz(i);
}
vxtot = vxtot / atom->Natoms;
vytot = vytot / atom->Natoms;
vztot = vztot / atom->Natoms;
for(int i = 0; i < atom->Nlocal; i++) {
atom_vx(i) -= vxtot;
atom_vy(i) -= vytot;
atom_vz(i) -= vztot;
}
t_act = 0;
MD_FLOAT t = 0.0;
for(int i = 0; i < atom->Nlocal; i++) {
t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
}
t *= t_scale;
MD_FLOAT factor = sqrt(param->temp / t);
for(int i = 0; i < atom->Nlocal; i++) {
atom_vx(i) *= factor;
atom_vy(i) *= factor;
atom_vz(i) *= factor;
}
}

43
lammps/timing.c Normal file
View File

@@ -0,0 +1,43 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <stdlib.h>
#include <time.h>
double getTimeStamp()
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
}
double getTimeResolution()
{
struct timespec ts;
clock_getres(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
}
double getTimeStamp_()
{
return getTimeStamp();
}

42
lammps/util.c Normal file
View File

@@ -0,0 +1,42 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* This file is part of MD-Bench.
*
* MD-Bench is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* =======================================================================================
*/
#include <util.h>
/* Park/Miller RNG w/out MASKING, so as to be like f90s version */
#define IA 16807
#define IM 2147483647
#define AM (1.0/IM)
#define IQ 127773
#define IR 2836
#define MASK 123459876
double myrandom(int* idum)
{
int k= (*idum) / IQ;
double ans;
*idum = IA * (*idum - k * IQ) - IR * k;
if(*idum < 0) *idum += IM;
ans = AM * (*idum);
return ans;
}

44
lammps/vtk.c Normal file
View File

@@ -0,0 +1,44 @@
#include <stdio.h>
#include <stdlib.h>
#include <atom.h>
int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
char timestep_filename[128];
snprintf(timestep_filename, sizeof timestep_filename, "%s_%d.vtk", filename, timestep);
FILE* fp = fopen(timestep_filename, "wb");
if(fp == NULL) {
fprintf(stderr, "Could not open VTK file for writing!\n");
return -1;
}
fprintf(fp, "# vtk DataFile Version 2.0\n");
fprintf(fp, "Particle data\n");
fprintf(fp, "ASCII\n");
fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
fprintf(fp, "POINTS %d double\n", atom->Nlocal);
for(int i = 0; i < atom->Nlocal; ++i) {
fprintf(fp, "%.4f %.4f %.4f\n", atom_x(i), atom_y(i), atom_z(i));
}
fprintf(fp, "\n\n");
fprintf(fp, "CELLS %d %d\n", atom->Nlocal, atom->Nlocal * 2);
for(int i = 0; i < atom->Nlocal; ++i) {
fprintf(fp, "1 %d\n", i);
}
fprintf(fp, "\n\n");
fprintf(fp, "CELL_TYPES %d\n", atom->Nlocal);
for(int i = 0; i < atom->Nlocal; ++i) {
fprintf(fp, "1\n");
}
fprintf(fp, "\n\n");
fprintf(fp, "POINT_DATA %d\n", atom->Nlocal);
fprintf(fp, "SCALARS mass double\n");
fprintf(fp, "LOOKUP_TABLE default\n");
for(int i = 0; i < atom->Nlocal; i++) {
fprintf(fp, "1.0\n");
}
fprintf(fp, "\n\n");
fclose(fp);
return 0;
}