Move src directory to lammps
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
89
lammps/allocate.c
Normal file
89
lammps/allocate.c
Normal file
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
void checkCUDAError(const char *msg, cudaError_t err)
|
||||
{
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
//print a human readable error message
|
||||
printf("[CUDA ERROR %s]: %s\r\n", msg, cudaGetErrorString(err));
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void* allocate (int alignment, size_t bytesize)
|
||||
{
|
||||
int errorCode;
|
||||
void* ptr;
|
||||
|
||||
checkCUDAError( "allocate", cudaMallocHost((void**)&ptr, bytesize) );
|
||||
|
||||
return ptr;
|
||||
|
||||
/*
|
||||
errorCode = posix_memalign(&ptr, alignment, bytesize);
|
||||
|
||||
if (errorCode) {
|
||||
if (errorCode == EINVAL) {
|
||||
fprintf(stderr,
|
||||
"Error: Alignment parameter is not a power of two\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (errorCode == ENOMEM) {
|
||||
fprintf(stderr,
|
||||
"Error: Insufficient memory to fulfill the request\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
if (ptr == NULL) {
|
||||
fprintf(stderr, "Error: posix_memalign failed!\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
return ptr;
|
||||
*/
|
||||
}
|
||||
|
||||
void* reallocate (
|
||||
void* ptr,
|
||||
int alignment,
|
||||
size_t newBytesize,
|
||||
size_t oldBytesize)
|
||||
{
|
||||
void* newarray = allocate(alignment, newBytesize);
|
||||
|
||||
if(ptr != NULL) {
|
||||
memcpy(newarray, ptr, oldBytesize);
|
||||
cudaFreeHost(ptr);
|
||||
}
|
||||
|
||||
return newarray;
|
||||
}
|
185
lammps/atom.c
Normal file
185
lammps/atom.c
Normal file
@@ -0,0 +1,185 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Authors: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Rafael Ravedutti (rr), rafaelravedutti@gmail.com
|
||||
*
|
||||
* Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <atom.h>
|
||||
#include <allocate.h>
|
||||
#include <util.h>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <device_launch_parameters.h>
|
||||
|
||||
#define DELTA 20000
|
||||
|
||||
void initAtom(Atom *atom)
|
||||
{
|
||||
atom->x = NULL; atom->y = NULL; atom->z = NULL;
|
||||
atom->vx = NULL; atom->vy = NULL; atom->vz = NULL;
|
||||
atom->fx = NULL; atom->fy = NULL; atom->fz = NULL;
|
||||
atom->Natoms = 0;
|
||||
atom->Nlocal = 0;
|
||||
atom->Nghost = 0;
|
||||
atom->Nmax = 0;
|
||||
atom->type = NULL;
|
||||
atom->ntypes = 0;
|
||||
atom->epsilon = NULL;
|
||||
atom->sigma6 = NULL;
|
||||
atom->cutforcesq = NULL;
|
||||
atom->cutneighsq = NULL;
|
||||
}
|
||||
|
||||
void createAtom(Atom *atom, Parameter *param)
|
||||
{
|
||||
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
|
||||
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
|
||||
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
|
||||
atom->Natoms = 4 * param->nx * param->ny * param->nz;
|
||||
atom->Nlocal = 0;
|
||||
atom->ntypes = param->ntypes;
|
||||
checkCUDAError( "atom->epsilon cudaMallocHost", cudaMallocHost((void**)&(atom->epsilon), atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)) ); // atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||
checkCUDAError( "atom->sigma6 cudaMallocHost", cudaMallocHost((void**)&(atom->sigma6), atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)) ); // atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||
checkCUDAError( "atom->cutforcesq cudaMallocHost", cudaMallocHost((void**)&(atom->cutforcesq), atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)) ); // atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||
checkCUDAError( "atom->cutneighsq cudaMallocHost", cudaMallocHost((void**)&(atom->cutneighsq), atom->ntypes * atom->ntypes * sizeof(MD_FLOAT)) ); // atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
|
||||
atom->epsilon[i] = param->epsilon;
|
||||
atom->sigma6[i] = param->sigma6;
|
||||
atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
|
||||
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||
}
|
||||
|
||||
MD_FLOAT alat = pow((4.0 / param->rho), (1.0 / 3.0));
|
||||
int ilo = (int) (xlo / (0.5 * alat) - 1);
|
||||
int ihi = (int) (xhi / (0.5 * alat) + 1);
|
||||
int jlo = (int) (ylo / (0.5 * alat) - 1);
|
||||
int jhi = (int) (yhi / (0.5 * alat) + 1);
|
||||
int klo = (int) (zlo / (0.5 * alat) - 1);
|
||||
int khi = (int) (zhi / (0.5 * alat) + 1);
|
||||
|
||||
ilo = MAX(ilo, 0);
|
||||
ihi = MIN(ihi, 2 * param->nx - 1);
|
||||
jlo = MAX(jlo, 0);
|
||||
jhi = MIN(jhi, 2 * param->ny - 1);
|
||||
klo = MAX(klo, 0);
|
||||
khi = MIN(khi, 2 * param->nz - 1);
|
||||
|
||||
MD_FLOAT xtmp, ytmp, ztmp, vxtmp, vytmp, vztmp;
|
||||
int i, j, k, m, n;
|
||||
int sx = 0; int sy = 0; int sz = 0;
|
||||
int ox = 0; int oy = 0; int oz = 0;
|
||||
int subboxdim = 8;
|
||||
|
||||
while(oz * subboxdim <= khi) {
|
||||
|
||||
k = oz * subboxdim + sz;
|
||||
j = oy * subboxdim + sy;
|
||||
i = ox * subboxdim + sx;
|
||||
|
||||
if(((i + j + k) % 2 == 0) &&
|
||||
(i >= ilo) && (i <= ihi) &&
|
||||
(j >= jlo) && (j <= jhi) &&
|
||||
(k >= klo) && (k <= khi)) {
|
||||
|
||||
xtmp = 0.5 * alat * i;
|
||||
ytmp = 0.5 * alat * j;
|
||||
ztmp = 0.5 * alat * k;
|
||||
|
||||
if( xtmp >= xlo && xtmp < xhi &&
|
||||
ytmp >= ylo && ytmp < yhi &&
|
||||
ztmp >= zlo && ztmp < zhi ) {
|
||||
|
||||
n = k * (2 * param->ny) * (2 * param->nx) +
|
||||
j * (2 * param->nx) +
|
||||
i + 1;
|
||||
|
||||
for(m = 0; m < 5; m++) {
|
||||
myrandom(&n);
|
||||
}
|
||||
vxtmp = myrandom(&n);
|
||||
|
||||
for(m = 0; m < 5; m++){
|
||||
myrandom(&n);
|
||||
}
|
||||
vytmp = myrandom(&n);
|
||||
|
||||
for(m = 0; m < 5; m++) {
|
||||
myrandom(&n);
|
||||
}
|
||||
vztmp = myrandom(&n);
|
||||
|
||||
if(atom->Nlocal == atom->Nmax) {
|
||||
growAtom(atom);
|
||||
}
|
||||
|
||||
atom_x(atom->Nlocal) = xtmp;
|
||||
atom_y(atom->Nlocal) = ytmp;
|
||||
atom_z(atom->Nlocal) = ztmp;
|
||||
atom_vx(atom->Nlocal) = vxtmp;
|
||||
atom_vy(atom->Nlocal) = vytmp;
|
||||
atom_vz(atom->Nlocal) = vztmp;
|
||||
atom->type[atom->Nlocal] = rand() % atom->ntypes;
|
||||
atom->Nlocal++;
|
||||
}
|
||||
}
|
||||
|
||||
sx++;
|
||||
|
||||
if(sx == subboxdim) { sx = 0; sy++; }
|
||||
if(sy == subboxdim) { sy = 0; sz++; }
|
||||
if(sz == subboxdim) { sz = 0; ox++; }
|
||||
if(ox * subboxdim > ihi) { ox = 0; oy++; }
|
||||
if(oy * subboxdim > jhi) { oy = 0; oz++; }
|
||||
}
|
||||
}
|
||||
|
||||
void growAtom(Atom *atom)
|
||||
{
|
||||
int nold = atom->Nmax;
|
||||
atom->Nmax += DELTA;
|
||||
|
||||
#ifdef AOS
|
||||
atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
|
||||
|
||||
atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
|
||||
|
||||
atom->vx = (MD_FLOAT*) reallocate(atom->vx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
|
||||
#else
|
||||
atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||
atom->y = (MD_FLOAT*) reallocate(atom->y, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||
atom->z = (MD_FLOAT*) reallocate(atom->z, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||
|
||||
atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||
atom->fy = (MD_FLOAT*) reallocate(atom->fy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||
atom->fz = (MD_FLOAT*) reallocate(atom->fz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||
|
||||
atom->vx = (MD_FLOAT*) reallocate(atom->vx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||
atom->vy = (MD_FLOAT*) reallocate(atom->vy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||
atom->vz = (MD_FLOAT*) reallocate(atom->vz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||
|
||||
#endif
|
||||
|
||||
atom->type = (int *) reallocate(atom->type, ALIGNMENT, atom->Nmax * sizeof(int), nold * sizeof(int));
|
||||
}
|
285
lammps/eam_utils.c
Normal file
285
lammps/eam_utils.c
Normal file
@@ -0,0 +1,285 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <allocate.h>
|
||||
#include <atom.h>
|
||||
#include <eam.h>
|
||||
#include <parameter.h>
|
||||
#include <util.h>
|
||||
|
||||
#ifndef MAXLINE
|
||||
#define MAXLINE 4096
|
||||
#endif
|
||||
|
||||
void initEam(Eam* eam, Parameter* param) {
|
||||
int ntypes = param->ntypes;
|
||||
eam->nmax = 0;
|
||||
eam->fp = NULL;
|
||||
coeff(eam, param);
|
||||
init_style(eam, param);
|
||||
}
|
||||
|
||||
void coeff(Eam* eam, Parameter* param) {
|
||||
read_file(&eam->file, param->input_file);
|
||||
param->mass = eam->file.mass;
|
||||
param->cutforce = eam->file.cut;
|
||||
param->cutneigh = param->cutforce + 1.0;
|
||||
param->temp = 600.0;
|
||||
param->dt = 0.001;
|
||||
param->rho = 0.07041125;
|
||||
param->dtforce = 0.5 * param->dt / param->mass;
|
||||
}
|
||||
|
||||
void init_style(Eam* eam, Parameter* param) {
|
||||
// convert read-in file(s) to arrays and spline them
|
||||
file2array(eam);
|
||||
array2spline(eam, param);
|
||||
}
|
||||
|
||||
void read_file(Funcfl* file, const char* filename) {
|
||||
FILE* fptr;
|
||||
char line[MAXLINE];
|
||||
|
||||
fptr = fopen(filename, "r");
|
||||
if(fptr == NULL) {
|
||||
printf("Can't open EAM Potential file: %s\n", filename);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
int tmp;
|
||||
fgets(line, MAXLINE, fptr);
|
||||
fgets(line, MAXLINE, fptr);
|
||||
sscanf(line, "%d %lg", &tmp, &(file->mass));
|
||||
fgets(line, MAXLINE, fptr);
|
||||
sscanf(line, "%d %lg %d %lg %lg", &file->nrho, &file->drho, &file->nr, &file->dr, &file->cut);
|
||||
|
||||
//printf("Read: %lf %i %lf %i %lf %lf\n",file->mass,file->nrho,file->drho,file->nr,file->dr,file->cut);
|
||||
file->frho = (MD_FLOAT *) allocate(ALIGNMENT, (file->nrho + 1) * sizeof(MD_FLOAT));
|
||||
file->rhor = (MD_FLOAT *) allocate(ALIGNMENT, (file->nr + 1) * sizeof(MD_FLOAT));
|
||||
file->zr = (MD_FLOAT *) allocate(ALIGNMENT, (file->nr + 1) * sizeof(MD_FLOAT));
|
||||
grab(fptr, file->nrho, file->frho);
|
||||
grab(fptr, file->nr, file->zr);
|
||||
grab(fptr, file->nr, file->rhor);
|
||||
for(int i = file->nrho; i > 0; i--) file->frho[i] = file->frho[i - 1];
|
||||
for(int i = file->nr; i > 0; i--) file->rhor[i] = file->rhor[i - 1];
|
||||
for(int i = file->nr; i > 0; i--) file->zr[i] = file->zr[i - 1];
|
||||
fclose(fptr);
|
||||
}
|
||||
|
||||
void file2array(Eam* eam) {
|
||||
int i, j, k, m, n;
|
||||
double sixth = 1.0 / 6.0;
|
||||
|
||||
// determine max function params from all active funcfl files
|
||||
// active means some element is pointing at it via map
|
||||
int active;
|
||||
double rmax, rhomax;
|
||||
eam->dr = eam->drho = rmax = rhomax = 0.0;
|
||||
active = 0;
|
||||
Funcfl* file = &eam->file;
|
||||
eam->dr = MAX(eam->dr, file->dr);
|
||||
eam->drho = MAX(eam->drho, file->drho);
|
||||
rmax = MAX(rmax, (file->nr - 1) * file->dr);
|
||||
rhomax = MAX(rhomax, (file->nrho - 1) * file->drho);
|
||||
|
||||
// set nr,nrho from cutoff and spacings
|
||||
// 0.5 is for round-off in divide
|
||||
eam->nr = (int)(rmax / eam->dr + 0.5);
|
||||
eam->nrho = (int)(rhomax / eam->drho + 0.5);
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// setup frho arrays
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
// allocate frho arrays
|
||||
// nfrho = # of funcfl files + 1 for zero array
|
||||
eam->frho = (MD_FLOAT *) allocate(ALIGNMENT, (eam->nrho + 1) * sizeof(MD_FLOAT));
|
||||
|
||||
// interpolate each file's frho to a single grid and cutoff
|
||||
double r, p, cof1, cof2, cof3, cof4;
|
||||
n = 0;
|
||||
|
||||
for(m = 1; m <= eam->nrho; m++) {
|
||||
r = (m - 1) * eam->drho;
|
||||
p = r / file->drho + 1.0;
|
||||
k = (int)(p);
|
||||
k = MIN(k, file->nrho - 2);
|
||||
k = MAX(k, 2);
|
||||
p -= k;
|
||||
p = MIN(p, 2.0);
|
||||
cof1 = -sixth * p * (p - 1.0) * (p - 2.0);
|
||||
cof2 = 0.5 * (p * p - 1.0) * (p - 2.0);
|
||||
cof3 = -0.5 * p * (p + 1.0) * (p - 2.0);
|
||||
cof4 = sixth * p * (p * p - 1.0);
|
||||
eam->frho[m] = cof1 * file->frho[k - 1] + cof2 * file->frho[k] +
|
||||
cof3 * file->frho[k + 1] + cof4 * file->frho[k + 2];
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// setup rhor arrays
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
// allocate rhor arrays
|
||||
// nrhor = # of funcfl files
|
||||
eam->rhor = (MD_FLOAT *) allocate(ALIGNMENT, (eam->nr + 1) * sizeof(MD_FLOAT));
|
||||
|
||||
// interpolate each file's rhor to a single grid and cutoff
|
||||
for(m = 1; m <= eam->nr; m++) {
|
||||
r = (m - 1) * eam->dr;
|
||||
p = r / file->dr + 1.0;
|
||||
k = (int)(p);
|
||||
k = MIN(k, file->nr - 2);
|
||||
k = MAX(k, 2);
|
||||
p -= k;
|
||||
p = MIN(p, 2.0);
|
||||
cof1 = -sixth * p * (p - 1.0) * (p - 2.0);
|
||||
cof2 = 0.5 * (p * p - 1.0) * (p - 2.0);
|
||||
cof3 = -0.5 * p * (p + 1.0) * (p - 2.0);
|
||||
cof4 = sixth * p * (p * p - 1.0);
|
||||
eam->rhor[m] = cof1 * file->rhor[k - 1] + cof2 * file->rhor[k] +
|
||||
cof3 * file->rhor[k + 1] + cof4 * file->rhor[k + 2];
|
||||
//if(m==119)printf("BuildRho: %e %e %e %e %e %e\n",rhor[m],cof1,cof2,cof3,cof4,file->rhor[k]);
|
||||
}
|
||||
|
||||
// type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
|
||||
// for funcfl files, I,J mapping only depends on I
|
||||
// OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// setup z2r arrays
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
// allocate z2r arrays
|
||||
// nz2r = N*(N+1)/2 where N = # of funcfl files
|
||||
eam->z2r = (MD_FLOAT *) allocate(ALIGNMENT, (eam->nr + 1) * sizeof(MD_FLOAT));
|
||||
|
||||
// create a z2r array for each file against other files, only for I >= J
|
||||
// interpolate zri and zrj to a single grid and cutoff
|
||||
double zri, zrj;
|
||||
Funcfl* ifile = &eam->file;
|
||||
Funcfl* jfile = &eam->file;
|
||||
|
||||
for(m = 1; m <= eam->nr; m++) {
|
||||
r = (m - 1) * eam->dr;
|
||||
p = r / ifile->dr + 1.0;
|
||||
k = (int)(p);
|
||||
k = MIN(k, ifile->nr - 2);
|
||||
k = MAX(k, 2);
|
||||
p -= k;
|
||||
p = MIN(p, 2.0);
|
||||
cof1 = -sixth * p * (p - 1.0) * (p - 2.0);
|
||||
cof2 = 0.5 * (p * p - 1.0) * (p - 2.0);
|
||||
cof3 = -0.5 * p * (p + 1.0) * (p - 2.0);
|
||||
cof4 = sixth * p * (p * p - 1.0);
|
||||
zri = cof1 * ifile->zr[k - 1] + cof2 * ifile->zr[k] +
|
||||
cof3 * ifile->zr[k + 1] + cof4 * ifile->zr[k + 2];
|
||||
|
||||
p = r / jfile->dr + 1.0;
|
||||
k = (int)(p);
|
||||
k = MIN(k, jfile->nr - 2);
|
||||
k = MAX(k, 2);
|
||||
p -= k;
|
||||
p = MIN(p, 2.0);
|
||||
cof1 = -sixth * p * (p - 1.0) * (p - 2.0);
|
||||
cof2 = 0.5 * (p * p - 1.0) * (p - 2.0);
|
||||
cof3 = -0.5 * p * (p + 1.0) * (p - 2.0);
|
||||
cof4 = sixth * p * (p * p - 1.0);
|
||||
zrj = cof1 * jfile->zr[k - 1] + cof2 * jfile->zr[k] +
|
||||
cof3 * jfile->zr[k + 1] + cof4 * jfile->zr[k + 2];
|
||||
|
||||
eam->z2r[m] = 27.2 * 0.529 * zri * zrj;
|
||||
}
|
||||
}
|
||||
|
||||
void array2spline(Eam* eam, Parameter* param) {
|
||||
eam->rdr = 1.0 / eam->dr;
|
||||
eam->rdrho = 1.0 / eam->drho;
|
||||
eam->nrho_tot = (eam->nrho + 1) * 7 + 64;
|
||||
eam->nr_tot = (eam->nr + 1) * 7 + 64;
|
||||
eam->nrho_tot -= eam->nrho_tot%64;
|
||||
eam->nr_tot -= eam->nr_tot%64;
|
||||
|
||||
int ntypes = param->ntypes;
|
||||
eam->frho_spline = (MD_FLOAT *) allocate(ALIGNMENT, ntypes * ntypes * eam->nrho_tot * sizeof(MD_FLOAT));
|
||||
eam->rhor_spline = (MD_FLOAT *) allocate(ALIGNMENT, ntypes * ntypes * eam->nr_tot * sizeof(MD_FLOAT));
|
||||
eam->z2r_spline = (MD_FLOAT *) allocate(ALIGNMENT, ntypes * ntypes * eam->nr_tot * sizeof(MD_FLOAT));
|
||||
interpolate(eam->nrho, eam->drho, eam->frho, eam->frho_spline);
|
||||
interpolate(eam->nr, eam->dr, eam->rhor, eam->rhor_spline);
|
||||
interpolate(eam->nr, eam->dr, eam->z2r, eam->z2r_spline);
|
||||
|
||||
// replicate data for multiple types;
|
||||
for(int tt = 0; tt < ntypes * ntypes; tt++) {
|
||||
for(int k = 0; k < eam->nrho_tot; k++)
|
||||
eam->frho_spline[tt*eam->nrho_tot + k] = eam->frho_spline[k];
|
||||
for(int k = 0; k < eam->nr_tot; k++)
|
||||
eam->rhor_spline[tt*eam->nr_tot + k] = eam->rhor_spline[k];
|
||||
for(int k = 0; k < eam->nr_tot; k++)
|
||||
eam->z2r_spline[tt*eam->nr_tot + k] = eam->z2r_spline[k];
|
||||
}
|
||||
}
|
||||
|
||||
void interpolate(int n, MD_FLOAT delta, MD_FLOAT* f, MD_FLOAT* spline) {
|
||||
for(int m = 1; m <= n; m++) spline[m * 7 + 6] = f[m];
|
||||
|
||||
spline[1 * 7 + 5] = spline[2 * 7 + 6] - spline[1 * 7 + 6];
|
||||
spline[2 * 7 + 5] = 0.5 * (spline[3 * 7 + 6] - spline[1 * 7 + 6]);
|
||||
spline[(n - 1) * 7 + 5] = 0.5 * (spline[n * 7 + 6] - spline[(n - 2) * 7 + 6]);
|
||||
spline[n * 7 + 5] = spline[n * 7 + 6] - spline[(n - 1) * 7 + 6];
|
||||
|
||||
for(int m = 3; m <= n - 2; m++)
|
||||
spline[m * 7 + 5] = ((spline[(m - 2) * 7 + 6] - spline[(m + 2) * 7 + 6]) +
|
||||
8.0 * (spline[(m + 1) * 7 + 6] - spline[(m - 1) * 7 + 6])) / 12.0;
|
||||
|
||||
for(int m = 1; m <= n - 1; m++) {
|
||||
spline[m * 7 + 4] = 3.0 * (spline[(m + 1) * 7 + 6] - spline[m * 7 + 6]) -
|
||||
2.0 * spline[m * 7 + 5] - spline[(m + 1) * 7 + 5];
|
||||
spline[m * 7 + 3] = spline[m * 7 + 5] + spline[(m + 1) * 7 + 5] -
|
||||
2.0 * (spline[(m + 1) * 7 + 6] - spline[m * 7 + 6]);
|
||||
}
|
||||
|
||||
spline[n * 7 + 4] = 0.0;
|
||||
spline[n * 7 + 3] = 0.0;
|
||||
|
||||
for(int m = 1; m <= n; m++) {
|
||||
spline[m * 7 + 2] = spline[m * 7 + 5] / delta;
|
||||
spline[m * 7 + 1] = 2.0 * spline[m * 7 + 4] / delta;
|
||||
spline[m * 7 + 0] = 3.0 * spline[m * 7 + 3] / delta;
|
||||
}
|
||||
}
|
||||
|
||||
void grab(FILE* fptr, int n, MD_FLOAT* list) {
|
||||
char* ptr;
|
||||
char line[MAXLINE];
|
||||
int i = 0;
|
||||
|
||||
while(i < n) {
|
||||
fgets(line, MAXLINE, fptr);
|
||||
ptr = strtok(line, " \t\n\r\f");
|
||||
list[i++] = atof(ptr);
|
||||
while(ptr = strtok(NULL, " \t\n\r\f")) list[i++] = atof(ptr);
|
||||
}
|
||||
}
|
232
lammps/force-tracing.c
Normal file
232
lammps/force-tracing.c
Normal file
@@ -0,0 +1,232 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <likwid-marker.h>
|
||||
|
||||
#include <timing.h>
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <atom.h>
|
||||
#include <stats.h>
|
||||
|
||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#ifndef VECTOR_WIDTH
|
||||
# define VECTOR_WIDTH 8
|
||||
#endif
|
||||
|
||||
#ifndef TRACER_CONDITION
|
||||
# define TRACER_CONDITION (!(timestep % param->every))
|
||||
#endif
|
||||
|
||||
#ifdef MEM_TRACER
|
||||
# define MEM_TRACER_INIT FILE *mem_tracer_fp; \
|
||||
if(TRACER_CONDITION) { \
|
||||
char mem_tracer_fn[128]; \
|
||||
snprintf(mem_tracer_fn, sizeof mem_tracer_fn, "mem_tracer_%d.out", timestep); \
|
||||
mem_tracer_fp = fopen(mem_tracer_fn, "w");
|
||||
}
|
||||
|
||||
# define MEM_TRACER_END if(TRACER_CONDITION) { fclose(mem_tracer_fp); }
|
||||
# define MEM_TRACE(addr, op) if(TRACER_CONDITION) { fprintf(mem_tracer_fp, "%c: %p\n", op, (void *)(&(addr))); }
|
||||
#else
|
||||
# define MEM_TRACER_INIT
|
||||
# define MEM_TRACER_END
|
||||
# define MEM_TRACE(addr, op)
|
||||
#endif
|
||||
|
||||
#ifdef INDEX_TRACER
|
||||
# define INDEX_TRACER_INIT FILE *index_tracer_fp; \
|
||||
if(TRACER_CONDITION) { \
|
||||
char index_tracer_fn[128]; \
|
||||
snprintf(index_tracer_fn, sizeof index_tracer_fn, "index_tracer_%d.out", timestep); \
|
||||
index_tracer_fp = fopen(index_tracer_fn, "w"); \
|
||||
}
|
||||
|
||||
# define INDEX_TRACER_END if(TRACER_CONDITION) { fclose(index_tracer_fp); }
|
||||
# define INDEX_TRACE_NATOMS(nl, ng, mn) if(TRACER_CONDITION) { fprintf(index_tracer_fp, "N: %d %d %d\n", nl, ng, mn); }
|
||||
# define INDEX_TRACE_ATOM(a) if(TRACER_CONDITION) { fprintf(index_tracer_fp, "A: %d\n", a); }
|
||||
# define INDEX_TRACE(l, e) if(TRACER_CONDITION) { \
|
||||
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
|
||||
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
|
||||
fprintf(index_tracer_fp, "I: "); \
|
||||
for(int __j = 0; __j < __e; ++__j) { \
|
||||
fprintf(index_tracer_fp, "%d ", l[__i + __j]); \
|
||||
} \
|
||||
fprintf(index_tracer_fp, "\n"); \
|
||||
} \
|
||||
}
|
||||
|
||||
# define DIST_TRACE_SORT(l, e) if(TRACER_CONDITION) { \
|
||||
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
|
||||
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
|
||||
if(__e > 1) { \
|
||||
for(int __j = __i; __j < __i + __e - 1; ++__j) { \
|
||||
for(int __k = __i; __k < __i + __e - (__j - __i) - 1; ++__k) { \
|
||||
if(l[__k] > l[__k + 1]) { \
|
||||
int __t = l[__k]; \
|
||||
l[__k] = l[__k + 1]; \
|
||||
l[__k + 1] = __t; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
# define DIST_TRACE(l, e) if(TRACER_CONDITION) { \
|
||||
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
|
||||
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
|
||||
if(__e > 1) { \
|
||||
fprintf(index_tracer_fp, "D: "); \
|
||||
for(int __j = 0; __j < __e - 1; ++__j) { \
|
||||
int __dist = abs(l[__i + __j + 1] - l[__i + __j]); \
|
||||
fprintf(index_tracer_fp, "%d ", __dist); \
|
||||
} \
|
||||
fprintf(index_tracer_fp, "\n"); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
# define INDEX_TRACER_INIT
|
||||
# define INDEX_TRACER_END
|
||||
# define INDEX_TRACE_NATOMS(nl, ng, mn)
|
||||
# define INDEX_TRACE_ATOM(a)
|
||||
# define INDEX_TRACE(l, e)
|
||||
# define DIST_TRACE_SORT(l, e)
|
||||
# define DIST_TRACE(l, e)
|
||||
#endif
|
||||
|
||||
double computeForceTracing(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep) {
|
||||
MEM_TRACER_INIT;
|
||||
INDEX_TRACER_INIT;
|
||||
int Nlocal = atom->Nlocal;
|
||||
int* neighs;
|
||||
MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz;
|
||||
#ifndef EXPLICIT_TYPES
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
#endif
|
||||
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
fx[i] = 0.0;
|
||||
fy[i] = 0.0;
|
||||
fz[i] = 0.0;
|
||||
}
|
||||
|
||||
INDEX_TRACE_NATOMS(Nlocal, atom->Nghost, neighbor->maxneighs);
|
||||
double S = getTimeStamp();
|
||||
LIKWID_MARKER_START("force");
|
||||
|
||||
for(int na = 0; na < (first_exec ? 1 : ATOMS_LOOP_RUNS); na++) {
|
||||
#pragma omp parallel for
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numneigh[i];
|
||||
MD_FLOAT xtmp = atom_x(i);
|
||||
MD_FLOAT ytmp = atom_y(i);
|
||||
MD_FLOAT ztmp = atom_z(i);
|
||||
MD_FLOAT fix = 0;
|
||||
MD_FLOAT fiy = 0;
|
||||
MD_FLOAT fiz = 0;
|
||||
|
||||
MEM_TRACE(atom_x(i), 'R');
|
||||
MEM_TRACE(atom_y(i), 'R');
|
||||
MEM_TRACE(atom_z(i), 'R');
|
||||
INDEX_TRACE_ATOM(i);
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_i = atom->type[i];
|
||||
MEM_TRACE(atom->type(i), 'R');
|
||||
#endif
|
||||
|
||||
#if defined(VARIANT) && VARIANT == stub && defined(NEIGHBORS_LOOP_RUNS) && NEIGHBORS_LOOP_RUNS > 1
|
||||
#define REPEAT_NEIGHBORS_LOOP
|
||||
int nmax = first_exec ? 1 : NEIGHBORS_LOOP_RUNS;
|
||||
for(int nn = 0; nn < (first_exec ? 1 : NEIGHBORS_LOOP_RUNS); nn++) {
|
||||
#endif
|
||||
|
||||
//DIST_TRACE_SORT(neighs, numneighs);
|
||||
INDEX_TRACE(neighs, numneighs);
|
||||
//DIST_TRACE(neighs, numneighs);
|
||||
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int j = neighs[k];
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
MEM_TRACE(neighs[k], 'R');
|
||||
MEM_TRACE(atom_x(j), 'R');
|
||||
MEM_TRACE(atom_y(j), 'R');
|
||||
MEM_TRACE(atom_z(j), 'R');
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_j = atom->type[j];
|
||||
const int type_ij = type_i * atom->ntypes + type_j;
|
||||
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
||||
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
||||
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
||||
MEM_TRACE(atom->type(j), 'R');
|
||||
#endif
|
||||
|
||||
if(rsq < cutforcesq) {
|
||||
MD_FLOAT sr2 = 1.0 / rsq;
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef REPEAT_NEIGHBORS_LOOP
|
||||
}
|
||||
#endif
|
||||
|
||||
fx[i] += fix;
|
||||
fy[i] += fiy;
|
||||
fz[i] += fiz;
|
||||
|
||||
addStat(stats->total_force_neighs, numneighs);
|
||||
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
|
||||
MEM_TRACE(fx[i], 'R');
|
||||
MEM_TRACE(fx[i], 'W');
|
||||
MEM_TRACE(fy[i], 'R');
|
||||
MEM_TRACE(fy[i], 'W');
|
||||
MEM_TRACE(fz[i], 'R');
|
||||
MEM_TRACE(fz[i], 'W');
|
||||
}
|
||||
}
|
||||
|
||||
LIKWID_MARKER_STOP("force");
|
||||
double E = getTimeStamp();
|
||||
|
||||
INDEX_TRACER_END;
|
||||
MEM_TRACER_END;
|
||||
return E-S;
|
||||
}
|
212
lammps/force.cu
Normal file
212
lammps/force.cu
Normal file
@@ -0,0 +1,212 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stddef.h>
|
||||
#include <cuda_profiler_api.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <device_launch_parameters.h>
|
||||
|
||||
extern "C" {
|
||||
#include <likwid-marker.h>
|
||||
|
||||
#include <timing.h>
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <atom.h>
|
||||
#include <allocate.h>
|
||||
}
|
||||
|
||||
// cuda kernel
|
||||
__global__ void calc_force(
|
||||
Atom a,
|
||||
MD_FLOAT cutforcesq, MD_FLOAT sigma6, MD_FLOAT epsilon,
|
||||
int Nlocal, int neigh_maxneighs, int *neigh_neighbors, int *neigh_numneigh) {
|
||||
|
||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if( i >= Nlocal ) {
|
||||
return;
|
||||
}
|
||||
|
||||
Atom *atom = &a;
|
||||
|
||||
const int numneighs = neigh_numneigh[i];
|
||||
|
||||
MD_FLOAT xtmp = atom_x(i);
|
||||
MD_FLOAT ytmp = atom_y(i);
|
||||
MD_FLOAT ztmp = atom_z(i);
|
||||
|
||||
MD_FLOAT fix = 0;
|
||||
MD_FLOAT fiy = 0;
|
||||
MD_FLOAT fiz = 0;
|
||||
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int j = neigh_neighbors[atom->Nlocal * k + i];
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_j = atom->type[j];
|
||||
const int type_ij = type_i * atom->ntypes + type_j;
|
||||
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
||||
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
||||
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
||||
#endif
|
||||
|
||||
if(rsq < cutforcesq) {
|
||||
MD_FLOAT sr2 = 1.0 / rsq;
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
}
|
||||
}
|
||||
|
||||
atom_fx(i) = fix;
|
||||
atom_fy(i) = fiy;
|
||||
atom_fz(i) = fiz;
|
||||
}
|
||||
|
||||
__global__ void kernel_initial_integrate(MD_FLOAT dtforce, MD_FLOAT dt, int Nlocal, Atom a) {
|
||||
|
||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if( i >= Nlocal ) {
|
||||
return;
|
||||
}
|
||||
|
||||
Atom *atom = &a;
|
||||
|
||||
atom_vx(i) += dtforce * atom_fx(i);
|
||||
atom_vy(i) += dtforce * atom_fy(i);
|
||||
atom_vz(i) += dtforce * atom_fz(i);
|
||||
atom_x(i) = atom_x(i) + dt * atom_vx(i);
|
||||
atom_y(i) = atom_y(i) + dt * atom_vy(i);
|
||||
atom_z(i) = atom_z(i) + dt * atom_vz(i);
|
||||
}
|
||||
|
||||
__global__ void kernel_final_integrate(MD_FLOAT dtforce, int Nlocal, Atom a) {
|
||||
|
||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if( i >= Nlocal ) {
|
||||
return;
|
||||
}
|
||||
|
||||
Atom *atom = &a;
|
||||
|
||||
atom_vx(i) += dtforce * atom_fx(i);
|
||||
atom_vy(i) += dtforce * atom_fy(i);
|
||||
atom_vz(i) += dtforce * atom_fz(i);
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
|
||||
|
||||
void cuda_final_integrate(bool doReneighbour, Parameter *param, Atom *atom, Atom *c_atom, const int num_threads_per_block) {
|
||||
|
||||
const int Nlocal = atom->Nlocal;
|
||||
const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
|
||||
|
||||
kernel_final_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, Nlocal, *c_atom);
|
||||
|
||||
checkCUDAError( "PeekAtLastError FinalIntegrate", cudaPeekAtLastError() );
|
||||
checkCUDAError( "DeviceSync FinalIntegrate", cudaDeviceSynchronize() );
|
||||
|
||||
if(doReneighbour) {
|
||||
checkCUDAError( "FinalIntegrate: velocity memcpy", cudaMemcpy(atom->vx, c_atom->vx, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
|
||||
}
|
||||
}
|
||||
|
||||
void cuda_initial_integrate(bool doReneighbour, Parameter *param, Atom *atom, Atom *c_atom, const int num_threads_per_block) {
|
||||
|
||||
const int Nlocal = atom->Nlocal;
|
||||
const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
|
||||
|
||||
kernel_initial_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, param->dt, Nlocal, *c_atom);
|
||||
|
||||
checkCUDAError( "PeekAtLastError InitialIntegrate", cudaPeekAtLastError() );
|
||||
checkCUDAError( "DeviceSync InitialIntegrate", cudaDeviceSynchronize() );
|
||||
|
||||
if(doReneighbour) {
|
||||
checkCUDAError( "InitialIntegrate: velocity memcpy", cudaMemcpy(atom->vx, c_atom->vx, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
|
||||
}
|
||||
}
|
||||
|
||||
double computeForce(
|
||||
bool reneighbourHappenend,
|
||||
Parameter *param,
|
||||
Atom *atom,
|
||||
Neighbor *neighbor,
|
||||
Atom *c_atom,
|
||||
Neighbor *c_neighbor,
|
||||
int num_threads_per_block
|
||||
)
|
||||
{
|
||||
int Nlocal = atom->Nlocal;
|
||||
#ifndef EXPLICIT_TYPES
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
#endif
|
||||
|
||||
/*
|
||||
int nDevices;
|
||||
cudaGetDeviceCount(&nDevices);
|
||||
size_t free, total;
|
||||
for(int i = 0; i < nDevices; ++i) {
|
||||
cudaMemGetInfo( &free, &total );
|
||||
cudaDeviceProp prop;
|
||||
cudaGetDeviceProperties(&prop, i);
|
||||
printf("DEVICE %d/%d NAME: %s\r\n with %ld MB/%ld MB memory used", i + 1, nDevices, prop.name, free / 1024 / 1024, total / 1024 / 1024);
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
// HINT: Run with cuda-memcheck ./MDBench-NVCC in case of error
|
||||
|
||||
// checkCUDAError( "c_atom->fx memset", cudaMemset(c_atom->fx, 0, sizeof(MD_FLOAT) * Nlocal * 3) );
|
||||
|
||||
cudaProfilerStart();
|
||||
|
||||
const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
|
||||
|
||||
double S = getTimeStamp();
|
||||
LIKWID_MARKER_START("force");
|
||||
|
||||
calc_force <<< num_blocks, num_threads_per_block >>> (*c_atom, cutforcesq, sigma6, epsilon, Nlocal, neighbor->maxneighs, c_neighbor->neighbors, c_neighbor->numneigh);
|
||||
|
||||
checkCUDAError( "PeekAtLastError ComputeForce", cudaPeekAtLastError() );
|
||||
checkCUDAError( "DeviceSync ComputeForce", cudaDeviceSynchronize() );
|
||||
|
||||
cudaProfilerStop();
|
||||
|
||||
LIKWID_MARKER_STOP("force");
|
||||
double E = getTimeStamp();
|
||||
|
||||
return E-S;
|
||||
}
|
||||
}
|
213
lammps/force_eam.c
Normal file
213
lammps/force_eam.c
Normal file
@@ -0,0 +1,213 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <likwid-marker.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <allocate.h>
|
||||
#include <timing.h>
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <atom.h>
|
||||
#include <stats.h>
|
||||
#include <eam.h>
|
||||
#include <util.h>
|
||||
|
||||
double computeForceEam(Eam* eam, Parameter* param, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep) {
|
||||
if(eam->nmax < atom->Nmax) {
|
||||
eam->nmax = atom->Nmax;
|
||||
if(eam->fp != NULL) { free(eam->fp); }
|
||||
eam->fp = (MD_FLOAT *) allocate(ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT));
|
||||
}
|
||||
|
||||
int Nlocal = atom->Nlocal;
|
||||
int* neighs;
|
||||
MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; int ntypes = atom->ntypes; MD_FLOAT* fp = eam->fp;
|
||||
MD_FLOAT* rhor_spline = eam->rhor_spline; MD_FLOAT* frho_spline = eam->frho_spline; MD_FLOAT* z2r_spline = eam->z2r_spline;
|
||||
int rdr = eam->rdr; int nr = eam->nr; int nr_tot = eam->nr_tot; int rdrho = eam->rdrho;
|
||||
int nrho = eam->nrho; int nrho_tot = eam->nrho_tot;
|
||||
double S = getTimeStamp();
|
||||
|
||||
LIKWID_MARKER_START("force_eam_fp");
|
||||
#pragma omp parallel for
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numneigh[i];
|
||||
MD_FLOAT xtmp = atom_x(i);
|
||||
MD_FLOAT ytmp = atom_y(i);
|
||||
MD_FLOAT ztmp = atom_z(i);
|
||||
MD_FLOAT rhoi = 0;
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_i = atom->type[i];
|
||||
#endif
|
||||
#pragma ivdep
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int j = neighs[k];
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_j = atom->type[j];
|
||||
const int type_ij = type_i * ntypes + type_j;
|
||||
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
||||
#else
|
||||
const MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
#endif
|
||||
if(rsq < cutforcesq) {
|
||||
MD_FLOAT p = sqrt(rsq) * rdr + 1.0;
|
||||
int m = (int)(p);
|
||||
m = m < nr - 1 ? m : nr - 1;
|
||||
p -= m;
|
||||
p = p < 1.0 ? p : 1.0;
|
||||
#ifdef EXPLICIT_TYPES
|
||||
rhoi += ((rhor_spline[type_ij * nr_tot + m * 7 + 3] * p +
|
||||
rhor_spline[type_ij * nr_tot + m * 7 + 4]) * p +
|
||||
rhor_spline[type_ij * nr_tot + m * 7 + 5]) * p +
|
||||
rhor_spline[type_ij * nr_tot + m * 7 + 6];
|
||||
#else
|
||||
rhoi += ((rhor_spline[m * 7 + 3] * p +
|
||||
rhor_spline[m * 7 + 4]) * p +
|
||||
rhor_spline[m * 7 + 5]) * p +
|
||||
rhor_spline[m * 7 + 6];
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_ii = type_i * type_i;
|
||||
#endif
|
||||
MD_FLOAT p = 1.0 * rhoi * rdrho + 1.0;
|
||||
int m = (int)(p);
|
||||
m = MAX(1, MIN(m, nrho - 1));
|
||||
p -= m;
|
||||
p = MIN(p, 1.0);
|
||||
#ifdef EXPLICIT_TYPES
|
||||
fp[i] = (frho_spline[type_ii * nrho_tot + m * 7 + 0] * p +
|
||||
frho_spline[type_ii * nrho_tot + m * 7 + 1]) * p +
|
||||
frho_spline[type_ii * nrho_tot + m * 7 + 2];
|
||||
#else
|
||||
fp[i] = (frho_spline[m * 7 + 0] * p + frho_spline[m * 7 + 1]) * p + frho_spline[m * 7 + 2];
|
||||
#endif
|
||||
}
|
||||
|
||||
LIKWID_MARKER_STOP("force_eam_fp");
|
||||
|
||||
// We still need to update fp for PBC atoms
|
||||
for(int i = 0; i < atom->Nghost; i++) {
|
||||
fp[Nlocal + i] = fp[atom->border_map[i]];
|
||||
}
|
||||
|
||||
LIKWID_MARKER_START("force_eam");
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numneigh[i];
|
||||
MD_FLOAT xtmp = atom_x(i);
|
||||
MD_FLOAT ytmp = atom_y(i);
|
||||
MD_FLOAT ztmp = atom_z(i);
|
||||
MD_FLOAT fix = 0;
|
||||
MD_FLOAT fiy = 0;
|
||||
MD_FLOAT fiz = 0;
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_i = atom->type[i];
|
||||
#endif
|
||||
|
||||
#pragma ivdep
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int j = neighs[k];
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_j = atom->type[j];
|
||||
const int type_ij = type_i * ntypes + type_j;
|
||||
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
||||
#else
|
||||
const MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
#endif
|
||||
|
||||
if(rsq < cutforcesq) {
|
||||
MD_FLOAT r = sqrt(rsq);
|
||||
MD_FLOAT p = r * rdr + 1.0;
|
||||
int m = (int)(p);
|
||||
m = m < nr - 1 ? m : nr - 1;
|
||||
p -= m;
|
||||
p = p < 1.0 ? p : 1.0;
|
||||
|
||||
|
||||
// rhoip = derivative of (density at atom j due to atom i)
|
||||
// rhojp = derivative of (density at atom i due to atom j)
|
||||
// phi = pair potential energy
|
||||
// phip = phi'
|
||||
// z2 = phi * r
|
||||
// z2p = (phi * r)' = (phi' r) + phi
|
||||
// psip needs both fp[i] and fp[j] terms since r_ij appears in two
|
||||
// terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji)
|
||||
// hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
MD_FLOAT rhoip = (rhor_spline[type_ij * nr_tot + m * 7 + 0] * p +
|
||||
rhor_spline[type_ij * nr_tot + m * 7 + 1]) * p +
|
||||
rhor_spline[type_ij * nr_tot + m * 7 + 2];
|
||||
|
||||
MD_FLOAT z2p = (z2r_spline[type_ij * nr_tot + m * 7 + 0] * p +
|
||||
z2r_spline[type_ij * nr_tot + m * 7 + 1]) * p +
|
||||
z2r_spline[type_ij * nr_tot + m * 7 + 2];
|
||||
|
||||
MD_FLOAT z2 = ((z2r_spline[type_ij * nr_tot + m * 7 + 3] * p +
|
||||
z2r_spline[type_ij * nr_tot + m * 7 + 4]) * p +
|
||||
z2r_spline[type_ij * nr_tot + m * 7 + 5]) * p +
|
||||
z2r_spline[type_ij * nr_tot + m * 7 + 6];
|
||||
#else
|
||||
MD_FLOAT rhoip = (rhor_spline[m * 7 + 0] * p + rhor_spline[m * 7 + 1]) * p + rhor_spline[m * 7 + 2];
|
||||
MD_FLOAT z2p = (z2r_spline[m * 7 + 0] * p + z2r_spline[m * 7 + 1]) * p + z2r_spline[m * 7 + 2];
|
||||
MD_FLOAT z2 = ((z2r_spline[m * 7 + 3] * p +
|
||||
z2r_spline[m * 7 + 4]) * p +
|
||||
z2r_spline[m * 7 + 5]) * p +
|
||||
z2r_spline[m * 7 + 6];
|
||||
#endif
|
||||
|
||||
MD_FLOAT recip = 1.0 / r;
|
||||
MD_FLOAT phi = z2 * recip;
|
||||
MD_FLOAT phip = z2p * recip - phi * recip;
|
||||
MD_FLOAT psip = fp[i] * rhoip + fp[j] * rhoip + phip;
|
||||
MD_FLOAT fpair = -psip * recip;
|
||||
|
||||
fix += delx * fpair;
|
||||
fiy += dely * fpair;
|
||||
fiz += delz * fpair;
|
||||
//fpair *= 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
fx[i] = fix;
|
||||
fy[i] = fiy;
|
||||
fz[i] = fiz;
|
||||
addStat(stats->total_force_neighs, numneighs);
|
||||
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
|
||||
}
|
||||
|
||||
LIKWID_MARKER_STOP("force_eam");
|
||||
double E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
33
lammps/includes/allocate.h
Normal file
33
lammps/includes/allocate.h
Normal file
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#ifndef __ALLOCATE_H_
|
||||
#define __ALLOCATE_H_
|
||||
extern void* allocate (int alignment, size_t bytesize);
|
||||
extern void* reallocate (void* ptr, int alignment, size_t newBytesize, size_t oldBytesize);
|
||||
|
||||
extern void checkCUDAError(const char *msg, cudaError_t err);
|
||||
#endif
|
78
lammps/includes/atom.h
Normal file
78
lammps/includes/atom.h
Normal file
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <parameter.h>
|
||||
|
||||
#ifndef __ATOM_H_
|
||||
#define __ATOM_H_
|
||||
|
||||
typedef struct {
|
||||
int Natoms, Nlocal, Nghost, Nmax;
|
||||
MD_FLOAT *x, *y, *z;
|
||||
MD_FLOAT *vx, *vy, *vz;
|
||||
MD_FLOAT *fx, *fy, *fz;
|
||||
int *border_map;
|
||||
int *type;
|
||||
int ntypes;
|
||||
MD_FLOAT *epsilon;
|
||||
MD_FLOAT *sigma6;
|
||||
MD_FLOAT *cutforcesq;
|
||||
MD_FLOAT *cutneighsq;
|
||||
} Atom;
|
||||
|
||||
extern void initAtom(Atom*);
|
||||
extern void createAtom(Atom*, Parameter*);
|
||||
extern void growAtom(Atom*);
|
||||
|
||||
#ifdef AOS
|
||||
#define POS_DATA_LAYOUT "AoS"
|
||||
|
||||
#define atom_x(i) atom->x[(i) * 3 + 0]
|
||||
#define atom_y(i) atom->x[(i) * 3 + 1]
|
||||
#define atom_z(i) atom->x[(i) * 3 + 2]
|
||||
|
||||
#define atom_fx(i) atom->fx[(i) * 3 + 0]
|
||||
#define atom_fy(i) atom->fx[(i) * 3 + 1]
|
||||
#define atom_fz(i) atom->fx[(i) * 3 + 2]
|
||||
|
||||
#define atom_vx(i) atom->vx[(i) * 3 + 0]
|
||||
#define atom_vy(i) atom->vx[(i) * 3 + 1]
|
||||
#define atom_vz(i) atom->vx[(i) * 3 + 2]
|
||||
|
||||
#else
|
||||
#define POS_DATA_LAYOUT "SoA"
|
||||
|
||||
#define atom_x(i) atom->x[i]
|
||||
#define atom_y(i) atom->y[i]
|
||||
#define atom_z(i) atom->z[i]
|
||||
|
||||
#define atom_fx(i) atom->fx[i]
|
||||
#define atom_fy(i) atom->fy[i]
|
||||
#define atom_fz(i) atom->fz[i]
|
||||
|
||||
#define atom_vx(i) atom->vx[i]
|
||||
#define atom_vy(i) atom->vy[i]
|
||||
#define atom_vz(i) atom->vz[i]
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
55
lammps/includes/eam.h
Normal file
55
lammps/includes/eam.h
Normal file
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <stdio.h>
|
||||
|
||||
#include <atom.h>
|
||||
#include <parameter.h>
|
||||
|
||||
#ifndef __EAM_H_
|
||||
#define __EAM_H_
|
||||
typedef struct {
|
||||
int nrho, nr;
|
||||
MD_FLOAT drho, dr, cut, mass;
|
||||
MD_FLOAT *frho, *rhor, *zr;
|
||||
} Funcfl;
|
||||
|
||||
typedef struct {
|
||||
MD_FLOAT* fp;
|
||||
int nmax;
|
||||
int nrho, nr;
|
||||
int nrho_tot, nr_tot;
|
||||
MD_FLOAT dr, rdr, drho, rdrho;
|
||||
MD_FLOAT *frho, *rhor, *z2r;
|
||||
MD_FLOAT *rhor_spline, *frho_spline, *z2r_spline;
|
||||
Funcfl file;
|
||||
} Eam;
|
||||
|
||||
void initEam(Eam* eam, Parameter* param);
|
||||
void coeff(Eam* eam, Parameter* param);
|
||||
void init_style(Eam* eam, Parameter *param);
|
||||
void read_file(Funcfl* file, const char* filename);
|
||||
void file2array(Eam* eam);
|
||||
void array2spline(Eam* eam, Parameter* param);
|
||||
void interpolate(int n, MD_FLOAT delta, MD_FLOAT* f, MD_FLOAT* spline);
|
||||
void grab(FILE* fptr, int n, MD_FLOAT* list);
|
||||
#endif
|
170
lammps/includes/likwid-marker.h
Normal file
170
lammps/includes/likwid-marker.h
Normal file
@@ -0,0 +1,170 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Filename: likwid-marker.h
|
||||
*
|
||||
* Description: Header File of likwid Marker API
|
||||
*
|
||||
* Version: <VERSION>
|
||||
* Released: <DATE>
|
||||
*
|
||||
* Authors: Thomas Gruber (tg), thomas.roehl@googlemail.com
|
||||
*
|
||||
* Project: likwid
|
||||
*
|
||||
* Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU General Public License as published by the Free Software
|
||||
* Foundation, either version 3 of the License, or (at your option) any later
|
||||
* version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* =======================================================================================
|
||||
*/
|
||||
#ifndef LIKWID_MARKER_H
|
||||
#define LIKWID_MARKER_H
|
||||
|
||||
|
||||
/** \addtogroup MarkerAPI Marker API module
|
||||
* @{
|
||||
*/
|
||||
/*!
|
||||
\def LIKWID_MARKER_INIT
|
||||
Shortcut for likwid_markerInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
|
||||
*/
|
||||
/*!
|
||||
\def LIKWID_MARKER_THREADINIT
|
||||
Shortcut for likwid_markerThreadInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
|
||||
*/
|
||||
/*!
|
||||
\def LIKWID_MARKER_REGISTER(regionTag)
|
||||
Shortcut for likwid_markerRegisterRegion() with \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
|
||||
*/
|
||||
/*!
|
||||
\def LIKWID_MARKER_START(regionTag)
|
||||
Shortcut for likwid_markerStartRegion() with \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
|
||||
*/
|
||||
/*!
|
||||
\def LIKWID_MARKER_STOP(regionTag)
|
||||
Shortcut for likwid_markerStopRegion() with \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
|
||||
*/
|
||||
/*!
|
||||
\def LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
|
||||
Shortcut for likwid_markerGetResults() for \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
|
||||
*/
|
||||
/*!
|
||||
\def LIKWID_MARKER_SWITCH
|
||||
Shortcut for likwid_markerNextGroup() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
|
||||
*/
|
||||
/*!
|
||||
\def LIKWID_MARKER_RESET(regionTag)
|
||||
Shortcut for likwid_markerResetRegion() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
|
||||
*/
|
||||
/*!
|
||||
\def LIKWID_MARKER_CLOSE
|
||||
Shortcut for likwid_markerClose() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
|
||||
*/
|
||||
/** @}*/
|
||||
|
||||
#ifdef LIKWID_PERFMON
|
||||
#include <likwid.h>
|
||||
#define LIKWID_MARKER_INIT likwid_markerInit()
|
||||
#define LIKWID_MARKER_THREADINIT likwid_markerThreadInit()
|
||||
#define LIKWID_MARKER_SWITCH likwid_markerNextGroup()
|
||||
#define LIKWID_MARKER_REGISTER(regionTag) likwid_markerRegisterRegion(regionTag)
|
||||
#define LIKWID_MARKER_START(regionTag) likwid_markerStartRegion(regionTag)
|
||||
#define LIKWID_MARKER_STOP(regionTag) likwid_markerStopRegion(regionTag)
|
||||
#define LIKWID_MARKER_CLOSE likwid_markerClose()
|
||||
#define LIKWID_MARKER_RESET(regionTag) likwid_markerResetRegion(regionTag)
|
||||
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count) likwid_markerGetRegion(regionTag, nevents, events, time, count)
|
||||
#else /* LIKWID_PERFMON */
|
||||
#define LIKWID_MARKER_INIT
|
||||
#define LIKWID_MARKER_THREADINIT
|
||||
#define LIKWID_MARKER_SWITCH
|
||||
#define LIKWID_MARKER_REGISTER(regionTag)
|
||||
#define LIKWID_MARKER_START(regionTag)
|
||||
#define LIKWID_MARKER_STOP(regionTag)
|
||||
#define LIKWID_MARKER_CLOSE
|
||||
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
|
||||
#define LIKWID_MARKER_RESET(regionTag)
|
||||
#endif /* LIKWID_PERFMON */
|
||||
|
||||
|
||||
/** \addtogroup NvMarkerAPI NvMarker API module (MarkerAPI for Nvidia GPUs)
|
||||
* @{
|
||||
*/
|
||||
/*!
|
||||
\def LIKWID_NVMARKER_INIT
|
||||
Shortcut for likwid_gpuMarkerInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
|
||||
*/
|
||||
/*!
|
||||
\def LIKWID_NVMARKER_THREADINIT
|
||||
Shortcut for likwid_gpuMarkerThreadInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
|
||||
*/
|
||||
/*!
|
||||
\def LIKWID_NVMARKER_REGISTER(regionTag)
|
||||
Shortcut for likwid_gpuMarkerRegisterRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
|
||||
*/
|
||||
/*!
|
||||
\def LIKWID_NVMARKER_START(regionTag)
|
||||
Shortcut for likwid_gpuMarkerStartRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
|
||||
*/
|
||||
/*!
|
||||
\def LIKWID_NVMARKER_STOP(regionTag)
|
||||
Shortcut for likwid_gpuMarkerStopRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
|
||||
*/
|
||||
/*!
|
||||
\def LIKWID_NVMARKER_GET(regionTag, ngpus, nevents, events, time, count)
|
||||
Shortcut for likwid_gpuMarkerGetRegion() for \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
|
||||
*/
|
||||
/*!
|
||||
\def LIKWID_NVMARKER_SWITCH
|
||||
Shortcut for likwid_gpuMarkerNextGroup() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
|
||||
*/
|
||||
/*!
|
||||
\def LIKWID_NVMARKER_RESET(regionTag)
|
||||
Shortcut for likwid_gpuMarkerResetRegion() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
|
||||
*/
|
||||
/*!
|
||||
\def LIKWID_NVMARKER_CLOSE
|
||||
Shortcut for likwid_gpuMarkerClose() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
|
||||
*/
|
||||
/** @}*/
|
||||
|
||||
#ifdef LIKWID_NVMON
|
||||
#ifndef LIKWID_WITH_NVMON
|
||||
#define LIKWID_WITH_NVMON
|
||||
#endif
|
||||
#include <likwid.h>
|
||||
#define LIKWID_NVMARKER_INIT likwid_gpuMarkerInit()
|
||||
#define LIKWID_NVMARKER_THREADINIT likwid_gpuMarkerThreadInit()
|
||||
#define LIKWID_NVMARKER_SWITCH likwid_gpuMarkerNextGroup()
|
||||
#define LIKWID_NVMARKER_REGISTER(regionTag) likwid_gpuMarkerRegisterRegion(regionTag)
|
||||
#define LIKWID_NVMARKER_START(regionTag) likwid_gpuMarkerStartRegion(regionTag)
|
||||
#define LIKWID_NVMARKER_STOP(regionTag) likwid_gpuMarkerStopRegion(regionTag)
|
||||
#define LIKWID_NVMARKER_CLOSE likwid_gpuMarkerClose()
|
||||
#define LIKWID_NVMARKER_RESET(regionTag) likwid_gpuMarkerResetRegion(regionTag)
|
||||
#define LIKWID_NVMARKER_GET(regionTag, ngpus, nevents, events, time, count) \
|
||||
likwid_gpuMarkerGetRegion(regionTag, ngpus, nevents, events, time, count)
|
||||
#else /* LIKWID_NVMON */
|
||||
#define LIKWID_NVMARKER_INIT
|
||||
#define LIKWID_NVMARKER_THREADINIT
|
||||
#define LIKWID_NVMARKER_SWITCH
|
||||
#define LIKWID_NVMARKER_REGISTER(regionTag)
|
||||
#define LIKWID_NVMARKER_START(regionTag)
|
||||
#define LIKWID_NVMARKER_STOP(regionTag)
|
||||
#define LIKWID_NVMARKER_CLOSE
|
||||
#define LIKWID_NVMARKER_GET(regionTag, nevents, events, time, count)
|
||||
#define LIKWID_NVMARKER_RESET(regionTag)
|
||||
#endif /* LIKWID_NVMON */
|
||||
|
||||
|
||||
|
||||
#endif /* LIKWID_MARKER_H */
|
58
lammps/includes/neighbor.h
Normal file
58
lammps/includes/neighbor.h
Normal file
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <atom.h>
|
||||
#include <parameter.h>
|
||||
|
||||
#ifndef __NEIGHBOR_H_
|
||||
#define __NEIGHBOR_H_
|
||||
typedef struct {
|
||||
int every;
|
||||
int ncalls;
|
||||
int* neighbors;
|
||||
int maxneighs;
|
||||
int* numneigh;
|
||||
} Neighbor;
|
||||
|
||||
typedef struct {
|
||||
MD_FLOAT xprd; MD_FLOAT yprd; MD_FLOAT zprd;
|
||||
MD_FLOAT bininvx; MD_FLOAT bininvy; MD_FLOAT bininvz;
|
||||
int mbinxlo; int mbinylo; int mbinzlo;
|
||||
int nbinx; int nbiny; int nbinz;
|
||||
int mbinx; int mbiny; int mbinz;
|
||||
} Neighbor_params;
|
||||
|
||||
typedef struct {
|
||||
int* bincount;
|
||||
int* bins;
|
||||
int mbins;
|
||||
int atoms_per_bin;
|
||||
} Binning;
|
||||
|
||||
extern void initNeighbor(Neighbor*, Parameter*);
|
||||
extern void setupNeighbor();
|
||||
extern void binatoms(Atom*);
|
||||
extern void buildNeighbor(Atom*, Neighbor*);
|
||||
extern void sortAtom(Atom*);
|
||||
extern void binatoms_cuda(Atom*, Binning*, int*, Neighbor_params*, const int);
|
||||
extern void buildNeighbor_cuda(Atom*, Neighbor*, Atom*, Neighbor*, const int, double*);
|
||||
#endif
|
57
lammps/includes/parameter.h
Normal file
57
lammps/includes/parameter.h
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#ifndef __PARAMETER_H_
|
||||
#define __PARAMETER_H_
|
||||
|
||||
#define FF_LJ 0
|
||||
#define FF_EAM 1
|
||||
|
||||
#if PRECISION == 1
|
||||
#define MD_FLOAT float
|
||||
#else
|
||||
#define MD_FLOAT double
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
int force_field;
|
||||
char* input_file;
|
||||
char* vtk_file;
|
||||
MD_FLOAT epsilon;
|
||||
MD_FLOAT sigma6;
|
||||
MD_FLOAT temp;
|
||||
MD_FLOAT rho;
|
||||
MD_FLOAT mass;
|
||||
int ntypes;
|
||||
int ntimes;
|
||||
int nstat;
|
||||
int every;
|
||||
MD_FLOAT dt;
|
||||
MD_FLOAT dtforce;
|
||||
MD_FLOAT cutforce;
|
||||
MD_FLOAT cutneigh;
|
||||
int nx, ny, nz;
|
||||
MD_FLOAT lattice;
|
||||
MD_FLOAT xprd, yprd, zprd;
|
||||
double proc_freq;
|
||||
} Parameter;
|
||||
#endif
|
34
lammps/includes/pbc.h
Normal file
34
lammps/includes/pbc.h
Normal file
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <atom.h>
|
||||
#include <parameter.h>
|
||||
|
||||
#ifndef __PBC_H_
|
||||
#define __PBC_H_
|
||||
extern void initPbc(Atom*);
|
||||
extern void updatePbc(Atom*, Parameter*);
|
||||
extern void updatePbc_cuda(Atom*, Parameter*, Atom*, bool, const int);
|
||||
extern void updateAtomsPbc(Atom*, Parameter*);
|
||||
extern void updateAtomsPbc_cuda(Atom*, Parameter*, Atom*, const int);
|
||||
extern void setupPbc(Atom*, Parameter*);
|
||||
#endif
|
46
lammps/includes/stats.h
Normal file
46
lammps/includes/stats.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <atom.h>
|
||||
#include <parameter.h>
|
||||
|
||||
#ifndef __STATS_H_
|
||||
#define __STATS_H_
|
||||
typedef struct {
|
||||
long long int total_force_neighs;
|
||||
long long int total_force_iters;
|
||||
} Stats;
|
||||
|
||||
void initStats(Stats *s);
|
||||
void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer);
|
||||
|
||||
#ifdef COMPUTE_STATS
|
||||
# define addStat(stat, value) stat += value;
|
||||
# define beginStatTimer() double Si = getTimeStamp();
|
||||
# define endStatTimer(stat) stat += getTimeStamp() - Si;
|
||||
#else
|
||||
# define addStat(stat, value)
|
||||
# define beginStatTimer()
|
||||
# define endStatTimer(stat)
|
||||
#endif
|
||||
|
||||
#endif
|
31
lammps/includes/thermo.h
Normal file
31
lammps/includes/thermo.h
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <parameter.h>
|
||||
#include <atom.h>
|
||||
|
||||
#ifndef __THERMO_H_
|
||||
#define __THERMO_H_
|
||||
extern void setupThermo(Parameter*, int);
|
||||
extern void computeThermo(int, Parameter*, Atom*);
|
||||
extern void adjustThermo(Parameter*, Atom*);
|
||||
#endif
|
16
lammps/includes/timers.h
Normal file
16
lammps/includes/timers.h
Normal file
@@ -0,0 +1,16 @@
|
||||
#ifndef __TIMERS_H_
|
||||
#define __TIMERS_H_
|
||||
|
||||
typedef enum {
|
||||
TOTAL = 0,
|
||||
NEIGH,
|
||||
FORCE,
|
||||
NEIGH_UPDATE_ATOMS_PBC,
|
||||
NEIGH_SETUP_PBC,
|
||||
NEIGH_UPDATE_PBC,
|
||||
NEIGH_BINATOMS,
|
||||
NEIGH_BUILD_LISTS,
|
||||
NUMTIMER
|
||||
} timertype;
|
||||
|
||||
#endif
|
30
lammps/includes/timing.h
Normal file
30
lammps/includes/timing.h
Normal file
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#ifndef __TIMING_H_
|
||||
#define __TIMING_H_
|
||||
|
||||
extern double getTimeStamp();
|
||||
extern double getTimeResolution();
|
||||
extern double getTimeStamp_();
|
||||
|
||||
#endif
|
37
lammps/includes/util.h
Normal file
37
lammps/includes/util.h
Normal file
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#ifndef __UTIL_H_
|
||||
#define __UTIL_H_
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(x,y) ((x)<(y)?(x):(y))
|
||||
#endif
|
||||
#ifndef MAX
|
||||
#define MAX(x,y) ((x)>(y)?(x):(y))
|
||||
#endif
|
||||
#ifndef ABS
|
||||
#define ABS(a) ((a) >= 0 ? (a) : -(a))
|
||||
#endif
|
||||
|
||||
extern double myrandom(int*);
|
||||
#endif
|
28
lammps/includes/vtk.h
Normal file
28
lammps/includes/vtk.h
Normal file
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <atom.h>
|
||||
|
||||
#ifndef __VTK_H_
|
||||
#define __VTK_H_
|
||||
extern int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
|
||||
#endif
|
253
lammps/main-stub.c
Normal file
253
lammps/main-stub.c
Normal file
@@ -0,0 +1,253 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
//---
|
||||
#include <likwid-marker.h>
|
||||
//---
|
||||
#include <timing.h>
|
||||
#include <allocate.h>
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <atom.h>
|
||||
#include <stats.h>
|
||||
#include <thermo.h>
|
||||
#include <pbc.h>
|
||||
#include <timers.h>
|
||||
|
||||
#define HLINE "----------------------------------------------------------------------------\n"
|
||||
|
||||
#define LATTICE_DISTANCE 10.0
|
||||
#define NEIGH_DISTANCE 1.0
|
||||
|
||||
extern double computeForce(Parameter*, Atom*, Neighbor*, Stats*, int, int);
|
||||
|
||||
void init(Parameter *param) {
|
||||
param->epsilon = 1.0;
|
||||
param->sigma6 = 1.0;
|
||||
param->rho = 0.8442;
|
||||
param->ntypes = 4;
|
||||
param->ntimes = 200;
|
||||
param->nx = 4;
|
||||
param->ny = 4;
|
||||
param->nz = 2;
|
||||
param->lattice = LATTICE_DISTANCE;
|
||||
param->cutforce = 5.0;
|
||||
param->cutneigh = param->cutforce;
|
||||
param->mass = 1.0;
|
||||
// Unused
|
||||
param->dt = 0.005;
|
||||
param->dtforce = 0.5 * param->dt;
|
||||
param->nstat = 100;
|
||||
param->temp = 1.44;
|
||||
param->every = 20;
|
||||
param->proc_freq = 0.0;
|
||||
}
|
||||
|
||||
// Show debug messages
|
||||
//#define DEBUG(msg) printf(msg)
|
||||
// Do not show debug messages
|
||||
#define DEBUG(msg)
|
||||
|
||||
#define ADD_ATOM(x, y, z, vx, vy, vz) atom_x(atom->Nlocal) = base_x + x * NEIGH_DISTANCE; \
|
||||
atom_y(atom->Nlocal) = base_y + y * NEIGH_DISTANCE; \
|
||||
atom_z(atom->Nlocal) = base_z + z * NEIGH_DISTANCE; \
|
||||
atom->vx[atom->Nlocal] = vy; \
|
||||
atom->vy[atom->Nlocal] = vy; \
|
||||
atom->vz[atom->Nlocal] = vz; \
|
||||
atom->Nlocal++
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
Atom atom_data;
|
||||
Atom *atom = (Atom *)(&atom_data);
|
||||
Neighbor neighbor;
|
||||
Stats stats;
|
||||
Parameter param;
|
||||
int atoms_per_unit_cell = 8;
|
||||
int csv = 0;
|
||||
|
||||
LIKWID_MARKER_INIT;
|
||||
LIKWID_MARKER_REGISTER("force");
|
||||
DEBUG("Initializing parameters...\n");
|
||||
init(¶m);
|
||||
|
||||
for(int i = 0; i < argc; i++)
|
||||
{
|
||||
if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0))
|
||||
{
|
||||
param.ntimes = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-nx") == 0))
|
||||
{
|
||||
param.nx = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-ny") == 0))
|
||||
{
|
||||
param.ny = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-nz") == 0))
|
||||
{
|
||||
param.nz = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-na") == 0))
|
||||
{
|
||||
atoms_per_unit_cell = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-f") == 0))
|
||||
{
|
||||
param.proc_freq = atof(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-csv") == 0))
|
||||
{
|
||||
csv = 1;
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0))
|
||||
{
|
||||
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
|
||||
printf(HLINE);
|
||||
printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
|
||||
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
|
||||
printf("-na <int>: set number of atoms per unit cell\n");
|
||||
printf("-f <real>: set CPU frequency (GHz) and display average cycles per atom and neighbors\n");
|
||||
printf("-csv: set output as CSV style\n");
|
||||
printf(HLINE);
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
param.xprd = param.nx * LATTICE_DISTANCE;
|
||||
param.yprd = param.ny * LATTICE_DISTANCE;
|
||||
param.zprd = param.nz * LATTICE_DISTANCE;
|
||||
|
||||
DEBUG("Initializing atoms...\n");
|
||||
initAtom(atom);
|
||||
initStats(&stats);
|
||||
|
||||
atom->ntypes = param.ntypes;
|
||||
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
|
||||
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
|
||||
atom->epsilon[i] = param.epsilon;
|
||||
atom->sigma6[i] = param.sigma6;
|
||||
atom->cutneighsq[i] = param.cutneigh * param.cutneigh;
|
||||
atom->cutforcesq[i] = param.cutforce * param.cutforce;
|
||||
}
|
||||
|
||||
DEBUG("Creating atoms...\n");
|
||||
for(int i = 0; i < param.nx; ++i) {
|
||||
for(int j = 0; j < param.ny; ++j) {
|
||||
for(int k = 0; k < param.nz; ++k) {
|
||||
int added_atoms = 0;
|
||||
int fac_x = 1;
|
||||
int fac_y = 1;
|
||||
int fac_z = 1;
|
||||
int fmod = 0;
|
||||
MD_FLOAT base_x = i * LATTICE_DISTANCE;
|
||||
MD_FLOAT base_y = j * LATTICE_DISTANCE;
|
||||
MD_FLOAT base_z = k * LATTICE_DISTANCE;
|
||||
MD_FLOAT vx = 0.0;
|
||||
MD_FLOAT vy = 0.0;
|
||||
MD_FLOAT vz = 0.0;
|
||||
|
||||
while(atom->Nlocal > atom->Nmax - atoms_per_unit_cell) {
|
||||
growAtom(atom);
|
||||
}
|
||||
|
||||
while(fac_x * fac_y * fac_z < atoms_per_unit_cell) {
|
||||
if(fmod == 0) { fac_x *= 2; }
|
||||
if(fmod == 1) { fac_y *= 2; }
|
||||
if(fmod == 2) { fac_z *= 2; }
|
||||
fmod = (fmod + 1) % 3;
|
||||
}
|
||||
|
||||
MD_FLOAT offset_x = (fac_x > 1) ? 1.0 / (fac_x - 1) : (int)fac_x;
|
||||
MD_FLOAT offset_y = (fac_y > 1) ? 1.0 / (fac_y - 1) : (int)fac_y;
|
||||
MD_FLOAT offset_z = (fac_z > 1) ? 1.0 / (fac_z - 1) : (int)fac_z;
|
||||
for(int ii = 0; ii < fac_x; ++ii) {
|
||||
for(int jj = 0; jj < fac_y; ++jj) {
|
||||
for(int kk = 0; kk < fac_z; ++kk) {
|
||||
if(added_atoms < atoms_per_unit_cell) {
|
||||
atom->type[atom->Nlocal] = rand() % atom->ntypes;
|
||||
ADD_ATOM(ii * offset_x, jj * offset_y, kk * offset_z, vx, vy, vz);
|
||||
added_atoms++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const double estim_atom_volume = (double)(atom->Nlocal * 3 * sizeof(MD_FLOAT));
|
||||
const double estim_neighbors_volume = (double)(atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int));
|
||||
const double estim_volume = (double)(atom->Nlocal * 6 * sizeof(MD_FLOAT) + estim_neighbors_volume);
|
||||
|
||||
if(!csv) {
|
||||
printf("Number of timesteps: %d\n", param.ntimes);
|
||||
printf("Number of times to compute the atoms loop: %d\n", ATOMS_LOOP_RUNS);
|
||||
printf("Number of times to compute the neighbors loop: %d\n", NEIGHBORS_LOOP_RUNS);
|
||||
printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz);
|
||||
printf("Atoms per unit cell: %d\n", atoms_per_unit_cell);
|
||||
printf("Total number of atoms: %d\n", atom->Nlocal);
|
||||
printf("Estimated total data volume (kB): %.4f\n", estim_volume / 1000.0);
|
||||
printf("Estimated atom data volume (kB): %.4f\n", estim_atom_volume / 1000.0);
|
||||
printf("Estimated neighborlist data volume (kB): %.4f\n", estim_neighbors_volume / 1000.0);
|
||||
}
|
||||
|
||||
DEBUG("Initializing neighbor lists...\n");
|
||||
initNeighbor(&neighbor, ¶m);
|
||||
DEBUG("Setting up neighbor lists...\n");
|
||||
setupNeighbor();
|
||||
DEBUG("Building neighbor lists...\n");
|
||||
buildNeighbor(atom, &neighbor);
|
||||
DEBUG("Computing forces...\n");
|
||||
computeForce(¶m, atom, &neighbor, &stats, 1, 1);
|
||||
|
||||
double S, E;
|
||||
S = getTimeStamp();
|
||||
for(int i = 0; i < param.ntimes; i++) {
|
||||
computeForce(¶m, atom, &neighbor, &stats, 0, i + 1);
|
||||
}
|
||||
E = getTimeStamp();
|
||||
double T_accum = E-S;
|
||||
double freq_hz = param.proc_freq * 1.e9;
|
||||
const double repeats = ATOMS_LOOP_RUNS * NEIGHBORS_LOOP_RUNS;
|
||||
const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes * repeats);
|
||||
const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes * repeats) * freq_hz;
|
||||
const double cycles_per_neigh = cycles_per_atom / (double)(atoms_per_unit_cell - 1);
|
||||
|
||||
if(!csv) {
|
||||
printf("Total time: %.4f, Mega atom updates/s: %.4f\n", T_accum, atoms_updates_per_sec / 1.e6);
|
||||
if(param.proc_freq > 0.0) {
|
||||
printf("Cycles per atom: %.4f, Cycles per neighbor: %.4f\n", cycles_per_atom, cycles_per_neigh);
|
||||
}
|
||||
} else {
|
||||
printf("steps,unit cells,atoms/unit cell,total atoms,total vol.(kB),atoms vol.(kB),neigh vol.(kB),time(s),atom upds/s(M)");
|
||||
if(param.proc_freq > 0.0) {
|
||||
printf(",cy/atom,cy/neigh");
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
printf("%d,%dx%dx%d,%d,%d,%.4f,%.4f,%.4f,%.4f,%.4f",
|
||||
param.ntimes, param.nx, param.ny, param.nz, atoms_per_unit_cell, atom->Nlocal,
|
||||
estim_volume / 1.e3, estim_atom_volume / 1.e3, estim_neighbors_volume / 1.e3, T_accum, atoms_updates_per_sec / 1.e6);
|
||||
|
||||
if(param.proc_freq > 0.0) {
|
||||
printf(",%.4f,%.4f", cycles_per_atom, cycles_per_neigh);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
double timer[NUMTIMER];
|
||||
timer[FORCE] = T_accum;
|
||||
displayStatistics(atom, ¶m, &stats, timer);
|
||||
LIKWID_MARKER_CLOSE;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
425
lammps/main.c
Normal file
425
lammps/main.c
Normal file
@@ -0,0 +1,425 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
|
||||
#include <likwid-marker.h>
|
||||
|
||||
#include <timing.h>
|
||||
#include <allocate.h>
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <atom.h>
|
||||
#include <stats.h>
|
||||
#include <thermo.h>
|
||||
#include <pbc.h>
|
||||
#include <timers.h>
|
||||
#include <eam.h>
|
||||
#include <vtk.h>
|
||||
|
||||
#define HLINE "----------------------------------------------------------------------------\n"
|
||||
|
||||
extern void cuda_final_integrate(bool doReneighbour, Parameter *param,
|
||||
Atom *atom, Atom *c_atom,
|
||||
const int num_threads_per_block);
|
||||
extern void cuda_initial_integrate(bool doReneighbour, Parameter *param,
|
||||
Atom *atom, Atom *c_atom,
|
||||
const int num_threads_per_block);
|
||||
|
||||
extern double computeForce(bool, Parameter*, Atom*, Neighbor*, Atom*, Neighbor*, const int);
|
||||
extern double computeForceTracing(Parameter*, Atom*, Neighbor*, Stats*, int, int);
|
||||
extern double computeForceEam(Eam* eam, Parameter*, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep);
|
||||
|
||||
void init(Parameter *param)
|
||||
{
|
||||
param->input_file = NULL;
|
||||
param->vtk_file = NULL;
|
||||
param->force_field = FF_LJ;
|
||||
param->epsilon = 1.0;
|
||||
param->sigma6 = 1.0;
|
||||
param->rho = 0.8442;
|
||||
param->ntypes = 4;
|
||||
param->ntimes = 200;
|
||||
param->dt = 0.005;
|
||||
param->nx = 32;
|
||||
param->ny = 32;
|
||||
param->nz = 32;
|
||||
param->cutforce = 2.5;
|
||||
param->cutneigh = param->cutforce + 0.30;
|
||||
param->temp = 1.44;
|
||||
param->nstat = 100;
|
||||
param->mass = 1.0;
|
||||
param->dtforce = 0.5 * param->dt;
|
||||
param->every = 20;
|
||||
param->proc_freq = 2.4;
|
||||
}
|
||||
|
||||
void initCudaAtom(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *c_neighbor) {
|
||||
|
||||
c_atom->Natoms = atom->Natoms;
|
||||
c_atom->Nlocal = atom->Nlocal;
|
||||
c_atom->Nghost = atom->Nghost;
|
||||
c_atom->Nmax = atom->Nmax;
|
||||
c_atom->ntypes = atom->ntypes;
|
||||
|
||||
c_atom->border_map = NULL;
|
||||
|
||||
const int Nlocal = atom->Nlocal;
|
||||
|
||||
checkCUDAError( "c_atom->x malloc", cudaMalloc((void**)&(c_atom->x), sizeof(MD_FLOAT) * atom->Nmax * 3) );
|
||||
checkCUDAError( "c_atom->x memcpy", cudaMemcpy(c_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) );
|
||||
|
||||
checkCUDAError( "c_atom->fx malloc", cudaMalloc((void**)&(c_atom->fx), sizeof(MD_FLOAT) * Nlocal * 3) );
|
||||
|
||||
checkCUDAError( "c_atom->vx malloc", cudaMalloc((void**)&(c_atom->vx), sizeof(MD_FLOAT) * Nlocal * 3) );
|
||||
checkCUDAError( "c_atom->vx memcpy", cudaMemcpy(c_atom->vx, atom->vx, sizeof(MD_FLOAT) * Nlocal * 3, cudaMemcpyHostToDevice) );
|
||||
|
||||
checkCUDAError( "c_atom->type malloc", cudaMalloc((void**)&(c_atom->type), sizeof(int) * atom->Nmax) );
|
||||
checkCUDAError( "c_atom->epsilon malloc", cudaMalloc((void**)&(c_atom->epsilon), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
|
||||
checkCUDAError( "c_atom->sigma6 malloc", cudaMalloc((void**)&(c_atom->sigma6), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
|
||||
checkCUDAError( "c_atom->cutforcesq malloc", cudaMalloc((void**)&(c_atom->cutforcesq), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
|
||||
|
||||
checkCUDAError( "c_neighbor->neighbors malloc", cudaMalloc((void**)&c_neighbor->neighbors, sizeof(int) * Nlocal * neighbor->maxneighs) );
|
||||
checkCUDAError( "c_neighbor->numneigh malloc", cudaMalloc((void**)&c_neighbor->numneigh, sizeof(int) * Nlocal) );
|
||||
|
||||
checkCUDAError( "c_atom->type memcpy", cudaMemcpy(c_atom->type, atom->type, sizeof(int) * atom->Nmax, cudaMemcpyHostToDevice) );
|
||||
checkCUDAError( "c_atom->sigma6 memcpy", cudaMemcpy(c_atom->sigma6, atom->sigma6, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
|
||||
checkCUDAError( "c_atom->epsilon memcpy", cudaMemcpy(c_atom->epsilon, atom->epsilon, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
|
||||
|
||||
checkCUDAError( "c_atom->cutforcesq memcpy", cudaMemcpy(c_atom->cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
|
||||
}
|
||||
|
||||
|
||||
double setup(
|
||||
Parameter *param,
|
||||
Eam *eam,
|
||||
Atom *atom,
|
||||
Neighbor *neighbor,
|
||||
Atom *c_atom,
|
||||
Neighbor *c_neighbor,
|
||||
Stats *stats,
|
||||
const int num_threads_per_block,
|
||||
double* timers)
|
||||
{
|
||||
if(param->force_field == FF_EAM) { initEam(eam, param); }
|
||||
double S, E;
|
||||
param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
|
||||
param->xprd = param->nx * param->lattice;
|
||||
param->yprd = param->ny * param->lattice;
|
||||
param->zprd = param->nz * param->lattice;
|
||||
|
||||
S = getTimeStamp();
|
||||
initAtom(atom);
|
||||
initNeighbor(neighbor, param);
|
||||
initPbc(atom);
|
||||
initStats(stats);
|
||||
setupNeighbor();
|
||||
createAtom(atom, param);
|
||||
setupThermo(param, atom->Natoms);
|
||||
adjustThermo(param, atom);
|
||||
setupPbc(atom, param);
|
||||
initCudaAtom(atom, neighbor, c_atom, c_neighbor);
|
||||
updatePbc_cuda(atom, param, c_atom, true, num_threads_per_block);
|
||||
buildNeighbor_cuda(atom, neighbor, c_atom, c_neighbor, num_threads_per_block, timers);
|
||||
E = getTimeStamp();
|
||||
|
||||
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double reneighbour(
|
||||
Parameter *param,
|
||||
Atom *atom,
|
||||
Neighbor *neighbor,
|
||||
Atom *c_atom,
|
||||
Neighbor *c_neighbor,
|
||||
const int num_threads_per_block,
|
||||
double* timers)
|
||||
{
|
||||
double S, E, beforeEvent, afterEvent;
|
||||
|
||||
S = getTimeStamp();
|
||||
beforeEvent = S;
|
||||
LIKWID_MARKER_START("reneighbour");
|
||||
updateAtomsPbc_cuda(atom, param, c_atom, num_threads_per_block);
|
||||
afterEvent = getTimeStamp();
|
||||
timers[NEIGH_UPDATE_ATOMS_PBC] += afterEvent - beforeEvent;
|
||||
beforeEvent = afterEvent;
|
||||
setupPbc(atom, param);
|
||||
afterEvent = getTimeStamp();
|
||||
timers[NEIGH_SETUP_PBC] += afterEvent - beforeEvent;
|
||||
beforeEvent = afterEvent;
|
||||
updatePbc_cuda(atom, param, c_atom, true, num_threads_per_block);
|
||||
afterEvent = getTimeStamp();
|
||||
timers[NEIGH_UPDATE_PBC] += afterEvent - beforeEvent;
|
||||
beforeEvent = afterEvent;
|
||||
//sortAtom(atom);
|
||||
buildNeighbor_cuda(atom, neighbor, c_atom, c_neighbor, num_threads_per_block, timers);
|
||||
LIKWID_MARKER_STOP("reneighbour");
|
||||
E = getTimeStamp();
|
||||
afterEvent = E;
|
||||
timers[NEIGH_BUILD_LISTS] += afterEvent - beforeEvent;
|
||||
|
||||
return E-S;
|
||||
}
|
||||
|
||||
void initialIntegrate(Parameter *param, Atom *atom)
|
||||
{
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
atom_vx(i) += param->dtforce * atom_fx(i);
|
||||
atom_vy(i) += param->dtforce * atom_fy(i);
|
||||
atom_vz(i) += param->dtforce * atom_fz(i);
|
||||
atom_x(i) = atom_x(i) + param->dt * atom_vx(i);
|
||||
atom_y(i) = atom_y(i) + param->dt * atom_vy(i);
|
||||
atom_z(i) = atom_z(i) + param->dt * atom_vz(i);
|
||||
}
|
||||
}
|
||||
|
||||
void finalIntegrate(Parameter *param, Atom *atom)
|
||||
{
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
atom_vx(i) += param->dtforce * atom_fx(i);
|
||||
atom_vy(i) += param->dtforce * atom_fy(i);
|
||||
atom_vz(i) += param->dtforce * atom_fz(i);
|
||||
}
|
||||
}
|
||||
|
||||
void printAtomState(Atom *atom)
|
||||
{
|
||||
printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n",
|
||||
atom->Natoms, atom->Nlocal, atom->Nghost, atom->Nmax);
|
||||
|
||||
/* int nall = atom->Nlocal + atom->Nghost; */
|
||||
|
||||
/* for (int i=0; i<nall; i++) { */
|
||||
/* printf("%d %f %f %f\n", i, atom->x[i], atom->y[i], atom->z[i]); */
|
||||
/* } */
|
||||
}
|
||||
|
||||
int str2ff(const char *string)
|
||||
{
|
||||
if(strncmp(string, "lj", 2) == 0) return FF_LJ;
|
||||
if(strncmp(string, "eam", 3) == 0) return FF_EAM;
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* ff2str(int ff)
|
||||
{
|
||||
if(ff == FF_LJ) { return "lj"; }
|
||||
if(ff == FF_EAM) { return "eam"; }
|
||||
return "invalid";
|
||||
}
|
||||
|
||||
int get_num_threads() {
|
||||
|
||||
const char *num_threads_env = getenv("NUM_THREADS");
|
||||
int num_threads = 0;
|
||||
if(num_threads_env == 0)
|
||||
num_threads = 32;
|
||||
else {
|
||||
num_threads = atoi(num_threads_env);
|
||||
}
|
||||
|
||||
return num_threads;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
double timer[NUMTIMER];
|
||||
Eam eam;
|
||||
Atom atom;
|
||||
Neighbor neighbor;
|
||||
Stats stats;
|
||||
Parameter param;
|
||||
Atom c_atom;
|
||||
Neighbor c_neighbor;
|
||||
|
||||
LIKWID_MARKER_INIT;
|
||||
#pragma omp parallel
|
||||
{
|
||||
LIKWID_MARKER_REGISTER("force");
|
||||
//LIKWID_MARKER_REGISTER("reneighbour");
|
||||
//LIKWID_MARKER_REGISTER("pbc");
|
||||
}
|
||||
init(¶m);
|
||||
|
||||
for(int i = 0; i < argc; i++)
|
||||
{
|
||||
if((strcmp(argv[i], "-f") == 0))
|
||||
{
|
||||
if((param.force_field = str2ff(argv[++i])) < 0) {
|
||||
fprintf(stderr, "Invalid force field!\n");
|
||||
exit(-1);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-i") == 0))
|
||||
{
|
||||
param.input_file = strdup(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0))
|
||||
{
|
||||
param.ntimes = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-nx") == 0))
|
||||
{
|
||||
param.nx = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-ny") == 0))
|
||||
{
|
||||
param.ny = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-nz") == 0))
|
||||
{
|
||||
param.nz = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "--freq") == 0))
|
||||
{
|
||||
param.proc_freq = atof(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "--vtk") == 0))
|
||||
{
|
||||
param.vtk_file = strdup(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0))
|
||||
{
|
||||
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
|
||||
printf(HLINE);
|
||||
printf("-f <string>: force field (lj or eam), default lj\n");
|
||||
printf("-i <string>: input file for EAM\n");
|
||||
printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
|
||||
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
|
||||
printf("--freq <real>: processor frequency (GHz)\n");
|
||||
printf("--vtk <string>: VTK file for visualization\n");
|
||||
printf(HLINE);
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
// this should be multiple of 32 as operations are performed at the level of warps
|
||||
const int num_threads_per_block = get_num_threads();
|
||||
|
||||
setup(¶m, &eam, &atom, &neighbor, &c_atom, &c_neighbor, &stats, num_threads_per_block, (double*) &timer);
|
||||
computeThermo(0, ¶m, &atom);
|
||||
if(param.force_field == FF_EAM) {
|
||||
computeForceEam(&eam, ¶m, &atom, &neighbor, &stats, 1, 0);
|
||||
} else {
|
||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER) || defined(COMPUTE_STATS)
|
||||
computeForceTracing(¶m, &atom, &neighbor, &stats, 1, 0);
|
||||
#else
|
||||
computeForce(true, ¶m, &atom, &neighbor, &c_atom, &c_neighbor, num_threads_per_block);
|
||||
#endif
|
||||
}
|
||||
|
||||
timer[FORCE] = 0.0;
|
||||
timer[NEIGH] = 0.0;
|
||||
timer[TOTAL] = getTimeStamp();
|
||||
timer[NEIGH_UPDATE_ATOMS_PBC] = 0.0;
|
||||
timer[NEIGH_SETUP_PBC] = 0.0;
|
||||
timer[NEIGH_UPDATE_PBC] = 0.0;
|
||||
timer[NEIGH_BINATOMS] = 0.0;
|
||||
timer[NEIGH_BUILD_LISTS] = 0.0;
|
||||
|
||||
if(param.vtk_file != NULL) {
|
||||
write_atoms_to_vtk_file(param.vtk_file, &atom, 0);
|
||||
}
|
||||
|
||||
for(int n = 0; n < param.ntimes; n++) {
|
||||
|
||||
const bool doReneighbour = (n + 1) % param.every == 0;
|
||||
|
||||
cuda_initial_integrate(doReneighbour, ¶m, &atom, &c_atom, num_threads_per_block);
|
||||
|
||||
if(doReneighbour) {
|
||||
timer[NEIGH] += reneighbour(¶m, &atom, &neighbor, &c_atom, &c_neighbor, num_threads_per_block, (double*) &timer);
|
||||
} else {
|
||||
double before = getTimeStamp();
|
||||
updatePbc_cuda(&atom, ¶m, &c_atom, false, num_threads_per_block);
|
||||
double after = getTimeStamp();
|
||||
timer[NEIGH_UPDATE_PBC] += after - before;
|
||||
}
|
||||
|
||||
if(param.force_field == FF_EAM) {
|
||||
timer[FORCE] += computeForceEam(&eam, ¶m, &atom, &neighbor, &stats, 0, n + 1);
|
||||
} else {
|
||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER) || defined(COMPUTE_STATS)
|
||||
timer[FORCE] += computeForceTracing(¶m, &atom, &neighbor, &stats, 0, n + 1);
|
||||
#else
|
||||
timer[FORCE] += computeForce(doReneighbour, ¶m, &atom, &neighbor, &c_atom, &c_neighbor, num_threads_per_block);
|
||||
#endif
|
||||
}
|
||||
|
||||
cuda_final_integrate(doReneighbour, ¶m, &atom, &c_atom, num_threads_per_block);
|
||||
|
||||
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
|
||||
checkCUDAError("computeThermo atom->x memcpy back", cudaMemcpy(atom.x, c_atom.x, atom.Nmax * sizeof(MD_FLOAT) * 3, cudaMemcpyDeviceToHost) );
|
||||
computeThermo(n + 1, ¶m, &atom);
|
||||
}
|
||||
|
||||
if(param.vtk_file != NULL) {
|
||||
write_atoms_to_vtk_file(param.vtk_file, &atom, n + 1);
|
||||
}
|
||||
}
|
||||
|
||||
timer[NEIGH_BUILD_LISTS] -= timer[NEIGH_BINATOMS];
|
||||
timer[TOTAL] = getTimeStamp() - timer[TOTAL];
|
||||
computeThermo(-1, ¶m, &atom);
|
||||
|
||||
printf(HLINE);
|
||||
printf("Force field: %s\n", ff2str(param.force_field));
|
||||
printf("Data layout for positions: %s\n", POS_DATA_LAYOUT);
|
||||
#if PRECISION == 1
|
||||
printf("Using single precision floating point.\n");
|
||||
#else
|
||||
printf("Using double precision floating point.\n");
|
||||
#endif
|
||||
printf(HLINE);
|
||||
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
|
||||
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs NEIGH_TIMERS: UPD_AT: %.2fs SETUP_PBC %.2fs UPDATE_PBC %.2fs BINATOMS %.2fs BUILD_NEIGHBOR %.2fs\n",
|
||||
timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH], timer[NEIGH_UPDATE_ATOMS_PBC], timer[NEIGH_SETUP_PBC], timer[NEIGH_UPDATE_PBC], timer[NEIGH_BINATOMS], timer[NEIGH_BUILD_LISTS]);
|
||||
printf(HLINE);
|
||||
printf("Performance: %.2f million atom updates per second\n",
|
||||
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
|
||||
double atomUpdatesTotal = (double) atom.Natoms * param.ntimes;
|
||||
printf("Force_perf in millions per sec: %.2f\n", 1e-6 * atomUpdatesTotal / timer[FORCE]);
|
||||
double atomNeighUpdatesTotal = (double) atom.Natoms * param.ntimes / param.every;
|
||||
printf("Neighbor_perf in millions per sec: updateAtomsPbc: %.2f setupPbc: %.2f updatePbc: %.2f binAtoms: %.2f buildNeighbor_wo_binning: %.2f\n", 1e-6 * atomNeighUpdatesTotal / timer[NEIGH_UPDATE_ATOMS_PBC], 1e-6 * atomNeighUpdatesTotal / timer[NEIGH_SETUP_PBC], 1e-6 * atomUpdatesTotal / timer[NEIGH_UPDATE_PBC], 1e-6 * atomNeighUpdatesTotal / timer[NEIGH_BINATOMS], 1e-6 * atomNeighUpdatesTotal / timer[NEIGH_BUILD_LISTS]);
|
||||
#ifdef COMPUTE_STATS
|
||||
displayStatistics(&atom, ¶m, &stats, timer);
|
||||
#endif
|
||||
LIKWID_MARKER_CLOSE;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
719
lammps/neighbor.cu
Normal file
719
lammps/neighbor.cu
Normal file
@@ -0,0 +1,719 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2021 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <cuda_profiler_api.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <device_launch_parameters.h>
|
||||
|
||||
extern "C" {
|
||||
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <allocate.h>
|
||||
#include <atom.h>
|
||||
#include <timing.h>
|
||||
#include <timers.h>
|
||||
|
||||
#define SMALL 1.0e-6
|
||||
#define FACTOR 0.999
|
||||
}
|
||||
|
||||
__device__ int coord2bin_device(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin,
|
||||
Neighbor_params np)
|
||||
{
|
||||
int ix, iy, iz;
|
||||
|
||||
if(xin >= np.xprd) {
|
||||
ix = (int)((xin - np.xprd) * np.bininvx) + np.nbinx - np.mbinxlo;
|
||||
} else if(xin >= 0.0) {
|
||||
ix = (int)(xin * np.bininvx) - np.mbinxlo;
|
||||
} else {
|
||||
ix = (int)(xin * np.bininvx) - np.mbinxlo - 1;
|
||||
}
|
||||
|
||||
if(yin >= np.yprd) {
|
||||
iy = (int)((yin - np.yprd) * np.bininvy) + np.nbiny - np.mbinylo;
|
||||
} else if(yin >= 0.0) {
|
||||
iy = (int)(yin * np.bininvy) - np.mbinylo;
|
||||
} else {
|
||||
iy = (int)(yin * np.bininvy) - np.mbinylo - 1;
|
||||
}
|
||||
|
||||
if(zin >= np.zprd) {
|
||||
iz = (int)((zin - np.zprd) * np.bininvz) + np.nbinz - np.mbinzlo;
|
||||
} else if(zin >= 0.0) {
|
||||
iz = (int)(zin * np.bininvz) - np.mbinzlo;
|
||||
} else {
|
||||
iz = (int)(zin * np.bininvz) - np.mbinzlo - 1;
|
||||
}
|
||||
|
||||
return (iz * np.mbiny * np.mbinx + iy * np.mbinx + ix + 1);
|
||||
}
|
||||
|
||||
/* sorts the contents of a bin to make it comparable to the CPU version */
|
||||
/* uses bubble sort since atoms per bin should be relatively small and can be done in situ */
|
||||
__global__ void sort_bin_contents_kernel(int* bincount, int* bins, int mbins, int atoms_per_bin){
|
||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (i >= mbins){
|
||||
return;
|
||||
}
|
||||
|
||||
int atoms_in_bin = bincount[i];
|
||||
int* bin_ptr = &bins[i * atoms_per_bin];
|
||||
int sorted;
|
||||
do {
|
||||
sorted = 1;
|
||||
int tmp;
|
||||
for(int index = 0; index < atoms_in_bin - 1; index++){
|
||||
if (bin_ptr[index] > bin_ptr[index + 1]){
|
||||
tmp = bin_ptr[index];
|
||||
bin_ptr[index] = bin_ptr[index + 1];
|
||||
bin_ptr[index + 1] = tmp;
|
||||
sorted = 0;
|
||||
}
|
||||
}
|
||||
} while (!sorted);
|
||||
}
|
||||
|
||||
__global__ void binatoms_kernel(Atom a, int* bincount, int* bins, int atoms_per_bin, Neighbor_params np, int *resize_needed){
|
||||
Atom* atom = &a;
|
||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int nall = atom->Nlocal + atom->Nghost;
|
||||
if(i >= nall){
|
||||
return;
|
||||
}
|
||||
|
||||
MD_FLOAT x = atom_x(i);
|
||||
MD_FLOAT y = atom_y(i);
|
||||
MD_FLOAT z = atom_z(i);
|
||||
int ibin = coord2bin_device(x, y, z, np);
|
||||
|
||||
int ac = atomicAdd(&bincount[ibin], 1);
|
||||
|
||||
if(ac < atoms_per_bin){
|
||||
bins[ibin * atoms_per_bin + ac] = i;
|
||||
} else {
|
||||
atomicMax(resize_needed, ac);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil,
|
||||
int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq){
|
||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int Nlocal = a.Nlocal;
|
||||
if( i >= Nlocal ) {
|
||||
return;
|
||||
}
|
||||
|
||||
Atom *atom = &a;
|
||||
Neighbor *neighbor = &neigh;
|
||||
|
||||
int* neighptr = &(neighbor->neighbors[i]);
|
||||
int n = 0;
|
||||
MD_FLOAT xtmp = atom_x(i);
|
||||
MD_FLOAT ytmp = atom_y(i);
|
||||
MD_FLOAT ztmp = atom_z(i);
|
||||
int ibin = coord2bin_device(xtmp, ytmp, ztmp, np);
|
||||
#ifdef EXPLICIT_TYPES
|
||||
int type_i = atom->type[i];
|
||||
#endif
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
int jbin = ibin + stencil[k];
|
||||
int* loc_bin = &bins[jbin * atoms_per_bin];
|
||||
|
||||
for(int m = 0; m < bincount[jbin]; m++) {
|
||||
int j = loc_bin[m];
|
||||
|
||||
if ( j == i ){
|
||||
continue;
|
||||
}
|
||||
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
int type_j = atom->type[j];
|
||||
const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
|
||||
#else
|
||||
const MD_FLOAT cutoff = cutneighsq;
|
||||
#endif
|
||||
|
||||
if( rsq <= cutoff ) {
|
||||
int idx = atom->Nlocal * n;
|
||||
neighptr[idx] = j;
|
||||
n += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
neighbor->numneigh[i] = n;
|
||||
|
||||
if(n > neighbor->maxneighs) {
|
||||
atomicMax(new_maxneighs, n);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
|
||||
static MD_FLOAT xprd, yprd, zprd;
|
||||
static MD_FLOAT bininvx, bininvy, bininvz;
|
||||
static int mbinxlo, mbinylo, mbinzlo;
|
||||
static int nbinx, nbiny, nbinz;
|
||||
static int mbinx, mbiny, mbinz; // n bins in x, y, z
|
||||
static int *bincount;
|
||||
static int *bins;
|
||||
static int mbins; //total number of bins
|
||||
static int atoms_per_bin; // max atoms per bin
|
||||
static MD_FLOAT cutneigh;
|
||||
static MD_FLOAT cutneighsq; // neighbor cutoff squared
|
||||
static int nmax;
|
||||
static int nstencil; // # of bins in stencil
|
||||
static int* stencil; // stencil list of bin offsets
|
||||
static MD_FLOAT binsizex, binsizey, binsizez;
|
||||
|
||||
static int* c_stencil = NULL;
|
||||
static int* c_resize_needed = NULL;
|
||||
static int* c_new_maxneighs = NULL;
|
||||
static Binning c_binning{
|
||||
.bincount = NULL,
|
||||
.bins = NULL,
|
||||
.mbins = 0,
|
||||
.atoms_per_bin = 0
|
||||
};
|
||||
|
||||
|
||||
static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT);
|
||||
static MD_FLOAT bindist(int, int, int);
|
||||
|
||||
/* exported subroutines */
|
||||
void initNeighbor(Neighbor *neighbor, Parameter *param)
|
||||
{
|
||||
MD_FLOAT neighscale = 5.0 / 6.0;
|
||||
xprd = param->nx * param->lattice;
|
||||
yprd = param->ny * param->lattice;
|
||||
zprd = param->nz * param->lattice;
|
||||
cutneigh = param->cutneigh;
|
||||
nbinx = neighscale * param->nx;
|
||||
nbiny = neighscale * param->ny;
|
||||
nbinz = neighscale * param->nz;
|
||||
nmax = 0;
|
||||
atoms_per_bin = 8;
|
||||
stencil = NULL;
|
||||
bins = NULL;
|
||||
bincount = NULL;
|
||||
neighbor->maxneighs = 100;
|
||||
neighbor->numneigh = NULL;
|
||||
neighbor->neighbors = NULL;
|
||||
}
|
||||
|
||||
void setupNeighbor()
|
||||
{
|
||||
MD_FLOAT coord;
|
||||
int mbinxhi, mbinyhi, mbinzhi;
|
||||
int nextx, nexty, nextz;
|
||||
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = xprd;
|
||||
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
|
||||
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = zprd;
|
||||
|
||||
cutneighsq = cutneigh * cutneigh;
|
||||
binsizex = xprd / nbinx;
|
||||
binsizey = yprd / nbiny;
|
||||
binsizez = zprd / nbinz;
|
||||
bininvx = 1.0 / binsizex;
|
||||
bininvy = 1.0 / binsizey;
|
||||
bininvz = 1.0 / binsizez;
|
||||
|
||||
coord = xlo - cutneigh - SMALL * xprd;
|
||||
mbinxlo = (int) (coord * bininvx);
|
||||
if (coord < 0.0) {
|
||||
mbinxlo = mbinxlo - 1;
|
||||
}
|
||||
coord = xhi + cutneigh + SMALL * xprd;
|
||||
mbinxhi = (int) (coord * bininvx);
|
||||
|
||||
coord = ylo - cutneigh - SMALL * yprd;
|
||||
mbinylo = (int) (coord * bininvy);
|
||||
if (coord < 0.0) {
|
||||
mbinylo = mbinylo - 1;
|
||||
}
|
||||
coord = yhi + cutneigh + SMALL * yprd;
|
||||
mbinyhi = (int) (coord * bininvy);
|
||||
|
||||
coord = zlo - cutneigh - SMALL * zprd;
|
||||
mbinzlo = (int) (coord * bininvz);
|
||||
if (coord < 0.0) {
|
||||
mbinzlo = mbinzlo - 1;
|
||||
}
|
||||
coord = zhi + cutneigh + SMALL * zprd;
|
||||
mbinzhi = (int) (coord * bininvz);
|
||||
|
||||
mbinxlo = mbinxlo - 1;
|
||||
mbinxhi = mbinxhi + 1;
|
||||
mbinx = mbinxhi - mbinxlo + 1;
|
||||
|
||||
mbinylo = mbinylo - 1;
|
||||
mbinyhi = mbinyhi + 1;
|
||||
mbiny = mbinyhi - mbinylo + 1;
|
||||
|
||||
mbinzlo = mbinzlo - 1;
|
||||
mbinzhi = mbinzhi + 1;
|
||||
mbinz = mbinzhi - mbinzlo + 1;
|
||||
|
||||
nextx = (int) (cutneigh * bininvx);
|
||||
if(nextx * binsizex < FACTOR * cutneigh) nextx++;
|
||||
|
||||
nexty = (int) (cutneigh * bininvy);
|
||||
if(nexty * binsizey < FACTOR * cutneigh) nexty++;
|
||||
|
||||
nextz = (int) (cutneigh * bininvz);
|
||||
if(nextz * binsizez < FACTOR * cutneigh) nextz++;
|
||||
|
||||
if (stencil) {
|
||||
free(stencil);
|
||||
}
|
||||
|
||||
stencil = (int*) malloc(
|
||||
(2 * nextz + 1) * (2 * nexty + 1) * (2 * nextx + 1) * sizeof(int));
|
||||
|
||||
nstencil = 0;
|
||||
int kstart = -nextz;
|
||||
|
||||
for(int k = kstart; k <= nextz; k++) {
|
||||
for(int j = -nexty; j <= nexty; j++) {
|
||||
for(int i = -nextx; i <= nextx; i++) {
|
||||
if(bindist(i, j, k) < cutneighsq) {
|
||||
stencil[nstencil++] =
|
||||
k * mbiny * mbinx + j * mbinx + i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mbins = mbinx * mbiny * mbinz;
|
||||
|
||||
if (bincount) {
|
||||
free(bincount);
|
||||
}
|
||||
bincount = (int*) malloc(mbins * sizeof(int));
|
||||
|
||||
if (bins) {
|
||||
free(bins);
|
||||
}
|
||||
bins = (int*) malloc(mbins * atoms_per_bin * sizeof(int));
|
||||
}
|
||||
|
||||
void buildNeighbor(Atom *atom, Neighbor *neighbor)
|
||||
{
|
||||
int nall = atom->Nlocal + atom->Nghost;
|
||||
|
||||
/* extend atom arrays if necessary */
|
||||
if(nall > nmax) {
|
||||
nmax = nall;
|
||||
if(neighbor->numneigh) cudaFreeHost(neighbor->numneigh);
|
||||
if(neighbor->neighbors) cudaFreeHost(neighbor->neighbors);
|
||||
checkCUDAError( "buildNeighbor numneigh", cudaMallocHost((void**)&(neighbor->numneigh), nmax * sizeof(int)) );
|
||||
checkCUDAError( "buildNeighbor neighbors", cudaMallocHost((void**)&(neighbor->neighbors), nmax * neighbor->maxneighs * sizeof(int)) );
|
||||
// neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
|
||||
// neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*));
|
||||
}
|
||||
|
||||
/* bin local & ghost atoms */
|
||||
binatoms(atom);
|
||||
int resize = 1;
|
||||
|
||||
/* loop over each atom, storing neighbors */
|
||||
while(resize) {
|
||||
int new_maxneighs = neighbor->maxneighs;
|
||||
resize = 0;
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
int* neighptr = &(neighbor->neighbors[i]);
|
||||
int n = 0;
|
||||
MD_FLOAT xtmp = atom_x(i);
|
||||
MD_FLOAT ytmp = atom_y(i);
|
||||
MD_FLOAT ztmp = atom_z(i);
|
||||
int ibin = coord2bin(xtmp, ytmp, ztmp);
|
||||
#ifdef EXPLICIT_TYPES
|
||||
int type_i = atom->type[i];
|
||||
#endif
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
int jbin = ibin + stencil[k];
|
||||
int* loc_bin = &bins[jbin * atoms_per_bin];
|
||||
|
||||
for(int m = 0; m < bincount[jbin]; m++) {
|
||||
int j = loc_bin[m];
|
||||
|
||||
if ( j == i ){
|
||||
continue;
|
||||
}
|
||||
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
int type_j = atom->type[j];
|
||||
const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
|
||||
#else
|
||||
const MD_FLOAT cutoff = cutneighsq;
|
||||
#endif
|
||||
|
||||
if( rsq <= cutoff ) {
|
||||
int idx = atom->Nlocal * n;
|
||||
neighptr[idx] = j;
|
||||
n += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
neighbor->numneigh[i] = n;
|
||||
|
||||
if(n >= neighbor->maxneighs) {
|
||||
resize = 1;
|
||||
|
||||
if(n >= new_maxneighs) {
|
||||
new_maxneighs = n;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(resize) {
|
||||
printf("RESIZE %d\n", neighbor->maxneighs);
|
||||
neighbor->maxneighs = new_maxneighs * 1.2;
|
||||
free(neighbor->neighbors);
|
||||
neighbor->neighbors = (int*) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* internal subroutines */
|
||||
MD_FLOAT bindist(int i, int j, int k)
|
||||
{
|
||||
MD_FLOAT delx, dely, delz;
|
||||
|
||||
if(i > 0) {
|
||||
delx = (i - 1) * binsizex;
|
||||
} else if(i == 0) {
|
||||
delx = 0.0;
|
||||
} else {
|
||||
delx = (i + 1) * binsizex;
|
||||
}
|
||||
|
||||
if(j > 0) {
|
||||
dely = (j - 1) * binsizey;
|
||||
} else if(j == 0) {
|
||||
dely = 0.0;
|
||||
} else {
|
||||
dely = (j + 1) * binsizey;
|
||||
}
|
||||
|
||||
if(k > 0) {
|
||||
delz = (k - 1) * binsizez;
|
||||
} else if(k == 0) {
|
||||
delz = 0.0;
|
||||
} else {
|
||||
delz = (k + 1) * binsizez;
|
||||
}
|
||||
|
||||
return (delx * delx + dely * dely + delz * delz);
|
||||
}
|
||||
|
||||
int coord2bin(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin)
|
||||
{
|
||||
int ix, iy, iz;
|
||||
|
||||
if(xin >= xprd) {
|
||||
ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
|
||||
} else if(xin >= 0.0) {
|
||||
ix = (int)(xin * bininvx) - mbinxlo;
|
||||
} else {
|
||||
ix = (int)(xin * bininvx) - mbinxlo - 1;
|
||||
}
|
||||
|
||||
if(yin >= yprd) {
|
||||
iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
|
||||
} else if(yin >= 0.0) {
|
||||
iy = (int)(yin * bininvy) - mbinylo;
|
||||
} else {
|
||||
iy = (int)(yin * bininvy) - mbinylo - 1;
|
||||
}
|
||||
|
||||
if(zin >= zprd) {
|
||||
iz = (int)((zin - zprd) * bininvz) + nbinz - mbinzlo;
|
||||
} else if(zin >= 0.0) {
|
||||
iz = (int)(zin * bininvz) - mbinzlo;
|
||||
} else {
|
||||
iz = (int)(zin * bininvz) - mbinzlo - 1;
|
||||
}
|
||||
|
||||
return (iz * mbiny * mbinx + iy * mbinx + ix + 1);
|
||||
}
|
||||
|
||||
void binatoms(Atom *atom)
|
||||
{
|
||||
int nall = atom->Nlocal + atom->Nghost;
|
||||
int resize = 1;
|
||||
|
||||
while(resize > 0) {
|
||||
resize = 0;
|
||||
|
||||
for(int i = 0; i < mbins; i++) {
|
||||
bincount[i] = 0;
|
||||
}
|
||||
|
||||
for(int i = 0; i < nall; i++) {
|
||||
MD_FLOAT x = atom_x(i);
|
||||
MD_FLOAT y = atom_y(i);
|
||||
MD_FLOAT z = atom_z(i);
|
||||
int ibin = coord2bin(x, y, z);
|
||||
|
||||
if(bincount[ibin] < atoms_per_bin) {
|
||||
int ac = bincount[ibin]++;
|
||||
bins[ibin * atoms_per_bin + ac] = i;
|
||||
} else {
|
||||
resize = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if(resize) {
|
||||
free(bins);
|
||||
atoms_per_bin *= 2;
|
||||
bins = (int*) malloc(mbins * atoms_per_bin * sizeof(int));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void sortAtom(Atom* atom) {
|
||||
binatoms(atom);
|
||||
int Nmax = atom->Nmax;
|
||||
int* binpos = bincount;
|
||||
|
||||
for(int i=1; i<mbins; i++) {
|
||||
binpos[i] += binpos[i-1];
|
||||
}
|
||||
|
||||
#ifdef AOS
|
||||
MD_FLOAT* new_x = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
|
||||
|
||||
MD_FLOAT* new_vx = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
|
||||
#else
|
||||
MD_FLOAT* new_x = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
|
||||
MD_FLOAT* new_y = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
|
||||
MD_FLOAT* new_z = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
|
||||
|
||||
MD_FLOAT* new_vx = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
|
||||
MD_FLOAT* new_vy = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
|
||||
MD_FLOAT* new_vz = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
|
||||
#endif
|
||||
|
||||
MD_FLOAT* old_x = atom->x; MD_FLOAT* old_y = atom->y; MD_FLOAT* old_z = atom->z;
|
||||
MD_FLOAT* old_vx = atom->vx; MD_FLOAT* old_vy = atom->vy; MD_FLOAT* old_vz = atom->vz;
|
||||
|
||||
for(int mybin = 0; mybin<mbins; mybin++) {
|
||||
int start = mybin>0?binpos[mybin-1]:0;
|
||||
int count = binpos[mybin] - start;
|
||||
for(int k=0; k<count; k++) {
|
||||
int new_i = start + k;
|
||||
int old_i = bins[mybin * atoms_per_bin + k];
|
||||
#ifdef AOS
|
||||
new_x[new_i * 3 + 0] = old_x[old_i * 3 + 0];
|
||||
new_x[new_i * 3 + 1] = old_x[old_i * 3 + 1];
|
||||
new_x[new_i * 3 + 2] = old_x[old_i * 3 + 2];
|
||||
|
||||
new_vx[new_i * 3 + 0] = old_vx[old_i * 3 + 0];
|
||||
new_vx[new_i * 3 + 1] = old_vy[old_i * 3 + 1];
|
||||
new_vx[new_i * 3 + 2] = old_vz[old_i * 3 + 2];
|
||||
#else
|
||||
new_x[new_i] = old_x[old_i];
|
||||
new_y[new_i] = old_y[old_i];
|
||||
new_z[new_i] = old_z[old_i];
|
||||
|
||||
new_vx[new_i] = old_vx[old_i];
|
||||
new_vy[new_i] = old_vy[old_i];
|
||||
new_vz[new_i] = old_vz[old_i];
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
free(atom->x);
|
||||
atom->x = new_x;
|
||||
|
||||
free(atom->vx);
|
||||
atom->vx = new_vx;
|
||||
#ifndef AOS
|
||||
free(atom->y);
|
||||
free(atom->z);
|
||||
atom->y = new_y; atom->z = new_z;
|
||||
|
||||
free(atom->vy); free(atom->vz);
|
||||
atom->vy = new_vy; atom->vz = new_vz;
|
||||
#endif
|
||||
}
|
||||
|
||||
void binatoms_cuda(Atom* c_atom, Binning* c_binning, int* c_resize_needed, Neighbor_params *np, const int threads_per_block)
|
||||
{
|
||||
int nall = c_atom->Nlocal + c_atom->Nghost;
|
||||
int resize = 1;
|
||||
|
||||
const int num_blocks = ceil((float)nall / (float)threads_per_block);
|
||||
|
||||
while(resize > 0) {
|
||||
resize = 0;
|
||||
checkCUDAError("binatoms_cuda c_binning->bincount memset", cudaMemset(c_binning->bincount, 0, c_binning->mbins * sizeof(int)));
|
||||
checkCUDAError("binatoms_cuda c_resize_needed memset", cudaMemset(c_resize_needed, 0, sizeof(int)) );
|
||||
|
||||
/*binatoms_kernel(Atom a, int* bincount, int* bins, int c_binning->atoms_per_bin, Neighbor_params np, int *resize_needed) */
|
||||
binatoms_kernel<<<num_blocks, threads_per_block>>>(*c_atom, c_binning->bincount, c_binning->bins, c_binning->atoms_per_bin, *np, c_resize_needed);
|
||||
|
||||
checkCUDAError( "PeekAtLastError binatoms kernel", cudaPeekAtLastError() );
|
||||
checkCUDAError( "DeviceSync binatoms kernel", cudaDeviceSynchronize() );
|
||||
|
||||
checkCUDAError("binatoms_cuda c_resize_needed memcpy back", cudaMemcpy(&resize, c_resize_needed, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
if(resize) {
|
||||
cudaFree(c_binning->bins);
|
||||
c_binning->atoms_per_bin *= 2;
|
||||
checkCUDAError("binatoms_cuda c_binning->bins resize malloc", cudaMalloc(&c_binning->bins, c_binning->mbins * c_binning->atoms_per_bin * sizeof(int)) );
|
||||
}
|
||||
}
|
||||
atoms_per_bin = c_binning->atoms_per_bin;
|
||||
const int sortBlocks = ceil((float)mbins / (float)threads_per_block);
|
||||
/*void sort_bin_contents_kernel(int* bincount, int* bins, int mbins, int atoms_per_bin)*/
|
||||
sort_bin_contents_kernel<<<sortBlocks, threads_per_block>>>(c_binning->bincount, c_binning->bins, c_binning->mbins, c_binning->atoms_per_bin);
|
||||
checkCUDAError( "PeekAtLastError sort_bin_contents kernel", cudaPeekAtLastError() );
|
||||
checkCUDAError( "DeviceSync sort_bin_contents kernel", cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *c_neighbor, const int num_threads_per_block, double* timers)
|
||||
{
|
||||
int nall = atom->Nlocal + atom->Nghost;
|
||||
c_neighbor->maxneighs = neighbor->maxneighs;
|
||||
|
||||
cudaProfilerStart();
|
||||
/* upload stencil */
|
||||
// TODO move all of this initialization into its own method
|
||||
if(c_stencil == NULL){
|
||||
checkCUDAError( "buildNeighbor c_n_stencil malloc", cudaMalloc((void**)&c_stencil, nstencil * sizeof(int)) );
|
||||
checkCUDAError( "buildNeighbor c_n_stencil memcpy", cudaMemcpy(c_stencil, stencil, nstencil * sizeof(int), cudaMemcpyHostToDevice ));
|
||||
}
|
||||
|
||||
if(c_binning.mbins == 0){
|
||||
c_binning.mbins = mbins;
|
||||
c_binning.atoms_per_bin = atoms_per_bin;
|
||||
checkCUDAError( "buildNeighbor c_binning->bincount malloc", cudaMalloc((void**)&(c_binning.bincount), c_binning.mbins * sizeof(int)) );
|
||||
checkCUDAError( "buidlNeighbor c_binning->bins malloc", cudaMalloc((void**)&(c_binning.bins), c_binning.mbins * c_binning.atoms_per_bin * sizeof(int)) );
|
||||
}
|
||||
|
||||
Neighbor_params np{
|
||||
.xprd = xprd,
|
||||
.yprd = yprd,
|
||||
.zprd = zprd,
|
||||
.bininvx = bininvx,
|
||||
.bininvy = bininvy,
|
||||
.bininvz = bininvz,
|
||||
.mbinxlo = mbinxlo,
|
||||
.mbinylo = mbinylo,
|
||||
.mbinzlo = mbinzlo,
|
||||
.nbinx = nbinx,
|
||||
.nbiny = nbiny,
|
||||
.nbinz = nbinz,
|
||||
.mbinx = mbinx,
|
||||
.mbiny = mbiny,
|
||||
.mbinz = mbinz
|
||||
};
|
||||
|
||||
if(c_resize_needed == NULL){
|
||||
checkCUDAError("buildNeighbor c_resize_needed malloc", cudaMalloc((void**)&c_resize_needed, sizeof(int)) );
|
||||
}
|
||||
|
||||
/* bin local & ghost atoms */
|
||||
double beforeBinning = getTimeStamp();
|
||||
binatoms_cuda(c_atom, &c_binning, c_resize_needed, &np, num_threads_per_block);
|
||||
double afterBinning = getTimeStamp();
|
||||
timers[NEIGH_BINATOMS] += afterBinning - beforeBinning;
|
||||
|
||||
if(c_new_maxneighs == NULL){
|
||||
checkCUDAError("c_new_maxneighs malloc", cudaMalloc((void**)&c_new_maxneighs, sizeof(int) ));
|
||||
}
|
||||
|
||||
int resize = 1;
|
||||
|
||||
/* extend c_neighbor arrays if necessary */
|
||||
if(nall > nmax) {
|
||||
nmax = nall;
|
||||
if(c_neighbor->numneigh) cudaFree(c_neighbor->numneigh);
|
||||
if(c_neighbor->neighbors) cudaFree(c_neighbor->neighbors);
|
||||
checkCUDAError( "buildNeighbor c_numneigh malloc", cudaMalloc((void**)&(c_neighbor->numneigh), nmax * sizeof(int)) );
|
||||
checkCUDAError( "buildNeighbor c_neighbors malloc", cudaMalloc((void**)&(c_neighbor->neighbors), nmax * c_neighbor->maxneighs * sizeof(int)) );
|
||||
}
|
||||
|
||||
/* loop over each atom, storing neighbors */
|
||||
while(resize) {
|
||||
resize = 0;
|
||||
|
||||
checkCUDAError("c_new_maxneighs memset", cudaMemset(c_new_maxneighs, 0, sizeof(int) ));
|
||||
|
||||
// TODO call compute_neigborhood kernel here
|
||||
const int num_blocks = ceil((float)atom->Nlocal / (float)num_threads_per_block);
|
||||
/*compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil,
|
||||
int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs)
|
||||
* */
|
||||
compute_neighborhood<<<num_blocks, num_threads_per_block>>>(*c_atom, *c_neighbor,
|
||||
np, nstencil, c_stencil,
|
||||
c_binning.bins, c_binning.atoms_per_bin, c_binning.bincount,
|
||||
c_new_maxneighs,
|
||||
cutneighsq);
|
||||
|
||||
checkCUDAError( "PeekAtLastError ComputeNeighbor", cudaPeekAtLastError() );
|
||||
checkCUDAError( "DeviceSync ComputeNeighbor", cudaDeviceSynchronize() );
|
||||
|
||||
// TODO copy the value of c_new_maxneighs back to host and check if it has been modified
|
||||
int new_maxneighs;
|
||||
checkCUDAError("c_new_maxneighs memcpy back", cudaMemcpy(&new_maxneighs, c_new_maxneighs, sizeof(int), cudaMemcpyDeviceToHost));
|
||||
if (new_maxneighs > c_neighbor->maxneighs){
|
||||
resize = 1;
|
||||
}
|
||||
|
||||
if(resize) {
|
||||
printf("RESIZE %d\n", c_neighbor->maxneighs);
|
||||
c_neighbor->maxneighs = new_maxneighs * 1.2;
|
||||
printf("NEW SIZE %d\n", c_neighbor->maxneighs);
|
||||
cudaFree(c_neighbor->neighbors);
|
||||
checkCUDAError("c_neighbor->neighbors resize malloc",
|
||||
cudaMalloc((void**)(&c_neighbor->neighbors),
|
||||
c_atom->Nmax * c_neighbor->maxneighs * sizeof(int)));
|
||||
}
|
||||
|
||||
}
|
||||
neighbor->maxneighs = c_neighbor->maxneighs;
|
||||
|
||||
cudaProfilerStop();
|
||||
}
|
||||
}
|
286
lammps/pbc.cu
Normal file
286
lammps/pbc.cu
Normal file
@@ -0,0 +1,286 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
extern "C" {
|
||||
|
||||
#include <pbc.h>
|
||||
#include <atom.h>
|
||||
#include <allocate.h>
|
||||
|
||||
#define DELTA 20000
|
||||
|
||||
}
|
||||
|
||||
__global__ void computeAtomsPbcUpdate(Atom a, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){
|
||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
Atom* atom = &a;
|
||||
if( i >= atom->Nlocal ){
|
||||
return;
|
||||
}
|
||||
|
||||
if (atom_x(i) < 0.0) {
|
||||
atom_x(i) += xprd;
|
||||
} else if (atom_x(i) >= xprd) {
|
||||
atom_x(i) -= xprd;
|
||||
}
|
||||
|
||||
if (atom_y(i) < 0.0) {
|
||||
atom_y(i) += yprd;
|
||||
} else if (atom_y(i) >= yprd) {
|
||||
atom_y(i) -= yprd;
|
||||
}
|
||||
|
||||
if (atom_z(i) < 0.0) {
|
||||
atom_z(i) += zprd;
|
||||
} else if (atom_z(i) >= zprd) {
|
||||
atom_z(i) -= zprd;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void computePbcUpdate(Atom a, int* PBCx, int* PBCy, int* PBCz, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){
|
||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int Nghost = a.Nghost;
|
||||
if( i >= Nghost ) {
|
||||
return;
|
||||
}
|
||||
Atom* atom = &a;
|
||||
int *border_map = atom->border_map;
|
||||
int nlocal = atom->Nlocal;
|
||||
|
||||
atom_x(nlocal + i) = atom_x(border_map[i]) + PBCx[i] * xprd;
|
||||
atom_y(nlocal + i) = atom_y(border_map[i]) + PBCy[i] * yprd;
|
||||
atom_z(nlocal + i) = atom_z(border_map[i]) + PBCz[i] * zprd;
|
||||
}
|
||||
|
||||
extern "C"{
|
||||
|
||||
static int NmaxGhost;
|
||||
static int *PBCx, *PBCy, *PBCz;
|
||||
|
||||
static int c_NmaxGhost = 0;
|
||||
static int *c_PBCx = NULL, *c_PBCy = NULL, *c_PBCz = NULL;
|
||||
|
||||
static void growPbc(Atom *);
|
||||
|
||||
/* exported subroutines */
|
||||
void initPbc(Atom *atom) {
|
||||
NmaxGhost = 0;
|
||||
atom->border_map = NULL;
|
||||
PBCx = NULL;
|
||||
PBCy = NULL;
|
||||
PBCz = NULL;
|
||||
}
|
||||
|
||||
/* update coordinates of ghost atoms */
|
||||
/* uses mapping created in setupPbc */
|
||||
void updatePbc(Atom *atom, Parameter *param) {
|
||||
int *border_map = atom->border_map;
|
||||
int nlocal = atom->Nlocal;
|
||||
MD_FLOAT xprd = param->xprd;
|
||||
MD_FLOAT yprd = param->yprd;
|
||||
MD_FLOAT zprd = param->zprd;
|
||||
|
||||
for (int i = 0; i < atom->Nghost; i++) {
|
||||
atom_x(nlocal + i) = atom_x(border_map[i]) + PBCx[i] * xprd;
|
||||
atom_y(nlocal + i) = atom_y(border_map[i]) + PBCy[i] * yprd;
|
||||
atom_z(nlocal + i) = atom_z(border_map[i]) + PBCz[i] * zprd;
|
||||
}
|
||||
}
|
||||
|
||||
/* update coordinates of ghost atoms */
|
||||
/* uses mapping created in setupPbc */
|
||||
void updatePbc_cuda(Atom *atom, Parameter *param, Atom *c_atom, bool doReneighbor, const int num_threads_per_block) {
|
||||
if (doReneighbor){
|
||||
c_atom->Natoms = atom->Natoms;
|
||||
c_atom->Nlocal = atom->Nlocal;
|
||||
c_atom->Nghost = atom->Nghost;
|
||||
c_atom->ntypes = atom->ntypes;
|
||||
|
||||
if (atom->Nmax > c_atom->Nmax){ // the number of ghost atoms has increased -> more space is needed
|
||||
c_atom->Nmax = atom->Nmax;
|
||||
if(c_atom->x != NULL){ cudaFree(c_atom->x); }
|
||||
if(c_atom->type != NULL){ cudaFree(c_atom->type); }
|
||||
checkCUDAError( "updatePbc c_atom->x malloc", cudaMalloc((void**)&(c_atom->x), sizeof(MD_FLOAT) * atom->Nmax * 3) );
|
||||
checkCUDAError( "updatePbc c_atom->type malloc", cudaMalloc((void**)&(c_atom->type), sizeof(int) * atom->Nmax) );
|
||||
}
|
||||
// TODO if the sort is reactivated the atom->vx needs to be copied to GPU as well
|
||||
checkCUDAError( "updatePbc c_atom->x memcpy", cudaMemcpy(c_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) );
|
||||
checkCUDAError( "updatePbc c_atom->type memcpy", cudaMemcpy(c_atom->type, atom->type, sizeof(int) * atom->Nmax, cudaMemcpyHostToDevice) );
|
||||
|
||||
if(c_NmaxGhost < NmaxGhost){
|
||||
c_NmaxGhost = NmaxGhost;
|
||||
if(c_PBCx != NULL){ cudaFree(c_PBCx); }
|
||||
if(c_PBCy != NULL){ cudaFree(c_PBCy); }
|
||||
if(c_PBCz != NULL){ cudaFree(c_PBCz); }
|
||||
if(c_atom->border_map != NULL){ cudaFree(c_atom->border_map); }
|
||||
checkCUDAError( "updatePbc c_PBCx malloc", cudaMalloc((void**)&c_PBCx, NmaxGhost * sizeof(int)) );
|
||||
checkCUDAError( "updatePbc c_PBCy malloc", cudaMalloc((void**)&c_PBCy, NmaxGhost * sizeof(int)) );
|
||||
checkCUDAError( "updatePbc c_PBCz malloc", cudaMalloc((void**)&c_PBCz, NmaxGhost * sizeof(int)) );
|
||||
checkCUDAError( "updatePbc c_atom->border_map malloc", cudaMalloc((void**)&(c_atom->border_map), NmaxGhost * sizeof(int)) );
|
||||
}
|
||||
checkCUDAError( "updatePbc c_PBCx memcpy", cudaMemcpy(c_PBCx, PBCx, NmaxGhost * sizeof(int), cudaMemcpyHostToDevice) );
|
||||
checkCUDAError( "updatePbc c_PBCy memcpy", cudaMemcpy(c_PBCy, PBCy, NmaxGhost * sizeof(int), cudaMemcpyHostToDevice) );
|
||||
checkCUDAError( "updatePbc c_PBCz memcpy", cudaMemcpy(c_PBCz, PBCz, NmaxGhost * sizeof(int), cudaMemcpyHostToDevice) );
|
||||
checkCUDAError( "updatePbc c_atom->border_map memcpy", cudaMemcpy(c_atom->border_map, atom->border_map, NmaxGhost * sizeof(int), cudaMemcpyHostToDevice) );
|
||||
}
|
||||
|
||||
MD_FLOAT xprd = param->xprd;
|
||||
MD_FLOAT yprd = param->yprd;
|
||||
MD_FLOAT zprd = param->zprd;
|
||||
|
||||
const int num_blocks = ceil((float)atom->Nghost / (float)num_threads_per_block);
|
||||
|
||||
/*__global__ void computePbcUpdate(Atom a, int* PBCx, int* PBCy, int* PBCz,
|
||||
* MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd)
|
||||
* */
|
||||
computePbcUpdate<<<num_blocks, num_threads_per_block>>>(*c_atom, c_PBCx, c_PBCy, c_PBCz, xprd, yprd, zprd);
|
||||
checkCUDAError( "PeekAtLastError UpdatePbc", cudaPeekAtLastError() );
|
||||
checkCUDAError( "DeviceSync UpdatePbc", cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
/* relocate atoms that have left domain according
|
||||
* to periodic boundary conditions */
|
||||
void updateAtomsPbc(Atom *atom, Parameter *param) {
|
||||
MD_FLOAT xprd = param->xprd;
|
||||
MD_FLOAT yprd = param->yprd;
|
||||
MD_FLOAT zprd = param->zprd;
|
||||
|
||||
for (int i = 0; i < atom->Nlocal; i++) {
|
||||
|
||||
if (atom_x(i) < 0.0) {
|
||||
atom_x(i) += xprd;
|
||||
} else if (atom_x(i) >= xprd) {
|
||||
atom_x(i) -= xprd;
|
||||
}
|
||||
|
||||
if (atom_y(i) < 0.0) {
|
||||
atom_y(i) += yprd;
|
||||
} else if (atom_y(i) >= yprd) {
|
||||
atom_y(i) -= yprd;
|
||||
}
|
||||
|
||||
if (atom_z(i) < 0.0) {
|
||||
atom_z(i) += zprd;
|
||||
} else if (atom_z(i) >= zprd) {
|
||||
atom_z(i) -= zprd;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void updateAtomsPbc_cuda(Atom* atom, Parameter* param, Atom* c_atom, const int num_threads_per_block){
|
||||
MD_FLOAT xprd = param->xprd;
|
||||
MD_FLOAT yprd = param->yprd;
|
||||
MD_FLOAT zprd = param->zprd;
|
||||
|
||||
const int num_blocks = ceil((float)atom->Nlocal / (float)num_threads_per_block);
|
||||
/*void computeAtomsPbcUpdate(Atom a, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd)*/
|
||||
computeAtomsPbcUpdate<<<num_blocks, num_threads_per_block>>>(*c_atom, xprd, yprd, zprd);
|
||||
|
||||
checkCUDAError( "PeekAtLastError UpdateAtomsPbc", cudaPeekAtLastError() );
|
||||
checkCUDAError( "DeviceSync UpdateAtomsPbc", cudaDeviceSynchronize() );
|
||||
|
||||
checkCUDAError( "updateAtomsPbc position memcpy back", cudaMemcpy(atom->x, c_atom->x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
|
||||
}
|
||||
|
||||
/* setup periodic boundary conditions by
|
||||
* defining ghost atoms around domain
|
||||
* only creates mapping and coordinate corrections
|
||||
* that are then enforced in updatePbc */
|
||||
#define ADDGHOST(dx, dy, dz) \
|
||||
Nghost++; \
|
||||
border_map[Nghost] = i; \
|
||||
PBCx[Nghost] = dx; \
|
||||
PBCy[Nghost] = dy; \
|
||||
PBCz[Nghost] = dz; \
|
||||
atom->type[atom->Nlocal + Nghost] = atom->type[i]
|
||||
|
||||
void setupPbc(Atom *atom, Parameter *param) {
|
||||
int *border_map = atom->border_map;
|
||||
MD_FLOAT xprd = param->xprd;
|
||||
MD_FLOAT yprd = param->yprd;
|
||||
MD_FLOAT zprd = param->zprd;
|
||||
MD_FLOAT Cutneigh = param->cutneigh;
|
||||
int Nghost = -1;
|
||||
|
||||
for (int i = 0; i < atom->Nlocal; i++) {
|
||||
|
||||
if (atom->Nlocal + Nghost + 7 >= atom->Nmax) {
|
||||
growAtom(atom);
|
||||
}
|
||||
if (Nghost + 7 >= NmaxGhost) {
|
||||
growPbc(atom);
|
||||
border_map = atom->border_map;
|
||||
}
|
||||
|
||||
MD_FLOAT x = atom_x(i);
|
||||
MD_FLOAT y = atom_y(i);
|
||||
MD_FLOAT z = atom_z(i);
|
||||
|
||||
/* Setup ghost atoms */
|
||||
/* 6 planes */
|
||||
if (x < Cutneigh) { ADDGHOST(+1, 0, 0); }
|
||||
if (x >= (xprd - Cutneigh)) { ADDGHOST(-1, 0, 0); }
|
||||
if (y < Cutneigh) { ADDGHOST(0, +1, 0); }
|
||||
if (y >= (yprd - Cutneigh)) { ADDGHOST(0, -1, 0); }
|
||||
if (z < Cutneigh) { ADDGHOST(0, 0, +1); }
|
||||
if (z >= (zprd - Cutneigh)) { ADDGHOST(0, 0, -1); }
|
||||
/* 8 corners */
|
||||
if (x < Cutneigh && y < Cutneigh && z < Cutneigh) { ADDGHOST(+1, +1, +1); }
|
||||
if (x < Cutneigh && y >= (yprd - Cutneigh) && z < Cutneigh) { ADDGHOST(+1, -1, +1); }
|
||||
if (x < Cutneigh && y >= Cutneigh && z >= (zprd - Cutneigh)) { ADDGHOST(+1, +1, -1); }
|
||||
if (x < Cutneigh && y >= (yprd - Cutneigh) && z >= (zprd - Cutneigh)) { ADDGHOST(+1, -1, -1); }
|
||||
if (x >= (xprd - Cutneigh) && y < Cutneigh && z < Cutneigh) { ADDGHOST(-1, +1, +1); }
|
||||
if (x >= (xprd - Cutneigh) && y >= (yprd - Cutneigh) && z < Cutneigh) { ADDGHOST(-1, -1, +1); }
|
||||
if (x >= (xprd - Cutneigh) && y < Cutneigh && z >= (zprd - Cutneigh)) { ADDGHOST(-1, +1, -1); }
|
||||
if (x >= (xprd - Cutneigh) && y >= (yprd - Cutneigh) && z >= (zprd - Cutneigh)) { ADDGHOST(-1, -1, -1); }
|
||||
/* 12 edges */
|
||||
if (x < Cutneigh && z < Cutneigh) { ADDGHOST(+1, 0, +1); }
|
||||
if (x < Cutneigh && z >= (zprd - Cutneigh)) { ADDGHOST(+1, 0, -1); }
|
||||
if (x >= (xprd - Cutneigh) && z < Cutneigh) { ADDGHOST(-1, 0, +1); }
|
||||
if (x >= (xprd - Cutneigh) && z >= (zprd - Cutneigh)) { ADDGHOST(-1, 0, -1); }
|
||||
if (y < Cutneigh && z < Cutneigh) { ADDGHOST(0, +1, +1); }
|
||||
if (y < Cutneigh && z >= (zprd - Cutneigh)) { ADDGHOST(0, +1, -1); }
|
||||
if (y >= (yprd - Cutneigh) && z < Cutneigh) { ADDGHOST(0, -1, +1); }
|
||||
if (y >= (yprd - Cutneigh) && z >= (zprd - Cutneigh)) { ADDGHOST(0, -1, -1); }
|
||||
if (y < Cutneigh && x < Cutneigh) { ADDGHOST(+1, +1, 0); }
|
||||
if (y < Cutneigh && x >= (xprd - Cutneigh)) { ADDGHOST(-1, +1, 0); }
|
||||
if (y >= (yprd - Cutneigh) && x < Cutneigh) { ADDGHOST(+1, -1, 0); }
|
||||
if (y >= (yprd - Cutneigh) && x >= (xprd - Cutneigh)) { ADDGHOST(-1, -1, 0); }
|
||||
}
|
||||
// increase by one to make it the ghost atom count
|
||||
atom->Nghost = Nghost + 1;
|
||||
}
|
||||
|
||||
/* internal subroutines */
|
||||
void growPbc(Atom *atom) {
|
||||
int nold = NmaxGhost;
|
||||
NmaxGhost += DELTA;
|
||||
|
||||
atom->border_map = (int *) reallocate(atom->border_map, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
PBCx = (int *) reallocate(PBCx, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
PBCy = (int *) reallocate(PBCy, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
PBCz = (int *) reallocate(PBCz, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
}
|
||||
}
|
31
lammps/stats.c
Normal file
31
lammps/stats.c
Normal file
@@ -0,0 +1,31 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#include <atom.h>
|
||||
#include <parameter.h>
|
||||
#include <stats.h>
|
||||
#include <timers.h>
|
||||
|
||||
void initStats(Stats *s) {
|
||||
s->total_force_neighs = 0;
|
||||
s->total_force_iters = 0;
|
||||
}
|
||||
|
||||
void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer) {
|
||||
#ifdef COMPUTE_STATS
|
||||
double force_useful_volume = 1e-9 * ( (double)(atom->Nlocal * (param->ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) +
|
||||
(double)(stats->total_force_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) );
|
||||
double avg_neigh = stats->total_force_neighs / (double)(atom->Nlocal * (param->ntimes + 1));
|
||||
double avg_simd = stats->total_force_iters / (double)(atom->Nlocal * (param->ntimes + 1));
|
||||
#ifdef EXPLICIT_TYPES
|
||||
force_useful_volume += 1e-9 * (double)((atom->Nlocal * (param->ntimes + 1)) + stats->total_force_neighs) * sizeof(int);
|
||||
#endif
|
||||
printf("Statistics:\n");
|
||||
printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param->proc_freq);
|
||||
printf("\tAverage neighbors per atom: %.4f\n", avg_neigh);
|
||||
printf("\tAverage SIMD iterations per atom: %.4f\n", avg_simd);
|
||||
printf("\tTotal number of computed pair interactions: %lld\n", stats->total_force_neighs);
|
||||
printf("\tTotal number of SIMD iterations: %lld\n", stats->total_force_iters);
|
||||
printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume);
|
||||
printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param->proc_freq * 1e9 / stats->total_force_iters);
|
||||
#endif
|
||||
}
|
133
lammps/thermo.c
Normal file
133
lammps/thermo.c
Normal file
@@ -0,0 +1,133 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <thermo.h>
|
||||
|
||||
static int *steparr;
|
||||
static MD_FLOAT *tmparr;
|
||||
static MD_FLOAT *engarr;
|
||||
static MD_FLOAT *prsarr;
|
||||
static MD_FLOAT mvv2e;
|
||||
static MD_FLOAT dof_boltz;
|
||||
static MD_FLOAT t_scale;
|
||||
static MD_FLOAT p_scale;
|
||||
static MD_FLOAT e_scale;
|
||||
static MD_FLOAT t_act;
|
||||
static MD_FLOAT p_act;
|
||||
static MD_FLOAT e_act;
|
||||
static int mstat;
|
||||
|
||||
/* exported subroutines */
|
||||
void setupThermo(Parameter *param, int natoms)
|
||||
{
|
||||
int maxstat = param->ntimes / param->nstat + 2;
|
||||
|
||||
steparr = (int*) malloc(maxstat * sizeof(int));
|
||||
tmparr = (MD_FLOAT*) malloc(maxstat * sizeof(MD_FLOAT));
|
||||
engarr = (MD_FLOAT*) malloc(maxstat * sizeof(MD_FLOAT));
|
||||
prsarr = (MD_FLOAT*) malloc(maxstat * sizeof(MD_FLOAT));
|
||||
|
||||
if(param->force_field == FF_LJ) {
|
||||
mvv2e = 1.0;
|
||||
dof_boltz = (natoms * 3 - 3);
|
||||
t_scale = mvv2e / dof_boltz;
|
||||
p_scale = 1.0 / 3 / param->xprd / param->yprd / param->zprd;
|
||||
e_scale = 0.5;
|
||||
} else {
|
||||
mvv2e = 1.036427e-04;
|
||||
dof_boltz = (natoms * 3 - 3) * 8.617343e-05;
|
||||
t_scale = mvv2e / dof_boltz;
|
||||
p_scale = 1.602176e+06 / 3 / param->xprd / param->yprd / param->zprd;
|
||||
e_scale = 524287.985533;//16.0;
|
||||
param->dtforce /= mvv2e;
|
||||
}
|
||||
|
||||
printf("step\ttemp\t\tpressure\n");
|
||||
}
|
||||
|
||||
void computeThermo(int iflag, Parameter *param, Atom *atom)
|
||||
{
|
||||
MD_FLOAT t = 0.0, p;
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
|
||||
}
|
||||
|
||||
t = t * t_scale;
|
||||
p = (t * dof_boltz) * p_scale;
|
||||
int istep = iflag;
|
||||
|
||||
if(iflag == -1){
|
||||
istep = param->ntimes;
|
||||
}
|
||||
if(iflag == 0){
|
||||
mstat = 0;
|
||||
}
|
||||
|
||||
steparr[mstat] = istep;
|
||||
tmparr[mstat] = t;
|
||||
prsarr[mstat] = p;
|
||||
mstat++;
|
||||
fprintf(stdout, "%i\t%e\t%e\n", istep, t, p);
|
||||
}
|
||||
|
||||
void adjustThermo(Parameter *param, Atom *atom)
|
||||
{
|
||||
/* zero center-of-mass motion */
|
||||
MD_FLOAT vxtot = 0.0; MD_FLOAT vytot = 0.0; MD_FLOAT vztot = 0.0;
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
vxtot += atom_vx(i);
|
||||
vytot += atom_vy(i);
|
||||
vztot += atom_vz(i);
|
||||
}
|
||||
|
||||
vxtot = vxtot / atom->Natoms;
|
||||
vytot = vytot / atom->Natoms;
|
||||
vztot = vztot / atom->Natoms;
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
atom_vx(i) -= vxtot;
|
||||
atom_vy(i) -= vytot;
|
||||
atom_vz(i) -= vztot;
|
||||
}
|
||||
|
||||
t_act = 0;
|
||||
MD_FLOAT t = 0.0;
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
|
||||
}
|
||||
|
||||
t *= t_scale;
|
||||
MD_FLOAT factor = sqrt(param->temp / t);
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
atom_vx(i) *= factor;
|
||||
atom_vy(i) *= factor;
|
||||
atom_vz(i) *= factor;
|
||||
}
|
||||
}
|
43
lammps/timing.c
Normal file
43
lammps/timing.c
Normal file
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
double getTimeStamp()
|
||||
{
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
|
||||
}
|
||||
|
||||
double getTimeResolution()
|
||||
{
|
||||
struct timespec ts;
|
||||
clock_getres(CLOCK_MONOTONIC, &ts);
|
||||
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
|
||||
}
|
||||
|
||||
double getTimeStamp_()
|
||||
{
|
||||
return getTimeStamp();
|
||||
}
|
42
lammps/util.c
Normal file
42
lammps/util.c
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* =======================================================================================
|
||||
*
|
||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
||||
*
|
||||
* This file is part of MD-Bench.
|
||||
*
|
||||
* MD-Bench is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU Lesser General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* MD-Bench is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License along
|
||||
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
|
||||
* =======================================================================================
|
||||
*/
|
||||
#include <util.h>
|
||||
|
||||
/* Park/Miller RNG w/out MASKING, so as to be like f90s version */
|
||||
#define IA 16807
|
||||
#define IM 2147483647
|
||||
#define AM (1.0/IM)
|
||||
#define IQ 127773
|
||||
#define IR 2836
|
||||
#define MASK 123459876
|
||||
|
||||
double myrandom(int* idum)
|
||||
{
|
||||
int k= (*idum) / IQ;
|
||||
double ans;
|
||||
|
||||
*idum = IA * (*idum - k * IQ) - IR * k;
|
||||
if(*idum < 0) *idum += IM;
|
||||
ans = AM * (*idum);
|
||||
return ans;
|
||||
}
|
44
lammps/vtk.c
Normal file
44
lammps/vtk.c
Normal file
@@ -0,0 +1,44 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <atom.h>
|
||||
|
||||
int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
|
||||
char timestep_filename[128];
|
||||
snprintf(timestep_filename, sizeof timestep_filename, "%s_%d.vtk", filename, timestep);
|
||||
FILE* fp = fopen(timestep_filename, "wb");
|
||||
|
||||
if(fp == NULL) {
|
||||
fprintf(stderr, "Could not open VTK file for writing!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
fprintf(fp, "# vtk DataFile Version 2.0\n");
|
||||
fprintf(fp, "Particle data\n");
|
||||
fprintf(fp, "ASCII\n");
|
||||
fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
|
||||
fprintf(fp, "POINTS %d double\n", atom->Nlocal);
|
||||
for(int i = 0; i < atom->Nlocal; ++i) {
|
||||
fprintf(fp, "%.4f %.4f %.4f\n", atom_x(i), atom_y(i), atom_z(i));
|
||||
}
|
||||
fprintf(fp, "\n\n");
|
||||
fprintf(fp, "CELLS %d %d\n", atom->Nlocal, atom->Nlocal * 2);
|
||||
for(int i = 0; i < atom->Nlocal; ++i) {
|
||||
fprintf(fp, "1 %d\n", i);
|
||||
}
|
||||
fprintf(fp, "\n\n");
|
||||
fprintf(fp, "CELL_TYPES %d\n", atom->Nlocal);
|
||||
for(int i = 0; i < atom->Nlocal; ++i) {
|
||||
fprintf(fp, "1\n");
|
||||
}
|
||||
fprintf(fp, "\n\n");
|
||||
fprintf(fp, "POINT_DATA %d\n", atom->Nlocal);
|
||||
fprintf(fp, "SCALARS mass double\n");
|
||||
fprintf(fp, "LOOKUP_TABLE default\n");
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
fprintf(fp, "1.0\n");
|
||||
}
|
||||
fprintf(fp, "\n\n");
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
Reference in New Issue
Block a user