Cleanup. Remove copyright year. Reformat.

This commit is contained in:
2024-05-13 12:33:08 +02:00
parent a6a269703d
commit 9712d7e2c8
77 changed files with 959 additions and 648 deletions

551
verletlist/atom.c Normal file
View File

@@ -0,0 +1,551 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <atom.h>
#include <allocate.h>
#include <device.h>
#include <util.h>
#define DELTA 20000
#ifndef MAXLINE
#define MAXLINE 4096
#endif
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
void initAtom(Atom *atom) {
atom->x = NULL; atom->y = NULL; atom->z = NULL;
atom->vx = NULL; atom->vy = NULL; atom->vz = NULL;
atom->fx = NULL; atom->fy = NULL; atom->fz = NULL;
atom->Natoms = 0;
atom->Nlocal = 0;
atom->Nghost = 0;
atom->Nmax = 0;
atom->type = NULL;
atom->ntypes = 0;
atom->epsilon = NULL;
atom->sigma6 = NULL;
atom->cutforcesq = NULL;
atom->cutneighsq = NULL;
atom->radius = NULL;
atom->av = NULL;
atom->r = NULL;
DeviceAtom *d_atom = &(atom->d_atom);
d_atom->x = NULL; d_atom->y = NULL; d_atom->z = NULL;
d_atom->vx = NULL; d_atom->vy = NULL; d_atom->vz = NULL;
d_atom->fx = NULL; d_atom->fy = NULL; d_atom->fz = NULL;
d_atom->border_map = NULL;
d_atom->type = NULL;
d_atom->epsilon = NULL;
d_atom->sigma6 = NULL;
d_atom->cutforcesq = NULL;
d_atom->cutneighsq = NULL;
}
void createAtom(Atom *atom, Parameter *param) {
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
atom->Natoms = 4 * param->nx * param->ny * param->nz;
atom->Nlocal = 0;
atom->ntypes = param->ntypes;
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
atom->epsilon[i] = param->epsilon;
atom->sigma6[i] = param->sigma6;
atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
MD_FLOAT alat = pow((4.0 / param->rho), (1.0 / 3.0));
int ilo = (int) (xlo / (0.5 * alat) - 1);
int ihi = (int) (xhi / (0.5 * alat) + 1);
int jlo = (int) (ylo / (0.5 * alat) - 1);
int jhi = (int) (yhi / (0.5 * alat) + 1);
int klo = (int) (zlo / (0.5 * alat) - 1);
int khi = (int) (zhi / (0.5 * alat) + 1);
ilo = MAX(ilo, 0);
ihi = MIN(ihi, 2 * param->nx - 1);
jlo = MAX(jlo, 0);
jhi = MIN(jhi, 2 * param->ny - 1);
klo = MAX(klo, 0);
khi = MIN(khi, 2 * param->nz - 1);
MD_FLOAT xtmp, ytmp, ztmp, vxtmp, vytmp, vztmp;
int i, j, k, m, n;
int sx = 0; int sy = 0; int sz = 0;
int ox = 0; int oy = 0; int oz = 0;
int subboxdim = 8;
while(oz * subboxdim <= khi) {
k = oz * subboxdim + sz;
j = oy * subboxdim + sy;
i = ox * subboxdim + sx;
if(((i + j + k) % 2 == 0) &&
(i >= ilo) && (i <= ihi) &&
(j >= jlo) && (j <= jhi) &&
(k >= klo) && (k <= khi)) {
xtmp = 0.5 * alat * i;
ytmp = 0.5 * alat * j;
ztmp = 0.5 * alat * k;
if( xtmp >= xlo && xtmp < xhi &&
ytmp >= ylo && ytmp < yhi &&
ztmp >= zlo && ztmp < zhi ) {
n = k * (2 * param->ny) * (2 * param->nx) +
j * (2 * param->nx) +
i + 1;
for(m = 0; m < 5; m++) {
myrandom(&n);
}
vxtmp = myrandom(&n);
for(m = 0; m < 5; m++){
myrandom(&n);
}
vytmp = myrandom(&n);
for(m = 0; m < 5; m++) {
myrandom(&n);
}
vztmp = myrandom(&n);
if(atom->Nlocal == atom->Nmax) {
growAtom(atom);
}
atom_x(atom->Nlocal) = xtmp;
atom_y(atom->Nlocal) = ytmp;
atom_z(atom->Nlocal) = ztmp;
atom_vx(atom->Nlocal) = vxtmp;
atom_vy(atom->Nlocal) = vytmp;
atom_vz(atom->Nlocal) = vztmp;
atom->type[atom->Nlocal] = rand() % atom->ntypes;
atom->Nlocal++;
}
}
sx++;
if(sx == subboxdim) { sx = 0; sy++; }
if(sy == subboxdim) { sy = 0; sz++; }
if(sz == subboxdim) { sz = 0; ox++; }
if(ox * subboxdim > ihi) { ox = 0; oy++; }
if(oy * subboxdim > jhi) { oy = 0; oz++; }
}
}
int type_str2int(const char *type) {
if(strncmp(type, "Ar", 2) == 0) { return 0; } // Argon
fprintf(stderr, "Invalid atom type: %s\n", type);
exit(-1);
return -1;
}
int readAtom(Atom* atom, Parameter* param) {
int len = strlen(param->input_file);
if(strncmp(&param->input_file[len - 4], ".pdb", 4) == 0) { return readAtom_pdb(atom, param); }
if(strncmp(&param->input_file[len - 4], ".gro", 4) == 0) { return readAtom_gro(atom, param); }
if(strncmp(&param->input_file[len - 4], ".dmp", 4) == 0) { return readAtom_dmp(atom, param); }
if(strncmp(&param->input_file[len - 3], ".in", 3) == 0) { return readAtom_in(atom, param); }
fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp, in\n", param->input_file);
exit(-1);
return -1;
}
int readAtom_pdb(Atom* atom, Parameter* param) {
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
int read_atoms = 0;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
while(!feof(fp)) {
readline(line, fp);
char *item = strtok(line, " ");
if(strncmp(item, "CRYST1", 6) == 0) {
param->xlo = 0.0;
param->xhi = atof(strtok(NULL, " "));
param->ylo = 0.0;
param->yhi = atof(strtok(NULL, " "));
param->zlo = 0.0;
param->zhi = atof(strtok(NULL, " "));
param->xprd = param->xhi - param->xlo;
param->yprd = param->yhi - param->ylo;
param->zprd = param->zhi - param->zlo;
// alpha, beta, gamma, sGroup, z
} else if(strncmp(item, "ATOM", 4) == 0) {
char *label;
int atom_id, comp_id;
MD_FLOAT occupancy, charge;
atom_id = atoi(strtok(NULL, " ")) - 1;
while(atom_id + 1 >= atom->Nmax) {
growAtom(atom);
}
atom->type[atom_id] = type_str2int(strtok(NULL, " "));
label = strtok(NULL, " ");
comp_id = atoi(strtok(NULL, " "));
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom_vx(atom_id) = 0.0;
atom_vy(atom_id) = 0.0;
atom_vz(atom_id) = 0.0;
occupancy = atof(strtok(NULL, " "));
charge = atof(strtok(NULL, " "));
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
atom->Natoms++;
atom->Nlocal++;
read_atoms++;
} else if(strncmp(item, "HEADER", 6) == 0 ||
strncmp(item, "REMARK", 6) == 0 ||
strncmp(item, "MODEL", 5) == 0 ||
strncmp(item, "TER", 3) == 0 ||
strncmp(item, "ENDMDL", 6) == 0) {
// Do nothing
} else {
fprintf(stderr, "Invalid item: %s\n", item);
exit(-1);
return -1;
}
}
if(!read_atoms) {
fprintf(stderr, "Input error: No atoms read!\n");
exit(-1);
return -1;
}
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
atom->epsilon[i] = param->epsilon;
atom->sigma6[i] = param->sigma6;
atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
fclose(fp);
return read_atoms;
}
int readAtom_gro(Atom* atom, Parameter* param) {
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
char desc[MAXLINE];
int read_atoms = 0;
int atoms_to_read = 0;
int i = 0;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
readline(desc, fp);
for(i = 0; desc[i] != '\n'; i++);
desc[i] = '\0';
readline(line, fp);
atoms_to_read = atoi(strtok(line, " "));
fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
while(!feof(fp) && read_atoms < atoms_to_read) {
readline(line, fp);
char *label = strtok(line, " ");
int type = type_str2int(strtok(NULL, " "));
int atom_id = atoi(strtok(NULL, " ")) - 1;
atom_id = read_atoms;
while(atom_id + 1 >= atom->Nmax) {
growAtom(atom);
}
atom->type[atom_id] = type;
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom_vx(atom_id) = atof(strtok(NULL, " "));
atom_vy(atom_id) = atof(strtok(NULL, " "));
atom_vz(atom_id) = atof(strtok(NULL, " "));
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
atom->Natoms++;
atom->Nlocal++;
read_atoms++;
}
if(!feof(fp)) {
readline(line, fp);
param->xlo = 0.0;
param->xhi = atof(strtok(line, " "));
param->ylo = 0.0;
param->yhi = atof(strtok(NULL, " "));
param->zlo = 0.0;
param->zhi = atof(strtok(NULL, " "));
param->xprd = param->xhi - param->xlo;
param->yprd = param->yhi - param->ylo;
param->zprd = param->zhi - param->zlo;
}
if(read_atoms != atoms_to_read) {
fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
exit(-1);
return -1;
}
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
atom->epsilon[i] = param->epsilon;
atom->sigma6[i] = param->sigma6;
atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
fclose(fp);
return read_atoms;
}
int readAtom_dmp(Atom* atom, Parameter* param) {
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
int natoms = 0;
int read_atoms = 0;
int atom_id = -1;
int ts = -1;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
while(!feof(fp) && ts < 1 && !read_atoms) {
readline(line, fp);
if(strncmp(line, "ITEM: ", 6) == 0) {
char *item = &line[6];
if(strncmp(item, "TIMESTEP", 8) == 0) {
readline(line, fp);
ts = atoi(line);
} else if(strncmp(item, "NUMBER OF ATOMS", 15) == 0) {
readline(line, fp);
natoms = atoi(line);
atom->Natoms = natoms;
atom->Nlocal = natoms;
while(atom->Nlocal >= atom->Nmax) {
growAtom(atom);
}
} else if(strncmp(item, "BOX BOUNDS pp pp pp", 19) == 0) {
readline(line, fp);
param->xlo = atof(strtok(line, " "));
param->xhi = atof(strtok(NULL, " "));
param->xprd = param->xhi - param->xlo;
readline(line, fp);
param->ylo = atof(strtok(line, " "));
param->yhi = atof(strtok(NULL, " "));
param->yprd = param->yhi - param->ylo;
readline(line, fp);
param->zlo = atof(strtok(line, " "));
param->zhi = atof(strtok(NULL, " "));
param->zprd = param->zhi - param->zlo;
} else if(strncmp(item, "ATOMS id type x y z vx vy vz", 28) == 0) {
for(int i = 0; i < natoms; i++) {
readline(line, fp);
atom_id = atoi(strtok(line, " ")) - 1;
atom->type[atom_id] = atoi(strtok(NULL, " "));
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom_vx(atom_id) = atof(strtok(NULL, " "));
atom_vy(atom_id) = atof(strtok(NULL, " "));
atom_vz(atom_id) = atof(strtok(NULL, " "));
atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
read_atoms++;
}
} else {
fprintf(stderr, "Invalid item: %s\n", item);
exit(-1);
return -1;
}
} else {
fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
exit(-1);
return -1;
}
}
if(ts < 0 || !natoms || !read_atoms) {
fprintf(stderr, "Input error: atom data was not read!\n");
exit(-1);
return -1;
}
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
atom->epsilon[i] = param->epsilon;
atom->sigma6[i] = param->sigma6;
atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
return natoms;
}
int readAtom_in(Atom* atom, Parameter* param) {
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
int natoms = 0;
int atom_id = 0;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
readline(line, fp);
natoms = atoi(strtok(line, " "));
param->xlo = atof(strtok(NULL, " "));
param->xhi = atof(strtok(NULL, " "));
param->ylo = atof(strtok(NULL, " "));
param->yhi = atof(strtok(NULL, " "));
param->zlo = atof(strtok(NULL, " "));
param->zhi = atof(strtok(NULL, " "));
atom->Natoms = natoms;
atom->Nlocal = natoms;
atom->ntypes = 1;
while(atom->Nlocal >= atom->Nmax) {
growAtom(atom);
}
for(int i = 0; i < natoms; i++) {
readline(line, fp);
// TODO: store mass per atom
char *s_mass = strtok(line, " ");
if(strncmp(s_mass, "inf", 3) == 0) {
// Set atom's mass to INFINITY
} else {
param->mass = atof(s_mass);
}
atom->radius[atom_id] = atof(strtok(NULL, " "));
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom_vx(atom_id) = atof(strtok(NULL, " "));
atom_vy(atom_id) = atof(strtok(NULL, " "));
atom_vz(atom_id) = atof(strtok(NULL, " "));
atom->type[atom_id] = 0;
atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
atom_id++;
}
if(!natoms) {
fprintf(stderr, "Input error: atom data was not read!\n");
exit(-1);
return -1;
}
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
atom->epsilon[i] = param->epsilon;
atom->sigma6[i] = param->sigma6;
atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
return natoms;
}
void writeAtom(Atom *atom, Parameter *param) {
FILE *fp = fopen(param->write_atom_file, "w");
for(int i = 0; i < atom->Nlocal; i++) {
fprintf(fp, "%d,%f,%f,%f,%f,%f,%f,%f,0\n",
atom->type[i], 1.0,
atom_x(i), atom_y(i), atom_z(i),
atom_vx(i), atom_vy(i), atom_vz(i));
}
fclose(fp);
fprintf(stdout, "Wrote input data to %s, grid size: %f, %f, %f\n",
param->write_atom_file, param->xprd, param->yprd, param->zprd);
}
void growAtom(Atom *atom) {
DeviceAtom *d_atom = &(atom->d_atom);
int nold = atom->Nmax;
atom->Nmax += DELTA;
#undef REALLOC
#define REALLOC(p,t,ns,os); \
atom->p = (t *) reallocate(atom->p, ALIGNMENT, ns, os); \
atom->d_atom.p = (t *) reallocateGPU(atom->d_atom.p, ns);
#ifdef AOS
REALLOC(x, MD_FLOAT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
REALLOC(vx, MD_FLOAT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
REALLOC(fx, MD_FLOAT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
#else
REALLOC(x, MD_FLOAT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
REALLOC(y, MD_FLOAT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
REALLOC(z, MD_FLOAT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
REALLOC(vx, MD_FLOAT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
REALLOC(vy, MD_FLOAT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
REALLOC(vz, MD_FLOAT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
REALLOC(fx, MD_FLOAT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
REALLOC(fy, MD_FLOAT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
REALLOC(fz, MD_FLOAT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
#endif
REALLOC(type, int, atom->Nmax * sizeof(int), nold * sizeof(int));
// DEM
atom->radius = (MD_FLOAT *) reallocate(atom->radius, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->av = (MD_FLOAT*) reallocate(atom->av, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
atom->r = (MD_FLOAT*) reallocate(atom->r, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 4, nold * sizeof(MD_FLOAT) * 4);
}

180
verletlist/cuda/force.cu Normal file
View File

@@ -0,0 +1,180 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
//---
#include <cuda_profiler_api.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
//---
#include <likwid-marker.h>
extern "C" {
#include <allocate.h>
#include <atom.h>
#include <allocate.h>
#include <device.h>
#include <neighbor.h>
#include <parameter.h>
#include <timing.h>
#include <util.h>
}
// cuda kernel
__global__ void calc_force(DeviceAtom a, MD_FLOAT cutforcesq, MD_FLOAT sigma6, MD_FLOAT epsilon, int Nlocal, int neigh_maxneighs, int *neigh_neighbors, int *neigh_numneigh, int ntypes) {
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if(i >= Nlocal) {
return;
}
DeviceAtom *atom = &a;
const int numneighs = neigh_numneigh[i];
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
MD_FLOAT fix = 0;
MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0;
#ifdef EXPLICIT_TYPES
const int type_i = atom->type[i];
#endif
for(int k = 0; k < numneighs; k++) {
int j = neigh_neighbors[Nlocal * k + i];
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
const int type_j = atom->type[j];
const int type_ij = type_i * ntypes + type_j;
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
const MD_FLOAT epsilon = atom->epsilon[type_ij];
#endif
if(rsq < cutforcesq) {
MD_FLOAT sr2 = 1.0 / rsq;
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
}
}
atom_fx(i) = fix;
atom_fy(i) = fiy;
atom_fz(i) = fiz;
}
__global__ void kernel_initial_integrate(MD_FLOAT dtforce, MD_FLOAT dt, int Nlocal, DeviceAtom a) {
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if( i >= Nlocal ) {
return;
}
DeviceAtom *atom = &a;
atom_vx(i) += dtforce * atom_fx(i);
atom_vy(i) += dtforce * atom_fy(i);
atom_vz(i) += dtforce * atom_fz(i);
atom_x(i) = atom_x(i) + dt * atom_vx(i);
atom_y(i) = atom_y(i) + dt * atom_vy(i);
atom_z(i) = atom_z(i) + dt * atom_vz(i);
}
__global__ void kernel_final_integrate(MD_FLOAT dtforce, int Nlocal, DeviceAtom a) {
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if( i >= Nlocal ) {
return;
}
DeviceAtom *atom = &a;
atom_vx(i) += dtforce * atom_fx(i);
atom_vy(i) += dtforce * atom_fy(i);
atom_vz(i) += dtforce * atom_fz(i);
}
extern "C" {
void finalIntegrate_cuda(bool reneigh, Parameter *param, Atom *atom) {
const int Nlocal = atom->Nlocal;
const int num_threads_per_block = get_cuda_num_threads();
const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
kernel_final_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, Nlocal, atom->d_atom);
cuda_assert("kernel_final_integrate", cudaPeekAtLastError());
cuda_assert("kernel_final_integrate", cudaDeviceSynchronize());
if(reneigh) {
memcpyFromGPU(atom->vx, atom->d_atom.vx, sizeof(MD_FLOAT) * atom->Nlocal * 3);
}
}
void initialIntegrate_cuda(bool reneigh, Parameter *param, Atom *atom) {
const int Nlocal = atom->Nlocal;
const int num_threads_per_block = get_cuda_num_threads();
const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
kernel_initial_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, param->dt, Nlocal, atom->d_atom);
cuda_assert("kernel_initial_integrate", cudaPeekAtLastError());
cuda_assert("kernel_initial_integrate", cudaDeviceSynchronize());
if(reneigh) {
memcpyFromGPU(atom->vx, atom->d_atom.vx, sizeof(MD_FLOAT) * atom->Nlocal * 3);
}
}
double computeForceLJFullNeigh_cuda(Parameter *param, Atom *atom, Neighbor *neighbor) {
const int num_threads_per_block = get_cuda_num_threads();
int Nlocal = atom->Nlocal;
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
/*
int nDevices;
cudaGetDeviceCount(&nDevices);
size_t free, total;
for(int i = 0; i < nDevices; ++i) {
cudaMemGetInfo( &free, &total );
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, i);
printf("DEVICE %d/%d NAME: %s\r\n with %ld MB/%ld MB memory used", i + 1, nDevices, prop.name, free / 1024 / 1024, total / 1024 / 1024);
}
*/
// HINT: Run with cuda-memcheck ./MDBench-NVCC in case of error
// memsetGPU(atom->d_atom.fx, 0, sizeof(MD_FLOAT) * Nlocal * 3);
cudaProfilerStart();
const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
double S = getTimeStamp();
LIKWID_MARKER_START("force");
calc_force <<< num_blocks, num_threads_per_block >>> (atom->d_atom, cutforcesq, sigma6, epsilon, Nlocal, neighbor->maxneighs, neighbor->d_neighbor.neighbors, neighbor->d_neighbor.numneigh, atom->ntypes);
cuda_assert("calc_force", cudaPeekAtLastError());
cuda_assert("calc_force", cudaDeviceSynchronize());
cudaProfilerStop();
LIKWID_MARKER_STOP("force");
double E = getTimeStamp();
return E-S;
}
}

293
verletlist/cuda/neighbor.cu Normal file
View File

@@ -0,0 +1,293 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <cuda_profiler_api.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
//---
extern "C" {
#include <atom.h>
#include <device.h>
#include <parameter.h>
#include <neighbor.h>
#include <util.h>
}
extern MD_FLOAT xprd, yprd, zprd;
extern MD_FLOAT bininvx, bininvy, bininvz;
extern int mbinxlo, mbinylo, mbinzlo;
extern int nbinx, nbiny, nbinz;
extern int mbinx, mbiny, mbinz; // n bins in x, y, z
extern int mbins; //total number of bins
extern int atoms_per_bin; // max atoms per bin
extern MD_FLOAT cutneighsq; // neighbor cutoff squared
extern int nmax;
extern int nstencil; // # of bins in stencil
extern int* stencil; // stencil list of bin offsets
static int* c_stencil = NULL;
static int* c_resize_needed = NULL;
static int* c_new_maxneighs = NULL;
static Binning c_binning {
.bincount = NULL,
.bins = NULL,
.mbins = 0,
.atoms_per_bin = 0
};
__device__ int coord2bin_device(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin, Neighbor_params np) {
int ix, iy, iz;
if(xin >= np.xprd) {
ix = (int)((xin - np.xprd) * np.bininvx) + np.nbinx - np.mbinxlo;
} else if(xin >= 0.0) {
ix = (int)(xin * np.bininvx) - np.mbinxlo;
} else {
ix = (int)(xin * np.bininvx) - np.mbinxlo - 1;
}
if(yin >= np.yprd) {
iy = (int)((yin - np.yprd) * np.bininvy) + np.nbiny - np.mbinylo;
} else if(yin >= 0.0) {
iy = (int)(yin * np.bininvy) - np.mbinylo;
} else {
iy = (int)(yin * np.bininvy) - np.mbinylo - 1;
}
if(zin >= np.zprd) {
iz = (int)((zin - np.zprd) * np.bininvz) + np.nbinz - np.mbinzlo;
} else if(zin >= 0.0) {
iz = (int)(zin * np.bininvz) - np.mbinzlo;
} else {
iz = (int)(zin * np.bininvz) - np.mbinzlo - 1;
}
return (iz * np.mbiny * np.mbinx + iy * np.mbinx + ix + 1);
}
/* sorts the contents of a bin to make it comparable to the CPU version */
/* uses bubble sort since atoms per bin should be relatively small and can be done in situ */
__global__ void sort_bin_contents_kernel(int* bincount, int* bins, int mbins, int atoms_per_bin){
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if(i >= mbins) {
return;
}
int atoms_in_bin = bincount[i];
int *bin_ptr = &bins[i * atoms_per_bin];
int sorted;
do {
sorted = 1;
int tmp;
for(int index = 0; index < atoms_in_bin - 1; index++){
if (bin_ptr[index] > bin_ptr[index + 1]){
tmp = bin_ptr[index];
bin_ptr[index] = bin_ptr[index + 1];
bin_ptr[index + 1] = tmp;
sorted = 0;
}
}
} while (!sorted);
}
__global__ void binatoms_kernel(DeviceAtom a, int nall, int* bincount, int* bins, int atoms_per_bin, Neighbor_params np, int *resize_needed) {
DeviceAtom* atom = &a;
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if(i >= nall) {
return;
}
MD_FLOAT x = atom_x(i);
MD_FLOAT y = atom_y(i);
MD_FLOAT z = atom_z(i);
int ibin = coord2bin_device(x, y, z, np);
int ac = atomicAdd(&bincount[ibin], 1);
if(ac < atoms_per_bin){
bins[ibin * atoms_per_bin + ac] = i;
} else {
atomicMax(resize_needed, ac);
}
}
__global__ void compute_neighborhood(
DeviceAtom a, DeviceNeighbor neigh, Neighbor_params np, int nlocal, int maxneighs, int nstencil, int* stencil,
int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq, int ntypes) {
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if(i >= nlocal) {
return;
}
DeviceAtom *atom = &a;
DeviceNeighbor *neighbor = &neigh;
int* neighptr = &(neighbor->neighbors[i]);
int n = 0;
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
int ibin = coord2bin_device(xtmp, ytmp, ztmp, np);
#ifdef EXPLICIT_TYPES
int type_i = atom->type[i];
#endif
for(int k = 0; k < nstencil; k++) {
int jbin = ibin + stencil[k];
int* loc_bin = &bins[jbin * atoms_per_bin];
for(int m = 0; m < bincount[jbin]; m++) {
int j = loc_bin[m];
if ( j == i ){
continue;
}
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
int type_j = atom->type[j];
const MD_FLOAT cutoff = atom->cutneighsq[type_i * ntypes + type_j];
#else
const MD_FLOAT cutoff = cutneighsq;
#endif
if( rsq <= cutoff ) {
int idx = nlocal * n;
neighptr[idx] = j;
n += 1;
}
}
}
neighbor->numneigh[i] = n;
if(n > maxneighs) {
atomicMax(new_maxneighs, n);
}
}
void binatoms_cuda(Atom *atom, Binning *c_binning, int *c_resize_needed, Neighbor_params *np, const int threads_per_block) {
int nall = atom->Nlocal + atom->Nghost;
int resize = 1;
const int num_blocks = ceil((float) nall / (float) threads_per_block);
while(resize > 0) {
resize = 0;
memsetGPU(c_binning->bincount, 0, c_binning->mbins * sizeof(int));
memsetGPU(c_resize_needed, 0, sizeof(int));
binatoms_kernel<<<num_blocks, threads_per_block>>>(atom->d_atom, atom->Nlocal + atom->Nghost, c_binning->bincount, c_binning->bins, c_binning->atoms_per_bin, *np, c_resize_needed);
cuda_assert("binatoms", cudaPeekAtLastError());
cuda_assert("binatoms", cudaDeviceSynchronize());
memcpyFromGPU(&resize, c_resize_needed, sizeof(int));
if(resize) {
c_binning->atoms_per_bin *= 2;
c_binning->bins = (int *) reallocateGPU(c_binning->bins, c_binning->mbins * c_binning->atoms_per_bin * sizeof(int));
}
}
atoms_per_bin = c_binning->atoms_per_bin;
const int sortBlocks = ceil((float) mbins / (float) threads_per_block);
sort_bin_contents_kernel<<<sortBlocks, threads_per_block>>>(c_binning->bincount, c_binning->bins, c_binning->mbins, c_binning->atoms_per_bin);
cuda_assert("sort_bin", cudaPeekAtLastError());
cuda_assert("sort_bin", cudaDeviceSynchronize());
}
void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor) {
DeviceNeighbor *d_neighbor = &(neighbor->d_neighbor);
const int num_threads_per_block = get_cuda_num_threads();
int nall = atom->Nlocal + atom->Nghost;
cudaProfilerStart();
// TODO move all of this initialization into its own method
if(c_stencil == NULL) {
c_stencil = (int *) allocateGPU(nstencil * sizeof(int));
memcpyToGPU(c_stencil, stencil, nstencil * sizeof(int));
}
if(c_binning.mbins == 0) {
c_binning.mbins = mbins;
c_binning.atoms_per_bin = atoms_per_bin;
c_binning.bincount = (int *) allocateGPU(c_binning.mbins * sizeof(int));
c_binning.bins = (int *) allocateGPU(c_binning.mbins * c_binning.atoms_per_bin * sizeof(int));
}
Neighbor_params np {
.xprd = xprd,
.yprd = yprd,
.zprd = zprd,
.bininvx = bininvx,
.bininvy = bininvy,
.bininvz = bininvz,
.mbinxlo = mbinxlo,
.mbinylo = mbinylo,
.mbinzlo = mbinzlo,
.nbinx = nbinx,
.nbiny = nbiny,
.nbinz = nbinz,
.mbinx = mbinx,
.mbiny = mbiny,
.mbinz = mbinz
};
if(c_resize_needed == NULL) {
c_resize_needed = (int *) allocateGPU(sizeof(int));
}
/* bin local & ghost atoms */
binatoms_cuda(atom, &c_binning, c_resize_needed, &np, num_threads_per_block);
if(c_new_maxneighs == NULL) {
c_new_maxneighs = (int *) allocateGPU(sizeof(int));
}
int resize = 1;
if(nall > nmax) {
nmax = nall;
d_neighbor->neighbors = (int *) reallocateGPU(d_neighbor->neighbors, nmax * neighbor->maxneighs * sizeof(int));
d_neighbor->numneigh = (int *) reallocateGPU(d_neighbor->numneigh, nmax * sizeof(int));
}
/* loop over each atom, storing neighbors */
while(resize) {
resize = 0;
memsetGPU(c_new_maxneighs, 0, sizeof(int));
const int num_blocks = ceil((float)atom->Nlocal / (float)num_threads_per_block);
compute_neighborhood<<<num_blocks, num_threads_per_block>>>(atom->d_atom, *d_neighbor,
np, atom->Nlocal, neighbor->maxneighs, nstencil, c_stencil,
c_binning.bins, c_binning.atoms_per_bin, c_binning.bincount,
c_new_maxneighs,
cutneighsq, atom->ntypes);
cuda_assert("compute_neighborhood", cudaPeekAtLastError());
cuda_assert("compute_neighborhood", cudaDeviceSynchronize());
int new_maxneighs;
memcpyFromGPU(&new_maxneighs, c_new_maxneighs, sizeof(int));
if(new_maxneighs > neighbor->maxneighs){
resize = 1;
}
if(resize) {
printf("RESIZE %d\n", neighbor->maxneighs);
neighbor->maxneighs = new_maxneighs * 1.2;
printf("NEW SIZE %d\n", neighbor->maxneighs);
neighbor->neighbors = (int *) reallocateGPU(neighbor->neighbors, atom->Nmax * neighbor->maxneighs * sizeof(int));
}
}
cudaProfilerStop();
}

111
verletlist/cuda/pbc.cu Normal file
View File

@@ -0,0 +1,111 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdlib.h>
#include <stdio.h>
//---
extern "C" {
#include <allocate.h>
#include <atom.h>
#include <device.h>
#include <pbc.h>
#include <util.h>
}
extern int NmaxGhost;
extern int *PBCx, *PBCy, *PBCz;
static int c_NmaxGhost = 0;
static int *c_PBCx = NULL, *c_PBCy = NULL, *c_PBCz = NULL;
__global__ void computeAtomsPbcUpdate(DeviceAtom a, int nlocal, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd) {
const int i = blockIdx.x * blockDim.x + threadIdx.x;
DeviceAtom *atom = &a;
if(i >= nlocal) {
return;
}
if (atom_x(i) < 0.0) {
atom_x(i) += xprd;
} else if (atom_x(i) >= xprd) {
atom_x(i) -= xprd;
}
if (atom_y(i) < 0.0) {
atom_y(i) += yprd;
} else if (atom_y(i) >= yprd) {
atom_y(i) -= yprd;
}
if (atom_z(i) < 0.0) {
atom_z(i) += zprd;
} else if (atom_z(i) >= zprd) {
atom_z(i) -= zprd;
}
}
__global__ void computePbcUpdate(DeviceAtom a, int nlocal, int nghost, int* PBCx, int* PBCy, int* PBCz, MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd){
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if(i >= nghost) {
return;
}
DeviceAtom* atom = &a;
int *border_map = atom->border_map;
atom_x(nlocal + i) = atom_x(border_map[i]) + PBCx[i] * xprd;
atom_y(nlocal + i) = atom_y(border_map[i]) + PBCy[i] * yprd;
atom_z(nlocal + i) = atom_z(border_map[i]) + PBCz[i] * zprd;
}
/* update coordinates of ghost atoms */
/* uses mapping created in setupPbc */
void updatePbc_cuda(Atom *atom, Parameter *param, bool reneigh) {
const int num_threads_per_block = get_cuda_num_threads();
if(reneigh) {
memcpyToGPU(atom->d_atom.x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3);
memcpyToGPU(atom->d_atom.type, atom->type, sizeof(int) * atom->Nmax);
if(c_NmaxGhost < NmaxGhost) {
c_NmaxGhost = NmaxGhost;
c_PBCx = (int *) reallocateGPU(c_PBCx, NmaxGhost * sizeof(int));
c_PBCy = (int *) reallocateGPU(c_PBCy, NmaxGhost * sizeof(int));
c_PBCz = (int *) reallocateGPU(c_PBCz, NmaxGhost * sizeof(int));
atom->d_atom.border_map = (int *) reallocateGPU(atom->d_atom.border_map, NmaxGhost * sizeof(int));
}
memcpyToGPU(c_PBCx, PBCx, NmaxGhost * sizeof(int));
memcpyToGPU(c_PBCy, PBCy, NmaxGhost * sizeof(int));
memcpyToGPU(c_PBCz, PBCz, NmaxGhost * sizeof(int));
memcpyToGPU(atom->d_atom.border_map, atom->border_map, NmaxGhost * sizeof(int));
cuda_assert("updatePbc.reneigh", cudaPeekAtLastError());
cuda_assert("updatePbc.reneigh", cudaDeviceSynchronize());
}
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
const int num_blocks = ceil((float)atom->Nghost / (float)num_threads_per_block);
computePbcUpdate<<<num_blocks, num_threads_per_block>>>(atom->d_atom, atom->Nlocal, atom->Nghost, c_PBCx, c_PBCy, c_PBCz, xprd, yprd, zprd);
cuda_assert("updatePbc", cudaPeekAtLastError());
cuda_assert("updatePbc", cudaDeviceSynchronize());
}
void updateAtomsPbc_cuda(Atom* atom, Parameter *param) {
const int num_threads_per_block = get_cuda_num_threads();
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
const int num_blocks = ceil((float)atom->Nlocal / (float)num_threads_per_block);
computeAtomsPbcUpdate<<<num_blocks, num_threads_per_block>>>(atom->d_atom, atom->Nlocal, xprd, yprd, zprd);
cuda_assert("computeAtomsPbcUpdate", cudaPeekAtLastError());
cuda_assert("computeAtomsPbcUpdate", cudaDeviceSynchronize());
memcpyFromGPU(atom->x, atom->d_atom.x, sizeof(MD_FLOAT) * atom->Nlocal * 3);
}

31
verletlist/device_spec.c Normal file
View File

@@ -0,0 +1,31 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <device.h>
#ifdef CUDA_TARGET
void initDevice(Atom *atom, Neighbor *neighbor) {
DeviceAtom *d_atom = &(atom->d_atom);
DeviceNeighbor *d_neighbor = &(neighbor->d_neighbor);
d_atom->epsilon = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
d_atom->sigma6 = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
d_atom->cutneighsq = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
d_atom->cutforcesq = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
d_neighbor->neighbors = (int *) allocateGPU(sizeof(int) * atom->Nmax * neighbor->maxneighs);
d_neighbor->numneigh = (int *) allocateGPU(sizeof(int) * atom->Nmax);
memcpyToGPU(d_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3);
memcpyToGPU(d_atom->vx, atom->vx, sizeof(MD_FLOAT) * atom->Nmax * 3);
memcpyToGPU(d_atom->sigma6, atom->sigma6, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
memcpyToGPU(d_atom->epsilon, atom->epsilon, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
memcpyToGPU(d_atom->cutneighsq, atom->cutneighsq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
memcpyToGPU(d_atom->cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
memcpyToGPU(d_atom->type, atom->type, sizeof(int) * atom->Nmax);
}
#endif

123
verletlist/force_dem.c Normal file
View File

@@ -0,0 +1,123 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <math.h>
//---
#include <atom.h>
#include <likwid-marker.h>
#include <neighbor.h>
#include <parameter.h>
#include <stats.h>
#include <timing.h>
double computeForceDemFullNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
int Nlocal = atom->Nlocal;
int* neighs;
MD_FLOAT k_s = param->k_s;
MD_FLOAT k_dn = param->k_dn;
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
for(int i = 0; i < Nlocal; i++) {
atom_fx(i) = 0.0;
atom_fy(i) = 0.0;
atom_fz(i) = 0.0;
}
double S = getTimeStamp();
LIKWID_MARKER_START("force");
for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_FLOAT irad = atom->radius[i];
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
MD_FLOAT fix = 0;
MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0;
for(int k = 0; k < numneighs; k++) {
int j = neighs[k];
MD_FLOAT jrad = atom->radius[j];
MD_FLOAT xj = atom_x(j);
MD_FLOAT yj = atom_y(j);
MD_FLOAT zj = atom_z(j);
MD_FLOAT delx = xtmp - xj;
MD_FLOAT dely = ytmp - yj;
MD_FLOAT delz = ztmp - zj;
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq < cutforcesq) {
MD_FLOAT r = sqrt(rsq);
MD_FLOAT p = irad + jrad - r;
if(p > 0) {
MD_FLOAT delvx = atom_vx(i) - atom_vx(j);
MD_FLOAT delvy = atom_vy(i) - atom_vy(j);
MD_FLOAT delvz = atom_vz(i) - atom_vz(j);
MD_FLOAT vr = sqrt(delvx * delvx + delvy * delvy + delvz * delvz);
// normal distance
MD_FLOAT nx = delx / r;
MD_FLOAT ny = dely / r;
MD_FLOAT nz = delz / r;
// normal contact velocity
MD_FLOAT nvx = delvx / vr;
MD_FLOAT nvy = delvy / vr;
MD_FLOAT nvz = delvz / vr;
// forces
atom_fx(i) += k_s * p * nx - k_dn * nvx;
atom_fy(i) += k_s * p * ny - k_dn * nvy;
atom_fz(i) += k_s * p * nz - k_dn * nvz;
atom_fx(j) += -k_s * p * nx - k_dn * nvx;
atom_fy(j) += -k_s * p * ny - k_dn * nvy;
atom_fz(j) += -k_s * p * nz - k_dn * nvz;
// contact position
//MD_FLOAT cterm = jrad / (irad + jrad);
//MD_FLOAT cx = xj + cterm * delx;
//MD_FLOAT cy = yj + cterm * dely;
//MD_FLOAT cz = zj + cterm * delz;
// delta contact and particle position
//MD_FLOAT delcx = cx - xtmp;
//MD_FLOAT delcy = cy - ytmp;
//MD_FLOAT delcz = cz - ztmp;
// contact velocity
//MD_FLOAT cvx = (atom_vx(i) + atom_avx(i) * delcx) - (atom_vx(j) + atom_avx(j) * (cx - xj));
//MD_FLOAT cvy = (atom_vy(i) + atom_avy(i) * delcy) - (atom_vy(j) + atom_avy(j) * (cy - yj));
//MD_FLOAT cvz = (atom_vz(i) + atom_avz(i) * delcz) - (atom_vz(j) + atom_avz(j) * (cz - zj));
// tangential force
//fix += MIN(kdt * vtsq, kf * fnx) * tx;
//fiy += MIN(kdt * vtsq, kf * fny) * ty;
//fiz += MIN(kdt * vtsq, kf * fnz) * tz;
// torque
//MD_FLOAT taux = delcx * ftx;
//MD_FLOAT tauy = delcy * fty;
//MD_FLOAT tauz = delcz * ftz;
}
#ifdef USE_REFERENCE_VERSION
addStat(stats->atoms_within_cutoff, 1);
} else {
addStat(stats->atoms_outside_cutoff, 1);
#endif
}
}
addStat(stats->total_force_neighs, numneighs);
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
}
LIKWID_MARKER_STOP("force");
double E = getTimeStamp();
return E-S;
}

209
verletlist/force_eam.c Normal file
View File

@@ -0,0 +1,209 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <likwid-marker.h>
#include <math.h>
#include <allocate.h>
#include <timing.h>
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#include <stats.h>
#include <eam.h>
#include <util.h>
double computeForceEam(Eam* eam, Parameter* param, Atom *atom, Neighbor *neighbor, Stats *stats) {
if(eam->nmax < atom->Nmax) {
eam->nmax = atom->Nmax;
if(eam->fp != NULL) { free(eam->fp); }
eam->fp = (MD_FLOAT *) allocate(ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT));
}
int Nlocal = atom->Nlocal;
int* neighs;
int ntypes = atom->ntypes; MD_FLOAT* fp = eam->fp;
MD_FLOAT* rhor_spline = eam->rhor_spline; MD_FLOAT* frho_spline = eam->frho_spline; MD_FLOAT* z2r_spline = eam->z2r_spline;
MD_FLOAT rdr = eam->rdr; int nr = eam->nr; int nr_tot = eam->nr_tot; MD_FLOAT rdrho = eam->rdrho;
int nrho = eam->nrho; int nrho_tot = eam->nrho_tot;
double S = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("force_eam_fp");
#pragma omp for
for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
MD_FLOAT rhoi = 0;
#ifdef EXPLICIT_TYPES
const int type_i = atom->type[i];
#endif
#pragma ivdep
for(int k = 0; k < numneighs; k++) {
int j = neighs[k];
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
const int type_j = atom->type[j];
const int type_ij = type_i * ntypes + type_j;
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
#else
const MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
#endif
if(rsq < cutforcesq) {
MD_FLOAT p = sqrt(rsq) * rdr + 1.0;
int m = (int)(p);
m = m < nr - 1 ? m : nr - 1;
p -= m;
p = p < 1.0 ? p : 1.0;
#ifdef EXPLICIT_TYPES
rhoi += ((rhor_spline[type_ij * nr_tot + m * 7 + 3] * p +
rhor_spline[type_ij * nr_tot + m * 7 + 4]) * p +
rhor_spline[type_ij * nr_tot + m * 7 + 5]) * p +
rhor_spline[type_ij * nr_tot + m * 7 + 6];
#else
rhoi += ((rhor_spline[m * 7 + 3] * p +
rhor_spline[m * 7 + 4]) * p +
rhor_spline[m * 7 + 5]) * p +
rhor_spline[m * 7 + 6];
#endif
}
}
#ifdef EXPLICIT_TYPES
const int type_ii = type_i * type_i;
#endif
MD_FLOAT p = 1.0 * rhoi * rdrho + 1.0;
int m = (int)(p);
m = MAX(1, MIN(m, nrho - 1));
p -= m;
p = MIN(p, 1.0);
#ifdef EXPLICIT_TYPES
fp[i] = (frho_spline[type_ii * nrho_tot + m * 7 + 0] * p +
frho_spline[type_ii * nrho_tot + m * 7 + 1]) * p +
frho_spline[type_ii * nrho_tot + m * 7 + 2];
#else
fp[i] = (frho_spline[m * 7 + 0] * p + frho_spline[m * 7 + 1]) * p + frho_spline[m * 7 + 2];
#endif
}
LIKWID_MARKER_STOP("force_eam_fp");
}
// We still need to update fp for PBC atoms
for(int i = 0; i < atom->Nghost; i++) {
fp[Nlocal + i] = fp[atom->border_map[i]];
}
#pragma omp parallel
{
LIKWID_MARKER_START("force_eam");
#pragma omp for
for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
MD_FLOAT fix = 0;
MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0;
#ifdef EXPLICIT_TYPES
const int type_i = atom->type[i];
#endif
#pragma ivdep
for(int k = 0; k < numneighs; k++) {
int j = neighs[k];
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
const int type_j = atom->type[j];
const int type_ij = type_i * ntypes + type_j;
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
#else
const MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
#endif
if(rsq < cutforcesq) {
MD_FLOAT r = sqrt(rsq);
MD_FLOAT p = r * rdr + 1.0;
int m = (int)(p);
m = m < nr - 1 ? m : nr - 1;
p -= m;
p = p < 1.0 ? p : 1.0;
// rhoip = derivative of (density at atom j due to atom i)
// rhojp = derivative of (density at atom i due to atom j)
// phi = pair potential energy
// phip = phi'
// z2 = phi * r
// z2p = (phi * r)' = (phi' r) + phi
// psip needs both fp[i] and fp[j] terms since r_ij appears in two
// terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji)
// hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip
#ifdef EXPLICIT_TYPES
MD_FLOAT rhoip = (rhor_spline[type_ij * nr_tot + m * 7 + 0] * p +
rhor_spline[type_ij * nr_tot + m * 7 + 1]) * p +
rhor_spline[type_ij * nr_tot + m * 7 + 2];
MD_FLOAT z2p = (z2r_spline[type_ij * nr_tot + m * 7 + 0] * p +
z2r_spline[type_ij * nr_tot + m * 7 + 1]) * p +
z2r_spline[type_ij * nr_tot + m * 7 + 2];
MD_FLOAT z2 = ((z2r_spline[type_ij * nr_tot + m * 7 + 3] * p +
z2r_spline[type_ij * nr_tot + m * 7 + 4]) * p +
z2r_spline[type_ij * nr_tot + m * 7 + 5]) * p +
z2r_spline[type_ij * nr_tot + m * 7 + 6];
#else
MD_FLOAT rhoip = (rhor_spline[m * 7 + 0] * p + rhor_spline[m * 7 + 1]) * p + rhor_spline[m * 7 + 2];
MD_FLOAT z2p = (z2r_spline[m * 7 + 0] * p + z2r_spline[m * 7 + 1]) * p + z2r_spline[m * 7 + 2];
MD_FLOAT z2 = ((z2r_spline[m * 7 + 3] * p +
z2r_spline[m * 7 + 4]) * p +
z2r_spline[m * 7 + 5]) * p +
z2r_spline[m * 7 + 6];
#endif
MD_FLOAT recip = 1.0 / r;
MD_FLOAT phi = z2 * recip;
MD_FLOAT phip = z2p * recip - phi * recip;
MD_FLOAT psip = fp[i] * rhoip + fp[j] * rhoip + phip;
MD_FLOAT fpair = -psip * recip;
fix += delx * fpair;
fiy += dely * fpair;
fiz += delz * fpair;
//fpair *= 0.5;
}
}
atom_fx(i) = fix;
atom_fy(i) = fiy;
atom_fz(i) = fiz;
addStat(stats->total_force_neighs, numneighs);
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
}
LIKWID_MARKER_STOP("force_eam");
}
double E = getTimeStamp();
return E-S;
}

298
verletlist/force_lj.c Normal file
View File

@@ -0,0 +1,298 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <stdlib.h>
//---
#include <atom.h>
#include <likwid-marker.h>
#include <neighbor.h>
#include <parameter.h>
#include <stats.h>
#include <timing.h>
#ifdef __SIMD_KERNEL__
#include <simd.h>
#endif
double computeForceLJFullNeigh(
Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats)
{
int nLocal = atom->Nlocal;
int* neighs;
#ifndef EXPLICIT_TYPES
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
#endif
const MD_FLOAT num1 = 1.0;
const MD_FLOAT num48 = 48.0;
const MD_FLOAT num05 = 0.5;
for (int i = 0; i < nLocal; i++) {
atom_fx(i) = 0.0;
atom_fy(i) = 0.0;
atom_fz(i) = 0.0;
}
double timeStart = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("force");
#pragma omp for schedule(runtime)
for (int i = 0; i < nLocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
MD_FLOAT fix = 0;
MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0;
#ifdef EXPLICIT_TYPES
const int type_i = atom->type[i];
#endif
for (int k = 0; k < numneighs; k++) {
int j = neighs[k];
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
const int type_j = atom->type[j];
const int type_ij = type_i * atom->ntypes + type_j;
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
const MD_FLOAT epsilon = atom->epsilon[type_ij];
#endif
if (rsq < cutforcesq) {
MD_FLOAT sr2 = num1 / rsq;
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
#ifdef USE_REFERENCE_VERSION
addStat(stats->atoms_within_cutoff, 1);
} else {
addStat(stats->atoms_outside_cutoff, 1);
#endif
}
}
atom_fx(i) += fix;
atom_fy(i) += fiy;
atom_fz(i) += fiz;
#ifdef USE_REFERENCE_VERSION
if (numneighs % VECTOR_WIDTH > 0) {
addStat(stats->atoms_outside_cutoff,
VECTOR_WIDTH - (numneighs % VECTOR_WIDTH));
}
#endif
addStat(stats->total_force_neighs, numneighs);
addStat(stats->total_force_iters,
(numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
}
LIKWID_MARKER_STOP("force");
}
double timeStop = getTimeStamp();
return timeStop - timeStart;
}
double computeForceLJHalfNeigh(
Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats)
{
int nlocal = atom->Nlocal;
int* neighs;
#ifndef EXPLICIT_TYPES
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
#endif
const MD_FLOAT num1 = 1.0;
const MD_FLOAT num48 = 48.0;
const MD_FLOAT num05 = 0.5;
for (int i = 0; i < nlocal; i++) {
atom_fx(i) = 0.0;
atom_fy(i) = 0.0;
atom_fz(i) = 0.0;
}
double timeStart = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("forceLJ-halfneigh");
#pragma omp for schedule(runtime)
for (int i = 0; i < nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
MD_FLOAT fix = 0;
MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0;
#ifdef EXPLICIT_TYPES
const int type_i = atom->type[i];
#endif
// Pragma required to vectorize the inner loop
#ifdef ENABLE_OMP_SIMD
#pragma omp simd reduction(+ : fix, fiy, fiz)
#endif
for (int k = 0; k < numneighs; k++) {
int j = neighs[k];
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
const int type_j = atom->type[j];
const int type_ij = type_i * atom->ntypes + type_j;
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
const MD_FLOAT epsilon = atom->epsilon[type_ij];
#endif
if (rsq < cutforcesq) {
MD_FLOAT sr2 = num1 / rsq;
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
// We do not need to update forces for ghost atoms
if (j < nlocal) {
atom_fx(j) -= delx * force;
atom_fy(j) -= dely * force;
atom_fz(j) -= delz * force;
}
}
}
atom_fx(i) += fix;
atom_fy(i) += fiy;
atom_fz(i) += fiz;
addStat(stats->total_force_neighs, numneighs);
addStat(stats->total_force_iters,
(numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
}
LIKWID_MARKER_STOP("forceLJ-halfneigh");
}
double timeStop = getTimeStamp();
return timeStop - timeStart;
}
double computeForceLJFullNeigh_simd(
Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats)
{
int Nlocal = atom->Nlocal;
int* neighs;
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
for (int i = 0; i < Nlocal; i++) {
atom_fx(i) = 0.0;
atom_fy(i) = 0.0;
atom_fz(i) = 0.0;
}
double S = getTimeStamp();
#ifndef __SIMD_KERNEL__
fprintf(stderr, "Error: SIMD kernel not implemented for specified instruction set!");
exit(-1);
#else
MD_SIMD_FLOAT cutforcesq_vec = simd_broadcast(cutforcesq);
MD_SIMD_FLOAT sigma6_vec = simd_broadcast(sigma6);
MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon);
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
#pragma omp parallel
{
LIKWID_MARKER_START("force");
#pragma omp for schedule(runtime)
for (int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_SIMD_INT numneighs_vec = simd_int_broadcast(numneighs);
MD_SIMD_FLOAT xtmp = simd_broadcast(atom_x(i));
MD_SIMD_FLOAT ytmp = simd_broadcast(atom_y(i));
MD_SIMD_FLOAT ztmp = simd_broadcast(atom_z(i));
MD_SIMD_FLOAT fix = simd_zero();
MD_SIMD_FLOAT fiy = simd_zero();
MD_SIMD_FLOAT fiz = simd_zero();
for (int k = 0; k < numneighs; k += VECTOR_WIDTH) {
// If the last iteration of this loop is separated from the rest, this
// mask can be set only there
MD_SIMD_MASK mask_numneighs = simd_mask_int_cond_lt(
simd_int_add(simd_int_broadcast(k), simd_int_seq()),
numneighs_vec);
MD_SIMD_INT j = simd_int_mask_load(&neighs[k], mask_numneighs);
#ifdef AOS
MD_SIMD_INT j3 = simd_int_add(simd_int_add(j, j), j); // j * 3
MD_SIMD_FLOAT delx = xtmp -
simd_gather(j3, &(atom->x[0]), sizeof(MD_FLOAT));
MD_SIMD_FLOAT dely = ytmp -
simd_gather(j3, &(atom->x[1]), sizeof(MD_FLOAT));
MD_SIMD_FLOAT delz = ztmp -
simd_gather(j3, &(atom->x[2]), sizeof(MD_FLOAT));
#else
MD_SIMD_FLOAT delx = xtmp - simd_gather(j, atom->x, sizeof(MD_FLOAT));
MD_SIMD_FLOAT dely = ytmp - simd_gather(j, atom->y, sizeof(MD_FLOAT));
MD_SIMD_FLOAT delz = ztmp - simd_gather(j, atom->z, sizeof(MD_FLOAT));
#endif
MD_SIMD_FLOAT rsq = simd_fma(delx,
delx,
simd_fma(dely, dely, simd_mul(delz, delz)));
MD_SIMD_MASK cutoff_mask = simd_mask_and(mask_numneighs,
simd_mask_cond_lt(rsq, cutforcesq_vec));
MD_SIMD_FLOAT sr2 = simd_reciprocal(rsq);
MD_SIMD_FLOAT sr6 = simd_mul(sr2,
simd_mul(sr2, simd_mul(sr2, sigma6_vec)));
MD_SIMD_FLOAT force = simd_mul(c48_vec,
simd_mul(sr6,
simd_mul(simd_sub(sr6, c05_vec), simd_mul(sr2, eps_vec))));
fix = simd_masked_add(fix, simd_mul(delx, force), cutoff_mask);
fiy = simd_masked_add(fiy, simd_mul(dely, force), cutoff_mask);
fiz = simd_masked_add(fiz, simd_mul(delz, force), cutoff_mask);
}
atom_fx(i) += simd_h_reduce_sum(fix);
atom_fy(i) += simd_h_reduce_sum(fiy);
atom_fz(i) += simd_h_reduce_sum(fiz);
}
LIKWID_MARKER_STOP("force");
}
#endif
double E = getTimeStamp();
return E - S;
}

103
verletlist/includes/atom.h Normal file
View File

@@ -0,0 +1,103 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <parameter.h>
#ifndef __ATOM_H_
#define __ATOM_H_
#ifdef CUDA_TARGET
# define KERNEL_NAME "CUDA"
# define computeForceLJFullNeigh computeForceLJFullNeigh_cuda
# define initialIntegrate initialIntegrate_cuda
# define finalIntegrate finalIntegrate_cuda
# define buildNeighbor buildNeighbor_cuda
# define updatePbc updatePbc_cuda
# define updateAtomsPbc updateAtomsPbc_cuda
#else
# ifdef USE_SIMD_KERNEL
# define KERNEL_NAME "SIMD"
# define computeForceLJFullNeigh computeForceLJFullNeigh_simd
# else
# define KERNEL_NAME "plain-C"
# define computeForceLJFullNeigh computeForceLJFullNeigh_plain_c
# endif
# define initialIntegrate initialIntegrate_cpu
# define finalIntegrate finalIntegrate_cpu
# define buildNeighbor buildNeighbor_cpu
# define updatePbc updatePbc_cpu
# define updateAtomsPbc updateAtomsPbc_cpu
#endif
typedef struct {
MD_FLOAT *x, *y, *z;
MD_FLOAT *vx, *vy, *vz;
MD_FLOAT *fx, *fy, *fz;
int *border_map;
int *type;
MD_FLOAT *epsilon;
MD_FLOAT *sigma6;
MD_FLOAT *cutforcesq;
MD_FLOAT *cutneighsq;
} DeviceAtom;
typedef struct {
int Natoms, Nlocal, Nghost, Nmax;
MD_FLOAT *x, *y, *z;
MD_FLOAT *vx, *vy, *vz;
MD_FLOAT *fx, *fy, *fz;
int *border_map;
int *type;
int ntypes;
MD_FLOAT *epsilon;
MD_FLOAT *sigma6;
MD_FLOAT *cutforcesq;
MD_FLOAT *cutneighsq;
// DEM
MD_FLOAT *radius;
MD_FLOAT *av;
MD_FLOAT *r;
// Device data
DeviceAtom d_atom;
} Atom;
extern void initAtom(Atom*);
extern void createAtom(Atom*, Parameter*);
extern int readAtom(Atom*, Parameter*);
extern int readAtom_pdb(Atom*, Parameter*);
extern int readAtom_gro(Atom*, Parameter*);
extern int readAtom_dmp(Atom*, Parameter*);
extern int readAtom_in(Atom*, Parameter*);
extern void writeAtom(Atom*, Parameter*);
extern void growAtom(Atom*);
#ifdef AOS
# define POS_DATA_LAYOUT "AoS"
# define atom_x(i) atom->x[(i) * 3 + 0]
# define atom_y(i) atom->x[(i) * 3 + 1]
# define atom_z(i) atom->x[(i) * 3 + 2]
# define atom_vx(i) atom->vx[(i) * 3 + 0]
# define atom_vy(i) atom->vx[(i) * 3 + 1]
# define atom_vz(i) atom->vx[(i) * 3 + 2]
# define atom_fx(i) atom->fx[(i) * 3 + 0]
# define atom_fy(i) atom->fx[(i) * 3 + 1]
# define atom_fz(i) atom->fx[(i) * 3 + 2]
#else
# define POS_DATA_LAYOUT "SoA"
# define atom_x(i) atom->x[i]
# define atom_y(i) atom->y[i]
# define atom_z(i) atom->z[i]
# define atom_vx(i) atom->vx[i]
# define atom_vy(i) atom->vy[i]
# define atom_vz(i) atom->vz[i]
# define atom_fx(i) atom->fx[i]
# define atom_fy(i) atom->fy[i]
# define atom_fz(i) atom->fz[i]
#endif
#endif

View File

@@ -0,0 +1,34 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdbool.h>
//---
#include <parameter.h>
#include <atom.h>
void initialIntegrate_cpu(bool reneigh, Parameter *param, Atom *atom) {
for(int i = 0; i < atom->Nlocal; i++) {
atom_vx(i) += param->dtforce * atom_fx(i);
atom_vy(i) += param->dtforce * atom_fy(i);
atom_vz(i) += param->dtforce * atom_fz(i);
atom_x(i) = atom_x(i) + param->dt * atom_vx(i);
atom_y(i) = atom_y(i) + param->dt * atom_vy(i);
atom_z(i) = atom_z(i) + param->dt * atom_vz(i);
}
}
void finalIntegrate_cpu(bool reneigh, Parameter *param, Atom *atom) {
for(int i = 0; i < atom->Nlocal; i++) {
atom_vx(i) += param->dtforce * atom_fx(i);
atom_vy(i) += param->dtforce * atom_fy(i);
atom_vz(i) += param->dtforce * atom_fz(i);
}
}
#ifdef CUDA_TARGET
void initialIntegrate_cuda(bool, Parameter*, Atom*);
void finalIntegrate_cuda(bool, Parameter*, Atom*);
#endif

View File

@@ -0,0 +1,55 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <atom.h>
#include <parameter.h>
#ifndef __NEIGHBOR_H_
#define __NEIGHBOR_H_
typedef struct {
int *neighbors;
int *numneigh;
} DeviceNeighbor;
typedef struct {
int every;
int ncalls;
int maxneighs;
int half_neigh;
int *neighbors;
int *numneigh;
// Device data
DeviceNeighbor d_neighbor;
} Neighbor;
typedef struct {
MD_FLOAT xprd; MD_FLOAT yprd; MD_FLOAT zprd;
MD_FLOAT bininvx; MD_FLOAT bininvy; MD_FLOAT bininvz;
int mbinxlo; int mbinylo; int mbinzlo;
int nbinx; int nbiny; int nbinz;
int mbinx; int mbiny; int mbinz;
} Neighbor_params;
typedef struct {
int* bincount;
int* bins;
int mbins;
int atoms_per_bin;
} Binning;
extern void initNeighbor(Neighbor*, Parameter*);
extern void setupNeighbor(Parameter*);
extern void binatoms(Atom*);
extern void buildNeighbor_cpu(Atom*, Neighbor*);
extern void sortAtom(Atom*);
#ifdef CUDA_TARGET
extern void buildNeighbor_cuda(Atom*, Neighbor*);
#endif
#endif

24
verletlist/includes/pbc.h Normal file
View File

@@ -0,0 +1,24 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdbool.h>
//---
#include <atom.h>
#include <parameter.h>
#ifndef __PBC_H_
#define __PBC_H_
extern void initPbc(Atom*);
extern void updatePbc_cpu(Atom*, Parameter*, bool);
extern void updateAtomsPbc_cpu(Atom*, Parameter*);
extern void setupPbc(Atom*, Parameter*);
#ifdef CUDA_TARGET
extern void updatePbc_cuda(Atom*, Parameter*, bool);
extern void updateAtomsPbc_cuda(Atom*, Parameter*);
#endif
#endif

View File

@@ -0,0 +1,32 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <atom.h>
#include <parameter.h>
#ifndef __STATS_H_
#define __STATS_H_
typedef struct {
long long int total_force_neighs;
long long int total_force_iters;
long long int atoms_within_cutoff;
long long int atoms_outside_cutoff;
} Stats;
void initStats(Stats *s);
void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer);
#ifdef COMPUTE_STATS
# define addStat(stat, value) stat += value;
# define beginStatTimer() double Si = getTimeStamp();
# define endStatTimer(stat) stat += getTimeStamp() - Si;
#else
# define addStat(stat, value)
# define beginStatTimer()
# define endStatTimer(stat)
#endif
#endif

View File

@@ -0,0 +1,102 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
#include <stdio.h>
#include <stdlib.h>
#endif
#ifndef VECTOR_WIDTH
# define VECTOR_WIDTH 8
#endif
#ifndef TRACER_CONDITION
# define TRACER_CONDITION (!(timestep % param->every))
#endif
#ifdef MEM_TRACER
# define MEM_TRACER_INIT FILE *mem_tracer_fp; \
if(TRACER_CONDITION) { \
char mem_tracer_fn[128]; \
snprintf(mem_tracer_fn, sizeof mem_tracer_fn, "mem_tracer_%d.out", timestep); \
mem_tracer_fp = fopen(mem_tracer_fn, "w");
}
# define MEM_TRACER_END if(TRACER_CONDITION) { fclose(mem_tracer_fp); }
# define MEM_TRACE(addr, op) if(TRACER_CONDITION) { fprintf(mem_tracer_fp, "%c: %p\n", op, (void *)(&(addr))); }
#else
# define MEM_TRACER_INIT
# define MEM_TRACER_END
# define MEM_TRACE(addr, op)
#endif
#ifdef INDEX_TRACER
# define INDEX_TRACER_INIT FILE *index_tracer_fp; \
if(TRACER_CONDITION) { \
char index_tracer_fn[128]; \
snprintf(index_tracer_fn, sizeof index_tracer_fn, "index_tracer_%d.out", timestep); \
index_tracer_fp = fopen(index_tracer_fn, "w"); \
}
# define INDEX_TRACER_END if(TRACER_CONDITION) { fclose(index_tracer_fp); }
# define INDEX_TRACE_NATOMS(nl, ng, mn) if(TRACER_CONDITION) { fprintf(index_tracer_fp, "N: %d %d %d\n", nl, ng, mn); }
# define INDEX_TRACE_ATOM(a) if(TRACER_CONDITION) { fprintf(index_tracer_fp, "A: %d\n", a); }
# define INDEX_TRACE(l, e) if(TRACER_CONDITION) { \
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
fprintf(index_tracer_fp, "I: "); \
for(int __j = 0; __j < __e; ++__j) { \
fprintf(index_tracer_fp, "%d ", l[__i + __j]); \
} \
fprintf(index_tracer_fp, "\n"); \
} \
}
# define DIST_TRACE_SORT(l, e) if(TRACER_CONDITION) { \
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
if(__e > 1) { \
for(int __j = __i; __j < __i + __e - 1; ++__j) { \
for(int __k = __i; __k < __i + __e - (__j - __i) - 1; ++__k) { \
if(l[__k] > l[__k + 1]) { \
int __t = l[__k]; \
l[__k] = l[__k + 1]; \
l[__k + 1] = __t; \
} \
} \
} \
} \
} \
}
# define DIST_TRACE(l, e) if(TRACER_CONDITION) { \
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
if(__e > 1) { \
fprintf(index_tracer_fp, "D: "); \
for(int __j = 0; __j < __e - 1; ++__j) { \
int __dist = abs(l[__i + __j + 1] - l[__i + __j]); \
fprintf(index_tracer_fp, "%d ", __dist); \
} \
fprintf(index_tracer_fp, "\n"); \
} \
} \
}
#else
# define INDEX_TRACER_INIT
# define INDEX_TRACER_END
# define INDEX_TRACE_NATOMS(nl, ng, mn)
# define INDEX_TRACE_ATOM(a)
# define INDEX_TRACE(l, e)
# define DIST_TRACE_SORT(l, e)
# define DIST_TRACE(l, e)
#endif
extern void traceAddresses(Parameter *param, Atom *atom, Neighbor *neighbor, int timestep);

12
verletlist/includes/vtk.h Normal file
View File

@@ -0,0 +1,12 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <atom.h>
#ifndef __VTK_H_
#define __VTK_H_
extern int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
#endif

299
verletlist/main-stub.c Normal file
View File

@@ -0,0 +1,299 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <string.h>
//---
#include <likwid-marker.h>
//---
#include <timing.h>
#include <allocate.h>
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#include <stats.h>
#include <thermo.h>
#include <eam.h>
#include <pbc.h>
#include <timers.h>
#include <util.h>
#define HLINE "----------------------------------------------------------------------------\n"
extern double computeForceLJFullNeigh_plain_c(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceLJFullNeigh_simd(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceLJHalfNeigh(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
// Patterns
#define P_SEQ 0
#define P_FIX 1
#define P_RAND 2
void init(Parameter *param) {
param->input_file = NULL;
param->force_field = FF_LJ;
param->epsilon = 1.0;
param->sigma6 = 1.0;
param->rho = 0.8442;
param->ntypes = 4;
param->ntimes = 200;
param->nx = 1;
param->ny = 1;
param->nz = 1;
param->lattice = 1.0;
param->cutforce = 1000000.0;
param->cutneigh = param->cutforce;
param->mass = 1.0;
param->half_neigh = 0;
// Unused
param->dt = 0.005;
param->dtforce = 0.5 * param->dt;
param->nstat = 100;
param->temp = 1.44;
param->reneigh_every = 20;
param->proc_freq = 2.4;
param->eam_file = NULL;
}
void createNeighbors(Atom *atom, Neighbor *neighbor, int pattern, int nneighs, int nreps) {
const int maxneighs = nneighs * nreps;
neighbor->numneigh = (int*) malloc(atom->Nmax * sizeof(int));
neighbor->neighbors = (int*) malloc(atom->Nmax * maxneighs * sizeof(int));
if(pattern == P_RAND && atom->Nlocal <= nneighs) {
fprintf(stderr, "Error: When using random pattern, number of atoms should be higher than number of neighbors per atom!\n");
exit(-1);
}
for(int i = 0; i < atom->Nlocal; i++) {
int *neighptr = &(neighbor->neighbors[i * neighbor->maxneighs]);
int j = (pattern == P_SEQ) ? (i + 1) : 0;
int m = (pattern == P_SEQ) ? atom->Nlocal : nneighs;
for(int k = 0; k < nneighs; k++) {
if(pattern == P_RAND) {
int found = 0;
do {
j = rand() % atom->Nlocal;
neighptr[k] = j;
found = (int)(i == j);
for(int l = 0; l < k; l++) {
if(neighptr[l] == j) {
found = 1;
}
}
} while(found == 1);
} else {
neighptr[k] = j;
j = (j + 1) % m;
}
}
for(int r = 1; r < nreps; r++) {
for(int k = 0; k < nneighs; k++) {
neighptr[r * nneighs + k] = neighptr[k];
}
}
neighbor->numneigh[i] = nneighs * nreps;
}
}
int main(int argc, const char *argv[]) {
Eam eam;
Atom atom_data;
Atom *atom = (Atom *)(&atom_data);
Neighbor neighbor;
Stats stats;
Parameter param;
char *pattern_str = NULL;
int pattern = P_SEQ;
int natoms = 256;
int nneighs = 76;
int nreps = 1;
int csv = 0;
LIKWID_MARKER_INIT;
LIKWID_MARKER_REGISTER("force");
DEBUG_MESSAGE("Initializing parameters...\n");
init(&param);
for(int i = 0; i < argc; i++) {
if((strcmp(argv[i], "-f") == 0)) {
if((param.force_field = str2ff(argv[++i])) < 0) {
fprintf(stderr, "Invalid force field!\n");
exit(-1);
}
continue;
}
if((strcmp(argv[i], "-p") == 0)) {
pattern_str = strdup(argv[++i]);
if(strncmp(pattern_str, "seq", 3) == 0) { pattern = P_SEQ; }
else if(strncmp(pattern_str, "fix", 3) == 0) { pattern = P_FIX; }
else if(strncmp(pattern_str, "rand", 3) == 0) { pattern = P_RAND; }
else {
fprintf(stderr, "Invalid pattern!\n");
exit(-1);
}
continue;
}
if((strcmp(argv[i], "-e") == 0)) {
param.eam_file = strdup(argv[++i]);
continue;
}
if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0)) {
param.ntimes = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-na") == 0)) {
natoms = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-nn") == 0)) {
nneighs = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-nr") == 0)) {
nreps = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "--freq") == 0)) {
param.proc_freq = atof(argv[++i]);
continue;
}
if((strcmp(argv[i], "--csv") == 0)) {
csv = 1;
continue;
}
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
printf(HLINE);
printf("-f <string>: force field (lj or eam), default lj\n");
printf("-p <string>: pattern for data accesses (seq, fix or rand)\n");
printf("-n / --nsteps <int>: number of timesteps for simulation\n");
printf("-na <int>: number of atoms (default 256)\n");
printf("-nn <int>: number of neighbors per atom (default 76)\n");
printf("-nr <int>: number of times neighbor lists should be replicated (default 1)\n");
printf("--freq <real>: set CPU frequency (GHz) and display average cycles per atom and neighbors\n");
printf("--csv: set output as CSV style\n");
printf(HLINE);
exit(EXIT_SUCCESS);
}
}
if(pattern_str == NULL) {
pattern_str = strdup("seq\0");
}
if(param.force_field == FF_EAM) {
DEBUG_MESSAGE("Initializing EAM parameters...\n");
initEam(&eam, &param);
}
DEBUG_MESSAGE("Initializing atoms...\n");
initAtom(atom);
initStats(&stats);
atom->ntypes = param.ntypes;
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
atom->epsilon[i] = param.epsilon;
atom->sigma6[i] = param.sigma6;
atom->cutneighsq[i] = param.cutneigh * param.cutneigh;
atom->cutforcesq[i] = param.cutforce * param.cutforce;
}
DEBUG_MESSAGE("Creating atoms...\n");
for(int i = 0; i < natoms; ++i) {
while(atom->Nlocal > atom->Nmax - natoms) {
growAtom(atom);
}
atom->type[atom->Nlocal] = rand() % atom->ntypes;
atom_x(atom->Nlocal) = (MD_FLOAT)(i) * 0.00001;
atom_y(atom->Nlocal) = (MD_FLOAT)(i) * 0.00001;
atom_z(atom->Nlocal) = (MD_FLOAT)(i) * 0.00001;
atom_vx(atom->Nlocal) = 0.0;
atom_vy(atom->Nlocal) = 0.0;
atom_vz(atom->Nlocal) = 0.0;
atom->Nlocal++;
}
const double estim_atom_volume = (double)(atom->Nlocal * 3 * sizeof(MD_FLOAT));
const double estim_neighbors_volume = (double)(atom->Nlocal * (nneighs + 2) * sizeof(int));
const double estim_volume = (double)(atom->Nlocal * 6 * sizeof(MD_FLOAT) + estim_neighbors_volume);
if(!csv) {
printf("Pattern: %s\n", pattern_str);
printf("Number of timesteps: %d\n", param.ntimes);
printf("Number of atoms: %d\n", natoms);
printf("Number of neighbors per atom: %d\n", nneighs);
printf("Number of times to replicate neighbor lists: %d\n", nreps);
printf("Estimated total data volume (kB): %.4f\n", estim_volume / 1000.0);
printf("Estimated atom data volume (kB): %.4f\n", estim_atom_volume / 1000.0);
printf("Estimated neighborlist data volume (kB): %.4f\n", estim_neighbors_volume / 1000.0);
}
DEBUG_MESSAGE("Initializing neighbor lists...\n");
initNeighbor(&neighbor, &param);
DEBUG_MESSAGE("Creating neighbor lists...\n");
createNeighbors(atom, &neighbor, pattern, nneighs, nreps);
DEBUG_MESSAGE("Computing forces...\n");
double T_accum = 0.0;
for(int i = 0; i < param.ntimes; i++) {
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
traceAddresses(&param, atom, &neighbor, i + 1);
#endif
if(param.force_field == FF_EAM) {
computeForceEam(&eam, &param, atom, &neighbor, &stats);
} else {
if(param.half_neigh) {
T_accum += computeForceLJHalfNeigh(&param, atom, &neighbor, &stats);
} else {
T_accum += computeForceLJFullNeigh(&param, atom, &neighbor, &stats);
}
}
}
double freq_hz = param.proc_freq * 1.e9;
const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes);
const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes) * freq_hz;
const double cycles_per_neigh = cycles_per_atom / (double)(nneighs);
if(!csv) {
printf("Total time: %.4f, Mega atom updates/s: %.4f\n", T_accum, atoms_updates_per_sec / 1.e6);
if(param.proc_freq > 0.0) {
printf("Cycles per atom: %.4f, Cycles per neighbor: %.4f\n", cycles_per_atom, cycles_per_neigh);
}
} else {
printf("steps,pattern,natoms,nneighs,nreps,total vol.(kB),atoms vol.(kB),neigh vol.(kB),time(s),atom upds/s(M)");
if(param.proc_freq > 0.0) {
printf(",cy/atom,cy/neigh");
}
printf("\n");
printf("%d,%s,%d,%d,%d,%.4f,%.4f,%.4f,%.4f,%.4f",
param.ntimes, pattern_str, natoms, nneighs, nreps,
estim_volume / 1.e3, estim_atom_volume / 1.e3, estim_neighbors_volume / 1.e3, T_accum, atoms_updates_per_sec / 1.e6);
if(param.proc_freq > 0.0) {
printf(",%.4f,%.4f", cycles_per_atom, cycles_per_neigh);
}
printf("\n");
}
double timer[NUMTIMER];
timer[FORCE] = T_accum;
displayStatistics(atom, &param, &stats, timer);
LIKWID_MARKER_CLOSE;
return EXIT_SUCCESS;
}

365
verletlist/main.c Normal file
View File

@@ -0,0 +1,365 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
// #include <omp.h>
#include <likwid-marker.h>
#include <allocate.h>
#include <atom.h>
#include <device.h>
#include <eam.h>
#include <integrate.h>
#include <neighbor.h>
#include <parameter.h>
#include <pbc.h>
#include <stats.h>
#include <thermo.h>
#include <timers.h>
#include <timing.h>
#include <util.h>
#include <vtk.h>
#define HLINE "------------------------------------------------------------------\n"
extern double computeForceLJFullNeigh_plain_c(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceLJHalfNeigh(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
#ifdef CUDA_TARGET
extern double computeForceLJFullNeigh_cuda(Parameter*, Atom*, Neighbor*);
#endif
double setup(Parameter* param, Eam* eam, Atom* atom, Neighbor* neighbor, Stats* stats)
{
if (param->force_field == FF_EAM) {
initEam(eam, param);
}
double timeStart, timeStop;
param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
param->xprd = param->nx * param->lattice;
param->yprd = param->ny * param->lattice;
param->zprd = param->nz * param->lattice;
timeStart = getTimeStamp();
initAtom(atom);
initPbc(atom);
initStats(stats);
initNeighbor(neighbor, param);
if (param->input_file == NULL) {
createAtom(atom, param);
} else {
readAtom(atom, param);
}
setupNeighbor(param);
setupThermo(param, atom->Natoms);
if (param->input_file == NULL) {
adjustThermo(param, atom);
}
#ifdef SORT_ATOMS
atom->Nghost = 0;
sortAtom(atom);
#endif
setupPbc(atom, param);
initDevice(atom, neighbor);
updatePbc(atom, param, true);
buildNeighbor(atom, neighbor);
timeStop = getTimeStamp();
return timeStop - timeStart;
}
double reneighbour(Parameter* param, Atom* atom, Neighbor* neighbor)
{
double timeStart, timeStop;
timeStart = getTimeStamp();
LIKWID_MARKER_START("reneighbour");
updateAtomsPbc(atom, param);
#ifdef SORT_ATOMS
atom->Nghost = 0;
sortAtom(atom);
#endif
setupPbc(atom, param);
updatePbc(atom, param, true);
buildNeighbor(atom, neighbor);
LIKWID_MARKER_STOP("reneighbour");
timeStop = getTimeStamp();
return timeStop - timeStart;
}
void printAtomState(Atom* atom)
{
printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n",
atom->Natoms,
atom->Nlocal,
atom->Nghost,
atom->Nmax);
// int nall = atom->Nlocal + atom->Nghost;
// for (int i=0; i<nall; i++) {
// printf("%d %f %f %f\n", i, atom->x[i], atom->y[i], atom->z[i]);
// }
}
double computeForce(
Eam* eam, Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats)
{
if (param->force_field == FF_EAM) {
return computeForceEam(eam, param, atom, neighbor, stats);
} else if (param->force_field == FF_DEM) {
if (param->half_neigh) {
fprintf(stderr, "Error: DEM cannot use half neighbor-lists!\n");
return 0.0;
} else {
return computeForceDemFullNeigh(param, atom, neighbor, stats);
}
}
if (param->half_neigh) {
return computeForceLJHalfNeigh(param, atom, neighbor, stats);
}
#ifdef CUDA_TARGET
return computeForceLJFullNeigh(param, atom, neighbor);
#else
return computeForceLJFullNeigh(param, atom, neighbor, stats);
#endif
}
void writeInput(Parameter* param, Atom* atom)
{
FILE* fpin = fopen("input.in", "w");
fprintf(fpin, "0,%f,0,%f,0,%f\n", param->xprd, param->yprd, param->zprd);
for (int i = 0; i < atom->Nlocal; i++) {
fprintf(fpin,
"1,%f,%f,%f,%f,%f,%f\n",
atom_x(i),
atom_y(i),
atom_z(i),
atom_vx(i),
atom_vy(i),
atom_vz(i));
}
fclose(fpin);
}
int main(int argc, char** argv)
{
double timer[NUMTIMER];
Eam eam;
Atom atom;
Neighbor neighbor;
Stats stats;
Parameter param;
LIKWID_MARKER_INIT;
#pragma omp parallel
{
LIKWID_MARKER_REGISTER("force");
// LIKWID_MARKER_REGISTER("reneighbour");
// LIKWID_MARKER_REGISTER("pbc");
}
initParameter(&param);
for (int i = 0; i < argc; i++) {
if ((strcmp(argv[i], "-p") == 0) || strcmp(argv[i], "--params") == 0) {
readParameter(&param, argv[++i]);
continue;
}
if ((strcmp(argv[i], "-f") == 0)) {
if ((param.force_field = str2ff(argv[++i])) < 0) {
fprintf(stderr, "Invalid force field!\n");
exit(-1);
}
continue;
}
if ((strcmp(argv[i], "-i") == 0)) {
param.input_file = strdup(argv[++i]);
continue;
}
if ((strcmp(argv[i], "-e") == 0)) {
param.eam_file = strdup(argv[++i]);
continue;
}
if ((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0)) {
param.ntimes = atoi(argv[++i]);
continue;
}
if ((strcmp(argv[i], "-nx") == 0)) {
param.nx = atoi(argv[++i]);
continue;
}
if ((strcmp(argv[i], "-ny") == 0)) {
param.ny = atoi(argv[++i]);
continue;
}
if ((strcmp(argv[i], "-nz") == 0)) {
param.nz = atoi(argv[++i]);
continue;
}
if ((strcmp(argv[i], "-half") == 0)) {
param.half_neigh = atoi(argv[++i]);
continue;
}
if ((strcmp(argv[i], "-r") == 0) || (strcmp(argv[i], "--radius") == 0)) {
param.cutforce = atof(argv[++i]);
continue;
}
if ((strcmp(argv[i], "-s") == 0) || (strcmp(argv[i], "--skin") == 0)) {
param.skin = atof(argv[++i]);
continue;
}
if ((strcmp(argv[i], "--freq") == 0)) {
param.proc_freq = atof(argv[++i]);
continue;
}
if ((strcmp(argv[i], "--vtk") == 0)) {
param.vtk_file = strdup(argv[++i]);
continue;
}
if ((strcmp(argv[i], "-w") == 0)) {
param.write_atom_file = strdup(argv[++i]);
continue;
}
if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
printf(HLINE);
printf("-p / --params <string>: file to read parameters from (can be "
"specified more than once)\n");
printf("-f <string>: force field (lj, eam or dem), "
"default lj\n");
printf("-i <string>: input file with atom positions "
"(dump)\n");
printf("-e <string>: input file for EAM\n");
printf("-n / --nsteps <int>: set number of timesteps for "
"simulation\n");
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in "
"x/y/z direction\n");
printf("-half <int>: use half (1) or full (0) neighbor "
"lists\n");
printf("-r / --radius <real>: set cutoff radius\n");
printf("-s / --skin <real>: set skin (verlet buffer)\n");
printf("-w <file>: write input atoms to file\n");
printf("--freq <real>: processor frequency (GHz)\n");
printf("--vtk <string>: VTK file for visualization\n");
printf(HLINE);
exit(EXIT_SUCCESS);
}
}
param.cutneigh = param.cutforce + param.skin;
setup(&param, &eam, &atom, &neighbor, &stats);
printParameter(&param);
printf(HLINE);
printf("step\ttemp\t\tpressure\n");
computeThermo(0, &param, &atom);
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
traceAddresses(&param, &atom, &neighbor, n + 1);
#endif
if (param.write_atom_file != NULL) {
writeAtom(&atom, &param);
}
// writeInput(&param, &atom);
timer[FORCE] = computeForce(&eam, &param, &atom, &neighbor, &stats);
timer[NEIGH] = 0.0;
timer[TOTAL] = getTimeStamp();
if (param.vtk_file != NULL) {
write_atoms_to_vtk_file(param.vtk_file, &atom, 0);
}
for (int n = 0; n < param.ntimes; n++) {
bool reneigh = (n + 1) % param.reneigh_every == 0;
initialIntegrate(reneigh, &param, &atom);
if ((n + 1) % param.reneigh_every) {
updatePbc(&atom, &param, false);
} else {
timer[NEIGH] += reneighbour(&param, &atom, &neighbor);
}
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
traceAddresses(&param, &atom, &neighbor, n + 1);
#endif
timer[FORCE] += computeForce(&eam, &param, &atom, &neighbor, &stats);
finalIntegrate(reneigh, &param, &atom);
if (!((n + 1) % param.nstat) && (n + 1) < param.ntimes) {
#ifdef CUDA_TARGET
memcpyFromGPU(atom.x, atom.d_atom.x, atom.Nmax * sizeof(MD_FLOAT) * 3);
#endif
computeThermo(n + 1, &param, &atom);
}
if (param.vtk_file != NULL) {
write_atoms_to_vtk_file(param.vtk_file, &atom, n + 1);
}
}
timer[TOTAL] = getTimeStamp() - timer[TOTAL];
computeThermo(-1, &param, &atom);
printf(HLINE);
printf("System: %d atoms %d ghost atoms, Steps: %d\n",
atom.Natoms,
atom.Nghost,
param.ntimes);
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
timer[TOTAL],
timer[FORCE],
timer[NEIGH],
timer[TOTAL] - timer[FORCE] - timer[NEIGH]);
printf(HLINE);
// int nthreads = 0;
// int chunkSize = 0;
// omp_sched_t schedKind;
// char schedType[10];
// #pragma omp parallel
// #pragma omp master
// {
// omp_get_schedule(&schedKind, &chunkSize);
//
// switch (schedKind) {
// case omp_sched_static:
// strcpy(schedType, "static");
// break;
// case omp_sched_dynamic:
// strcpy(schedType, "dynamic");
// break;
// case omp_sched_guided:
// strcpy(schedType, "guided");
// break;
// case omp_sched_auto:
// strcpy(schedType, "auto");
// break;
// }
//
// nthreads = omp_get_max_threads();
// }
//
// printf("Num threads: %d\n", nthreads);
// printf("Schedule: (%s,%d)\n", schedType, chunkSize);
printf("Performance: %.2f million atom updates per second\n",
1e-6 * (double)atom.Natoms * param.ntimes / timer[TOTAL]);
#ifdef COMPUTE_STATS
displayStatistics(&atom, &param, &stats, timer);
#endif
LIKWID_MARKER_CLOSE;
return EXIT_SUCCESS;
}

385
verletlist/neighbor.c Normal file
View File

@@ -0,0 +1,385 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#define SMALL 1.0e-6
#define FACTOR 0.999
MD_FLOAT xprd, yprd, zprd;
MD_FLOAT bininvx, bininvy, bininvz;
int mbinxlo, mbinylo, mbinzlo;
int nbinx, nbiny, nbinz;
int mbinx, mbiny, mbinz; // n bins in x, y, z
int *bincount;
int *bins;
int mbins; //total number of bins
int atoms_per_bin; // max atoms per bin
MD_FLOAT cutneigh;
MD_FLOAT cutneighsq; // neighbor cutoff squared
int nmax;
int nstencil; // # of bins in stencil
int* stencil; // stencil list of bin offsets
MD_FLOAT binsizex, binsizey, binsizez;
static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT);
static MD_FLOAT bindist(int, int, int);
/* exported subroutines */
void initNeighbor(Neighbor *neighbor, Parameter *param) {
MD_FLOAT neighscale = 5.0 / 6.0;
xprd = param->nx * param->lattice;
yprd = param->ny * param->lattice;
zprd = param->nz * param->lattice;
cutneigh = param->cutneigh;
nbinx = neighscale * param->nx;
nbiny = neighscale * param->ny;
nbinz = neighscale * param->nz;
nmax = 0;
atoms_per_bin = 8;
stencil = NULL;
bins = NULL;
bincount = NULL;
neighbor->maxneighs = 100;
neighbor->numneigh = NULL;
neighbor->neighbors = NULL;
neighbor->half_neigh = param->half_neigh;
}
void setupNeighbor(Parameter* param) {
MD_FLOAT coord;
int mbinxhi, mbinyhi, mbinzhi;
int nextx, nexty, nextz;
if(param->input_file != NULL) {
xprd = param->xprd;
yprd = param->yprd;
zprd = param->zprd;
}
// TODO: update lo and hi for standard case and use them here instead
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = xprd;
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = zprd;
cutneighsq = cutneigh * cutneigh;
if(param->input_file != NULL) {
binsizex = cutneigh * 0.5;
binsizey = cutneigh * 0.5;
binsizez = cutneigh * 0.5;
nbinx = (int)((param->xhi - param->xlo) / binsizex);
nbiny = (int)((param->yhi - param->ylo) / binsizey);
nbinz = (int)((param->zhi - param->zlo) / binsizez);
if(nbinx == 0) { nbinx = 1; }
if(nbiny == 0) { nbiny = 1; }
if(nbinz == 0) { nbinz = 1; }
bininvx = nbinx / (param->xhi - param->xlo);
bininvy = nbiny / (param->yhi - param->ylo);
bininvz = nbinz / (param->zhi - param->zlo);
} else {
binsizex = xprd / nbinx;
binsizey = yprd / nbiny;
binsizez = zprd / nbinz;
bininvx = 1.0 / binsizex;
bininvy = 1.0 / binsizey;
bininvz = 1.0 / binsizez;
}
coord = xlo - cutneigh - SMALL * xprd;
mbinxlo = (int) (coord * bininvx);
if (coord < 0.0) { mbinxlo = mbinxlo - 1; }
coord = xhi + cutneigh + SMALL * xprd;
mbinxhi = (int) (coord * bininvx);
coord = ylo - cutneigh - SMALL * yprd;
mbinylo = (int) (coord * bininvy);
if (coord < 0.0) { mbinylo = mbinylo - 1; }
coord = yhi + cutneigh + SMALL * yprd;
mbinyhi = (int) (coord * bininvy);
coord = zlo - cutneigh - SMALL * zprd;
mbinzlo = (int) (coord * bininvz);
if (coord < 0.0) { mbinzlo = mbinzlo - 1; }
coord = zhi + cutneigh + SMALL * zprd;
mbinzhi = (int) (coord * bininvz);
mbinxlo = mbinxlo - 1;
mbinxhi = mbinxhi + 1;
mbinx = mbinxhi - mbinxlo + 1;
mbinylo = mbinylo - 1;
mbinyhi = mbinyhi + 1;
mbiny = mbinyhi - mbinylo + 1;
mbinzlo = mbinzlo - 1;
mbinzhi = mbinzhi + 1;
mbinz = mbinzhi - mbinzlo + 1;
nextx = (int) (cutneigh * bininvx);
if(nextx * binsizex < FACTOR * cutneigh) nextx++;
nexty = (int) (cutneigh * bininvy);
if(nexty * binsizey < FACTOR * cutneigh) nexty++;
nextz = (int) (cutneigh * bininvz);
if(nextz * binsizez < FACTOR * cutneigh) nextz++;
if (stencil) { free(stencil); }
stencil = (int*) malloc((2 * nextz + 1) * (2 * nexty + 1) * (2 * nextx + 1) * sizeof(int));
nstencil = 0;
int kstart = -nextz;
for(int k = kstart; k <= nextz; k++) {
for(int j = -nexty; j <= nexty; j++) {
for(int i = -nextx; i <= nextx; i++) {
if(bindist(i, j, k) < cutneighsq) {
stencil[nstencil++] = k * mbiny * mbinx + j * mbinx + i;
}
}
}
}
mbins = mbinx * mbiny * mbinz;
if (bincount) { free(bincount); }
bincount = (int*) malloc(mbins * sizeof(int));
if (bins) { free(bins); }
bins = (int*) malloc(mbins * atoms_per_bin * sizeof(int));
}
void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
int nall = atom->Nlocal + atom->Nghost;
/* extend atom arrays if necessary */
if(nall > nmax) {
nmax = nall;
if(neighbor->numneigh) free(neighbor->numneigh);
if(neighbor->neighbors) free(neighbor->neighbors);
neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*));
}
/* bin local & ghost atoms */
binatoms(atom);
int resize = 1;
/* loop over each atom, storing neighbors */
while(resize) {
int new_maxneighs = neighbor->maxneighs;
resize = 0;
for(int i = 0; i < atom->Nlocal; i++) {
int* neighptr = &(neighbor->neighbors[i * neighbor->maxneighs]);
int n = 0;
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
int ibin = coord2bin(xtmp, ytmp, ztmp);
#ifdef EXPLICIT_TYPES
int type_i = atom->type[i];
#endif
for(int k = 0; k < nstencil; k++) {
int jbin = ibin + stencil[k];
int* loc_bin = &bins[jbin * atoms_per_bin];
for(int m = 0; m < bincount[jbin]; m++) {
int j = loc_bin[m];
if((j == i) || (neighbor->half_neigh && (j < i))) {
continue;
}
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
int type_j = atom->type[j];
const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
#else
const MD_FLOAT cutoff = cutneighsq;
#endif
if(rsq <= cutoff) {
neighptr[n++] = j;
}
}
}
neighbor->numneigh[i] = n;
if(n >= neighbor->maxneighs) {
resize = 1;
if(n >= new_maxneighs) {
new_maxneighs = n;
}
}
}
if(resize) {
printf("RESIZE %d\n", neighbor->maxneighs);
neighbor->maxneighs = new_maxneighs * 1.2;
free(neighbor->neighbors);
neighbor->neighbors = (int*) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
}
}
}
/* internal subroutines */
MD_FLOAT bindist(int i, int j, int k) {
MD_FLOAT delx, dely, delz;
if(i > 0) {
delx = (i - 1) * binsizex;
} else if(i == 0) {
delx = 0.0;
} else {
delx = (i + 1) * binsizex;
}
if(j > 0) {
dely = (j - 1) * binsizey;
} else if(j == 0) {
dely = 0.0;
} else {
dely = (j + 1) * binsizey;
}
if(k > 0) {
delz = (k - 1) * binsizez;
} else if(k == 0) {
delz = 0.0;
} else {
delz = (k + 1) * binsizez;
}
return (delx * delx + dely * dely + delz * delz);
}
int coord2bin(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin) {
int ix, iy, iz;
if(xin >= xprd) {
ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
} else if(xin >= 0.0) {
ix = (int)(xin * bininvx) - mbinxlo;
} else {
ix = (int)(xin * bininvx) - mbinxlo - 1;
}
if(yin >= yprd) {
iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
} else if(yin >= 0.0) {
iy = (int)(yin * bininvy) - mbinylo;
} else {
iy = (int)(yin * bininvy) - mbinylo - 1;
}
if(zin >= zprd) {
iz = (int)((zin - zprd) * bininvz) + nbinz - mbinzlo;
} else if(zin >= 0.0) {
iz = (int)(zin * bininvz) - mbinzlo;
} else {
iz = (int)(zin * bininvz) - mbinzlo - 1;
}
return (iz * mbiny * mbinx + iy * mbinx + ix + 1);
}
void binatoms(Atom *atom) {
int nall = atom->Nlocal + atom->Nghost;
int resize = 1;
while(resize > 0) {
resize = 0;
for(int i = 0; i < mbins; i++) {
bincount[i] = 0;
}
for(int i = 0; i < nall; i++) {
int ibin = coord2bin(atom_x(i), atom_y(i), atom_z(i));
if(bincount[ibin] < atoms_per_bin) {
int ac = bincount[ibin]++;
bins[ibin * atoms_per_bin + ac] = i;
} else {
resize = 1;
}
}
if(resize) {
free(bins);
atoms_per_bin *= 2;
bins = (int*) malloc(mbins * atoms_per_bin * sizeof(int));
}
}
}
void sortAtom(Atom* atom) {
binatoms(atom);
int Nmax = atom->Nmax;
int* binpos = bincount;
for(int i = 1; i < mbins; i++) {
binpos[i] += binpos[i - 1];
}
#ifdef AOS
MD_FLOAT* new_x = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
MD_FLOAT* new_vx = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
#else
MD_FLOAT* new_x = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
MD_FLOAT* new_y = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
MD_FLOAT* new_z = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
MD_FLOAT* new_vx = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
MD_FLOAT* new_vy = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
MD_FLOAT* new_vz = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT));
#endif
MD_FLOAT* old_x = atom->x; MD_FLOAT* old_y = atom->y; MD_FLOAT* old_z = atom->z;
MD_FLOAT* old_vx = atom->vx; MD_FLOAT* old_vy = atom->vy; MD_FLOAT* old_vz = atom->vz;
for(int mybin = 0; mybin < mbins; mybin++) {
int start = mybin > 0 ? binpos[mybin - 1] : 0;
int count = binpos[mybin] - start;
for(int k = 0; k < count; k++) {
int new_i = start + k;
int old_i = bins[mybin * atoms_per_bin + k];
#ifdef AOS
new_x[new_i * 3 + 0] = old_x[old_i * 3 + 0];
new_x[new_i * 3 + 1] = old_x[old_i * 3 + 1];
new_x[new_i * 3 + 2] = old_x[old_i * 3 + 2];
new_vx[new_i * 3 + 0] = old_vx[old_i * 3 + 0];
new_vx[new_i * 3 + 1] = old_vx[old_i * 3 + 1];
new_vx[new_i * 3 + 2] = old_vx[old_i * 3 + 2];
#else
new_x[new_i] = old_x[old_i];
new_y[new_i] = old_y[old_i];
new_z[new_i] = old_z[old_i];
new_vx[new_i] = old_vx[old_i];
new_vy[new_i] = old_vy[old_i];
new_vz[new_i] = old_vz[old_i];
#endif
}
}
free(atom->x);
free(atom->vx);
atom->x = new_x;
atom->vx = new_vx;
#ifndef AOS
free(atom->y);
free(atom->z);
free(atom->vy);
free(atom->vz);
atom->y = new_y;
atom->z = new_z;
atom->vy = new_vy;
atom->vz = new_vz;
#endif
}

234
verletlist/pbc.c Normal file
View File

@@ -0,0 +1,234 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
//---
#include <allocate.h>
#include <atom.h>
#include <pbc.h>
#define DELTA 20000
int nmaxGhost;
int *PBCx, *PBCy, *PBCz;
static void growPbc(Atom*);
/* exported subroutines */
void initPbc(Atom* atom)
{
nmaxGhost = 0;
atom->border_map = NULL;
PBCx = NULL;
PBCy = NULL;
PBCz = NULL;
}
/* update coordinates of ghost atoms */
/* uses mapping created in setupPbc */
void updatePbc_cpu(Atom* atom, Parameter* param, bool doReneighbor)
{
int* borderMap = atom->border_map;
int nlocal = atom->Nlocal;
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
for (int i = 0; i < atom->Nghost; i++) {
atom_x(nlocal + i) = atom_x(borderMap[i]) + PBCx[i] * xprd;
atom_y(nlocal + i) = atom_y(borderMap[i]) + PBCy[i] * yprd;
atom_z(nlocal + i) = atom_z(borderMap[i]) + PBCz[i] * zprd;
}
}
/* relocate atoms that have left domain according
* to periodic boundary conditions */
void updateAtomsPbc_cpu(Atom* atom, Parameter* param)
{
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
for (int i = 0; i < atom->Nlocal; i++) {
if (atom_x(i) < 0.0) {
atom_x(i) += xprd;
} else if (atom_x(i) >= xprd) {
atom_x(i) -= xprd;
}
if (atom_y(i) < 0.0) {
atom_y(i) += yprd;
} else if (atom_y(i) >= yprd) {
atom_y(i) -= yprd;
}
if (atom_z(i) < 0.0) {
atom_z(i) += zprd;
} else if (atom_z(i) >= zprd) {
atom_z(i) -= zprd;
}
}
}
/* setup periodic boundary conditions by
* defining ghost atoms around domain
* only creates mapping and coordinate corrections
* that are then enforced in updatePbc */
#define ADDGHOST(dx, dy, dz) \
Nghost++; \
border_map[Nghost] = i; \
PBCx[Nghost] = dx; \
PBCy[Nghost] = dy; \
PBCz[Nghost] = dz; \
atom->type[atom->Nlocal + Nghost] = atom->type[i]
void setupPbc(Atom* atom, Parameter* param)
{
int* border_map = atom->border_map;
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
MD_FLOAT cutneigh = param->cutneigh;
int Nghost = -1;
for (int i = 0; i < atom->Nlocal; i++) {
if (atom->Nlocal + Nghost + 7 >= atom->Nmax) {
growAtom(atom);
}
if (Nghost + 7 >= nmaxGhost) {
growPbc(atom);
border_map = atom->border_map;
}
MD_FLOAT x = atom_x(i);
MD_FLOAT y = atom_y(i);
MD_FLOAT z = atom_z(i);
/* Setup ghost atoms */
/* 6 planes */
if (param->pbc_x != 0) {
if (x < cutneigh) {
ADDGHOST(+1, 0, 0);
}
if (x >= (xprd - cutneigh)) {
ADDGHOST(-1, 0, 0);
}
}
if (param->pbc_y != 0) {
if (y < cutneigh) {
ADDGHOST(0, +1, 0);
}
if (y >= (yprd - cutneigh)) {
ADDGHOST(0, -1, 0);
}
}
if (param->pbc_z != 0) {
if (z < cutneigh) {
ADDGHOST(0, 0, +1);
}
if (z >= (zprd - cutneigh)) {
ADDGHOST(0, 0, -1);
}
}
/* 8 corners */
if (param->pbc_x != 0 && param->pbc_y != 0 && param->pbc_z != 0) {
if (x < cutneigh && y < cutneigh && z < cutneigh) {
ADDGHOST(+1, +1, +1);
}
if (x < cutneigh && y >= (yprd - cutneigh) && z < cutneigh) {
ADDGHOST(+1, -1, +1);
}
if (x < cutneigh && y < cutneigh && z >= (zprd - cutneigh)) {
ADDGHOST(+1, +1, -1);
}
if (x < cutneigh && y >= (yprd - cutneigh) && z >= (zprd - cutneigh)) {
ADDGHOST(+1, -1, -1);
}
if (x >= (xprd - cutneigh) && y < cutneigh && z < cutneigh) {
ADDGHOST(-1, +1, +1);
}
if (x >= (xprd - cutneigh) && y >= (yprd - cutneigh) && z < cutneigh) {
ADDGHOST(-1, -1, +1);
}
if (x >= (xprd - cutneigh) && y < cutneigh && z >= (zprd - cutneigh)) {
ADDGHOST(-1, +1, -1);
}
if (x >= (xprd - cutneigh) && y >= (yprd - cutneigh) &&
z >= (zprd - cutneigh)) {
ADDGHOST(-1, -1, -1);
}
}
/* 12 edges */
if (param->pbc_x != 0 && param->pbc_z != 0) {
if (x < cutneigh && z < cutneigh) {
ADDGHOST(+1, 0, +1);
}
if (x < cutneigh && z >= (zprd - cutneigh)) {
ADDGHOST(+1, 0, -1);
}
if (x >= (xprd - cutneigh) && z < cutneigh) {
ADDGHOST(-1, 0, +1);
}
if (x >= (xprd - cutneigh) && z >= (zprd - cutneigh)) {
ADDGHOST(-1, 0, -1);
}
}
if (param->pbc_y != 0 && param->pbc_z != 0) {
if (y < cutneigh && z < cutneigh) {
ADDGHOST(0, +1, +1);
}
if (y < cutneigh && z >= (zprd - cutneigh)) {
ADDGHOST(0, +1, -1);
}
if (y >= (yprd - cutneigh) && z < cutneigh) {
ADDGHOST(0, -1, +1);
}
if (y >= (yprd - cutneigh) && z >= (zprd - cutneigh)) {
ADDGHOST(0, -1, -1);
}
}
if (param->pbc_x != 0 && param->pbc_y != 0) {
if (y < cutneigh && x < cutneigh) {
ADDGHOST(+1, +1, 0);
}
if (y < cutneigh && x >= (xprd - cutneigh)) {
ADDGHOST(-1, +1, 0);
}
if (y >= (yprd - cutneigh) && x < cutneigh) {
ADDGHOST(+1, -1, 0);
}
if (y >= (yprd - cutneigh) && x >= (xprd - cutneigh)) {
ADDGHOST(-1, -1, 0);
}
}
}
// increase by one to make it the ghost atom count
atom->Nghost = Nghost + 1;
}
/* internal subroutines */
void growPbc(Atom* atom)
{
int nold = nmaxGhost;
nmaxGhost += DELTA;
atom->border_map = (int*)reallocate(atom->border_map,
ALIGNMENT,
nmaxGhost * sizeof(int),
nold * sizeof(int));
PBCx = (int*)reallocate(PBCx, ALIGNMENT, nmaxGhost * sizeof(int), nold * sizeof(int));
PBCy = (int*)reallocate(PBCy, ALIGNMENT, nmaxGhost * sizeof(int), nold * sizeof(int));
PBCz = (int*)reallocate(PBCz, ALIGNMENT, nmaxGhost * sizeof(int), nold * sizeof(int));
}

48
verletlist/stats.c Normal file
View File

@@ -0,0 +1,48 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <atom.h>
#include <parameter.h>
#include <stats.h>
#include <timers.h>
void initStats(Stats *s) {
s->total_force_neighs = 0;
s->total_force_iters = 0;
s->atoms_within_cutoff = 0;
s->atoms_outside_cutoff = 0;
}
void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer) {
#ifdef COMPUTE_STATS
double force_useful_volume = 1e-9 * ( (double)(atom->Nlocal * (param->ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) +
(double)(stats->total_force_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) );
double avg_neigh = stats->total_force_neighs / (double)(atom->Nlocal * (param->ntimes + 1));
double avg_simd = stats->total_force_iters / (double)(atom->Nlocal * (param->ntimes + 1));
#ifdef EXPLICIT_TYPES
force_useful_volume += 1e-9 * (double)((atom->Nlocal * (param->ntimes + 1)) + stats->total_force_neighs) * sizeof(int);
#endif
printf("Statistics:\n");
printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param->proc_freq);
printf("\tAverage neighbors per atom: %.4f\n", avg_neigh);
printf("\tAverage SIMD iterations per atom: %.4f\n", avg_simd);
printf("\tTotal number of computed pair interactions: %lld\n", stats->total_force_neighs);
printf("\tTotal number of SIMD iterations: %lld\n", stats->total_force_iters);
printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume);
printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param->proc_freq * 1e9 / stats->total_force_iters);
#ifdef USE_REFERENCE_VERSION
const double eff_pct = (double)stats->atoms_within_cutoff / (double)(stats->atoms_within_cutoff + stats->atoms_outside_cutoff) * 100.0;
printf("\tAtoms within/outside cutoff radius: %lld/%lld (%.2f%%)\n", stats->atoms_within_cutoff, stats->atoms_outside_cutoff, eff_pct);
#endif
#endif
}

56
verletlist/tracing.c Normal file
View File

@@ -0,0 +1,56 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#include <tracing.h>
void traceAddresses(Parameter *param, Atom *atom, Neighbor *neighbor, int timestep) {
MEM_TRACER_INIT;
INDEX_TRACER_INIT;
int Nlocal = atom->Nlocal;
int* neighs;
INDEX_TRACE_NATOMS(Nlocal, atom->Nghost, neighbor->maxneighs);
for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MEM_TRACE(atom_x(i), 'R');
MEM_TRACE(atom_y(i), 'R');
MEM_TRACE(atom_z(i), 'R');
INDEX_TRACE_ATOM(i);
#ifdef EXPLICIT_TYPES
MEM_TRACE(atom->type[i], 'R');
#endif
DIST_TRACE_SORT(neighs, numneighs);
INDEX_TRACE(neighs, numneighs);
DIST_TRACE(neighs, numneighs);
for(int k = 0; k < numneighs; k++) {
MEM_TRACE(neighs[k], 'R');
MEM_TRACE(atom_x(j), 'R');
MEM_TRACE(atom_y(j), 'R');
MEM_TRACE(atom_z(j), 'R');
#ifdef EXPLICIT_TYPES
MEM_TRACE(atom->type[j], 'R');
#endif
}
MEM_TRACE(atom_fx(i), 'R');
MEM_TRACE(atom_fx(i), 'W');
MEM_TRACE(atom_fy(i), 'R');
MEM_TRACE(atom_fy(i), 'W');
MEM_TRACE(atom_fz(i), 'R');
MEM_TRACE(atom_fz(i), 'W');
}
INDEX_TRACER_END;
MEM_TRACER_END;
}

50
verletlist/vtk.c Normal file
View File

@@ -0,0 +1,50 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <stdlib.h>
#include <atom.h>
int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
char timestep_filename[128];
snprintf(timestep_filename, sizeof timestep_filename, "%s_%d.vtk", filename, timestep);
FILE* fp = fopen(timestep_filename, "wb");
if(fp == NULL) {
fprintf(stderr, "Could not open VTK file for writing!\n");
return -1;
}
fprintf(fp, "# vtk DataFile Version 2.0\n");
fprintf(fp, "Particle data\n");
fprintf(fp, "ASCII\n");
fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
fprintf(fp, "POINTS %d double\n", atom->Nlocal);
for(int i = 0; i < atom->Nlocal; ++i) {
fprintf(fp, "%.4f %.4f %.4f\n", atom_x(i), atom_y(i), atom_z(i));
}
fprintf(fp, "\n\n");
fprintf(fp, "CELLS %d %d\n", atom->Nlocal, atom->Nlocal * 2);
for(int i = 0; i < atom->Nlocal; ++i) {
fprintf(fp, "1 %d\n", i);
}
fprintf(fp, "\n\n");
fprintf(fp, "CELL_TYPES %d\n", atom->Nlocal);
for(int i = 0; i < atom->Nlocal; ++i) {
fprintf(fp, "1\n");
}
fprintf(fp, "\n\n");
fprintf(fp, "POINT_DATA %d\n", atom->Nlocal);
fprintf(fp, "SCALARS mass double\n");
fprintf(fp, "LOOKUP_TABLE default\n");
for(int i = 0; i < atom->Nlocal; i++) {
fprintf(fp, "1.0\n");
}
fprintf(fp, "\n\n");
fclose(fp);
return 0;
}