Fix explicit types for CUDA and provide option to write initial state of system
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
da3b1dd53f
commit
02629612a9
@ -21,6 +21,7 @@ typedef struct {
|
|||||||
char* input_file;
|
char* input_file;
|
||||||
char* vtk_file;
|
char* vtk_file;
|
||||||
char* xtc_file;
|
char* xtc_file;
|
||||||
|
char* write_atom_file;
|
||||||
MD_FLOAT epsilon;
|
MD_FLOAT epsilon;
|
||||||
MD_FLOAT sigma;
|
MD_FLOAT sigma;
|
||||||
MD_FLOAT sigma6;
|
MD_FLOAT sigma6;
|
||||||
|
@ -7,8 +7,8 @@
|
|||||||
#ifndef __TIMING_H_
|
#ifndef __TIMING_H_
|
||||||
#define __TIMING_H_
|
#define __TIMING_H_
|
||||||
|
|
||||||
extern double getTimeStamp();
|
extern double getTimeStamp(void);
|
||||||
extern double getTimeResolution();
|
extern double getTimeResolution(void);
|
||||||
extern double getTimeStamp_();
|
extern double getTimeStamp_(void);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -17,6 +17,7 @@ void initParameter(Parameter *param) {
|
|||||||
param->vtk_file = NULL;
|
param->vtk_file = NULL;
|
||||||
param->xtc_file = NULL;
|
param->xtc_file = NULL;
|
||||||
param->eam_file = NULL;
|
param->eam_file = NULL;
|
||||||
|
param->write_atom_file = NULL;
|
||||||
param->force_field = FF_LJ;
|
param->force_field = FF_LJ;
|
||||||
param->epsilon = 1.0;
|
param->epsilon = 1.0;
|
||||||
param->sigma = 1.0;
|
param->sigma = 1.0;
|
||||||
|
@ -502,6 +502,21 @@ int readAtom_in(Atom* atom, Parameter* param) {
|
|||||||
return natoms;
|
return natoms;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void writeAtom(Atom *atom, Parameter *param) {
|
||||||
|
FILE *fp = fopen(param->write_atom_file, "w");
|
||||||
|
|
||||||
|
for(int i = 0; i < atom->Nlocal; i++) {
|
||||||
|
fprintf(fp, "%d,%f,%f,%f,%f,%f,%f,%f,0\n",
|
||||||
|
atom->type[i], 1.0,
|
||||||
|
atom_x(i), atom_y(i), atom_z(i),
|
||||||
|
atom_vx(i), atom_vy(i), atom_vz(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(fp);
|
||||||
|
fprintf(stdout, "Wrote input data to %s, grid size: %f, %f, %f\n",
|
||||||
|
param->write_atom_file, param->xprd, param->yprd, param->zprd);
|
||||||
|
}
|
||||||
|
|
||||||
void growAtom(Atom *atom) {
|
void growAtom(Atom *atom) {
|
||||||
DeviceAtom *d_atom = &(atom->d_atom);
|
DeviceAtom *d_atom = &(atom->d_atom);
|
||||||
int nold = atom->Nmax;
|
int nold = atom->Nmax;
|
||||||
|
@ -29,7 +29,7 @@ extern "C" {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// cuda kernel
|
// cuda kernel
|
||||||
__global__ void calc_force(DeviceAtom a, MD_FLOAT cutforcesq, MD_FLOAT sigma6, MD_FLOAT epsilon, int Nlocal, int neigh_maxneighs, int *neigh_neighbors, int *neigh_numneigh) {
|
__global__ void calc_force(DeviceAtom a, MD_FLOAT cutforcesq, MD_FLOAT sigma6, MD_FLOAT epsilon, int Nlocal, int neigh_maxneighs, int *neigh_neighbors, int *neigh_numneigh, int ntypes) {
|
||||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
if(i >= Nlocal) {
|
if(i >= Nlocal) {
|
||||||
return;
|
return;
|
||||||
@ -46,6 +46,10 @@ __global__ void calc_force(DeviceAtom a, MD_FLOAT cutforcesq, MD_FLOAT sigma6, M
|
|||||||
MD_FLOAT fiy = 0;
|
MD_FLOAT fiy = 0;
|
||||||
MD_FLOAT fiz = 0;
|
MD_FLOAT fiz = 0;
|
||||||
|
|
||||||
|
#ifdef EXPLICIT_TYPES
|
||||||
|
const int type_i = atom->type[i];
|
||||||
|
#endif
|
||||||
|
|
||||||
for(int k = 0; k < numneighs; k++) {
|
for(int k = 0; k < numneighs; k++) {
|
||||||
int j = neigh_neighbors[Nlocal * k + i];
|
int j = neigh_neighbors[Nlocal * k + i];
|
||||||
MD_FLOAT delx = xtmp - atom_x(j);
|
MD_FLOAT delx = xtmp - atom_x(j);
|
||||||
@ -55,7 +59,7 @@ __global__ void calc_force(DeviceAtom a, MD_FLOAT cutforcesq, MD_FLOAT sigma6, M
|
|||||||
|
|
||||||
#ifdef EXPLICIT_TYPES
|
#ifdef EXPLICIT_TYPES
|
||||||
const int type_j = atom->type[j];
|
const int type_j = atom->type[j];
|
||||||
const int type_ij = type_i * atom->ntypes + type_j;
|
const int type_ij = type_i * ntypes + type_j;
|
||||||
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
||||||
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
||||||
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
||||||
@ -138,11 +142,9 @@ void initialIntegrate_cuda(bool reneigh, Parameter *param, Atom *atom) {
|
|||||||
double computeForceLJFullNeigh_cuda(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
double computeForceLJFullNeigh_cuda(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||||
const int num_threads_per_block = get_cuda_num_threads();
|
const int num_threads_per_block = get_cuda_num_threads();
|
||||||
int Nlocal = atom->Nlocal;
|
int Nlocal = atom->Nlocal;
|
||||||
#ifndef EXPLICIT_TYPES
|
|
||||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||||
MD_FLOAT sigma6 = param->sigma6;
|
MD_FLOAT sigma6 = param->sigma6;
|
||||||
MD_FLOAT epsilon = param->epsilon;
|
MD_FLOAT epsilon = param->epsilon;
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
int nDevices;
|
int nDevices;
|
||||||
@ -165,7 +167,7 @@ double computeForceLJFullNeigh_cuda(Parameter *param, Atom *atom, Neighbor *neig
|
|||||||
double S = getTimeStamp();
|
double S = getTimeStamp();
|
||||||
LIKWID_MARKER_START("force");
|
LIKWID_MARKER_START("force");
|
||||||
|
|
||||||
calc_force <<< num_blocks, num_threads_per_block >>> (atom->d_atom, cutforcesq, sigma6, epsilon, Nlocal, neighbor->maxneighs, neighbor->d_neighbor.neighbors, neighbor->d_neighbor.numneigh);
|
calc_force <<< num_blocks, num_threads_per_block >>> (atom->d_atom, cutforcesq, sigma6, epsilon, Nlocal, neighbor->maxneighs, neighbor->d_neighbor.neighbors, neighbor->d_neighbor.numneigh, atom->ntypes);
|
||||||
cuda_assert("calc_force", cudaPeekAtLastError());
|
cuda_assert("calc_force", cudaPeekAtLastError());
|
||||||
cuda_assert("calc_force", cudaDeviceSynchronize());
|
cuda_assert("calc_force", cudaDeviceSynchronize());
|
||||||
cudaProfilerStop();
|
cudaProfilerStop();
|
||||||
|
@ -120,7 +120,7 @@ __global__ void binatoms_kernel(DeviceAtom a, int nall, int* bincount, int* bins
|
|||||||
|
|
||||||
__global__ void compute_neighborhood(
|
__global__ void compute_neighborhood(
|
||||||
DeviceAtom a, DeviceNeighbor neigh, Neighbor_params np, int nlocal, int maxneighs, int nstencil, int* stencil,
|
DeviceAtom a, DeviceNeighbor neigh, Neighbor_params np, int nlocal, int maxneighs, int nstencil, int* stencil,
|
||||||
int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq) {
|
int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq, int ntypes) {
|
||||||
|
|
||||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
if(i >= nlocal) {
|
if(i >= nlocal) {
|
||||||
@ -157,7 +157,7 @@ __global__ void compute_neighborhood(
|
|||||||
|
|
||||||
#ifdef EXPLICIT_TYPES
|
#ifdef EXPLICIT_TYPES
|
||||||
int type_j = atom->type[j];
|
int type_j = atom->type[j];
|
||||||
const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
|
const MD_FLOAT cutoff = atom->cutneighsq[type_i * ntypes + type_j];
|
||||||
#else
|
#else
|
||||||
const MD_FLOAT cutoff = cutneighsq;
|
const MD_FLOAT cutoff = cutneighsq;
|
||||||
#endif
|
#endif
|
||||||
@ -269,7 +269,7 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor) {
|
|||||||
np, atom->Nlocal, neighbor->maxneighs, nstencil, c_stencil,
|
np, atom->Nlocal, neighbor->maxneighs, nstencil, c_stencil,
|
||||||
c_binning.bins, c_binning.atoms_per_bin, c_binning.bincount,
|
c_binning.bins, c_binning.atoms_per_bin, c_binning.bincount,
|
||||||
c_new_maxneighs,
|
c_new_maxneighs,
|
||||||
cutneighsq);
|
cutneighsq, atom->ntypes);
|
||||||
|
|
||||||
cuda_assert("compute_neighborhood", cudaPeekAtLastError());
|
cuda_assert("compute_neighborhood", cudaPeekAtLastError());
|
||||||
cuda_assert("compute_neighborhood", cudaDeviceSynchronize());
|
cuda_assert("compute_neighborhood", cudaDeviceSynchronize());
|
||||||
|
@ -14,6 +14,7 @@ void initDevice(Atom *atom, Neighbor *neighbor) {
|
|||||||
|
|
||||||
d_atom->epsilon = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
d_atom->epsilon = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||||
d_atom->sigma6 = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
d_atom->sigma6 = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||||
|
d_atom->cutneighsq = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||||
d_atom->cutforcesq = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
d_atom->cutforcesq = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||||
d_neighbor->neighbors = (int *) allocateGPU(sizeof(int) * atom->Nmax * neighbor->maxneighs);
|
d_neighbor->neighbors = (int *) allocateGPU(sizeof(int) * atom->Nmax * neighbor->maxneighs);
|
||||||
d_neighbor->numneigh = (int *) allocateGPU(sizeof(int) * atom->Nmax);
|
d_neighbor->numneigh = (int *) allocateGPU(sizeof(int) * atom->Nmax);
|
||||||
@ -22,6 +23,7 @@ void initDevice(Atom *atom, Neighbor *neighbor) {
|
|||||||
memcpyToGPU(d_atom->vx, atom->vx, sizeof(MD_FLOAT) * atom->Nmax * 3);
|
memcpyToGPU(d_atom->vx, atom->vx, sizeof(MD_FLOAT) * atom->Nmax * 3);
|
||||||
memcpyToGPU(d_atom->sigma6, atom->sigma6, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
memcpyToGPU(d_atom->sigma6, atom->sigma6, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||||
memcpyToGPU(d_atom->epsilon, atom->epsilon, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
memcpyToGPU(d_atom->epsilon, atom->epsilon, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||||
|
memcpyToGPU(d_atom->cutneighsq, atom->cutneighsq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||||
memcpyToGPU(d_atom->cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
memcpyToGPU(d_atom->cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes);
|
||||||
memcpyToGPU(d_atom->type, atom->type, sizeof(int) * atom->Nmax);
|
memcpyToGPU(d_atom->type, atom->type, sizeof(int) * atom->Nmax);
|
||||||
}
|
}
|
||||||
|
@ -73,6 +73,7 @@ extern int readAtom_pdb(Atom*, Parameter*);
|
|||||||
extern int readAtom_gro(Atom*, Parameter*);
|
extern int readAtom_gro(Atom*, Parameter*);
|
||||||
extern int readAtom_dmp(Atom*, Parameter*);
|
extern int readAtom_dmp(Atom*, Parameter*);
|
||||||
extern int readAtom_in(Atom*, Parameter*);
|
extern int readAtom_in(Atom*, Parameter*);
|
||||||
|
extern void writeAtom(Atom*, Parameter*);
|
||||||
extern void growAtom(Atom*);
|
extern void growAtom(Atom*);
|
||||||
|
|
||||||
#ifdef AOS
|
#ifdef AOS
|
||||||
|
@ -207,6 +207,10 @@ int main(int argc, char** argv) {
|
|||||||
param.vtk_file = strdup(argv[++i]);
|
param.vtk_file = strdup(argv[++i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if((strcmp(argv[i], "-w") == 0)) {
|
||||||
|
param.write_atom_file = strdup(argv[++i]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
|
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
|
||||||
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
|
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
|
||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
@ -219,6 +223,7 @@ int main(int argc, char** argv) {
|
|||||||
printf("-half <int>: use half (1) or full (0) neighbor lists\n");
|
printf("-half <int>: use half (1) or full (0) neighbor lists\n");
|
||||||
printf("-r / --radius <real>: set cutoff radius\n");
|
printf("-r / --radius <real>: set cutoff radius\n");
|
||||||
printf("-s / --skin <real>: set skin (verlet buffer)\n");
|
printf("-s / --skin <real>: set skin (verlet buffer)\n");
|
||||||
|
printf("-w <file>: write input atoms to file\n");
|
||||||
printf("--freq <real>: processor frequency (GHz)\n");
|
printf("--freq <real>: processor frequency (GHz)\n");
|
||||||
printf("--vtk <string>: VTK file for visualization\n");
|
printf("--vtk <string>: VTK file for visualization\n");
|
||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
@ -237,6 +242,10 @@ int main(int argc, char** argv) {
|
|||||||
traceAddresses(¶m, &atom, &neighbor, n + 1);
|
traceAddresses(¶m, &atom, &neighbor, n + 1);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if(param.write_atom_file != NULL) {
|
||||||
|
writeAtom(&atom, ¶m);
|
||||||
|
}
|
||||||
|
|
||||||
//writeInput(¶m, &atom);
|
//writeInput(¶m, &atom);
|
||||||
|
|
||||||
timer[FORCE] = computeForce(&eam, ¶m, &atom, &neighbor, &stats);
|
timer[FORCE] = computeForce(&eam, ¶m, &atom, &neighbor, &stats);
|
||||||
|
Loading…
Reference in New Issue
Block a user