diff --git a/common/includes/parameter.h b/common/includes/parameter.h index f4ff676..614be9f 100644 --- a/common/includes/parameter.h +++ b/common/includes/parameter.h @@ -21,6 +21,7 @@ typedef struct { char* input_file; char* vtk_file; char* xtc_file; + char* write_atom_file; MD_FLOAT epsilon; MD_FLOAT sigma; MD_FLOAT sigma6; diff --git a/common/includes/timing.h b/common/includes/timing.h index 484659f..7792085 100644 --- a/common/includes/timing.h +++ b/common/includes/timing.h @@ -7,8 +7,8 @@ #ifndef __TIMING_H_ #define __TIMING_H_ -extern double getTimeStamp(); -extern double getTimeResolution(); -extern double getTimeStamp_(); +extern double getTimeStamp(void); +extern double getTimeResolution(void); +extern double getTimeStamp_(void); #endif diff --git a/common/parameter.c b/common/parameter.c index 1d6d593..8aab646 100644 --- a/common/parameter.c +++ b/common/parameter.c @@ -17,6 +17,7 @@ void initParameter(Parameter *param) { param->vtk_file = NULL; param->xtc_file = NULL; param->eam_file = NULL; + param->write_atom_file = NULL; param->force_field = FF_LJ; param->epsilon = 1.0; param->sigma = 1.0; diff --git a/lammps/atom.c b/lammps/atom.c index a96eeb7..5dd0cdf 100644 --- a/lammps/atom.c +++ b/lammps/atom.c @@ -502,6 +502,21 @@ int readAtom_in(Atom* atom, Parameter* param) { return natoms; } +void writeAtom(Atom *atom, Parameter *param) { + FILE *fp = fopen(param->write_atom_file, "w"); + + for(int i = 0; i < atom->Nlocal; i++) { + fprintf(fp, "%d,%f,%f,%f,%f,%f,%f,%f,0\n", + atom->type[i], 1.0, + atom_x(i), atom_y(i), atom_z(i), + atom_vx(i), atom_vy(i), atom_vz(i)); + } + + fclose(fp); + fprintf(stdout, "Wrote input data to %s, grid size: %f, %f, %f\n", + param->write_atom_file, param->xprd, param->yprd, param->zprd); +} + void growAtom(Atom *atom) { DeviceAtom *d_atom = &(atom->d_atom); int nold = atom->Nmax; diff --git a/lammps/cuda/force.cu b/lammps/cuda/force.cu index a6ec4d8..f96c826 100644 --- a/lammps/cuda/force.cu +++ b/lammps/cuda/force.cu @@ -29,7 +29,7 @@ extern "C" { } // cuda kernel -__global__ void calc_force(DeviceAtom a, MD_FLOAT cutforcesq, MD_FLOAT sigma6, MD_FLOAT epsilon, int Nlocal, int neigh_maxneighs, int *neigh_neighbors, int *neigh_numneigh) { +__global__ void calc_force(DeviceAtom a, MD_FLOAT cutforcesq, MD_FLOAT sigma6, MD_FLOAT epsilon, int Nlocal, int neigh_maxneighs, int *neigh_neighbors, int *neigh_numneigh, int ntypes) { const int i = blockIdx.x * blockDim.x + threadIdx.x; if(i >= Nlocal) { return; @@ -46,6 +46,10 @@ __global__ void calc_force(DeviceAtom a, MD_FLOAT cutforcesq, MD_FLOAT sigma6, M MD_FLOAT fiy = 0; MD_FLOAT fiz = 0; +#ifdef EXPLICIT_TYPES + const int type_i = atom->type[i]; +#endif + for(int k = 0; k < numneighs; k++) { int j = neigh_neighbors[Nlocal * k + i]; MD_FLOAT delx = xtmp - atom_x(j); @@ -55,7 +59,7 @@ __global__ void calc_force(DeviceAtom a, MD_FLOAT cutforcesq, MD_FLOAT sigma6, M #ifdef EXPLICIT_TYPES const int type_j = atom->type[j]; - const int type_ij = type_i * atom->ntypes + type_j; + const int type_ij = type_i * ntypes + type_j; const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij]; const MD_FLOAT sigma6 = atom->sigma6[type_ij]; const MD_FLOAT epsilon = atom->epsilon[type_ij]; @@ -138,11 +142,9 @@ void initialIntegrate_cuda(bool reneigh, Parameter *param, Atom *atom) { double computeForceLJFullNeigh_cuda(Parameter *param, Atom *atom, Neighbor *neighbor) { const int num_threads_per_block = get_cuda_num_threads(); int Nlocal = atom->Nlocal; -#ifndef EXPLICIT_TYPES MD_FLOAT cutforcesq = param->cutforce * param->cutforce; MD_FLOAT sigma6 = param->sigma6; MD_FLOAT epsilon = param->epsilon; -#endif /* int nDevices; @@ -165,7 +167,7 @@ double computeForceLJFullNeigh_cuda(Parameter *param, Atom *atom, Neighbor *neig double S = getTimeStamp(); LIKWID_MARKER_START("force"); - calc_force <<< num_blocks, num_threads_per_block >>> (atom->d_atom, cutforcesq, sigma6, epsilon, Nlocal, neighbor->maxneighs, neighbor->d_neighbor.neighbors, neighbor->d_neighbor.numneigh); + calc_force <<< num_blocks, num_threads_per_block >>> (atom->d_atom, cutforcesq, sigma6, epsilon, Nlocal, neighbor->maxneighs, neighbor->d_neighbor.neighbors, neighbor->d_neighbor.numneigh, atom->ntypes); cuda_assert("calc_force", cudaPeekAtLastError()); cuda_assert("calc_force", cudaDeviceSynchronize()); cudaProfilerStop(); diff --git a/lammps/cuda/neighbor.cu b/lammps/cuda/neighbor.cu index b83b8ae..1e40e56 100644 --- a/lammps/cuda/neighbor.cu +++ b/lammps/cuda/neighbor.cu @@ -120,7 +120,7 @@ __global__ void binatoms_kernel(DeviceAtom a, int nall, int* bincount, int* bins __global__ void compute_neighborhood( DeviceAtom a, DeviceNeighbor neigh, Neighbor_params np, int nlocal, int maxneighs, int nstencil, int* stencil, - int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq) { + int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq, int ntypes) { const int i = blockIdx.x * blockDim.x + threadIdx.x; if(i >= nlocal) { @@ -157,7 +157,7 @@ __global__ void compute_neighborhood( #ifdef EXPLICIT_TYPES int type_j = atom->type[j]; - const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j]; + const MD_FLOAT cutoff = atom->cutneighsq[type_i * ntypes + type_j]; #else const MD_FLOAT cutoff = cutneighsq; #endif @@ -269,7 +269,7 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor) { np, atom->Nlocal, neighbor->maxneighs, nstencil, c_stencil, c_binning.bins, c_binning.atoms_per_bin, c_binning.bincount, c_new_maxneighs, - cutneighsq); + cutneighsq, atom->ntypes); cuda_assert("compute_neighborhood", cudaPeekAtLastError()); cuda_assert("compute_neighborhood", cudaDeviceSynchronize()); diff --git a/lammps/device_spec.c b/lammps/device_spec.c index b6fec89..42d6002 100644 --- a/lammps/device_spec.c +++ b/lammps/device_spec.c @@ -14,6 +14,7 @@ void initDevice(Atom *atom, Neighbor *neighbor) { d_atom->epsilon = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); d_atom->sigma6 = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); + d_atom->cutneighsq = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); d_atom->cutforcesq = (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); d_neighbor->neighbors = (int *) allocateGPU(sizeof(int) * atom->Nmax * neighbor->maxneighs); d_neighbor->numneigh = (int *) allocateGPU(sizeof(int) * atom->Nmax); @@ -22,6 +23,7 @@ void initDevice(Atom *atom, Neighbor *neighbor) { memcpyToGPU(d_atom->vx, atom->vx, sizeof(MD_FLOAT) * atom->Nmax * 3); memcpyToGPU(d_atom->sigma6, atom->sigma6, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); memcpyToGPU(d_atom->epsilon, atom->epsilon, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); + memcpyToGPU(d_atom->cutneighsq, atom->cutneighsq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); memcpyToGPU(d_atom->cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); memcpyToGPU(d_atom->type, atom->type, sizeof(int) * atom->Nmax); } diff --git a/lammps/includes/atom.h b/lammps/includes/atom.h index ea00284..0fe3b51 100644 --- a/lammps/includes/atom.h +++ b/lammps/includes/atom.h @@ -73,6 +73,7 @@ extern int readAtom_pdb(Atom*, Parameter*); extern int readAtom_gro(Atom*, Parameter*); extern int readAtom_dmp(Atom*, Parameter*); extern int readAtom_in(Atom*, Parameter*); +extern void writeAtom(Atom*, Parameter*); extern void growAtom(Atom*); #ifdef AOS diff --git a/lammps/main.c b/lammps/main.c index ec43857..c623827 100644 --- a/lammps/main.c +++ b/lammps/main.c @@ -207,6 +207,10 @@ int main(int argc, char** argv) { param.vtk_file = strdup(argv[++i]); continue; } + if((strcmp(argv[i], "-w") == 0)) { + param.write_atom_file = strdup(argv[++i]); + continue; + } if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) { printf("MD Bench: A minimalistic re-implementation of miniMD\n"); printf(HLINE); @@ -219,6 +223,7 @@ int main(int argc, char** argv) { printf("-half : use half (1) or full (0) neighbor lists\n"); printf("-r / --radius : set cutoff radius\n"); printf("-s / --skin : set skin (verlet buffer)\n"); + printf("-w : write input atoms to file\n"); printf("--freq : processor frequency (GHz)\n"); printf("--vtk : VTK file for visualization\n"); printf(HLINE); @@ -237,6 +242,10 @@ int main(int argc, char** argv) { traceAddresses(¶m, &atom, &neighbor, n + 1); #endif + if(param.write_atom_file != NULL) { + writeAtom(&atom, ¶m); + } + //writeInput(¶m, &atom); timer[FORCE] = computeForce(&eam, ¶m, &atom, &neighbor, &stats);