Fix explicit types for CUDA and provide option to write initial state of system
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
		| @@ -21,6 +21,7 @@ typedef struct { | |||||||
|     char* input_file; |     char* input_file; | ||||||
|     char* vtk_file; |     char* vtk_file; | ||||||
|     char* xtc_file; |     char* xtc_file; | ||||||
|  |     char* write_atom_file; | ||||||
|     MD_FLOAT epsilon; |     MD_FLOAT epsilon; | ||||||
|     MD_FLOAT sigma; |     MD_FLOAT sigma; | ||||||
|     MD_FLOAT sigma6; |     MD_FLOAT sigma6; | ||||||
|   | |||||||
| @@ -7,8 +7,8 @@ | |||||||
| #ifndef __TIMING_H_ | #ifndef __TIMING_H_ | ||||||
| #define __TIMING_H_ | #define __TIMING_H_ | ||||||
|  |  | ||||||
| extern double getTimeStamp(); | extern double getTimeStamp(void); | ||||||
| extern double getTimeResolution(); | extern double getTimeResolution(void); | ||||||
| extern double getTimeStamp_(); | extern double getTimeStamp_(void); | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -17,6 +17,7 @@ void initParameter(Parameter *param) { | |||||||
|     param->vtk_file = NULL; |     param->vtk_file = NULL; | ||||||
|     param->xtc_file = NULL; |     param->xtc_file = NULL; | ||||||
|     param->eam_file = NULL; |     param->eam_file = NULL; | ||||||
|  |     param->write_atom_file = NULL; | ||||||
|     param->force_field = FF_LJ; |     param->force_field = FF_LJ; | ||||||
|     param->epsilon = 1.0; |     param->epsilon = 1.0; | ||||||
|     param->sigma = 1.0; |     param->sigma = 1.0; | ||||||
|   | |||||||
| @@ -502,6 +502,21 @@ int readAtom_in(Atom* atom, Parameter* param) { | |||||||
|     return natoms; |     return natoms; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void writeAtom(Atom *atom, Parameter *param) { | ||||||
|  |     FILE *fp = fopen(param->write_atom_file, "w"); | ||||||
|  |  | ||||||
|  |     for(int i = 0; i < atom->Nlocal; i++) { | ||||||
|  |         fprintf(fp, "%d,%f,%f,%f,%f,%f,%f,%f,0\n", | ||||||
|  |             atom->type[i], 1.0, | ||||||
|  |             atom_x(i), atom_y(i), atom_z(i), | ||||||
|  |             atom_vx(i), atom_vy(i), atom_vz(i)); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fclose(fp); | ||||||
|  |     fprintf(stdout, "Wrote input data to %s, grid size: %f, %f, %f\n", | ||||||
|  |         param->write_atom_file, param->xprd, param->yprd, param->zprd); | ||||||
|  | } | ||||||
|  |  | ||||||
| void growAtom(Atom *atom) { | void growAtom(Atom *atom) { | ||||||
|     DeviceAtom *d_atom = &(atom->d_atom); |     DeviceAtom *d_atom = &(atom->d_atom); | ||||||
|     int nold = atom->Nmax; |     int nold = atom->Nmax; | ||||||
|   | |||||||
| @@ -29,7 +29,7 @@ extern "C" { | |||||||
| } | } | ||||||
|  |  | ||||||
| // cuda kernel | // cuda kernel | ||||||
| __global__ void calc_force(DeviceAtom a, MD_FLOAT cutforcesq, MD_FLOAT sigma6, MD_FLOAT epsilon, int Nlocal, int neigh_maxneighs, int *neigh_neighbors, int *neigh_numneigh) { | __global__ void calc_force(DeviceAtom a, MD_FLOAT cutforcesq, MD_FLOAT sigma6, MD_FLOAT epsilon, int Nlocal, int neigh_maxneighs, int *neigh_neighbors, int *neigh_numneigh, int ntypes) { | ||||||
|     const int i = blockIdx.x * blockDim.x + threadIdx.x; |     const int i = blockIdx.x * blockDim.x + threadIdx.x; | ||||||
|     if(i >= Nlocal) { |     if(i >= Nlocal) { | ||||||
|         return; |         return; | ||||||
| @@ -46,6 +46,10 @@ __global__ void calc_force(DeviceAtom a, MD_FLOAT cutforcesq, MD_FLOAT sigma6, M | |||||||
|     MD_FLOAT fiy = 0; |     MD_FLOAT fiy = 0; | ||||||
|     MD_FLOAT fiz = 0; |     MD_FLOAT fiz = 0; | ||||||
|  |  | ||||||
|  | #ifdef EXPLICIT_TYPES | ||||||
|  |     const int type_i = atom->type[i]; | ||||||
|  | #endif | ||||||
|  |  | ||||||
|     for(int k = 0; k < numneighs; k++) { |     for(int k = 0; k < numneighs; k++) { | ||||||
|         int j = neigh_neighbors[Nlocal * k + i]; |         int j = neigh_neighbors[Nlocal * k + i]; | ||||||
|         MD_FLOAT delx = xtmp - atom_x(j); |         MD_FLOAT delx = xtmp - atom_x(j); | ||||||
| @@ -55,7 +59,7 @@ __global__ void calc_force(DeviceAtom a, MD_FLOAT cutforcesq, MD_FLOAT sigma6, M | |||||||
|  |  | ||||||
| #ifdef EXPLICIT_TYPES | #ifdef EXPLICIT_TYPES | ||||||
|         const int type_j = atom->type[j]; |         const int type_j = atom->type[j]; | ||||||
|         const int type_ij = type_i * atom->ntypes + type_j; |         const int type_ij = type_i * ntypes + type_j; | ||||||
|         const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij]; |         const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij]; | ||||||
|         const MD_FLOAT sigma6 = atom->sigma6[type_ij]; |         const MD_FLOAT sigma6 = atom->sigma6[type_ij]; | ||||||
|         const MD_FLOAT epsilon = atom->epsilon[type_ij]; |         const MD_FLOAT epsilon = atom->epsilon[type_ij]; | ||||||
| @@ -138,11 +142,9 @@ void initialIntegrate_cuda(bool reneigh, Parameter *param, Atom *atom) { | |||||||
| double computeForceLJFullNeigh_cuda(Parameter *param, Atom *atom, Neighbor *neighbor) { | double computeForceLJFullNeigh_cuda(Parameter *param, Atom *atom, Neighbor *neighbor) { | ||||||
|     const int num_threads_per_block = get_cuda_num_threads(); |     const int num_threads_per_block = get_cuda_num_threads(); | ||||||
|     int Nlocal = atom->Nlocal; |     int Nlocal = atom->Nlocal; | ||||||
| #ifndef EXPLICIT_TYPES |  | ||||||
|     MD_FLOAT cutforcesq = param->cutforce * param->cutforce; |     MD_FLOAT cutforcesq = param->cutforce * param->cutforce; | ||||||
|     MD_FLOAT sigma6 = param->sigma6; |     MD_FLOAT sigma6 = param->sigma6; | ||||||
|     MD_FLOAT epsilon = param->epsilon; |     MD_FLOAT epsilon = param->epsilon; | ||||||
| #endif |  | ||||||
|  |  | ||||||
|     /* |     /* | ||||||
|     int nDevices; |     int nDevices; | ||||||
| @@ -165,7 +167,7 @@ double computeForceLJFullNeigh_cuda(Parameter *param, Atom *atom, Neighbor *neig | |||||||
|     double S = getTimeStamp(); |     double S = getTimeStamp(); | ||||||
|     LIKWID_MARKER_START("force"); |     LIKWID_MARKER_START("force"); | ||||||
|  |  | ||||||
|     calc_force <<< num_blocks, num_threads_per_block >>> (atom->d_atom, cutforcesq, sigma6, epsilon, Nlocal, neighbor->maxneighs, neighbor->d_neighbor.neighbors, neighbor->d_neighbor.numneigh); |     calc_force <<< num_blocks, num_threads_per_block >>> (atom->d_atom, cutforcesq, sigma6, epsilon, Nlocal, neighbor->maxneighs, neighbor->d_neighbor.neighbors, neighbor->d_neighbor.numneigh, atom->ntypes); | ||||||
|     cuda_assert("calc_force", cudaPeekAtLastError()); |     cuda_assert("calc_force", cudaPeekAtLastError()); | ||||||
|     cuda_assert("calc_force", cudaDeviceSynchronize()); |     cuda_assert("calc_force", cudaDeviceSynchronize()); | ||||||
|     cudaProfilerStop(); |     cudaProfilerStop(); | ||||||
|   | |||||||
| @@ -120,7 +120,7 @@ __global__ void binatoms_kernel(DeviceAtom a, int nall, int* bincount, int* bins | |||||||
|  |  | ||||||
| __global__ void compute_neighborhood( | __global__ void compute_neighborhood( | ||||||
|     DeviceAtom a, DeviceNeighbor neigh, Neighbor_params np, int nlocal, int maxneighs, int nstencil, int* stencil, |     DeviceAtom a, DeviceNeighbor neigh, Neighbor_params np, int nlocal, int maxneighs, int nstencil, int* stencil, | ||||||
|     int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq) { |     int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq, int ntypes) { | ||||||
|  |  | ||||||
|     const int i = blockIdx.x * blockDim.x + threadIdx.x; |     const int i = blockIdx.x * blockDim.x + threadIdx.x; | ||||||
|     if(i >= nlocal) { |     if(i >= nlocal) { | ||||||
| @@ -157,7 +157,7 @@ __global__ void compute_neighborhood( | |||||||
|  |  | ||||||
| #ifdef EXPLICIT_TYPES | #ifdef EXPLICIT_TYPES | ||||||
|             int type_j = atom->type[j]; |             int type_j = atom->type[j]; | ||||||
|             const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j]; |             const MD_FLOAT cutoff = atom->cutneighsq[type_i * ntypes + type_j]; | ||||||
| #else | #else | ||||||
|             const MD_FLOAT cutoff = cutneighsq; |             const MD_FLOAT cutoff = cutneighsq; | ||||||
| #endif | #endif | ||||||
| @@ -269,7 +269,7 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor) { | |||||||
|                                                                     np, atom->Nlocal, neighbor->maxneighs, nstencil, c_stencil, |                                                                     np, atom->Nlocal, neighbor->maxneighs, nstencil, c_stencil, | ||||||
|                                                                     c_binning.bins, c_binning.atoms_per_bin, c_binning.bincount, |                                                                     c_binning.bins, c_binning.atoms_per_bin, c_binning.bincount, | ||||||
|                                                                     c_new_maxneighs, |                                                                     c_new_maxneighs, | ||||||
| 								                                    cutneighsq); | 								                                    cutneighsq, atom->ntypes); | ||||||
|  |  | ||||||
|         cuda_assert("compute_neighborhood", cudaPeekAtLastError()); |         cuda_assert("compute_neighborhood", cudaPeekAtLastError()); | ||||||
|         cuda_assert("compute_neighborhood", cudaDeviceSynchronize()); |         cuda_assert("compute_neighborhood", cudaDeviceSynchronize()); | ||||||
|   | |||||||
| @@ -14,6 +14,7 @@ void initDevice(Atom *atom, Neighbor *neighbor) { | |||||||
|  |  | ||||||
|     d_atom->epsilon         =   (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); |     d_atom->epsilon         =   (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); | ||||||
|     d_atom->sigma6          =   (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); |     d_atom->sigma6          =   (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); | ||||||
|  |     d_atom->cutneighsq      =   (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); | ||||||
|     d_atom->cutforcesq      =   (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); |     d_atom->cutforcesq      =   (MD_FLOAT *) allocateGPU(sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); | ||||||
|     d_neighbor->neighbors   =   (int *) allocateGPU(sizeof(int) * atom->Nmax * neighbor->maxneighs); |     d_neighbor->neighbors   =   (int *) allocateGPU(sizeof(int) * atom->Nmax * neighbor->maxneighs); | ||||||
|     d_neighbor->numneigh    =   (int *) allocateGPU(sizeof(int) * atom->Nmax); |     d_neighbor->numneigh    =   (int *) allocateGPU(sizeof(int) * atom->Nmax); | ||||||
| @@ -22,6 +23,7 @@ void initDevice(Atom *atom, Neighbor *neighbor) { | |||||||
|     memcpyToGPU(d_atom->vx,             atom->vx,         sizeof(MD_FLOAT) * atom->Nmax * 3); |     memcpyToGPU(d_atom->vx,             atom->vx,         sizeof(MD_FLOAT) * atom->Nmax * 3); | ||||||
|     memcpyToGPU(d_atom->sigma6,         atom->sigma6,     sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); |     memcpyToGPU(d_atom->sigma6,         atom->sigma6,     sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); | ||||||
|     memcpyToGPU(d_atom->epsilon,        atom->epsilon,    sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); |     memcpyToGPU(d_atom->epsilon,        atom->epsilon,    sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); | ||||||
|  |     memcpyToGPU(d_atom->cutneighsq,     atom->cutneighsq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); | ||||||
|     memcpyToGPU(d_atom->cutforcesq,     atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); |     memcpyToGPU(d_atom->cutforcesq,     atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes); | ||||||
|     memcpyToGPU(d_atom->type,           atom->type,       sizeof(int) * atom->Nmax); |     memcpyToGPU(d_atom->type,           atom->type,       sizeof(int) * atom->Nmax); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -73,6 +73,7 @@ extern int readAtom_pdb(Atom*, Parameter*); | |||||||
| extern int readAtom_gro(Atom*, Parameter*); | extern int readAtom_gro(Atom*, Parameter*); | ||||||
| extern int readAtom_dmp(Atom*, Parameter*); | extern int readAtom_dmp(Atom*, Parameter*); | ||||||
| extern int readAtom_in(Atom*, Parameter*); | extern int readAtom_in(Atom*, Parameter*); | ||||||
|  | extern void writeAtom(Atom*, Parameter*); | ||||||
| extern void growAtom(Atom*); | extern void growAtom(Atom*); | ||||||
|  |  | ||||||
| #ifdef AOS | #ifdef AOS | ||||||
|   | |||||||
| @@ -207,6 +207,10 @@ int main(int argc, char** argv) { | |||||||
|             param.vtk_file = strdup(argv[++i]); |             param.vtk_file = strdup(argv[++i]); | ||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
|  |         if((strcmp(argv[i], "-w") == 0)) { | ||||||
|  |             param.write_atom_file = strdup(argv[++i]); | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|         if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) { |         if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) { | ||||||
|             printf("MD Bench: A minimalistic re-implementation of miniMD\n"); |             printf("MD Bench: A minimalistic re-implementation of miniMD\n"); | ||||||
|             printf(HLINE); |             printf(HLINE); | ||||||
| @@ -219,6 +223,7 @@ int main(int argc, char** argv) { | |||||||
|             printf("-half <int>:                use half (1) or full (0) neighbor lists\n"); |             printf("-half <int>:                use half (1) or full (0) neighbor lists\n"); | ||||||
|             printf("-r / --radius <real>:       set cutoff radius\n"); |             printf("-r / --radius <real>:       set cutoff radius\n"); | ||||||
|             printf("-s / --skin <real>:         set skin (verlet buffer)\n"); |             printf("-s / --skin <real>:         set skin (verlet buffer)\n"); | ||||||
|  |             printf("-w <file>:                  write input atoms to file\n"); | ||||||
|             printf("--freq <real>:              processor frequency (GHz)\n"); |             printf("--freq <real>:              processor frequency (GHz)\n"); | ||||||
|             printf("--vtk <string>:             VTK file for visualization\n"); |             printf("--vtk <string>:             VTK file for visualization\n"); | ||||||
|             printf(HLINE); |             printf(HLINE); | ||||||
| @@ -237,6 +242,10 @@ int main(int argc, char** argv) { | |||||||
|     traceAddresses(¶m, &atom, &neighbor, n + 1); |     traceAddresses(¶m, &atom, &neighbor, n + 1); | ||||||
|     #endif |     #endif | ||||||
|  |  | ||||||
|  |     if(param.write_atom_file != NULL) { | ||||||
|  |         writeAtom(&atom, ¶m); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     //writeInput(¶m, &atom); |     //writeInput(¶m, &atom); | ||||||
|  |  | ||||||
|     timer[FORCE] = computeForce(&eam, ¶m, &atom, &neighbor, &stats); |     timer[FORCE] = computeForce(&eam, ¶m, &atom, &neighbor, &stats); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user