diff --git a/common/includes/util.h b/common/includes/util.h index 5ff9c2b..00de105 100644 --- a/common/includes/util.h +++ b/common/includes/util.h @@ -39,8 +39,8 @@ extern double myrandom(int*); extern void random_reset(int *seed, int ibase, double *coord); extern int str2ff(const char *string); extern const char* ff2str(int ff); -extern int get_num_threads(); extern void readline(char *line, FILE *fp); extern void debug_printf(const char *format, ...); +extern int get_cuda_num_threads(); #endif diff --git a/common/util.c b/common/util.c index 2508b7d..350c96f 100644 --- a/common/util.c +++ b/common/util.c @@ -79,7 +79,7 @@ const char* ff2str(int ff) { return "invalid"; } -int get_num_threads() { +int get_cuda_num_threads() { const char *num_threads_env = getenv("NUM_THREADS"); return (num_threads_env == NULL) ? 32 : atoi(num_threads_env); } diff --git a/lammps/cuda/force.cu b/lammps/cuda/force.cu index a05ce7f..a6ec4d8 100644 --- a/lammps/cuda/force.cu +++ b/lammps/cuda/force.cu @@ -109,7 +109,7 @@ extern "C" { void finalIntegrate_cuda(bool reneigh, Parameter *param, Atom *atom) { const int Nlocal = atom->Nlocal; - const int num_threads_per_block = get_num_threads(); + const int num_threads_per_block = get_cuda_num_threads(); const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block); kernel_final_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, Nlocal, atom->d_atom); @@ -123,7 +123,7 @@ void finalIntegrate_cuda(bool reneigh, Parameter *param, Atom *atom) { void initialIntegrate_cuda(bool reneigh, Parameter *param, Atom *atom) { const int Nlocal = atom->Nlocal; - const int num_threads_per_block = get_num_threads(); + const int num_threads_per_block = get_cuda_num_threads(); const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block); kernel_initial_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, param->dt, Nlocal, atom->d_atom); @@ -136,7 +136,7 @@ void initialIntegrate_cuda(bool reneigh, Parameter *param, Atom *atom) { } double computeForceLJFullNeigh_cuda(Parameter *param, Atom *atom, Neighbor *neighbor) { - const int num_threads_per_block = get_num_threads(); + const int num_threads_per_block = get_cuda_num_threads(); int Nlocal = atom->Nlocal; #ifndef EXPLICIT_TYPES MD_FLOAT cutforcesq = param->cutforce * param->cutforce; diff --git a/lammps/cuda/neighbor.cu b/lammps/cuda/neighbor.cu index 3fb74c2..b83b8ae 100644 --- a/lammps/cuda/neighbor.cu +++ b/lammps/cuda/neighbor.cu @@ -206,7 +206,7 @@ void binatoms_cuda(Atom *atom, Binning *c_binning, int *c_resize_needed, Neighbo void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor) { DeviceNeighbor *d_neighbor = &(neighbor->d_neighbor); - const int num_threads_per_block = get_num_threads(); + const int num_threads_per_block = get_cuda_num_threads(); int nall = atom->Nlocal + atom->Nghost; cudaProfilerStart(); diff --git a/lammps/cuda/pbc.cu b/lammps/cuda/pbc.cu index b80383c..0c7df50 100644 --- a/lammps/cuda/pbc.cu +++ b/lammps/cuda/pbc.cu @@ -65,7 +65,7 @@ __global__ void computePbcUpdate(DeviceAtom a, int nlocal, int nghost, int* PBCx /* update coordinates of ghost atoms */ /* uses mapping created in setupPbc */ void updatePbc_cuda(Atom *atom, Parameter *param, bool reneigh) { - const int num_threads_per_block = get_num_threads(); + const int num_threads_per_block = get_cuda_num_threads(); if(reneigh) { memcpyToGPU(atom->d_atom.x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3); @@ -98,7 +98,7 @@ void updatePbc_cuda(Atom *atom, Parameter *param, bool reneigh) { } void updateAtomsPbc_cuda(Atom* atom, Parameter *param) { - const int num_threads_per_block = get_num_threads(); + const int num_threads_per_block = get_cuda_num_threads(); MD_FLOAT xprd = param->xprd; MD_FLOAT yprd = param->yprd; MD_FLOAT zprd = param->zprd;