Change function get_num_threads to get_cuda_num_threads
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
a460fffa19
commit
2f13291817
@ -39,8 +39,8 @@ extern double myrandom(int*);
|
|||||||
extern void random_reset(int *seed, int ibase, double *coord);
|
extern void random_reset(int *seed, int ibase, double *coord);
|
||||||
extern int str2ff(const char *string);
|
extern int str2ff(const char *string);
|
||||||
extern const char* ff2str(int ff);
|
extern const char* ff2str(int ff);
|
||||||
extern int get_num_threads();
|
|
||||||
extern void readline(char *line, FILE *fp);
|
extern void readline(char *line, FILE *fp);
|
||||||
extern void debug_printf(const char *format, ...);
|
extern void debug_printf(const char *format, ...);
|
||||||
|
extern int get_cuda_num_threads();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -79,7 +79,7 @@ const char* ff2str(int ff) {
|
|||||||
return "invalid";
|
return "invalid";
|
||||||
}
|
}
|
||||||
|
|
||||||
int get_num_threads() {
|
int get_cuda_num_threads() {
|
||||||
const char *num_threads_env = getenv("NUM_THREADS");
|
const char *num_threads_env = getenv("NUM_THREADS");
|
||||||
return (num_threads_env == NULL) ? 32 : atoi(num_threads_env);
|
return (num_threads_env == NULL) ? 32 : atoi(num_threads_env);
|
||||||
}
|
}
|
||||||
|
@ -109,7 +109,7 @@ extern "C" {
|
|||||||
|
|
||||||
void finalIntegrate_cuda(bool reneigh, Parameter *param, Atom *atom) {
|
void finalIntegrate_cuda(bool reneigh, Parameter *param, Atom *atom) {
|
||||||
const int Nlocal = atom->Nlocal;
|
const int Nlocal = atom->Nlocal;
|
||||||
const int num_threads_per_block = get_num_threads();
|
const int num_threads_per_block = get_cuda_num_threads();
|
||||||
const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
|
const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
|
||||||
|
|
||||||
kernel_final_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, Nlocal, atom->d_atom);
|
kernel_final_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, Nlocal, atom->d_atom);
|
||||||
@ -123,7 +123,7 @@ void finalIntegrate_cuda(bool reneigh, Parameter *param, Atom *atom) {
|
|||||||
|
|
||||||
void initialIntegrate_cuda(bool reneigh, Parameter *param, Atom *atom) {
|
void initialIntegrate_cuda(bool reneigh, Parameter *param, Atom *atom) {
|
||||||
const int Nlocal = atom->Nlocal;
|
const int Nlocal = atom->Nlocal;
|
||||||
const int num_threads_per_block = get_num_threads();
|
const int num_threads_per_block = get_cuda_num_threads();
|
||||||
const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
|
const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
|
||||||
|
|
||||||
kernel_initial_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, param->dt, Nlocal, atom->d_atom);
|
kernel_initial_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, param->dt, Nlocal, atom->d_atom);
|
||||||
@ -136,7 +136,7 @@ void initialIntegrate_cuda(bool reneigh, Parameter *param, Atom *atom) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
double computeForceLJFullNeigh_cuda(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
double computeForceLJFullNeigh_cuda(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||||
const int num_threads_per_block = get_num_threads();
|
const int num_threads_per_block = get_cuda_num_threads();
|
||||||
int Nlocal = atom->Nlocal;
|
int Nlocal = atom->Nlocal;
|
||||||
#ifndef EXPLICIT_TYPES
|
#ifndef EXPLICIT_TYPES
|
||||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||||
|
@ -206,7 +206,7 @@ void binatoms_cuda(Atom *atom, Binning *c_binning, int *c_resize_needed, Neighbo
|
|||||||
|
|
||||||
void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor) {
|
void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor) {
|
||||||
DeviceNeighbor *d_neighbor = &(neighbor->d_neighbor);
|
DeviceNeighbor *d_neighbor = &(neighbor->d_neighbor);
|
||||||
const int num_threads_per_block = get_num_threads();
|
const int num_threads_per_block = get_cuda_num_threads();
|
||||||
int nall = atom->Nlocal + atom->Nghost;
|
int nall = atom->Nlocal + atom->Nghost;
|
||||||
|
|
||||||
cudaProfilerStart();
|
cudaProfilerStart();
|
||||||
|
@ -65,7 +65,7 @@ __global__ void computePbcUpdate(DeviceAtom a, int nlocal, int nghost, int* PBCx
|
|||||||
/* update coordinates of ghost atoms */
|
/* update coordinates of ghost atoms */
|
||||||
/* uses mapping created in setupPbc */
|
/* uses mapping created in setupPbc */
|
||||||
void updatePbc_cuda(Atom *atom, Parameter *param, bool reneigh) {
|
void updatePbc_cuda(Atom *atom, Parameter *param, bool reneigh) {
|
||||||
const int num_threads_per_block = get_num_threads();
|
const int num_threads_per_block = get_cuda_num_threads();
|
||||||
|
|
||||||
if(reneigh) {
|
if(reneigh) {
|
||||||
memcpyToGPU(atom->d_atom.x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3);
|
memcpyToGPU(atom->d_atom.x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3);
|
||||||
@ -98,7 +98,7 @@ void updatePbc_cuda(Atom *atom, Parameter *param, bool reneigh) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void updateAtomsPbc_cuda(Atom* atom, Parameter *param) {
|
void updateAtomsPbc_cuda(Atom* atom, Parameter *param) {
|
||||||
const int num_threads_per_block = get_num_threads();
|
const int num_threads_per_block = get_cuda_num_threads();
|
||||||
MD_FLOAT xprd = param->xprd;
|
MD_FLOAT xprd = param->xprd;
|
||||||
MD_FLOAT yprd = param->yprd;
|
MD_FLOAT yprd = param->yprd;
|
||||||
MD_FLOAT zprd = param->zprd;
|
MD_FLOAT zprd = param->zprd;
|
||||||
|
Loading…
Reference in New Issue
Block a user