diff --git a/include_NVCC.mk b/include_NVCC.mk index 39ae113..e8cabf0 100644 --- a/include_NVCC.mk +++ b/include_NVCC.mk @@ -7,7 +7,7 @@ ANSI_CFLAGS += -pedantic ANSI_CFLAGS += -Wextra # CFLAGS = -O0 -g -std=c99 -fargument-noalias -CFLAGS = -O3 -arch=sm_61 # -fopenmp +CFLAGS = -O3 -g -arch=sm_61 # -fopenmp ASFLAGS = -masm=intel LFLAGS = DEFINES = -D_GNU_SOURCE -DLIKWID_PERFMON diff --git a/src/force.cu b/src/force.cu index 6e9f41a..fc8ec8b 100644 --- a/src/force.cu +++ b/src/force.cu @@ -51,8 +51,7 @@ __global__ void calc_force( Atom *atom = &a; - int *neighs = &neigh_neighbors[i * neigh_maxneighs]; - int numneighs = neigh_numneigh[i]; + const int numneighs = neigh_numneigh[i]; MD_FLOAT xtmp = atom_x(i); MD_FLOAT ytmp = atom_y(i); @@ -63,7 +62,7 @@ __global__ void calc_force( MD_FLOAT fiz = 0; for(int k = 0; k < numneighs; k++) { - int j = neighs[k]; + int j = neigh_neighbors[atom->Nlocal * k + i]; MD_FLOAT delx = xtmp - atom_x(j); MD_FLOAT dely = ytmp - atom_y(j); MD_FLOAT delz = ztmp - atom_z(j); diff --git a/src/neighbor.c b/src/neighbor.c index cce3756..7b45b12 100644 --- a/src/neighbor.c +++ b/src/neighbor.c @@ -178,7 +178,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) if(neighbor->numneigh) cudaFreeHost(neighbor->numneigh); if(neighbor->neighbors) cudaFreeHost(neighbor->neighbors); checkCUDAError( "buildNeighbor numneigh", cudaMallocHost((void**)&(neighbor->numneigh), nmax * sizeof(int)) ); - checkCUDAError( "buildNeighbor neighbors", cudaMallocHost((void**)&(neighbor->neighbors), nmax * neighbor->maxneighs * sizeof(int*)) ); + checkCUDAError( "buildNeighbor neighbors", cudaMallocHost((void**)&(neighbor->neighbors), nmax * neighbor->maxneighs * sizeof(int)) ); // neighbor->numneigh = (int*) malloc(nmax * sizeof(int)); // neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*)); } @@ -193,7 +193,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) resize = 0; for(int i = 0; i < atom->Nlocal; i++) { - int* neighptr = &(neighbor->neighbors[i * neighbor->maxneighs]); + int* neighptr = &(neighbor->neighbors[i]); int n = 0; MD_FLOAT xtmp = atom_x(i); MD_FLOAT ytmp = atom_y(i); @@ -226,7 +226,9 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) #endif if( rsq <= cutoff ) { - neighptr[n++] = j; + int idx = atom->Nlocal * n; + neighptr[idx] = j; + n += 1; } } }