Implement Neighbour list AoS memory layout + performance measurement
This commit is contained in:
		| @@ -7,7 +7,7 @@ ANSI_CFLAGS += -pedantic | ||||
| ANSI_CFLAGS += -Wextra | ||||
|  | ||||
| # CFLAGS   = -O0 -g  -std=c99 -fargument-noalias | ||||
| CFLAGS   = -O3 -arch=sm_61 # -fopenmp | ||||
| CFLAGS   = -O3 -g -arch=sm_61 # -fopenmp | ||||
| ASFLAGS  =  -masm=intel | ||||
| LFLAGS   = | ||||
| DEFINES  = -D_GNU_SOURCE -DLIKWID_PERFMON | ||||
|   | ||||
| @@ -51,8 +51,7 @@ __global__ void calc_force( | ||||
|  | ||||
|     Atom *atom = &a; | ||||
|  | ||||
|     int *neighs = &neigh_neighbors[i * neigh_maxneighs]; | ||||
|     int numneighs = neigh_numneigh[i]; | ||||
|     const int numneighs = neigh_numneigh[i]; | ||||
|  | ||||
|     MD_FLOAT xtmp = atom_x(i); | ||||
|     MD_FLOAT ytmp = atom_y(i); | ||||
| @@ -63,7 +62,7 @@ __global__ void calc_force( | ||||
|     MD_FLOAT fiz = 0; | ||||
|  | ||||
|     for(int k = 0; k < numneighs; k++) { | ||||
|         int j = neighs[k]; | ||||
|         int j = neigh_neighbors[atom->Nlocal * k + i]; | ||||
|         MD_FLOAT delx = xtmp - atom_x(j); | ||||
|         MD_FLOAT dely = ytmp - atom_y(j); | ||||
|         MD_FLOAT delz = ztmp - atom_z(j); | ||||
|   | ||||
| @@ -178,7 +178,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) | ||||
|         if(neighbor->numneigh) cudaFreeHost(neighbor->numneigh); | ||||
|         if(neighbor->neighbors) cudaFreeHost(neighbor->neighbors); | ||||
|         checkCUDAError( "buildNeighbor numneigh", cudaMallocHost((void**)&(neighbor->numneigh), nmax * sizeof(int)) ); | ||||
|         checkCUDAError( "buildNeighbor neighbors", cudaMallocHost((void**)&(neighbor->neighbors), nmax * neighbor->maxneighs * sizeof(int*)) ); | ||||
|         checkCUDAError( "buildNeighbor neighbors", cudaMallocHost((void**)&(neighbor->neighbors), nmax * neighbor->maxneighs * sizeof(int)) ); | ||||
|         // neighbor->numneigh = (int*) malloc(nmax * sizeof(int)); | ||||
|         // neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*)); | ||||
|     } | ||||
| @@ -193,7 +193,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) | ||||
|         resize = 0; | ||||
|  | ||||
|         for(int i = 0; i < atom->Nlocal; i++) { | ||||
|             int* neighptr = &(neighbor->neighbors[i * neighbor->maxneighs]); | ||||
|             int* neighptr = &(neighbor->neighbors[i]); | ||||
|             int n = 0; | ||||
|             MD_FLOAT xtmp = atom_x(i); | ||||
|             MD_FLOAT ytmp = atom_y(i); | ||||
| @@ -226,7 +226,9 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) | ||||
|                     #endif | ||||
|  | ||||
|                     if( rsq <= cutoff ) { | ||||
|                         neighptr[n++] = j; | ||||
|                         int idx = atom->Nlocal * n; | ||||
|                         neighptr[idx] = j; | ||||
|                         n += 1; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user