Implement Neighbour list AoS memory layout + performance measurement
This commit is contained in:
		| @@ -7,7 +7,7 @@ ANSI_CFLAGS += -pedantic | |||||||
| ANSI_CFLAGS += -Wextra | ANSI_CFLAGS += -Wextra | ||||||
|  |  | ||||||
| # CFLAGS   = -O0 -g  -std=c99 -fargument-noalias | # CFLAGS   = -O0 -g  -std=c99 -fargument-noalias | ||||||
| CFLAGS   = -O3 -arch=sm_61 # -fopenmp | CFLAGS   = -O3 -g -arch=sm_61 # -fopenmp | ||||||
| ASFLAGS  =  -masm=intel | ASFLAGS  =  -masm=intel | ||||||
| LFLAGS   = | LFLAGS   = | ||||||
| DEFINES  = -D_GNU_SOURCE -DLIKWID_PERFMON | DEFINES  = -D_GNU_SOURCE -DLIKWID_PERFMON | ||||||
|   | |||||||
| @@ -51,8 +51,7 @@ __global__ void calc_force( | |||||||
|  |  | ||||||
|     Atom *atom = &a; |     Atom *atom = &a; | ||||||
|  |  | ||||||
|     int *neighs = &neigh_neighbors[i * neigh_maxneighs]; |     const int numneighs = neigh_numneigh[i]; | ||||||
|     int numneighs = neigh_numneigh[i]; |  | ||||||
|  |  | ||||||
|     MD_FLOAT xtmp = atom_x(i); |     MD_FLOAT xtmp = atom_x(i); | ||||||
|     MD_FLOAT ytmp = atom_y(i); |     MD_FLOAT ytmp = atom_y(i); | ||||||
| @@ -63,7 +62,7 @@ __global__ void calc_force( | |||||||
|     MD_FLOAT fiz = 0; |     MD_FLOAT fiz = 0; | ||||||
|  |  | ||||||
|     for(int k = 0; k < numneighs; k++) { |     for(int k = 0; k < numneighs; k++) { | ||||||
|         int j = neighs[k]; |         int j = neigh_neighbors[atom->Nlocal * k + i]; | ||||||
|         MD_FLOAT delx = xtmp - atom_x(j); |         MD_FLOAT delx = xtmp - atom_x(j); | ||||||
|         MD_FLOAT dely = ytmp - atom_y(j); |         MD_FLOAT dely = ytmp - atom_y(j); | ||||||
|         MD_FLOAT delz = ztmp - atom_z(j); |         MD_FLOAT delz = ztmp - atom_z(j); | ||||||
|   | |||||||
| @@ -178,7 +178,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) | |||||||
|         if(neighbor->numneigh) cudaFreeHost(neighbor->numneigh); |         if(neighbor->numneigh) cudaFreeHost(neighbor->numneigh); | ||||||
|         if(neighbor->neighbors) cudaFreeHost(neighbor->neighbors); |         if(neighbor->neighbors) cudaFreeHost(neighbor->neighbors); | ||||||
|         checkCUDAError( "buildNeighbor numneigh", cudaMallocHost((void**)&(neighbor->numneigh), nmax * sizeof(int)) ); |         checkCUDAError( "buildNeighbor numneigh", cudaMallocHost((void**)&(neighbor->numneigh), nmax * sizeof(int)) ); | ||||||
|         checkCUDAError( "buildNeighbor neighbors", cudaMallocHost((void**)&(neighbor->neighbors), nmax * neighbor->maxneighs * sizeof(int*)) ); |         checkCUDAError( "buildNeighbor neighbors", cudaMallocHost((void**)&(neighbor->neighbors), nmax * neighbor->maxneighs * sizeof(int)) ); | ||||||
|         // neighbor->numneigh = (int*) malloc(nmax * sizeof(int)); |         // neighbor->numneigh = (int*) malloc(nmax * sizeof(int)); | ||||||
|         // neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*)); |         // neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*)); | ||||||
|     } |     } | ||||||
| @@ -193,7 +193,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) | |||||||
|         resize = 0; |         resize = 0; | ||||||
|  |  | ||||||
|         for(int i = 0; i < atom->Nlocal; i++) { |         for(int i = 0; i < atom->Nlocal; i++) { | ||||||
|             int* neighptr = &(neighbor->neighbors[i * neighbor->maxneighs]); |             int* neighptr = &(neighbor->neighbors[i]); | ||||||
|             int n = 0; |             int n = 0; | ||||||
|             MD_FLOAT xtmp = atom_x(i); |             MD_FLOAT xtmp = atom_x(i); | ||||||
|             MD_FLOAT ytmp = atom_y(i); |             MD_FLOAT ytmp = atom_y(i); | ||||||
| @@ -226,7 +226,9 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) | |||||||
|                     #endif |                     #endif | ||||||
|  |  | ||||||
|                     if( rsq <= cutoff ) { |                     if( rsq <= cutoff ) { | ||||||
|                         neighptr[n++] = j; |                         int idx = atom->Nlocal * n; | ||||||
|  |                         neighptr[idx] = j; | ||||||
|  |                         n += 1; | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user