Added a rough sketch for the next steps of porting neighborhood computation to cuda
This commit is contained in:
parent
67f9c769ef
commit
757d4329f3
@ -33,6 +33,14 @@ typedef struct {
|
||||
int* numneigh;
|
||||
} Neighbor;
|
||||
|
||||
typedef struct {
|
||||
MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd,
|
||||
MD_FLOAT bininvx, MD_FLOAT bininvy, MD_FLOAT bininvz,
|
||||
int mbinxlo, int mbinylo, int mbinzlo,
|
||||
int nbinx, int nbiny, int nbinz,
|
||||
int mbinx, int mbiny, int mbinz
|
||||
} Neighbor_params;
|
||||
|
||||
extern void initNeighbor(Neighbor*, Parameter*);
|
||||
extern void setupNeighbor();
|
||||
extern void binatoms(Atom*);
|
||||
|
@ -24,6 +24,8 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
extern "C" {
|
||||
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <allocate.h>
|
||||
@ -31,7 +33,100 @@
|
||||
|
||||
#define SMALL 1.0e-6
|
||||
#define FACTOR 0.999
|
||||
}
|
||||
|
||||
__device__ int coord2bin_device(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin,
|
||||
Neighbor_params np)
|
||||
{
|
||||
int ix, iy, iz;
|
||||
|
||||
if(xin >= np.xprd) {
|
||||
ix = (int)((xin - np.xprd) * np.bininvx) + np.nbinx - np.mbinxlo;
|
||||
} else if(xin >= 0.0) {
|
||||
ix = (int)(xin * np.bininvx) - np.mbinxlo;
|
||||
} else {
|
||||
ix = (int)(xin * np.bininvx) - np.mbinxlo - 1;
|
||||
}
|
||||
|
||||
if(yin >= np.yprd) {
|
||||
iy = (int)((yin - np.yprd) * np.bininvy) + np.nbiny - np.mbinylo;
|
||||
} else if(yin >= 0.0) {
|
||||
iy = (int)(yin * np.bininvy) - np.mbinylo;
|
||||
} else {
|
||||
iy = (int)(yin * np.bininvy) - np.mbinylo - 1;
|
||||
}
|
||||
|
||||
if(zin >= np.zprd) {
|
||||
iz = (int)((zin - np.zprd) * np.bininvz) + np.nbinz - np.mbinzlo;
|
||||
} else if(zin >= 0.0) {
|
||||
iz = (int)(zin * np.bininvz) - np.mbinzlo;
|
||||
} else {
|
||||
iz = (int)(zin * np.bininvz) - np.mbinzlo - 1;
|
||||
}
|
||||
|
||||
return (iz * np.mbiny * np.mbinx + iy * np.mbinx + ix + 1);
|
||||
}
|
||||
|
||||
__global__ void compute_neighborhood(Atom a, Neighbor neigh, int Nlocal, Neighbor_params np, int nstencil, int* stencil,
|
||||
int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs){
|
||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if( i >= Nlocal ) {
|
||||
return;
|
||||
}
|
||||
|
||||
Atom *atom = &a;
|
||||
Neighbor *neighbor = &neigh;
|
||||
|
||||
int* neighptr = &(neighbor->neighbors[i]);
|
||||
int n = 0;
|
||||
MD_FLOAT xtmp = atom_x(i);
|
||||
MD_FLOAT ytmp = atom_y(i);
|
||||
MD_FLOAT ztmp = atom_z(i);
|
||||
int ibin = coord2bin_device(xtmp, ytmp, ztmp, Neighbor_params np);
|
||||
#ifdef EXPLICIT_TYPES
|
||||
int type_i = atom->type[i];
|
||||
#endif
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
int jbin = ibin + stencil[k];
|
||||
int* loc_bin = &bins[jbin * atoms_per_bin];
|
||||
|
||||
for(int m = 0; m < bincount[jbin]; m++) {
|
||||
int j = loc_bin[m];
|
||||
|
||||
if ( j == i ){
|
||||
continue;
|
||||
}
|
||||
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
int type_j = atom->type[j];
|
||||
const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
|
||||
#else
|
||||
const MD_FLOAT cutoff = cutneighsq;
|
||||
#endif
|
||||
|
||||
if( rsq <= cutoff ) {
|
||||
int idx = atom->Nlocal * n;
|
||||
neighptr[idx] = j;
|
||||
n += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
neighbor->numneigh[i] = n;
|
||||
|
||||
if(n >= neighbor->maxneighs) {
|
||||
atomicMax(new_maxneighs, n);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
|
||||
static MD_FLOAT xprd, yprd, zprd;
|
||||
static MD_FLOAT bininvx, bininvy, bininvz;
|
||||
static int mbinxlo, mbinylo, mbinzlo;
|
||||
@ -417,3 +512,42 @@ void sortAtom(Atom* atom) {
|
||||
atom->vy = new_vy; atom->vz = new_vz;
|
||||
#endif
|
||||
}
|
||||
|
||||
void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *c_neighbor)
|
||||
{
|
||||
int nall = atom->Nlocal + atom->Nghost;
|
||||
|
||||
/* extend atom arrays if necessary */
|
||||
if(nall > nmax) {
|
||||
nmax = nall;
|
||||
if(neighbor->numneigh) cudaFreeHost(neighbor->numneigh);
|
||||
if(neighbor->neighbors) cudaFreeHost(neighbor->neighbors);
|
||||
checkCUDAError( "buildNeighbor numneigh", cudaMallocHost((void**)&(neighbor->numneigh), nmax * sizeof(int)) );
|
||||
checkCUDAError( "buildNeighbor neighbors", cudaMallocHost((void**)&(neighbor->neighbors), nmax * neighbor->maxneighs * sizeof(int)) );
|
||||
// neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
|
||||
// neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*));
|
||||
}
|
||||
|
||||
/* bin local & ghost atoms */
|
||||
binatoms(atom);
|
||||
int resize = 1;
|
||||
|
||||
/* loop over each atom, storing neighbors */
|
||||
while(resize) {
|
||||
int new_maxneighs = neighbor->maxneighs;
|
||||
resize = 0;
|
||||
|
||||
// TODO allocate space for and then copy all necessary components
|
||||
// TODO dont forget to copy the atom positions over
|
||||
|
||||
// TODO call compute_neigborhood kernel here
|
||||
|
||||
if(resize) {
|
||||
printf("RESIZE %d\n", neighbor->maxneighs);
|
||||
neighbor->maxneighs = new_maxneighs * 1.2;
|
||||
free(neighbor->neighbors);
|
||||
neighbor->neighbors = (int*) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user