Added a rough sketch for the next steps of porting neighborhood computation to cuda

2022-06-23 23:58:15 +02:00
parent 67f9c769ef
commit 757d4329f3
2 changed files with 142 additions and 0 deletions
--- a/src/includes/neighbor.h
+++ b/src/includes/neighbor.h
@@ -33,6 +33,14 @@ typedef struct {
    int* numneigh;
 } Neighbor;
 typedef struct {
    MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd,
    MD_FLOAT bininvx, MD_FLOAT bininvy, MD_FLOAT bininvz,
    int mbinxlo, int mbinylo, int mbinzlo,
    int nbinx, int nbiny, int nbinz,
    int mbinx, int mbiny, int mbinz
 } Neighbor_params;
 extern void initNeighbor(Neighbor*, Parameter*);
 extern void setupNeighbor();
 extern void binatoms(Atom*);
--- a/src/neighbor.cu
+++ b/src/neighbor.cu
@@ -24,6 +24,8 @@
 #include <stdio.h>
 #include <math.h>
 extern "C" {
 #include <neighbor.h>
 #include <parameter.h>
 #include <allocate.h>
@@ -31,7 +33,100 @@
 #define SMALL 1.0e-6
 #define FACTOR 0.999
 }
 __device__ int coord2bin_device(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin, 
                                Neighbor_params np)
 {
    int ix, iy, iz;
    if(xin >= np.xprd) {
        ix = (int)((xin - np.xprd) * np.bininvx) + np.nbinx - np.mbinxlo;
    } else if(xin >= 0.0) {
        ix = (int)(xin * np.bininvx) - np.mbinxlo;
    } else {
        ix = (int)(xin * np.bininvx) - np.mbinxlo - 1;
    }
    if(yin >= np.yprd) {
        iy = (int)((yin - np.yprd) * np.bininvy) + np.nbiny - np.mbinylo;
    } else if(yin >= 0.0) {
        iy = (int)(yin * np.bininvy) - np.mbinylo;
    } else {
        iy = (int)(yin * np.bininvy) - np.mbinylo - 1;
    }
    if(zin >= np.zprd) {
        iz = (int)((zin - np.zprd) * np.bininvz) + np.nbinz - np.mbinzlo;
    } else if(zin >= 0.0) {
        iz = (int)(zin * np.bininvz) - np.mbinzlo;
    } else {
        iz = (int)(zin * np.bininvz) - np.mbinzlo - 1;
    }
    return (iz * np.mbiny * np.mbinx + iy * np.mbinx + ix + 1);
 }
 __global__ void compute_neighborhood(Atom a, Neighbor neigh, int Nlocal, Neighbor_params np, int nstencil, int* stencil,
                                     int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs){
    const int i = blockIdx.x * blockDim.x + threadIdx.x;
    if( i >= Nlocal ) {
        return;
    }
    Atom *atom = &a;
    Neighbor *neighbor = &neigh;
    int* neighptr = &(neighbor->neighbors[i]);
    int n = 0;
    MD_FLOAT xtmp = atom_x(i);
    MD_FLOAT ytmp = atom_y(i);
    MD_FLOAT ztmp = atom_z(i);
    int ibin = coord2bin_device(xtmp, ytmp, ztmp, Neighbor_params np);
 #ifdef EXPLICIT_TYPES
    int type_i = atom->type[i];
 #endif
    for(int k = 0; k < nstencil; k++) {
        int jbin = ibin + stencil[k];
        int* loc_bin = &bins[jbin * atoms_per_bin];
        for(int m = 0; m < bincount[jbin]; m++) {
            int j = loc_bin[m];
            if ( j == i ){
                continue;
            }
            MD_FLOAT delx = xtmp - atom_x(j);
            MD_FLOAT dely = ytmp - atom_y(j);
            MD_FLOAT delz = ztmp - atom_z(j);
            MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
 #ifdef EXPLICIT_TYPES
            int type_j = atom->type[j];
                    const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
 #else
            const MD_FLOAT cutoff = cutneighsq;
 #endif
            if( rsq <= cutoff ) {
                int idx = atom->Nlocal * n;
                neighptr[idx] = j;
                n += 1;
            }
        }
    }
    neighbor->numneigh[i] = n;
    if(n >= neighbor->maxneighs) {
        atomicMax(new_maxneighs, n);
    }
 }
 extern "C" {
 static MD_FLOAT xprd, yprd, zprd;
 static MD_FLOAT bininvx, bininvy, bininvz;
 static int mbinxlo, mbinylo, mbinzlo;
@@ -417,3 +512,42 @@ void sortAtom(Atom* atom) {
    atom->vy = new_vy; atom->vz = new_vz;
 #endif
 }
 void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *c_neighbor)
 {
    int nall = atom->Nlocal + atom->Nghost;
    /* extend atom arrays if necessary */
    if(nall > nmax) {
        nmax = nall;
        if(neighbor->numneigh) cudaFreeHost(neighbor->numneigh);
        if(neighbor->neighbors) cudaFreeHost(neighbor->neighbors);
        checkCUDAError( "buildNeighbor numneigh", cudaMallocHost((void**)&(neighbor->numneigh), nmax * sizeof(int)) );
        checkCUDAError( "buildNeighbor neighbors", cudaMallocHost((void**)&(neighbor->neighbors), nmax * neighbor->maxneighs * sizeof(int)) );
        // neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
        // neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*));
    }
    /* bin local & ghost atoms */
    binatoms(atom);
    int resize = 1;
    /* loop over each atom, storing neighbors */
    while(resize) {
        int new_maxneighs = neighbor->maxneighs;
        resize = 0;
        // TODO allocate space for and then copy all necessary components
        // TODO dont forget to copy the atom positions over
        // TODO call compute_neigborhood kernel here
        if(resize) {
            printf("RESIZE %d\n", neighbor->maxneighs);
            neighbor->maxneighs = new_maxneighs * 1.2;
            free(neighbor->neighbors);
            neighbor->neighbors = (int*) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
        }
    }
 }
 }