Added a rough sketch for the next steps of porting neighborhood computation to cuda

2022-06-23 23:58:15 +02:00
parent 67f9c769ef
commit 757d4329f3
2 changed files with 142 additions and 0 deletions
--- a/src/includes/neighbor.h
+++ b/src/includes/neighbor.h
@@ -33,6 +33,14 @@ typedef struct {
    int* numneigh;
 } Neighbor;

+typedef struct {
+    MD_FLOAT xprd, MD_FLOAT yprd, MD_FLOAT zprd,
+    MD_FLOAT bininvx, MD_FLOAT bininvy, MD_FLOAT bininvz,
+    int mbinxlo, int mbinylo, int mbinzlo,
+    int nbinx, int nbiny, int nbinz,
+    int mbinx, int mbiny, int mbinz
+} Neighbor_params;
+
 extern void initNeighbor(Neighbor*, Parameter*);
 extern void setupNeighbor();
 extern void binatoms(Atom*);
--- a/src/neighbor.cu
+++ b/src/neighbor.cu
@@ -24,6 +24,8 @@
 #include <stdio.h>
 #include <math.h>

+extern "C" {
+
 #include <neighbor.h>
 #include <parameter.h>
 #include <allocate.h>
@@ -31,7 +33,100 @@

 #define SMALL 1.0e-6
 #define FACTOR 0.999
+}

+__device__ int coord2bin_device(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin, 
+                                Neighbor_params np)
+{
+    int ix, iy, iz;
+
+    if(xin >= np.xprd) {
+        ix = (int)((xin - np.xprd) * np.bininvx) + np.nbinx - np.mbinxlo;
+    } else if(xin >= 0.0) {
+        ix = (int)(xin * np.bininvx) - np.mbinxlo;
+    } else {
+        ix = (int)(xin * np.bininvx) - np.mbinxlo - 1;
+    }
+
+    if(yin >= np.yprd) {
+        iy = (int)((yin - np.yprd) * np.bininvy) + np.nbiny - np.mbinylo;
+    } else if(yin >= 0.0) {
+        iy = (int)(yin * np.bininvy) - np.mbinylo;
+    } else {
+        iy = (int)(yin * np.bininvy) - np.mbinylo - 1;
+    }
+
+    if(zin >= np.zprd) {
+        iz = (int)((zin - np.zprd) * np.bininvz) + np.nbinz - np.mbinzlo;
+    } else if(zin >= 0.0) {
+        iz = (int)(zin * np.bininvz) - np.mbinzlo;
+    } else {
+        iz = (int)(zin * np.bininvz) - np.mbinzlo - 1;
+    }
+
+    return (iz * np.mbiny * np.mbinx + iy * np.mbinx + ix + 1);
+}
+
+__global__ void compute_neighborhood(Atom a, Neighbor neigh, int Nlocal, Neighbor_params np, int nstencil, int* stencil,
+                                     int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs){
+    const int i = blockIdx.x * blockDim.x + threadIdx.x;
+    if( i >= Nlocal ) {
+        return;
+    }
+    
+    Atom *atom = &a;
+    Neighbor *neighbor = &neigh;
+    
+    int* neighptr = &(neighbor->neighbors[i]);
+    int n = 0;
+    MD_FLOAT xtmp = atom_x(i);
+    MD_FLOAT ytmp = atom_y(i);
+    MD_FLOAT ztmp = atom_z(i);
+    int ibin = coord2bin_device(xtmp, ytmp, ztmp, Neighbor_params np);
+#ifdef EXPLICIT_TYPES
+    int type_i = atom->type[i];
+#endif
+    for(int k = 0; k < nstencil; k++) {
+        int jbin = ibin + stencil[k];
+        int* loc_bin = &bins[jbin * atoms_per_bin];
+
+        for(int m = 0; m < bincount[jbin]; m++) {
+            int j = loc_bin[m];
+
+            if ( j == i ){
+                continue;
+            }
+
+            MD_FLOAT delx = xtmp - atom_x(j);
+            MD_FLOAT dely = ytmp - atom_y(j);
+            MD_FLOAT delz = ztmp - atom_z(j);
+            MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
+
+#ifdef EXPLICIT_TYPES
+            int type_j = atom->type[j];
+                    const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
+#else
+            const MD_FLOAT cutoff = cutneighsq;
+#endif
+
+            if( rsq <= cutoff ) {
+                int idx = atom->Nlocal * n;
+                neighptr[idx] = j;
+                n += 1;
+            }
+        }
+    }
+
+    neighbor->numneigh[i] = n;
+
+    if(n >= neighbor->maxneighs) {
+        atomicMax(new_maxneighs, n);
+    }
+}
+
+extern "C" {
+    
+    
 static MD_FLOAT xprd, yprd, zprd;
 static MD_FLOAT bininvx, bininvy, bininvz;
 static int mbinxlo, mbinylo, mbinzlo;
@@ -417,3 +512,42 @@ void sortAtom(Atom* atom) {
    atom->vy = new_vy; atom->vz = new_vz;
 #endif
 }
+
+void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *c_neighbor)
+{
+    int nall = atom->Nlocal + atom->Nghost;
+
+    /* extend atom arrays if necessary */
+    if(nall > nmax) {
+        nmax = nall;
+        if(neighbor->numneigh) cudaFreeHost(neighbor->numneigh);
+        if(neighbor->neighbors) cudaFreeHost(neighbor->neighbors);
+        checkCUDAError( "buildNeighbor numneigh", cudaMallocHost((void**)&(neighbor->numneigh), nmax * sizeof(int)) );
+        checkCUDAError( "buildNeighbor neighbors", cudaMallocHost((void**)&(neighbor->neighbors), nmax * neighbor->maxneighs * sizeof(int)) );
+        // neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
+        // neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*));
+    }
+
+    /* bin local & ghost atoms */
+    binatoms(atom);
+    int resize = 1;
+
+    /* loop over each atom, storing neighbors */
+    while(resize) {
+        int new_maxneighs = neighbor->maxneighs;
+        resize = 0;
+
+        // TODO allocate space for and then copy all necessary components
+        // TODO dont forget to copy the atom positions over
+
+        // TODO call compute_neigborhood kernel here
+
+        if(resize) {
+            printf("RESIZE %d\n", neighbor->maxneighs);
+            neighbor->maxneighs = new_maxneighs * 1.2;
+            free(neighbor->neighbors);
+            neighbor->neighbors = (int*) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
+        }
+    }
+}
+}