Final MPI version
This commit is contained in:
		
							
								
								
									
										97
									
								
								common/box.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								common/box.c
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,97 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
			
		||||
 * All rights reserved. This file is part of MD-Bench.
 | 
			
		||||
 * Use of this source code is governed by a LGPL-3.0
 | 
			
		||||
 * license that can be found in the LICENSE file.
 | 
			
		||||
 */
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <parameter.h>
 | 
			
		||||
#include <util.h>
 | 
			
		||||
#include <box.h>
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
 | 
			
		||||
int overlapBox(int dim, int dir, const Box* mybox, const Box* other, Box* cut, MD_FLOAT xprd, MD_FLOAT cutneigh)
 | 
			
		||||
{
 | 
			
		||||
  int pbc = -100;
 | 
			
		||||
  MD_FLOAT min[3], max[3];
 | 
			
		||||
  int same = (mybox->id == other->id) ? 1 : 0;
 | 
			
		||||
  
 | 
			
		||||
  //projections
 | 
			
		||||
  min[_x] = MAX(mybox->lo[_x], other->lo[_x]); max[_x] = MIN(mybox->hi[_x], other->hi[_x]); 
 | 
			
		||||
  min[_y] = MAX(mybox->lo[_y], other->lo[_y]); max[_y] = MIN(mybox->hi[_y], other->hi[_y]);
 | 
			
		||||
  min[_z] = MAX(mybox->lo[_z], other->lo[_z]); max[_z] = MIN(mybox->hi[_z], other->hi[_z]);
 | 
			
		||||
  
 | 
			
		||||
  //Intersection no periodic case
 | 
			
		||||
  if(!same){
 | 
			
		||||
    if (dir ==  0)  max[dim] = MIN(mybox->hi[dim], other->hi[dim]+ cutneigh);
 | 
			
		||||
    if (dir ==  1)  min[dim] = MAX(mybox->lo[dim], other->lo[dim]- cutneigh);
 | 
			
		||||
    if ((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z])) pbc = 0;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //Intersection periodic case
 | 
			
		||||
  if(pbc < 0)
 | 
			
		||||
  {
 | 
			
		||||
    if(dir == 0){
 | 
			
		||||
      min[dim] = MAX(mybox->lo[dim] , other->lo[dim]- xprd);
 | 
			
		||||
      max[dim] = MIN(mybox->hi[dim] , other->hi[dim]- xprd + cutneigh);
 | 
			
		||||
 | 
			
		||||
    } else {
 | 
			
		||||
      min[dim] = MAX(mybox->lo[dim], other->lo[dim]+ xprd - cutneigh);
 | 
			
		||||
      max[dim] = MIN(mybox->hi[dim], other->hi[dim]+ xprd); 
 | 
			
		||||
 | 
			
		||||
    } 
 | 
			
		||||
    if((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z])) 
 | 
			
		||||
      pbc = (dir == 0) ? 1:-1;
 | 
			
		||||
  }   
 | 
			
		||||
  
 | 
			
		||||
  //storing the cuts
 | 
			
		||||
  cut->lo[_x] = min[_x]; cut->hi[_x] = max[_x]; 
 | 
			
		||||
  cut->lo[_y] = min[_y]; cut->hi[_y] = max[_y];
 | 
			
		||||
  cut->lo[_z] = min[_z]; cut->hi[_z] = max[_z];
 | 
			
		||||
 | 
			
		||||
  return pbc;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int overlapFullBox(Parameter* param, MD_FLOAT *cutneigh ,const Box* mybox, const Box* other)
 | 
			
		||||
{
 | 
			
		||||
  MD_FLOAT min[3], max[3];
 | 
			
		||||
  MD_FLOAT xprd = param->xprd; 
 | 
			
		||||
  MD_FLOAT yprd = param->yprd; 
 | 
			
		||||
  MD_FLOAT zprd = param->zprd;
 | 
			
		||||
  
 | 
			
		||||
  for(int k = -1; k < 2; k++)
 | 
			
		||||
  {
 | 
			
		||||
    for(int j = -1; j < 2; j++)
 | 
			
		||||
    {
 | 
			
		||||
      for(int i= -1; i < 2; i++)
 | 
			
		||||
      {
 | 
			
		||||
        min[_x] = MAX(mybox->lo[_x], other->lo[_x]-cutneigh[_x] + i*xprd);
 | 
			
		||||
        min[_y] = MAX(mybox->lo[_y], other->lo[_y]-cutneigh[_y] + j*yprd); 
 | 
			
		||||
        min[_z] = MAX(mybox->lo[_z], other->lo[_z]-cutneigh[_z] + k*zprd);
 | 
			
		||||
        max[_x] = MIN(mybox->hi[_x], other->hi[_x]+cutneigh[_x] + i*xprd);
 | 
			
		||||
        max[_y] = MIN(mybox->hi[_y], other->hi[_y]+cutneigh[_y] + j*yprd);
 | 
			
		||||
        max[_z] = MIN(mybox->hi[_z], other->hi[_z]+cutneigh[_z] + k*zprd);
 | 
			
		||||
        if ((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z])) 
 | 
			
		||||
          return 1;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void expandBox(int iswap, const Box* me, const Box* other, Box* cut, MD_FLOAT cutneigh)
 | 
			
		||||
 {
 | 
			
		||||
    if(iswap==2 || iswap==3){
 | 
			
		||||
      if(me->lo[_x] <= other->lo[_x]) cut->lo[_x] -= cutneigh;
 | 
			
		||||
      if(me->hi[_x] >= other->hi[_x]) cut->hi[_x] += cutneigh;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(iswap==4 || iswap==5){
 | 
			
		||||
      if(me->lo[_x] <= other->lo[_x]) cut->lo[_x] -= cutneigh;
 | 
			
		||||
      if(me->hi[_x] >= other->hi[_x]) cut->hi[_x] += cutneigh;
 | 
			
		||||
      if(me->lo[_y] <= other->lo[_y]) cut->lo[_y] -= cutneigh;
 | 
			
		||||
      if(me->hi[_y] >= other->hi[_y]) cut->hi[_y] += cutneigh;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										556
									
								
								common/comm.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										556
									
								
								common/comm.c
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,556 @@
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include <comm.h>   
 | 
			
		||||
#include <allocate.h>
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
#include <util.h>
 | 
			
		||||
 | 
			
		||||
#define NEIGHMIN  6       
 | 
			
		||||
#define BUFFACTOR 2
 | 
			
		||||
#define BUFMIN    1000
 | 
			
		||||
#define BUFEXTRA  100
 | 
			
		||||
#define world MPI_COMM_WORLD
 | 
			
		||||
 | 
			
		||||
MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE; 
 | 
			
		||||
static inline void allocDynamicBuffers(Comm*);
 | 
			
		||||
static inline void freeDynamicBuffers(Comm*);
 | 
			
		||||
static inline void freeBuffers(Comm*);
 | 
			
		||||
 | 
			
		||||
void defineReverseList(Comm* comm){
 | 
			
		||||
  int dim = 0;
 | 
			
		||||
  int index = 0;
 | 
			
		||||
  int me = comm->myproc;
 | 
			
		||||
  
 | 
			
		||||
  //Set the inverse list
 | 
			
		||||
  for(int iswap = 0; iswap<6; iswap++){
 | 
			
		||||
    int dim = comm->swapdim[iswap]; 
 | 
			
		||||
    int dir = comm->swapdir[iswap];
 | 
			
		||||
    int invswap = comm->swap[dim][(dir+1)%2]; 
 | 
			
		||||
    
 | 
			
		||||
    for(int ineigh = comm->sendfrom[invswap]; ineigh< comm->sendtill[invswap]; ineigh++)
 | 
			
		||||
      comm->nrecv[index++] = comm->nsend[ineigh]; 
 | 
			
		||||
     
 | 
			
		||||
    comm->recvfrom[iswap] = (iswap == 0) ? 0 : comm->recvtill[iswap-1];
 | 
			
		||||
    comm->recvtill[iswap] = index;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //set if myproc is unique in the swap 
 | 
			
		||||
  for(int iswap = 0; iswap<6; iswap++){
 | 
			
		||||
    int sizeswap = comm->sendtill[iswap]-comm->sendfrom[iswap]; 
 | 
			
		||||
    int index = comm->sendfrom[iswap];
 | 
			
		||||
    int myneigh = comm->nsend[index];
 | 
			
		||||
    comm->othersend[iswap] = (sizeswap != 1 || comm->myproc != myneigh) ?  1 : 0;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void addNeighToExchangeList(Comm* comm, int newneigh){
 | 
			
		||||
 | 
			
		||||
    int numneigh = comm->numneighexch;
 | 
			
		||||
   
 | 
			
		||||
    if(comm->numneighexch>=comm->maxneighexch){
 | 
			
		||||
      size_t oldByteSize = comm->maxneighexch*sizeof(int);
 | 
			
		||||
      comm->maxneighexch *=2; 
 | 
			
		||||
      comm->nexch = (int*) reallocate(comm->nexch, ALIGNMENT,  comm->maxneighexch * sizeof(int), oldByteSize);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    // Add the new element to the list
 | 
			
		||||
    comm->nexch[numneigh] = newneigh;
 | 
			
		||||
    comm->numneighexch++;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//Exported functions
 | 
			
		||||
void neighComm(Comm *comm, Parameter* param, Grid *grid)
 | 
			
		||||
{
 | 
			
		||||
  int me = comm->myproc; 
 | 
			
		||||
  int numproc = comm ->numproc;
 | 
			
		||||
  int PAD = 6;   //number of elements for processor in the map
 | 
			
		||||
  int ineigh = 0;
 | 
			
		||||
  int sneigh = 0;
 | 
			
		||||
  MD_FLOAT *map = grid->map;
 | 
			
		||||
  MD_FLOAT cutneigh = param->cutneigh;
 | 
			
		||||
  MD_FLOAT prd[3] = {param->xprd, param->yprd, param->zprd};
 | 
			
		||||
  Box mybox, other, cut;
 | 
			
		||||
 
 | 
			
		||||
  //needed for rebalancing
 | 
			
		||||
  freeDynamicBuffers(comm);
 | 
			
		||||
 | 
			
		||||
  //Local box
 | 
			
		||||
  mybox.id = me;
 | 
			
		||||
  mybox.lo[_x] = map[me*PAD+0];  mybox.hi[_x] = map[me*PAD+3];
 | 
			
		||||
  mybox.lo[_y] = map[me*PAD+1];  mybox.hi[_y] = map[me*PAD+4];
 | 
			
		||||
  mybox.lo[_z] = map[me*PAD+2];  mybox.hi[_z] = map[me*PAD+5];
 | 
			
		||||
 | 
			
		||||
  //Check for all possible neighbours only for exchange atoms
 | 
			
		||||
  comm->numneighexch = 0;
 | 
			
		||||
  for(int proc = 0; proc <numproc; proc++){
 | 
			
		||||
      other.id = proc;
 | 
			
		||||
      other.lo[_x] = map[proc*PAD+0];  other.hi[_x] = map[proc*PAD+3];
 | 
			
		||||
      other.lo[_y] = map[proc*PAD+1];  other.hi[_y] = map[proc*PAD+4];
 | 
			
		||||
      other.lo[_z] = map[proc*PAD+2];  other.hi[_z] = map[proc*PAD+5];
 | 
			
		||||
    
 | 
			
		||||
    if(proc != me){
 | 
			
		||||
      int intersection = overlapFullBox(param,grid->cutneigh,&mybox,&other);
 | 
			
		||||
      if(intersection) addNeighToExchangeList(comm,proc);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  //MAP is stored as follows: xlo,ylo,zlo,xhi,yhi,zhi
 | 
			
		||||
  for(int iswap = 0; iswap <6; iswap++)
 | 
			
		||||
  {
 | 
			
		||||
    int dir = comm->swapdir[iswap]; 
 | 
			
		||||
    int dim = comm->swapdim[iswap]; 
 | 
			
		||||
 | 
			
		||||
    for(int proc = 0; proc < numproc; proc++)
 | 
			
		||||
    {      
 | 
			
		||||
      //Check for neighbours along dimmensions, for forwardComm, backwardComm  and ghostComm
 | 
			
		||||
      other.id = proc;
 | 
			
		||||
      other.lo[_x] = map[proc*PAD+0];  other.hi[_x] = map[proc*PAD+3];
 | 
			
		||||
      other.lo[_y] = map[proc*PAD+1];  other.hi[_y] = map[proc*PAD+4];
 | 
			
		||||
      other.lo[_z] = map[proc*PAD+2];  other.hi[_z] = map[proc*PAD+5]; 
 | 
			
		||||
          
 | 
			
		||||
      //return if two boxes intersect: -100 not intersection, 0, 1 and -1 intersection for each different pbc.  
 | 
			
		||||
      int pbc = overlapBox(dim,dir,&mybox,&other,&cut,prd[dim],cutneigh);
 | 
			
		||||
      if(pbc == -100) continue;   
 | 
			
		||||
      
 | 
			
		||||
      expandBox(iswap, &mybox, &other, &cut, cutneigh);
 | 
			
		||||
 
 | 
			
		||||
      if(ineigh >= comm->maxneigh) {
 | 
			
		||||
          size_t oldByteSize = comm->maxneigh*sizeof(int);
 | 
			
		||||
          size_t oldBoxSize = comm->maxneigh*sizeof(Box); 
 | 
			
		||||
          comm->maxneigh  = 2*ineigh;  
 | 
			
		||||
          comm->nsend     = (int*) reallocate(comm->nsend, ALIGNMENT,  comm->maxneigh * sizeof(int), oldByteSize);
 | 
			
		||||
          comm->nrecv     = (int*) reallocate(comm->nrecv, ALIGNMENT,  comm->maxneigh * sizeof(int), oldByteSize);
 | 
			
		||||
          comm->pbc_x     = (int*) reallocate(comm->pbc_x, ALIGNMENT,  comm->maxneigh * sizeof(int), oldByteSize);
 | 
			
		||||
          comm->pbc_y     = (int*) reallocate(comm->pbc_y, ALIGNMENT,  comm->maxneigh * sizeof(int), oldByteSize);
 | 
			
		||||
          comm->pbc_z     = (int*) reallocate(comm->pbc_z, ALIGNMENT,  comm->maxneigh * sizeof(int), oldByteSize);
 | 
			
		||||
          comm->boxes     = (Box*) reallocate(comm->boxes, ALIGNMENT,  comm->maxneigh * sizeof(Box), oldBoxSize);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
      comm->boxes[ineigh] = cut;  
 | 
			
		||||
      comm->nsend[ineigh] = proc;
 | 
			
		||||
      comm->pbc_x[ineigh] = (dim == _x) ? pbc : 0;
 | 
			
		||||
      comm->pbc_y[ineigh] = (dim == _y) ? pbc : 0; 
 | 
			
		||||
      comm->pbc_z[ineigh] = (dim == _z) ? pbc : 0; 
 | 
			
		||||
      ineigh++; 
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    comm->sendfrom[iswap] = (iswap == 0) ? 0:comm->sendtill[iswap-1];
 | 
			
		||||
    comm->sendtill[iswap] = ineigh;
 | 
			
		||||
    comm->numneigh = ineigh; 
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  allocDynamicBuffers(comm);
 | 
			
		||||
  defineReverseList(comm);
 | 
			
		||||
}
 | 
			
		||||
    
 | 
			
		||||
void initComm(int* argc, char*** argv, Comm* comm)
 | 
			
		||||
{
 | 
			
		||||
  //MPI Initialize
 | 
			
		||||
  MPI_Init(argc, argv);
 | 
			
		||||
  MPI_Comm_size(MPI_COMM_WORLD, &(comm->numproc));
 | 
			
		||||
  MPI_Comm_rank(MPI_COMM_WORLD, &(comm->myproc));
 | 
			
		||||
  comm->numneigh = 0;
 | 
			
		||||
  comm->numneighexch = 0;
 | 
			
		||||
  comm->nrecv=NULL;
 | 
			
		||||
  comm->nsend=NULL;
 | 
			
		||||
  comm->nexch=NULL;  
 | 
			
		||||
  comm->pbc_x=NULL; 
 | 
			
		||||
  comm->pbc_y=NULL;  
 | 
			
		||||
  comm->pbc_z=NULL;  
 | 
			
		||||
  comm->boxes=NULL;  
 | 
			
		||||
  comm->atom_send=NULL;     
 | 
			
		||||
  comm->atom_recv=NULL;   
 | 
			
		||||
  comm->off_atom_send=NULL; 
 | 
			
		||||
  comm->off_atom_recv=NULL;
 | 
			
		||||
  comm->maxsendlist=NULL; 
 | 
			
		||||
  comm->sendlist=NULL;
 | 
			
		||||
  comm->buf_send=NULL; 
 | 
			
		||||
  comm->buf_recv=NULL; 
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
void endComm(Comm* comm)
 | 
			
		||||
{
 | 
			
		||||
  comm->maxneigh = 0;
 | 
			
		||||
  comm->maxneighexch =0;
 | 
			
		||||
  comm->maxsend = 0; 
 | 
			
		||||
  comm->maxrecv = 0;
 | 
			
		||||
  freeBuffers(comm);
 | 
			
		||||
  MPI_Finalize();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void setupComm(Comm* comm, Parameter* param, Grid* grid){
 | 
			
		||||
 
 | 
			
		||||
  comm->swap[_x][0] = 0; comm->swap[_x][1] =1;
 | 
			
		||||
  comm->swap[_y][0] = 2; comm->swap[_y][1] =3;
 | 
			
		||||
  comm->swap[_z][0] = 4; comm->swap[_z][1] =5;
 | 
			
		||||
 | 
			
		||||
  comm->swapdim[0] = comm->swapdim[1] = _x;
 | 
			
		||||
  comm->swapdim[2] = comm->swapdim[3] = _y;
 | 
			
		||||
  comm->swapdim[4] = comm->swapdim[5] = _z;
 | 
			
		||||
 | 
			
		||||
  comm->swapdir[0] = comm->swapdir[2] = comm->swapdir[4] = 0;
 | 
			
		||||
  comm->swapdir[1] = comm->swapdir[3] = comm->swapdir[5] = 1;
 | 
			
		||||
  
 | 
			
		||||
  for(int i = 0;  i<6; i++){
 | 
			
		||||
    comm->sendfrom[i] = 0;
 | 
			
		||||
    comm->sendtill[i] = 0;
 | 
			
		||||
    comm->recvfrom[i] = 0;
 | 
			
		||||
    comm->recvtill[i] = 0;  
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  comm->forwardSize   = FORWARD_SIZE;      //send coordiantes x,y,z
 | 
			
		||||
  comm->reverseSize   = REVERSE_SIZE;      //return forces fx, fy, fz
 | 
			
		||||
  comm->ghostSize     = GHOST_SIZE;        //send x,y,z,type;
 | 
			
		||||
  comm->exchangeSize  = EXCHANGE_SIZE;     //send x,y,z,vx,vy,vz,type
 | 
			
		||||
 
 | 
			
		||||
  //Allocate memory for recv buffer and recv buffer
 | 
			
		||||
  comm->maxsend = BUFMIN; 
 | 
			
		||||
  comm->maxrecv = BUFMIN;
 | 
			
		||||
  comm->buf_send = (MD_FLOAT*) allocate(ALIGNMENT,(comm->maxsend + BUFEXTRA) * sizeof(MD_FLOAT));
 | 
			
		||||
  comm->buf_recv = (MD_FLOAT*) allocate(ALIGNMENT, comm->maxrecv * sizeof(MD_FLOAT)); 
 | 
			
		||||
 | 
			
		||||
  comm->maxneighexch = NEIGHMIN;
 | 
			
		||||
  comm->nexch  = (int*) allocate(ALIGNMENT,  comm->maxneighexch * sizeof(int));
 | 
			
		||||
 | 
			
		||||
  comm->maxneigh = NEIGHMIN;
 | 
			
		||||
  comm->nsend  = (int*) allocate(ALIGNMENT,  comm->maxneigh * sizeof(int));
 | 
			
		||||
  comm->nrecv  = (int*) allocate(ALIGNMENT,  comm->maxneigh * sizeof(int));
 | 
			
		||||
  comm->pbc_x  = (int*) allocate(ALIGNMENT,  comm->maxneigh * sizeof(int));
 | 
			
		||||
  comm->pbc_y  = (int*) allocate(ALIGNMENT,  comm->maxneigh * sizeof(int));
 | 
			
		||||
  comm->pbc_z  = (int*) allocate(ALIGNMENT,  comm->maxneigh * sizeof(int));
 | 
			
		||||
  comm->boxes  = (Box*) allocate(ALIGNMENT,  comm->maxneigh * sizeof(Box));
 | 
			
		||||
  
 | 
			
		||||
  neighComm(comm, param, grid); 
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void forwardComm(Comm* comm, Atom* atom, int iswap)
 | 
			
		||||
{ 
 | 
			
		||||
  int nrqst=0, offset=0, nsend=0, nrecv=0; 
 | 
			
		||||
  int pbc[3];
 | 
			
		||||
  int size = comm->forwardSize; 
 | 
			
		||||
  int maxrqst = comm->numneigh;
 | 
			
		||||
  MD_FLOAT* buf;
 | 
			
		||||
  MPI_Request requests[maxrqst];
 | 
			
		||||
  
 | 
			
		||||
  for(int ineigh = comm->sendfrom[iswap]; ineigh < comm->sendtill[iswap]; ineigh++){
 | 
			
		||||
    offset = comm->off_atom_send[ineigh];
 | 
			
		||||
    pbc[_x]=comm->pbc_x[ineigh]; pbc[_y]=comm->pbc_y[ineigh];  pbc[_z]=comm->pbc_z[ineigh];
 | 
			
		||||
    packForward(atom, comm->atom_send[ineigh], comm->sendlist[ineigh], &comm->buf_send[offset*size],pbc);
 | 
			
		||||
  }
 | 
			
		||||
   
 | 
			
		||||
  //Receives elements 
 | 
			
		||||
  if(comm->othersend[iswap])  
 | 
			
		||||
    for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){      
 | 
			
		||||
      offset = comm->off_atom_recv[ineigh]*size;
 | 
			
		||||
      nrecv  = comm->atom_recv[ineigh]*size;
 | 
			
		||||
      MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nrecv[ineigh],0,world,&requests[nrqst++]);
 | 
			
		||||
    }
 | 
			
		||||
   
 | 
			
		||||
  //Send elements 
 | 
			
		||||
  if(comm->othersend[iswap]) 
 | 
			
		||||
    for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){  
 | 
			
		||||
      offset = comm->off_atom_send[ineigh]*size;
 | 
			
		||||
      nsend  = comm->atom_send[ineigh]*size;
 | 
			
		||||
      MPI_Send(&comm->buf_send[offset],nsend,type,comm->nsend[ineigh],0,world);      
 | 
			
		||||
    } 
 | 
			
		||||
 | 
			
		||||
  if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
 | 
			
		||||
  
 | 
			
		||||
  if(comm->othersend[iswap]) buf = comm->buf_recv;
 | 
			
		||||
  else buf = comm->buf_send;
 | 
			
		||||
  
 | 
			
		||||
  /* unpack buffer */   
 | 
			
		||||
  for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
 | 
			
		||||
    offset = comm->off_atom_recv[ineigh];
 | 
			
		||||
    unpackForward(atom, comm->atom_recv[ineigh], comm->firstrecv[iswap] + offset, &buf[offset*size]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void reverseComm(Comm* comm, Atom* atom, int iswap)
 | 
			
		||||
{ 
 | 
			
		||||
  int nrqst=0, offset=0, nsend=0, nrecv=0 ;
 | 
			
		||||
  int size = comm->reverseSize; 
 | 
			
		||||
  int maxrqst = comm->numneigh;
 | 
			
		||||
  MD_FLOAT* buf;
 | 
			
		||||
  MPI_Request requests[maxrqst];
 | 
			
		||||
  
 | 
			
		||||
  for(int ineigh = comm->recvfrom[iswap]; ineigh < comm->recvtill[iswap]; ineigh++){
 | 
			
		||||
    offset = comm->off_atom_recv[ineigh]; 
 | 
			
		||||
    packReverse(atom, comm->atom_recv[ineigh], comm->firstrecv[iswap] + offset, &comm->buf_send[offset*size]);
 | 
			
		||||
  }
 | 
			
		||||
  //Receives elements 
 | 
			
		||||
  if(comm->othersend[iswap])   
 | 
			
		||||
    for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){      
 | 
			
		||||
      offset = comm->off_atom_send[ineigh]*size;
 | 
			
		||||
      nrecv  = comm->atom_send[ineigh]*size; 
 | 
			
		||||
      MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nsend[ineigh],0,world,&requests[nrqst++]);
 | 
			
		||||
    }
 | 
			
		||||
  //Send elements  
 | 
			
		||||
  if(comm->othersend[iswap]) 
 | 
			
		||||
    for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){  
 | 
			
		||||
      offset = comm->off_atom_recv[ineigh]*size;
 | 
			
		||||
      nsend  = comm->atom_recv[ineigh]*size;  
 | 
			
		||||
      MPI_Send(&comm->buf_send[offset],nsend,type,comm->nrecv[ineigh],0,world);        
 | 
			
		||||
    } 
 | 
			
		||||
  if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
 | 
			
		||||
  if(comm->othersend[iswap])  buf = comm->buf_recv;
 | 
			
		||||
  else buf = comm->buf_send; 
 | 
			
		||||
 | 
			
		||||
  /* unpack buffer */   
 | 
			
		||||
  for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
 | 
			
		||||
    offset =  comm->off_atom_send[ineigh]; 
 | 
			
		||||
    unpackReverse(atom, comm->atom_send[ineigh], comm->sendlist[ineigh], &buf[offset*size]);
 | 
			
		||||
  } 
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ghostComm(Comm* comm, Atom* atom,int iswap){
 | 
			
		||||
  
 | 
			
		||||
  MD_FLOAT xlo=0, xhi=0, ylo=0, yhi=0, zlo=0, zhi=0; 
 | 
			
		||||
  MD_FLOAT* buf;
 | 
			
		||||
  int nrqst=0, nsend=0, nrecv=0, offset=0, ineigh=0, pbc[3];
 | 
			
		||||
  int all_recv=0, all_send=0, currentSend=0; 
 | 
			
		||||
  int size = comm->ghostSize; 
 | 
			
		||||
  int maxrqrst = comm->numneigh;
 | 
			
		||||
  MPI_Request requests[maxrqrst];
 | 
			
		||||
  for(int i = 0; i<maxrqrst; i++) 
 | 
			
		||||
    requests[maxrqrst]=MPI_REQUEST_NULL;    
 | 
			
		||||
  if(iswap%2==0) comm->iterAtom = LOCAL+GHOST;
 | 
			
		||||
  int iter = 0; 
 | 
			
		||||
  for(int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++)
 | 
			
		||||
      {          
 | 
			
		||||
        Box* tile = &comm->boxes[ineigh];
 | 
			
		||||
        
 | 
			
		||||
        xlo = tile->lo[_x]; ylo = tile->lo[_y]; zlo = tile->lo[_z]; 
 | 
			
		||||
        xhi = tile->hi[_x]; yhi = tile->hi[_y]; zhi = tile->hi[_z];   
 | 
			
		||||
        pbc[_x]=comm->pbc_x[ineigh]; pbc[_y]=comm->pbc_y[ineigh];  pbc[_z]=comm->pbc_z[ineigh];
 | 
			
		||||
        nsend = 0; 
 | 
			
		||||
    
 | 
			
		||||
        for(int i = 0; i < comm->iterAtom ; i++) 
 | 
			
		||||
        { 
 | 
			
		||||
          if(IsinRegionToSend(i)){
 | 
			
		||||
                if(nsend >= comm->maxsendlist[ineigh]) growList(comm,ineigh,nsend);
 | 
			
		||||
                if(currentSend + size >= comm->maxsend) growSend(comm,currentSend); 
 | 
			
		||||
                comm->sendlist[ineigh][nsend++] = i;
 | 
			
		||||
                currentSend += packGhost(atom, i, &comm->buf_send[currentSend], pbc);  
 | 
			
		||||
          }   
 | 
			
		||||
        }
 | 
			
		||||
        comm->atom_send[ineigh]     = nsend;          //#atoms send per neigh   
 | 
			
		||||
        comm->off_atom_send[ineigh] = all_send;       //offset atom respect to neighbours in a swap
 | 
			
		||||
        all_send += nsend;                            //all atoms send
 | 
			
		||||
      } 
 | 
			
		||||
  //Receives how many elements to be received.
 | 
			
		||||
  if(comm->othersend[iswap])
 | 
			
		||||
    for(nrqst=0, ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++)
 | 
			
		||||
      MPI_Irecv(&comm->atom_recv[ineigh],1,MPI_INT,comm->nrecv[ineigh],0,world,&requests[nrqst++]);
 | 
			
		||||
  
 | 
			
		||||
  if(!comm->othersend[iswap]) comm->atom_recv[comm->recvfrom[iswap]] = nsend; 
 | 
			
		||||
 | 
			
		||||
  //Communicate how many elements to be sent.
 | 
			
		||||
  if(comm->othersend[iswap])
 | 
			
		||||
    for(int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++)
 | 
			
		||||
      MPI_Send(&comm->atom_send[ineigh],1,MPI_INT,comm->nsend[ineigh],0,world);    
 | 
			
		||||
   if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
 | 
			
		||||
 | 
			
		||||
  //Define offset to store in the recv_buff    
 | 
			
		||||
  for(int ineigh = comm->recvfrom[iswap]; ineigh<comm->recvtill[iswap]; ineigh++){ 
 | 
			
		||||
    comm->off_atom_recv[ineigh] = all_recv;
 | 
			
		||||
    all_recv += comm->atom_recv[ineigh];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if(all_recv*size>=comm->maxrecv) growRecv(comm,all_recv*size);
 | 
			
		||||
 | 
			
		||||
  //Receives elements 
 | 
			
		||||
  if(comm->othersend[iswap])
 | 
			
		||||
    for (nrqst=0, ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
 | 
			
		||||
      offset = comm->off_atom_recv[ineigh]*size;  
 | 
			
		||||
      nrecv = comm->atom_recv[ineigh]*size;
 | 
			
		||||
      MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nrecv[ineigh],0,world,&requests[nrqst++]);
 | 
			
		||||
    } 
 | 
			
		||||
  //Send elements
 | 
			
		||||
  if(comm->othersend[iswap])
 | 
			
		||||
    for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
 | 
			
		||||
      offset = comm->off_atom_send[ineigh]*size;
 | 
			
		||||
      nsend  = comm->atom_send[ineigh]*size;  
 | 
			
		||||
      MPI_Send(&comm->buf_send[offset],nsend,type,comm->nsend[ineigh],0,world); 
 | 
			
		||||
    }
 | 
			
		||||
  if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
 | 
			
		||||
  
 | 
			
		||||
  if(comm->othersend[iswap]) buf = comm->buf_recv;
 | 
			
		||||
  else buf = comm->buf_send; 
 | 
			
		||||
  //unpack elements
 | 
			
		||||
  comm->firstrecv[iswap] = LOCAL+GHOST; 
 | 
			
		||||
  for(int i = 0; i < all_recv; i++)
 | 
			
		||||
    unpackGhost(atom, LOCAL+GHOST, &buf[i*size]); 
 | 
			
		||||
  
 | 
			
		||||
  //Increases the buffer if needed
 | 
			
		||||
  int max_size = MAX(comm->forwardSize,comm->reverseSize);
 | 
			
		||||
  int max_buf = max_size * MAX(all_recv, all_send); 
 | 
			
		||||
  if(max_buf>=comm->maxrecv) growRecv(comm,max_buf);
 | 
			
		||||
  if(max_buf>=comm->maxsend) growSend(comm,max_buf);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void exchangeComm(Comm* comm, Atom* atom){
 | 
			
		||||
 | 
			
		||||
  MD_FLOAT x,y,z;
 | 
			
		||||
  MD_FLOAT *lo = atom->mybox.lo; 
 | 
			
		||||
  MD_FLOAT *hi = atom->mybox.hi;
 | 
			
		||||
  int size = comm->exchangeSize;
 | 
			
		||||
  int numneigh = comm->numneighexch;
 | 
			
		||||
  int offset_recv[numneigh];
 | 
			
		||||
  int size_recv[numneigh];
 | 
			
		||||
  MPI_Request requests[numneigh];
 | 
			
		||||
  int i =0,  nsend = 0, nrecv = 0;
 | 
			
		||||
  int nrqst = 0;
 | 
			
		||||
  int nlocal, offset,m;
 | 
			
		||||
 | 
			
		||||
  /* enforce PBC */
 | 
			
		||||
  pbc(atom);
 | 
			
		||||
  
 | 
			
		||||
  if(comm->numneigh == 0) return;
 | 
			
		||||
 | 
			
		||||
  nlocal = atom->Nlocal;
 | 
			
		||||
  while(i < nlocal) {
 | 
			
		||||
    if(atom_x(i) < lo[_x] || atom_x(i) >= hi[_x] ||
 | 
			
		||||
       atom_y(i) < lo[_y] || atom_y(i) >= hi[_y] ||
 | 
			
		||||
       atom_z(i) < lo[_z] || atom_z(i) >= hi[_z]) {
 | 
			
		||||
      if(nsend+size >= comm->maxsend) growSend(comm, nsend);
 | 
			
		||||
      nsend += packExchange(atom, i, &comm->buf_send[nsend]);
 | 
			
		||||
      copy(atom, i, nlocal-1);
 | 
			
		||||
      nlocal--;
 | 
			
		||||
    } else i++;
 | 
			
		||||
  }
 | 
			
		||||
  atom->Nlocal = nlocal;
 | 
			
		||||
 | 
			
		||||
  /* send/recv number of to share atoms with neighbouring procs*/
 | 
			
		||||
  for(int ineigh = 0; ineigh < numneigh; ineigh++) 
 | 
			
		||||
    MPI_Irecv(&size_recv[ineigh],1,MPI_INT,comm->nexch[ineigh],0,world,&requests[nrqst++]);
 | 
			
		||||
 | 
			
		||||
  for (int ineigh = 0; ineigh < numneigh; ineigh++) 
 | 
			
		||||
    MPI_Send(&nsend,1,MPI_INT,comm->nexch[ineigh],0,world); 
 | 
			
		||||
  MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
 | 
			
		||||
 | 
			
		||||
  //Define offset to store in the recv_buff
 | 
			
		||||
  for(int ineigh = 0; ineigh<numneigh; ineigh++){ 
 | 
			
		||||
    offset_recv[ineigh] = nrecv; 
 | 
			
		||||
    nrecv += size_recv[ineigh];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if(nrecv >= comm->maxrecv) growRecv(comm,nrecv); 
 | 
			
		||||
 | 
			
		||||
    //Receives elements 
 | 
			
		||||
    nrqst=0;
 | 
			
		||||
    for (int ineigh = 0; ineigh< numneigh; ineigh++){
 | 
			
		||||
      offset = offset_recv[ineigh];
 | 
			
		||||
      MPI_Irecv(&comm->buf_recv[offset], size_recv[ineigh], type, comm->nexch[ineigh],0,world,&requests[nrqst++]);
 | 
			
		||||
    }
 | 
			
		||||
    //Send elements 
 | 
			
		||||
    for (int ineigh = 0; ineigh< numneigh; ineigh++)
 | 
			
		||||
      MPI_Send(comm->buf_send,nsend,type,comm->nexch[ineigh],0,world); 
 | 
			
		||||
    MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);  
 | 
			
		||||
 | 
			
		||||
    nlocal = atom->Nlocal;
 | 
			
		||||
    m = 0;
 | 
			
		||||
    while(m < nrecv) {
 | 
			
		||||
      x = comm->buf_recv[m + _x]; 
 | 
			
		||||
      y = comm->buf_recv[m + _y];
 | 
			
		||||
      z = comm->buf_recv[m + _z];
 | 
			
		||||
 | 
			
		||||
      if(x >= lo[_x] && x < hi[_x] &&
 | 
			
		||||
         y >= lo[_y] && y < hi[_y] &&
 | 
			
		||||
         z >= lo[_z] && z < hi[_z]){
 | 
			
		||||
        m += unpackExchange(atom, nlocal++, &comm->buf_recv[m]);
 | 
			
		||||
      } else {
 | 
			
		||||
        m += size;
 | 
			
		||||
      }
 | 
			
		||||
    } 
 | 
			
		||||
    atom->Nlocal = nlocal;
 | 
			
		||||
    
 | 
			
		||||
    int all_atoms=0;
 | 
			
		||||
    MPI_Allreduce(&atom->Nlocal, &all_atoms, 1, MPI_INT, MPI_SUM, world);
 | 
			
		||||
    if(atom->Natoms!=all_atoms && comm->myproc ==0){
 | 
			
		||||
      printf("Losing atoms! current atoms:%d expected atoms:%d\n",all_atoms,atom->Natoms);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//Internal functions
 | 
			
		||||
 | 
			
		||||
inline void growRecv(Comm* comm, int n)
 | 
			
		||||
{ 
 | 
			
		||||
  comm -> maxrecv = BUFFACTOR * n;
 | 
			
		||||
  if(comm->buf_recv) free(comm -> buf_recv);
 | 
			
		||||
  comm -> buf_recv = (MD_FLOAT*) allocate(ALIGNMENT, comm->maxrecv * sizeof(MD_FLOAT));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline void growSend(Comm* comm, int n)
 | 
			
		||||
{
 | 
			
		||||
  size_t oldByteSize = (comm->maxsend+BUFEXTRA)*sizeof(MD_FLOAT);
 | 
			
		||||
  comm -> maxsend = BUFFACTOR * n;
 | 
			
		||||
  comm -> buf_send = (MD_FLOAT*) reallocate(comm->buf_send, ALIGNMENT, (comm->maxsend + BUFEXTRA) * sizeof(MD_FLOAT), oldByteSize);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline void growList(Comm* comm, int ineigh, int n)
 | 
			
		||||
{
 | 
			
		||||
  size_t oldByteSize = comm->maxsendlist[ineigh]*sizeof(int);
 | 
			
		||||
  comm->maxsendlist[ineigh] = BUFFACTOR * n;
 | 
			
		||||
  comm->sendlist[ineigh] = (int*) reallocate(comm->sendlist[ineigh],ALIGNMENT, comm->maxsendlist[ineigh] * sizeof(int), oldByteSize);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void  allocDynamicBuffers(Comm* comm)
 | 
			
		||||
{  
 | 
			
		||||
  //Buffers depending on the # of my neighs 
 | 
			
		||||
  int numneigh = comm->numneigh; 
 | 
			
		||||
  comm->atom_send   = (int*) allocate(ALIGNMENT,  numneigh * sizeof(int));
 | 
			
		||||
  comm->atom_recv   = (int*) allocate(ALIGNMENT,  numneigh * sizeof(int));
 | 
			
		||||
  comm->off_atom_send = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
 | 
			
		||||
  comm->off_atom_recv = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
 | 
			
		||||
  comm->maxsendlist   = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
 | 
			
		||||
 
 | 
			
		||||
  for(int i = 0; i < numneigh; i++) 
 | 
			
		||||
    comm->maxsendlist[i] = BUFMIN;
 | 
			
		||||
 | 
			
		||||
  comm->sendlist = (int**) allocate(ALIGNMENT, numneigh * sizeof(int*));
 | 
			
		||||
  for(int i = 0; i < numneigh; i++) 
 | 
			
		||||
    comm->sendlist[i] = (int*) allocate(ALIGNMENT, comm->maxsendlist[i] * sizeof(int));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void freeDynamicBuffers(Comm* comm)
 | 
			
		||||
{
 | 
			
		||||
  int numneigh =comm->numneigh;
 | 
			
		||||
  
 | 
			
		||||
  if(comm->atom_send) free(comm->atom_send);
 | 
			
		||||
  if(comm->atom_recv) free(comm->atom_recv);
 | 
			
		||||
  if(comm->off_atom_send) free(comm->off_atom_send);
 | 
			
		||||
  if(comm->off_atom_recv) free(comm->off_atom_recv);
 | 
			
		||||
  if(comm->maxsendlist) free(comm->maxsendlist);
 | 
			
		||||
  if(comm->sendlist){
 | 
			
		||||
    for(int i = 0; i < numneigh; i++) 
 | 
			
		||||
      if(comm->sendlist[i]) free(comm->sendlist[i]);
 | 
			
		||||
  } 
 | 
			
		||||
  if(comm->sendlist) free(comm->sendlist);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void freeBuffers(Comm* comm)
 | 
			
		||||
{
 | 
			
		||||
  if(comm->nrecv) free(comm->nrecv);
 | 
			
		||||
  if(comm->nsend) free(comm->nsend);
 | 
			
		||||
  if(comm->nexch) free(comm->nexch);  
 | 
			
		||||
  if(comm->pbc_x) free(comm->pbc_x); 
 | 
			
		||||
  if(comm->pbc_y) free(comm->pbc_y);  
 | 
			
		||||
  if(comm->pbc_z) free(comm->pbc_z);  
 | 
			
		||||
  if(comm->boxes) free(comm->boxes);  
 | 
			
		||||
  if(comm->atom_send) free(comm->atom_send);     
 | 
			
		||||
  if(comm->atom_recv) free(comm->atom_recv);   
 | 
			
		||||
  if(comm->off_atom_send) free(comm->off_atom_send); 
 | 
			
		||||
  if(comm->off_atom_recv) free(comm->off_atom_recv);
 | 
			
		||||
  if(comm->maxsendlist) free(comm->maxsendlist); 
 | 
			
		||||
  
 | 
			
		||||
  if(comm->sendlist){
 | 
			
		||||
    for(int i = 0; i < comm->numneigh; i++) 
 | 
			
		||||
      if(comm->sendlist[i]) free(comm->sendlist[i]); 
 | 
			
		||||
  }
 | 
			
		||||
  if(comm->sendlist) free(comm->sendlist);
 | 
			
		||||
 | 
			
		||||
  if(comm->buf_send) free(comm->buf_send); 
 | 
			
		||||
  if(comm->buf_recv) free(comm->buf_recv);   
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										490
									
								
								common/grid.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										490
									
								
								common/grid.c
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,490 @@
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <grid.h>
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
#include <parameter.h>
 | 
			
		||||
#include <allocate.h>
 | 
			
		||||
#include <util.h>
 | 
			
		||||
#include <math.h>
 | 
			
		||||
 | 
			
		||||
static MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
 | 
			
		||||
 | 
			
		||||
//Grommacs Balancing
 | 
			
		||||
MD_FLOAT f_normalization(MD_FLOAT* x,MD_FLOAT* fx, MD_FLOAT minx, int nprocs) {
 | 
			
		||||
 | 
			
		||||
  MD_FLOAT sum=0;
 | 
			
		||||
  for(int n = 0; n<nprocs; n++){
 | 
			
		||||
    fx[n] = MAX(minx,x[n]);
 | 
			
		||||
    sum+=fx[n];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  for(int n = 0; n<nprocs; n++)
 | 
			
		||||
    fx[n] /= sum;    
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void fixedPointIteration(MD_FLOAT* x0, int nprocs, MD_FLOAT minx)
 | 
			
		||||
{ 
 | 
			
		||||
  MD_FLOAT tolerance = 1e-3;
 | 
			
		||||
  MD_FLOAT alpha = 0.5;
 | 
			
		||||
  MD_FLOAT *fx = (MD_FLOAT*) malloc(nprocs*sizeof(MD_FLOAT));
 | 
			
		||||
  int maxIterations = 100; 
 | 
			
		||||
    
 | 
			
		||||
  for (int i = 0; i < maxIterations; i++) {
 | 
			
		||||
 | 
			
		||||
    int converged = 1; 
 | 
			
		||||
    f_normalization(x0,fx,minx,nprocs);
 | 
			
		||||
 | 
			
		||||
    for(int n=0; n<nprocs; n++)
 | 
			
		||||
      fx[n]= (1-alpha) * x0[n] + alpha * fx[n];
 | 
			
		||||
    
 | 
			
		||||
    for (int n=0; n<nprocs; n++) {
 | 
			
		||||
        if (fabs(fx[n] - x0[n]) >= tolerance) {
 | 
			
		||||
            converged = 0;
 | 
			
		||||
            break;
 | 
			
		||||
        }      
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    for (int n=0; n<nprocs; n++) 
 | 
			
		||||
        x0[n] = fx[n];
 | 
			
		||||
 | 
			
		||||
    if(converged){
 | 
			
		||||
      for(int n = 0; n<nprocs; n++)    
 | 
			
		||||
      return;
 | 
			
		||||
    } 
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void staggeredBalance(Grid* grid, Atom* atom, Parameter* param, double newTime)
 | 
			
		||||
{ 
 | 
			
		||||
  int me;
 | 
			
		||||
  MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
			
		||||
  int *coord = grid->coord;
 | 
			
		||||
  int *nprocs  = grid ->nprocs;
 | 
			
		||||
  //Elapsed time since the last rebalance
 | 
			
		||||
  double time = newTime - grid->Timer;
 | 
			
		||||
  grid->Timer = newTime;
 | 
			
		||||
  //store the older dimm to compare later for exchange
 | 
			
		||||
  MD_FLOAT lo[3], hi[3];
 | 
			
		||||
  for(int dim = 0; dim< 3; dim++){ 
 | 
			
		||||
    lo[dim] = atom->mybox.lo[dim];
 | 
			
		||||
    hi[dim] = atom->mybox.hi[dim]; 
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  //Define parameters
 | 
			
		||||
  MPI_Comm subComm[3]; 
 | 
			
		||||
  int color[3] = {0,0,0};
 | 
			
		||||
  int id[3] = {0,0,0};
 | 
			
		||||
  MD_FLOAT ** load = (MD_FLOAT**) malloc(3*sizeof(MD_FLOAT*));
 | 
			
		||||
  for(int dim = 0; dim<3; dim++) 
 | 
			
		||||
    load[dim] = (MD_FLOAT*) malloc(nprocs[dim]*sizeof(MD_FLOAT));
 | 
			
		||||
 
 | 
			
		||||
  int maxprocs = MAX(MAX(nprocs[_x],nprocs[_y]),nprocs[_z]);
 | 
			
		||||
  MD_FLOAT* cellSize = (MD_FLOAT*) malloc(maxprocs*sizeof(MD_FLOAT)); 
 | 
			
		||||
  MD_FLOAT* limits = (MD_FLOAT*) malloc(2*maxprocs*sizeof(MD_FLOAT)); //limits: (x0, x1), (x1, x2)... Repeat values in between to perfom MPI_Scatter later 
 | 
			
		||||
  MD_FLOAT t_sum[3] = {0,0,0}; 
 | 
			
		||||
  MD_FLOAT recv_buf[2] = {0,0};        //Each proc only receives 2 elments per dimension xlo and xhi
 | 
			
		||||
  MD_FLOAT balancedLoad[3] = {0,0,0};  //1/nprocs
 | 
			
		||||
  MD_FLOAT minLoad[3]  = {0,0,0};      //beta*(1/nprocs) 
 | 
			
		||||
  MD_FLOAT prd[3] = {param->xprd, param->yprd, param->zprd};
 | 
			
		||||
  MD_FLOAT boundaries[6] ={0,0,0,0,0,0}; // xlo,xhi,ylo,yhi,zlo,zhi
 | 
			
		||||
 | 
			
		||||
  //Create sub-communications along each dimension
 | 
			
		||||
  for(int dim = 0; dim<3; dim++){
 | 
			
		||||
     if(dim == _x){
 | 
			
		||||
        color[_x] = (coord[_y] == 0 && coord[_z] ==0) ? 1:MPI_UNDEFINED;
 | 
			
		||||
        id[_x] = me;
 | 
			
		||||
     } else if(dim == _y) {
 | 
			
		||||
        color[_y] = coord[_z] == 0 ? coord[_x]:MPI_UNDEFINED; 
 | 
			
		||||
        id[_y] = (coord[_y] == 0 && coord[_z] == 0) ? 0:me;
 | 
			
		||||
     } else {
 | 
			
		||||
        color[_z]= coord[_y]*nprocs[_x]+coord[_x]; 
 | 
			
		||||
        id[_z] = coord[_z] == 0 ? 0 : me; 
 | 
			
		||||
     }
 | 
			
		||||
    MPI_Comm_split(world, color[dim], id[dim], &subComm[dim]);
 | 
			
		||||
  } 
 | 
			
		||||
 | 
			
		||||
  //Set the minimum load and the balance load
 | 
			
		||||
  for(int dim = 0; dim<3; dim++){
 | 
			
		||||
    balancedLoad[dim] = 1./nprocs[dim]; 
 | 
			
		||||
    minLoad[dim]  = 0.8*balancedLoad[dim]; 
 | 
			
		||||
  }
 | 
			
		||||
  //set and communicate the workload in reverse order
 | 
			
		||||
  for(int dim = _z; dim>= _x; dim--)
 | 
			
		||||
  {
 | 
			
		||||
    if(subComm[dim] != MPI_COMM_NULL){
 | 
			
		||||
      MPI_Gather(&time,1,type,load[dim],1,type,0,subComm[dim]);
 | 
			
		||||
 | 
			
		||||
      if(id[dim] == 0)
 | 
			
		||||
      {
 | 
			
		||||
        for(int n=0; n<nprocs[dim]; n++) 
 | 
			
		||||
          t_sum[dim] += load[dim][n];
 | 
			
		||||
 | 
			
		||||
        for(int n=0; n<nprocs[dim]; n++)
 | 
			
		||||
          load[dim][n] /= t_sum[dim];
 | 
			
		||||
      }
 | 
			
		||||
      time =t_sum[dim];
 | 
			
		||||
    }
 | 
			
		||||
    MPI_Barrier(world);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //Brodacast the new boundaries along dimensions
 | 
			
		||||
  for(int dim=0; dim<3; dim++){
 | 
			
		||||
    
 | 
			
		||||
    if(subComm[dim] != MPI_COMM_NULL){
 | 
			
		||||
 | 
			
		||||
      MPI_Bcast(boundaries,6,type,0,subComm[dim]);
 | 
			
		||||
      if(id[dim] == 0) {
 | 
			
		||||
        fixedPointIteration(load[dim], nprocs[dim], minLoad[dim]); 
 | 
			
		||||
        MD_FLOAT inv_sum=0;
 | 
			
		||||
        for(int n=0; n<nprocs[dim];n++)
 | 
			
		||||
          inv_sum +=(1/load[dim][n]);
 | 
			
		||||
        
 | 
			
		||||
        for(int n=0; n<nprocs[dim];n++)
 | 
			
		||||
          cellSize[n] = (prd[dim]/load[dim][n])*(1./inv_sum); 
 | 
			
		||||
   
 | 
			
		||||
        MD_FLOAT sum=0;
 | 
			
		||||
        for(int n=0; n<nprocs[dim]; n++){
 | 
			
		||||
          limits[2*n] = sum; 
 | 
			
		||||
          limits[2*n+1] = sum+cellSize[n];
 | 
			
		||||
          sum+= cellSize[n]; 
 | 
			
		||||
        }
 | 
			
		||||
        limits[2*nprocs[dim]-1] = prd[dim];
 | 
			
		||||
      } 
 | 
			
		||||
      MPI_Scatter(limits,2,type,recv_buf,2,type,0,subComm[dim]); 
 | 
			
		||||
      boundaries[2*dim] = recv_buf[0];
 | 
			
		||||
      boundaries[2*dim+1] = recv_buf[1];
 | 
			
		||||
    }
 | 
			
		||||
     MPI_Barrier(world);
 | 
			
		||||
  }  
 | 
			
		||||
 | 
			
		||||
  atom->mybox.lo[_x]=boundaries[0]; atom->mybox.hi[_x]=boundaries[1];
 | 
			
		||||
  atom->mybox.lo[_y]=boundaries[2]; atom->mybox.hi[_y]=boundaries[3];
 | 
			
		||||
  atom->mybox.lo[_z]=boundaries[4]; atom->mybox.hi[_z]=boundaries[5];
 | 
			
		||||
 
 | 
			
		||||
  MD_FLOAT domain[6] = {boundaries[0], boundaries[2], boundaries[4], boundaries[1], boundaries[3], boundaries[5]};
 | 
			
		||||
  MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
 | 
			
		||||
  
 | 
			
		||||
  //because cells change dynamically, It is required to increase the neighbouring exchange region 
 | 
			
		||||
  for(int dim =_x; dim<=_z; dim++){
 | 
			
		||||
    MD_FLOAT dr,dr_max; 
 | 
			
		||||
    int n = grid->nprocs[dim]; 
 | 
			
		||||
    MD_FLOAT maxdelta = 0.2*prd[dim];
 | 
			
		||||
    dr = MAX(fabs(lo[dim] - atom->mybox.lo[dim]),fabs(hi[dim] - atom->mybox.hi[dim]));
 | 
			
		||||
    MPI_Allreduce(&dr, &dr_max, 1, type, MPI_MAX, world);
 | 
			
		||||
    grid->cutneigh[dim] = param->cutneigh+dr_max; 
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  for(int dim=0; dim<3; dim++) {
 | 
			
		||||
    if(subComm[dim] != MPI_COMM_NULL){
 | 
			
		||||
      MPI_Comm_free(&subComm[dim]);
 | 
			
		||||
    }
 | 
			
		||||
    free(load[dim]);
 | 
			
		||||
  }
 | 
			
		||||
  free(load); 
 | 
			
		||||
  free(limits);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//RCB Balancing
 | 
			
		||||
MD_FLOAT meanTimeBisect(Atom *atom, MPI_Comm subComm, int dim, double time)
 | 
			
		||||
{
 | 
			
		||||
  MD_FLOAT mean=0, sum=0, total_sum=0, weightAtoms= 0, total_weight=0;
 | 
			
		||||
 | 
			
		||||
  for(int i=0; i<atom->Nlocal; i++){
 | 
			
		||||
    sum += atom_pos(i);
 | 
			
		||||
  }
 | 
			
		||||
  sum*=time;
 | 
			
		||||
  weightAtoms = atom->Nlocal*time;
 | 
			
		||||
  MPI_Allreduce(&sum, &total_sum, 1, type, MPI_SUM, subComm);
 | 
			
		||||
  MPI_Allreduce(&weightAtoms, &total_weight, 1, type, MPI_SUM, subComm);
 | 
			
		||||
 | 
			
		||||
  mean = total_sum/total_weight;
 | 
			
		||||
  return mean;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
MD_FLOAT meanBisect(Atom* atom, MPI_Comm subComm, int dim, double time)
 | 
			
		||||
{  
 | 
			
		||||
  int Natoms = 0;
 | 
			
		||||
  MD_FLOAT sum=0, mean=0, total_sum=0;
 | 
			
		||||
 | 
			
		||||
  for(int i=0; i<atom->Nlocal; i++){
 | 
			
		||||
    sum += atom_pos(i);
 | 
			
		||||
  }
 | 
			
		||||
  MPI_Allreduce(&sum, &total_sum, 1, type, MPI_SUM, subComm);
 | 
			
		||||
  MPI_Allreduce(&atom->Nlocal, &Natoms, 1, MPI_INT, MPI_SUM, subComm);
 | 
			
		||||
  mean = total_sum/Natoms;
 | 
			
		||||
  return mean;
 | 
			
		||||
} 
 | 
			
		||||
 | 
			
		||||
void nextBisectionLevel(Grid* grid, Atom* atom, RCB_Method method, MPI_Comm subComm, int dim ,int* color, int ilevel, double time)
 | 
			
		||||
{ 
 | 
			
		||||
  int rank, size;
 | 
			
		||||
  int branch = 0, i = 0, m = 0;
 | 
			
		||||
  int nsend = 0, nrecv = 0, nrecv2 = 0;
 | 
			
		||||
  int values_per_atom = 7; 
 | 
			
		||||
  MD_FLOAT bisection, pos;
 | 
			
		||||
  MPI_Request request[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
 | 
			
		||||
  MPI_Comm_rank(subComm,&rank);
 | 
			
		||||
  MPI_Comm_size(subComm,&size);
 | 
			
		||||
   
 | 
			
		||||
  int odd = size%2;
 | 
			
		||||
  int extraProc = odd ? size-1:size;
 | 
			
		||||
  int half = (int) (0.5*size);
 | 
			
		||||
  int partner = (rank<half) ? rank+half:rank-half;
 | 
			
		||||
  if(odd && rank == extraProc) partner = 0;
 | 
			
		||||
  //Apply the bisection 
 | 
			
		||||
  bisection = method(atom,subComm,dim,time);
 | 
			
		||||
  //Define the new boundaries
 | 
			
		||||
  if(rank<half){
 | 
			
		||||
    atom->mybox.hi[dim] = bisection;
 | 
			
		||||
    branch = 0;
 | 
			
		||||
  } else {
 | 
			
		||||
    atom->mybox.lo[dim] = bisection;
 | 
			
		||||
    branch = 1;
 | 
			
		||||
  }
 | 
			
		||||
  //Define new color for the further communicaton
 | 
			
		||||
  *color = (branch << ilevel) | *color;
 | 
			
		||||
  //Grow the send buffer
 | 
			
		||||
  if(atom->Nlocal>=grid->maxsend){
 | 
			
		||||
      if(grid->buf_send) free(grid->buf_send); 
 | 
			
		||||
      grid->buf_send = (MD_FLOAT*) malloc(atom->Nlocal*values_per_atom* sizeof(MD_FLOAT));
 | 
			
		||||
      grid->maxsend = atom->Nlocal;
 | 
			
		||||
  }
 | 
			
		||||
  //buffer particles to send
 | 
			
		||||
  while(i < atom->Nlocal) {
 | 
			
		||||
    pos = atom_pos(i);
 | 
			
		||||
    if(pos < atom->mybox.lo[dim] || pos >= atom->mybox.hi[dim]) {
 | 
			
		||||
      nsend += packExchange(atom, i, &grid->buf_send[nsend]);
 | 
			
		||||
      copy(atom, i, atom->Nlocal-1);
 | 
			
		||||
      atom->Nlocal--;
 | 
			
		||||
    } else i++;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //Communicate the number of elements to be sent
 | 
			
		||||
  if(rank < extraProc){
 | 
			
		||||
    MPI_Irecv(&nrecv,1,MPI_INT,partner,0,subComm,&request[0]);
 | 
			
		||||
  }
 | 
			
		||||
  if(odd && rank == 0){ 
 | 
			
		||||
    MPI_Irecv(&nrecv2,1,MPI_INT,extraProc,0,subComm,&request[1]);
 | 
			
		||||
  }
 | 
			
		||||
  MPI_Send(&nsend,1,MPI_INT,partner,0,subComm);
 | 
			
		||||
  MPI_Waitall(2,request,MPI_STATUS_IGNORE);
 | 
			
		||||
 | 
			
		||||
  //Grow the recv buffer 
 | 
			
		||||
  if(nrecv+nrecv2>=grid->maxrecv){
 | 
			
		||||
      if(grid->buf_recv) free(grid->buf_recv); 
 | 
			
		||||
      grid->buf_recv = (MD_FLOAT*) malloc((nrecv+nrecv2)*values_per_atom*sizeof(MD_FLOAT));
 | 
			
		||||
      grid->maxrecv = nrecv+nrecv2;
 | 
			
		||||
  } 
 | 
			
		||||
 | 
			
		||||
  //communicate elements in the buffer
 | 
			
		||||
  request[0] = MPI_REQUEST_NULL; 
 | 
			
		||||
  request[1] = MPI_REQUEST_NULL;
 | 
			
		||||
 | 
			
		||||
  if(rank < extraProc){
 | 
			
		||||
    MPI_Irecv(grid->buf_recv,nrecv,type,partner,0,subComm,&request[0]);
 | 
			
		||||
  }
 | 
			
		||||
  if(odd && rank == 0){ 
 | 
			
		||||
    MPI_Irecv(&grid->buf_recv[nrecv],nrecv2,type,extraProc,0,subComm,&request[1]);
 | 
			
		||||
  }
 | 
			
		||||
  MPI_Send (grid->buf_send,nsend,type,partner,0,subComm); 
 | 
			
		||||
  MPI_Waitall(2,request,MPI_STATUS_IGNORE);
 | 
			
		||||
 | 
			
		||||
  //store atoms in atom list
 | 
			
		||||
  while(m < nrecv+nrecv2){ 
 | 
			
		||||
    m += unpackExchange(atom, atom->Nlocal++, &grid->buf_recv[m]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void rcbBalance(Grid* grid, Atom* atom, Parameter* param, RCB_Method method, int ndim, double newTime)
 | 
			
		||||
{
 | 
			
		||||
  int me, nprocs=0, ilevel=0, nboxes=1;
 | 
			
		||||
  int color = 0, size =0;
 | 
			
		||||
  int index, prd[3];
 | 
			
		||||
  MPI_Comm subComm;
 | 
			
		||||
  MPI_Comm_size(world, &nprocs);
 | 
			
		||||
  MPI_Comm_rank(world, &me);
 | 
			
		||||
  
 | 
			
		||||
  //set the elapsed time since the last dynamic balance
 | 
			
		||||
  double time = newTime - grid->Timer;
 | 
			
		||||
  
 | 
			
		||||
  prd[_x] = atom->mybox.xprd = param->xprd; 
 | 
			
		||||
  prd[_y] = atom->mybox.yprd = param->yprd; 
 | 
			
		||||
  prd[_z] = atom->mybox.zprd = param->zprd;
 | 
			
		||||
 | 
			
		||||
  //Sort by larger dimension 
 | 
			
		||||
  int largerDim[3] ={_x, _y, _z};
 | 
			
		||||
 | 
			
		||||
  for(int i = 0; i< 2; i++){
 | 
			
		||||
    for(int j = i+1; j<3; j++)
 | 
			
		||||
    {
 | 
			
		||||
      if(prd[largerDim[j]]>prd[largerDim[i]]){
 | 
			
		||||
        MD_FLOAT tmp = largerDim[j];
 | 
			
		||||
        largerDim[j] = largerDim[i];
 | 
			
		||||
        largerDim[i] = tmp;
 | 
			
		||||
      }  
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  //Initial Partition
 | 
			
		||||
  atom->mybox.lo[_x] = 0; atom->mybox.hi[_x] = atom->mybox.xprd;
 | 
			
		||||
  atom->mybox.lo[_y] = 0; atom->mybox.hi[_y] = atom->mybox.yprd;
 | 
			
		||||
  atom->mybox.lo[_z] = 0; atom->mybox.hi[_z] = atom->mybox.zprd;
 | 
			
		||||
  
 | 
			
		||||
  //Recursion tree 
 | 
			
		||||
  while(nboxes<nprocs)
 | 
			
		||||
  {  
 | 
			
		||||
    index = ilevel%ndim; 
 | 
			
		||||
    MPI_Comm_split(world, color, me, &subComm);
 | 
			
		||||
    MPI_Comm_size(subComm,&size);
 | 
			
		||||
    if(size > 1){
 | 
			
		||||
      nextBisectionLevel(grid, atom, method, subComm, largerDim[index], &color, ilevel, time);
 | 
			
		||||
    }
 | 
			
		||||
    MPI_Comm_free(&subComm);
 | 
			
		||||
    nboxes = pow(2,++ilevel);
 | 
			
		||||
  }
 | 
			
		||||
  //Set the new timer grid
 | 
			
		||||
  grid->Timer = newTime;
 | 
			
		||||
 | 
			
		||||
  //Creating the global map
 | 
			
		||||
  MD_FLOAT domain[6] = {atom->mybox.lo[_x], atom->mybox.lo[_y], atom->mybox.lo[_z], atom->mybox.hi[_x], atom->mybox.hi[_y], atom->mybox.hi[_z]};
 | 
			
		||||
  MPI_Allgather(domain, 6, type, grid->map, 6, type, world);  
 | 
			
		||||
  
 | 
			
		||||
  //Define the same cutneighbour in all dimensions for the exchange communication
 | 
			
		||||
  for(int dim =_x; dim<=_z; dim++)
 | 
			
		||||
    grid->cutneigh[dim] = param->cutneigh;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//Regular grid
 | 
			
		||||
void cartisian3d(Grid* grid, Parameter* param, Box* box)
 | 
			
		||||
{
 | 
			
		||||
  int me, nproc;
 | 
			
		||||
  MPI_Comm_size(MPI_COMM_WORLD, &nproc);
 | 
			
		||||
  MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
			
		||||
  
 | 
			
		||||
  int numdim=3;
 | 
			
		||||
  int reorder=0;
 | 
			
		||||
  int periods[3]={1,1,1}; 
 | 
			
		||||
  int mycoord[3]={0,0,0};
 | 
			
		||||
  int griddim[3]={0,0,0};
 | 
			
		||||
  MD_FLOAT len[3];
 | 
			
		||||
  MPI_Comm cartesian;
 | 
			
		||||
 | 
			
		||||
  box->xprd = param->xprd;
 | 
			
		||||
  box->yprd = param->yprd;
 | 
			
		||||
  box->zprd = param->zprd;
 | 
			
		||||
 | 
			
		||||
 //Creates a cartesian 3d grid 
 | 
			
		||||
  MPI_Dims_create(nproc, numdim, griddim); 
 | 
			
		||||
  MPI_Cart_create(world,numdim,griddim,periods,reorder,&cartesian); 
 | 
			
		||||
  grid->nprocs[_x] = griddim[_x];
 | 
			
		||||
  grid->nprocs[_y] = griddim[_y]; 
 | 
			
		||||
  grid->nprocs[_z] = griddim[_z];
 | 
			
		||||
 | 
			
		||||
  //Coordinates position in the grid
 | 
			
		||||
  MPI_Cart_coords(cartesian,me,3,mycoord); 
 | 
			
		||||
  grid->coord[_x] = mycoord[_x];
 | 
			
		||||
  grid->coord[_y] = mycoord[_y];
 | 
			
		||||
  grid->coord[_z] = mycoord[_z];
 | 
			
		||||
 | 
			
		||||
  //boundaries of my local box, with origin in (0,0,0). 
 | 
			
		||||
  len[_x] = param->xprd / griddim[_x];
 | 
			
		||||
  len[_y] = param->yprd / griddim[_y];
 | 
			
		||||
  len[_z] = param->zprd / griddim[_z];
 | 
			
		||||
 | 
			
		||||
  box->lo[_x] = mycoord[_x] * len[_x];
 | 
			
		||||
  box->hi[_x] = (mycoord[_x] + 1) * len[_x];
 | 
			
		||||
  box->lo[_y] = mycoord[_y] * len[_y];
 | 
			
		||||
  box->hi[_y] = (mycoord[_y] + 1) * len[_y];
 | 
			
		||||
  box->lo[_z] = mycoord[_z] * len[_z];
 | 
			
		||||
  box->hi[_z] = (mycoord[_z] + 1) * len[_z];
 | 
			
		||||
  
 | 
			
		||||
  MD_FLOAT domain[6] = {box->lo[_x], box->lo[_y], box->lo[_z], box->hi[_x], box->hi[_y], box->hi[_z]};
 | 
			
		||||
  MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
 | 
			
		||||
  MPI_Comm_free(&cartesian);
 | 
			
		||||
 | 
			
		||||
  //Define the same cutneighbour in all dimensions for the exchange communication
 | 
			
		||||
  for(int dim =_x; dim<=_z; dim++)
 | 
			
		||||
    grid->cutneigh[dim] = param->cutneigh;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//Other Functions from the grid
 | 
			
		||||
void initGrid(Grid* grid)
 | 
			
		||||
{ //start with regular grid
 | 
			
		||||
  int nprocs;
 | 
			
		||||
  MPI_Comm_size(world, &nprocs);
 | 
			
		||||
  grid->map_size = 6 * nprocs;             
 | 
			
		||||
  grid->map  = (MD_FLOAT*) allocate(ALIGNMENT, grid->map_size * sizeof(MD_FLOAT));  
 | 
			
		||||
  //========rcb=======
 | 
			
		||||
  grid->maxsend = 0; 
 | 
			
		||||
  grid->maxrecv = 0;
 | 
			
		||||
  grid->buf_send = NULL;  
 | 
			
		||||
  grid->buf_recv = NULL;
 | 
			
		||||
  //====staggered=====
 | 
			
		||||
  grid->Timer = 0.;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void setupGrid(Grid* grid, Atom* atom, Parameter* param)
 | 
			
		||||
{
 | 
			
		||||
  int me; 
 | 
			
		||||
  MD_FLOAT xlo, ylo, zlo, xhi, yhi, zhi; 
 | 
			
		||||
  MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
			
		||||
  initGrid(grid);
 | 
			
		||||
 | 
			
		||||
  //Set the origin at (0,0,0)
 | 
			
		||||
  if(param->input_file){
 | 
			
		||||
    for(int i=0; i<atom->Nlocal; i++){
 | 
			
		||||
      atom_x(i) = atom_x(i) - param->xlo;
 | 
			
		||||
      atom_y(i) = atom_y(i) - param->ylo;
 | 
			
		||||
      atom_z(i) = atom_z(i) - param->zlo;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  cartisian3d(grid, param, &atom->mybox);
 | 
			
		||||
  
 | 
			
		||||
  xlo = atom->mybox.lo[_x]; xhi = atom->mybox.hi[_x];  
 | 
			
		||||
  ylo = atom->mybox.lo[_y]; yhi = atom->mybox.hi[_y];
 | 
			
		||||
  zlo = atom->mybox.lo[_z]; zhi = atom->mybox.hi[_z];  
 | 
			
		||||
 | 
			
		||||
  int i = 0; 
 | 
			
		||||
  while(i < atom->Nlocal) 
 | 
			
		||||
  {
 | 
			
		||||
    if(atom_x(i) >= xlo && atom_x(i)< xhi &&  
 | 
			
		||||
       atom_y(i) >= ylo && atom_y(i)< yhi &&  
 | 
			
		||||
       atom_z(i) >= zlo && atom_z(i)< zhi)
 | 
			
		||||
      {
 | 
			
		||||
        i++;
 | 
			
		||||
      } else {
 | 
			
		||||
        copy(atom, i, atom->Nlocal-1);
 | 
			
		||||
        atom->Nlocal--; 
 | 
			
		||||
      }
 | 
			
		||||
  } 
 | 
			
		||||
 | 
			
		||||
  //printGrid(grid);
 | 
			
		||||
  if(!param->balance){
 | 
			
		||||
    MPI_Allreduce(&atom->Nlocal, &atom->Natoms, 1, MPI_INT, MPI_SUM, world); 
 | 
			
		||||
    printf("Processor:%i, Local atoms:%i, Total atoms:%i\n",me, atom->Nlocal,atom->Natoms);
 | 
			
		||||
    MPI_Barrier(world);
 | 
			
		||||
  }  
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void printGrid(Grid* grid)
 | 
			
		||||
{
 | 
			
		||||
  int me, nprocs;
 | 
			
		||||
  MPI_Comm_size(world, &nprocs);
 | 
			
		||||
  MPI_Comm_rank(world, &me);
 | 
			
		||||
  MD_FLOAT* map = grid->map;
 | 
			
		||||
  if(me==0)
 | 
			
		||||
  {
 | 
			
		||||
 
 | 
			
		||||
    printf("GRID:\n");
 | 
			
		||||
    printf("===================================================================================================\n");
 | 
			
		||||
    for(int i=0; i<nprocs; i++)
 | 
			
		||||
      printf("Box:%i\txlo:%.4f\txhi:%.4f\tylo:%.4f\tyhi:%.4f\tzlo:%.4f\tzhi:%.4f\n", i,map[6*i],map[6*i+3],map[6*i+1],map[6*i+4],map[6*i+2],map[6*i+5]);
 | 
			
		||||
    printf("\n\n");    
 | 
			
		||||
    //printf("Box processor:%i\n xlo:%.4f\txhi:%.4f\n ylo:%.4f\tyhi:%.4f\n zlo:%.4f\tzhi:%.4f\n", i,map[6*i],map[6*i+3],map[6*i+1],map[6*i+4],map[6*i+2],map[6*i+5]);
 | 
			
		||||
  }
 | 
			
		||||
  MPI_Barrier(world);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										22
									
								
								common/includes/box.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								common/includes/box.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,22 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
			
		||||
 * All rights reserved. This file is part of MD-Bench.
 | 
			
		||||
 * Use of this source code is governed by a LGPL-3.0
 | 
			
		||||
 * license that can be found in the LICENSE file.
 | 
			
		||||
 */
 | 
			
		||||
#include <parameter.h>
 | 
			
		||||
 | 
			
		||||
#ifndef __BOX_H_
 | 
			
		||||
#define __BOX_H_
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  int id;
 | 
			
		||||
  MD_FLOAT xprd, yprd, zprd;     //Domain Dimension
 | 
			
		||||
  MD_FLOAT lo[3];               //smallest coordinate of my subdomain
 | 
			
		||||
  MD_FLOAT hi[3];               //Highest coordinate of my subdomain
 | 
			
		||||
} Box;
 | 
			
		||||
 | 
			
		||||
int overlapBox(int, int , const Box*, const Box* , Box* , MD_FLOAT , MD_FLOAT);
 | 
			
		||||
int overlapFullBox(Parameter*, MD_FLOAT*, const Box*, const Box*);
 | 
			
		||||
void expandBox(int , const Box*, const Box* , Box* , MD_FLOAT);
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										104
									
								
								common/includes/comm.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										104
									
								
								common/includes/comm.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,104 @@
 | 
			
		||||
#include <atom.h>
 | 
			
		||||
#include <parameter.h>
 | 
			
		||||
#include <box.h>
 | 
			
		||||
#include <grid.h>
 | 
			
		||||
 | 
			
		||||
#ifndef COMM_H
 | 
			
		||||
#define COMM_H
 | 
			
		||||
 | 
			
		||||
#ifdef GROMACS
 | 
			
		||||
#define FORWARD_SIZE  (3*CLUSTER_N)   
 | 
			
		||||
#define REVERSE_SIZE  (3*CLUSTER_N)
 | 
			
		||||
#define GHOST_SIZE    (4*CLUSTER_N+10)
 | 
			
		||||
#define EXCHANGE_SIZE 7
 | 
			
		||||
 | 
			
		||||
#define JFAC MAX(1, CLUSTER_N / CLUSTER_M)
 | 
			
		||||
#define LOCAL atom->Nclusters_local / JFAC
 | 
			
		||||
#define GHOST atom->Nclusters_ghost 
 | 
			
		||||
 | 
			
		||||
#define IsinRegionToSend(cj)                                                                  \
 | 
			
		||||
           ((atom->jclusters[(cj)].bbminx >= xlo || atom->jclusters[(cj)].bbmaxx >= xlo)  &&  \
 | 
			
		||||
            (atom->jclusters[(cj)].bbminx  < xhi || atom->jclusters[(cj)].bbmaxx  < xhi)  &&  \
 | 
			
		||||
            (atom->jclusters[(cj)].bbminy >= ylo || atom->jclusters[(cj)].bbmaxy >= ylo)  &&  \
 | 
			
		||||
            (atom->jclusters[(cj)].bbminy  < yhi || atom->jclusters[(cj)].bbmaxy  < yhi)  &&  \
 | 
			
		||||
            (atom->jclusters[(cj)].bbminz >= zlo || atom->jclusters[(cj)].bbmaxz >= zlo)  &&  \
 | 
			
		||||
            (atom->jclusters[(cj)].bbminz  < zhi || atom->jclusters[(cj)].bbmaxz  < zhi))  
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
#define FORWARD_SIZE  3   
 | 
			
		||||
#define REVERSE_SIZE  3
 | 
			
		||||
#define GHOST_SIZE    4
 | 
			
		||||
#define EXCHANGE_SIZE 7
 | 
			
		||||
#define LOCAL atom->Nlocal
 | 
			
		||||
#define GHOST atom->Nghost
 | 
			
		||||
 | 
			
		||||
#define IsinRegionToSend(i)                                 \
 | 
			
		||||
           ((atom_x((i)) >= xlo && atom_x((i)) < xhi) &&    \
 | 
			
		||||
            (atom_y((i)) >= ylo && atom_y((i)) < yhi) &&    \
 | 
			
		||||
            (atom_z((i)) >= zlo && atom_z((i)) < zhi)) 
 | 
			
		||||
 | 
			
		||||
#endif 
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  int myproc;                       // my proc ID
 | 
			
		||||
  int numproc;                      // # of processors
 | 
			
		||||
	
 | 
			
		||||
  int numneigh;                     // # of all my neighs along all swaps 
 | 
			
		||||
  int maxneigh;										  // Buffer size for my neighs
 | 
			
		||||
	int sendfrom[6];                  //return the lowest neigh index to send in each swap
 | 
			
		||||
  int sendtill[6];                  //return the highest neigh index to send in each swao
 | 
			
		||||
  int recvfrom[6];                  //return the lowest neigh index to recv in each swap
 | 
			
		||||
  int recvtill[6];                  //return the highest neigh index to recv in each swap
 | 
			
		||||
  int* nsend;											  // neigh whose I want to send
 | 
			
		||||
  int* nrecv;                       // neigh whose I want to recv
 | 
			
		||||
 | 
			
		||||
	int* pbc_x;                       // if pbc in x
 | 
			
		||||
	int* pbc_y;                       // if pbc in y
 | 
			
		||||
	int* pbc_z;                       // if pbc in z
 | 
			
		||||
	
 | 
			
		||||
  int* atom_send, *atom_recv;       // # of atoms to send/recv for each of my neighs 
 | 
			
		||||
	int* off_atom_send;               // atom offset to send, inside of a swap
 | 
			
		||||
  int* off_atom_recv;               // atom offset to recv, inside of a swap
 | 
			
		||||
         
 | 
			
		||||
  int* nexch;                        //procs to exchange
 | 
			
		||||
  int numneighexch;                  //# of neighbours to exchange
 | 
			
		||||
  int maxneighexch;                  //max buff size to store neighbours
 | 
			
		||||
 | 
			
		||||
	int numswap;                      // # of swaps to perform, it is 6
 | 
			
		||||
  int swapdim[6]; 									// dimension of the swap (_x, _y or _z)
 | 
			
		||||
	int swapdir[6];										// direction of the swap 0 or 1
 | 
			
		||||
  int swap[3][2];                   // given a dim and dir, knows the swap
 | 
			
		||||
  int othersend[6];                 // Determine if a proc interact with more procs in a given swap
 | 
			
		||||
 | 
			
		||||
	int firstrecv[6];                 // where to put 1st recv atom in each swap
 | 
			
		||||
  int** sendlist;                   // list of atoms to send in each swap   
 | 
			
		||||
  int* maxsendlist;								  // max # of atoms send in each list-swap
 | 
			
		||||
 | 
			
		||||
	int maxsend;											// max elements in buff sender 									
 | 
			
		||||
	int maxrecv;											// max elements in buff receiver
 | 
			
		||||
  MD_FLOAT* buf_send;               // sender buffer for all comm
 | 
			
		||||
	MD_FLOAT* buf_recv;               // receicer buffer for all comm
 | 
			
		||||
	 	  
 | 
			
		||||
	int forwardSize;					        // # of paramaters per atom in forward comm.
 | 
			
		||||
	int reverseSize;			        		// # of parameters per atom in reverse
 | 
			
		||||
  int exchangeSize;                 // # of parameters per atom in exchange
 | 
			
		||||
	int ghostSize;                    // # of parameters per atom in ghost list                               
 | 
			
		||||
 | 
			
		||||
  int  iterAtom;                     //last atom to iterate in each swap.
 | 
			
		||||
  Box* boxes; 											 // Boundaries to  be sent to other procs as ghost.
 | 
			
		||||
} Comm;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void initComm(int*, char***, Comm*); 						    //Init MPI 
 | 
			
		||||
void endComm(Comm*);													      //End MPI
 | 
			
		||||
void setupComm(Comm*,Parameter*,Grid*);             //Creates a 3d grid or rcb grid
 | 
			
		||||
void neighComm(Comm*,Parameter*,Grid*);             //Find neighbours within cut-off and defines ghost regions
 | 
			
		||||
void forwardComm(Comm*,Atom*,int);							    //Send info in one direction
 | 
			
		||||
void reverseComm(Comm*,Atom*,int);							    //Return info after forward communication
 | 
			
		||||
void exchangeComm(Comm*,Atom*);							        //Exchange info between procs
 | 
			
		||||
void ghostComm(Comm*, Atom*,int);                   //Build the ghost neighbours to send during next forwards
 | 
			
		||||
void growSend(Comm*,int);										        //Grows the size of the buffer sender
 | 
			
		||||
void growRecv(Comm*,int);										        //Grows the size of the buffer receiver
 | 
			
		||||
void growList(Comm*, int, int);                     //Grows the size of the list to send
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										51
									
								
								common/includes/grid.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								common/includes/grid.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,51 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
			
		||||
 * All rights reserved. This file is part of MD-Bench.
 | 
			
		||||
 * Use of this source code is governed by a LGPL-3.0
 | 
			
		||||
 * license that can be found in the LICENSE file.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#include <parameter.h>
 | 
			
		||||
#include <box.h>
 | 
			
		||||
#include <atom.h>
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
 | 
			
		||||
#ifndef __MAP_H_
 | 
			
		||||
#define __MAP_H_
 | 
			
		||||
 | 
			
		||||
#define world MPI_COMM_WORLD
 | 
			
		||||
#define atom_pos(i) ((dim == _x) ? atom_x((i)) : (dim == _y) ? atom_y((i)) : atom_z((i)))
 | 
			
		||||
 | 
			
		||||
enum {RCB=1, meanTimeRCB, Staggered};
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  int balance_every;
 | 
			
		||||
  int  map_size;
 | 
			
		||||
  MD_FLOAT* map;
 | 
			
		||||
  //===Param for Staggerd balance
 | 
			
		||||
  int nprocs[3]; 
 | 
			
		||||
  int coord[3];
 | 
			
		||||
  MD_FLOAT cutneigh[3];
 | 
			
		||||
  double Timer;
 | 
			
		||||
  //===Param for RCB balance 
 | 
			
		||||
  MD_FLOAT* buf_send;
 | 
			
		||||
  MD_FLOAT* buf_recv;
 | 
			
		||||
  int maxsend; 
 | 
			
		||||
  int maxrecv; 
 | 
			
		||||
} Grid; 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef MD_FLOAT(*RCB_Method)(Atom*,MPI_Comm,int,double);
 | 
			
		||||
 | 
			
		||||
void setupGrid(Grid*, Atom*, Parameter*);
 | 
			
		||||
void cartisian3d(Grid*, Parameter*, Box*);
 | 
			
		||||
void rcbBalance(Grid*, Atom*, Parameter* ,RCB_Method, int, double);
 | 
			
		||||
void staggeredBalance(Grid*, Atom*, Parameter*, double); 
 | 
			
		||||
void printGrid(Grid*); 
 | 
			
		||||
//rcb methods
 | 
			
		||||
MD_FLOAT meanBisect(Atom* , MPI_Comm, int, double);
 | 
			
		||||
MD_FLOAT meanTimeBisect(Atom*, MPI_Comm, int, double);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -53,6 +53,10 @@ typedef struct {
 | 
			
		||||
    MD_FLOAT k_dn;
 | 
			
		||||
    MD_FLOAT gx, gy, gz;
 | 
			
		||||
    MD_FLOAT reflect_x, reflect_y, reflect_z;
 | 
			
		||||
    //MPI implementation
 | 
			
		||||
    int balance;
 | 
			
		||||
    int method;
 | 
			
		||||
    int balance_every;
 | 
			
		||||
} Parameter;
 | 
			
		||||
 | 
			
		||||
void initParameter(Parameter*);
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										71
									
								
								common/includes/shell_methods.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								common/includes/shell_methods.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,71 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
			
		||||
 * All rights reserved. This file is part of MD-Bench.
 | 
			
		||||
 * Use of this source code is governed by a LGPL-3.0
 | 
			
		||||
 * license that can be found in the LICENSE file.
 | 
			
		||||
 */
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#include <limits.h>
 | 
			
		||||
#include <math.h>
 | 
			
		||||
#include <comm.h>
 | 
			
		||||
#include <atom.h>
 | 
			
		||||
#include <timing.h>
 | 
			
		||||
#include <parameter.h>
 | 
			
		||||
#include <util.h>
 | 
			
		||||
 | 
			
		||||
//static void addDummyCluster(Atom*);
 | 
			
		||||
 | 
			
		||||
double forward(Comm* comm, Atom *atom, Parameter* param){
 | 
			
		||||
    double S, E;    
 | 
			
		||||
    S = getTimeStamp();  
 | 
			
		||||
    if(param->method == halfShell){
 | 
			
		||||
        for(int iswap = 0; iswap < 5; iswap++) 
 | 
			
		||||
            forwardComm(comm, atom, iswap);
 | 
			
		||||
    } else if(param->method == eightShell){
 | 
			
		||||
        for(int iswap = 0; iswap < 6; iswap+=2) 
 | 
			
		||||
            forwardComm(comm, atom, iswap);
 | 
			
		||||
    } else {
 | 
			
		||||
        for(int iswap = 0; iswap < 6; iswap++) 
 | 
			
		||||
            forwardComm(comm, atom, iswap);
 | 
			
		||||
    }
 | 
			
		||||
    E = getTimeStamp();
 | 
			
		||||
    return E-S;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
double reverse(Comm* comm, Atom *atom, Parameter* param){
 | 
			
		||||
    double S, E;    
 | 
			
		||||
    S = getTimeStamp(); 
 | 
			
		||||
    if(param->method == halfShell){
 | 
			
		||||
        for(int iswap = 4; iswap >= 0; iswap--) 
 | 
			
		||||
            reverseComm(comm, atom, iswap);
 | 
			
		||||
    } else if(param->method == eightShell){
 | 
			
		||||
        for(int iswap = 4; iswap >= 0; iswap-=2) 
 | 
			
		||||
            reverseComm(comm, atom, iswap);
 | 
			
		||||
    } else if(param->method == halfStencil){
 | 
			
		||||
        for(int iswap = 5; iswap >= 0; iswap--) 
 | 
			
		||||
            reverseComm(comm, atom, iswap);
 | 
			
		||||
    }  else { }  //Full Shell Reverse does nothing 
 | 
			
		||||
    E = getTimeStamp();
 | 
			
		||||
    return E-S;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ghostNeighbor(Comm* comm, Atom* atom, Parameter* param)
 | 
			
		||||
{   
 | 
			
		||||
    #ifdef GROMACS
 | 
			
		||||
    atom->Nclusters_ghost = 0;
 | 
			
		||||
    #endif
 | 
			
		||||
    atom->Nghost = 0;    
 | 
			
		||||
    if(param->method == halfShell){
 | 
			
		||||
        for(int iswap=0; iswap<5; iswap++) 
 | 
			
		||||
            ghostComm(comm,atom,iswap);
 | 
			
		||||
    } else if(param->method == eightShell){
 | 
			
		||||
        for(int iswap = 0; iswap<6; iswap+=2)
 | 
			
		||||
            ghostComm(comm, atom,iswap);
 | 
			
		||||
    } else {
 | 
			
		||||
        for(int iswap=0; iswap<6; iswap++) 
 | 
			
		||||
            ghostComm(comm,atom,iswap);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@@ -9,9 +9,15 @@
 | 
			
		||||
 | 
			
		||||
typedef enum {
 | 
			
		||||
    TOTAL = 0,
 | 
			
		||||
    NEIGH,
 | 
			
		||||
    FORCE,
 | 
			
		||||
    NEIGH,
 | 
			
		||||
    FORWARD,
 | 
			
		||||
    REVERSE,
 | 
			
		||||
    UPDATE,
 | 
			
		||||
    BALANCE,
 | 
			
		||||
    SETUP,
 | 
			
		||||
    REST,
 | 
			
		||||
    NUMTIMER
 | 
			
		||||
} timertype;
 | 
			
		||||
 } timerComm;
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -4,6 +4,8 @@
 | 
			
		||||
 * Use of this source code is governed by a LGPL-3.0
 | 
			
		||||
 * license that can be found in the LICENSE file.
 | 
			
		||||
 */
 | 
			
		||||
#include <math.h>
 | 
			
		||||
 | 
			
		||||
#ifndef __UTIL_H_
 | 
			
		||||
#define __UTIL_H_
 | 
			
		||||
 | 
			
		||||
@@ -35,6 +37,13 @@
 | 
			
		||||
#   define PRECISION_STRING     "double"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define BigOrEqual(a,b) (fabs((a)-(b))<1e-9 || (a)>(b))
 | 
			
		||||
#define Equal(a,b) (fabs((a)-(b))<1e-9)
 | 
			
		||||
 | 
			
		||||
enum {_x=0, _y, _z}; 
 | 
			
		||||
enum {fullShell=0, halfShell, eightShell, halfStencil};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
extern double myrandom(int*);
 | 
			
		||||
extern void random_reset(int *seed, int ibase, double *coord);
 | 
			
		||||
extern int str2ff(const char *string);
 | 
			
		||||
 
 | 
			
		||||
@@ -11,6 +11,7 @@
 | 
			
		||||
#include <atom.h>
 | 
			
		||||
#include <parameter.h>
 | 
			
		||||
#include <util.h>
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
 | 
			
		||||
void initParameter(Parameter *param) {
 | 
			
		||||
    param->input_file = NULL;
 | 
			
		||||
@@ -54,13 +55,17 @@ void initParameter(Parameter *param) {
 | 
			
		||||
    param->reflect_x = 0.0;
 | 
			
		||||
    param->reflect_y = 0.0;
 | 
			
		||||
    param->reflect_z = 0.0;
 | 
			
		||||
    //MPI
 | 
			
		||||
    param->balance = 0;
 | 
			
		||||
    param->method = 0;
 | 
			
		||||
    param->balance_every =param->reneigh_every; 
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void readParameter(Parameter *param, const char *filename) {
 | 
			
		||||
    FILE *fp = fopen(filename, "r");
 | 
			
		||||
    char line[MAXLINE];
 | 
			
		||||
    int i;
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    if(!fp) {
 | 
			
		||||
        fprintf(stderr, "Could not open parameter file: %s\n", filename);
 | 
			
		||||
        exit(-1);
 | 
			
		||||
@@ -72,8 +77,8 @@ void readParameter(Parameter *param, const char *filename) {
 | 
			
		||||
        for(i = 0; line[i] != '\0' && line[i] != '#'; i++);
 | 
			
		||||
        line[i] = '\0';
 | 
			
		||||
 | 
			
		||||
        char *tok = strtok(line, " ");
 | 
			
		||||
        char *val = strtok(NULL, " ");
 | 
			
		||||
        char *tok = strtok(line, "\t ");
 | 
			
		||||
        char *val = strtok(NULL, "\t ");
 | 
			
		||||
 | 
			
		||||
        #define PARSE_PARAM(p,f)   if(strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) { param->p = f(val); }
 | 
			
		||||
        #define PARSE_STRING(p)    PARSE_PARAM(p, strdup)
 | 
			
		||||
@@ -117,15 +122,20 @@ void readParameter(Parameter *param, const char *filename) {
 | 
			
		||||
            PARSE_INT(x_out_every);
 | 
			
		||||
            PARSE_INT(v_out_every);
 | 
			
		||||
            PARSE_INT(half_neigh);
 | 
			
		||||
            PARSE_INT(method);
 | 
			
		||||
            PARSE_INT(balance);
 | 
			
		||||
            PARSE_INT(balance_every);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Update dtforce
 | 
			
		||||
    param->dtforce = 0.5 * param->dt;
 | 
			
		||||
 | 
			
		||||
    // Update sigma6 parameter
 | 
			
		||||
    MD_FLOAT s2 = param->sigma * param->sigma;
 | 
			
		||||
    param->sigma6 = s2 * s2 * s2;
 | 
			
		||||
    
 | 
			
		||||
    //Update balance parameter, 10 could be change
 | 
			
		||||
    param->balance_every *=param->reneigh_every;
 | 
			
		||||
    fclose(fp);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -183,4 +193,19 @@ void printParameter(Parameter *param) {
 | 
			
		||||
    printf("\tSkin: %e\n", param->skin);
 | 
			
		||||
    printf("\tHalf neighbor lists: %d\n", param->half_neigh);
 | 
			
		||||
    printf("\tProcessor frequency (GHz): %.4f\n", param->proc_freq);
 | 
			
		||||
 | 
			
		||||
    // ================ New MPI features =============
 | 
			
		||||
    char str[20]; 
 | 
			
		||||
    strcpy(str, (param->method == 1) ? "Half Shell"  :
 | 
			
		||||
                (param->method == 2) ? "Eight Shell" :
 | 
			
		||||
                (param->method == 3) ? "Half Stencil":                      
 | 
			
		||||
                                       "Full Shell");
 | 
			
		||||
    printf("\tMethod: %s\n", str);
 | 
			
		||||
    strcpy(str, (param->balance == 1) ? "mean RCB"      : 
 | 
			
		||||
                (param->balance == 2) ? "mean Time RCB" :
 | 
			
		||||
                (param->balance == 3) ? "Staggered"     :
 | 
			
		||||
                                        "cartisian");
 | 
			
		||||
    printf("\tPartition: %s\n", str);
 | 
			
		||||
    if(param->balance) 
 | 
			
		||||
        printf("\tRebalancing every (timesteps): %d\n",param->balance_every); 
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -10,6 +10,7 @@
 | 
			
		||||
 | 
			
		||||
#include <thermo.h>
 | 
			
		||||
#include <util.h>
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
 | 
			
		||||
static int *steparr;
 | 
			
		||||
static MD_FLOAT *tmparr;
 | 
			
		||||
@@ -24,6 +25,7 @@ static MD_FLOAT t_act;
 | 
			
		||||
static MD_FLOAT p_act;
 | 
			
		||||
static MD_FLOAT e_act;
 | 
			
		||||
static int mstat;
 | 
			
		||||
static MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
 | 
			
		||||
 | 
			
		||||
/* exported subroutines */
 | 
			
		||||
void setupThermo(Parameter *param, int natoms)
 | 
			
		||||
@@ -53,57 +55,73 @@ void setupThermo(Parameter *param, int natoms)
 | 
			
		||||
 | 
			
		||||
void computeThermo(int iflag, Parameter *param, Atom *atom)
 | 
			
		||||
{
 | 
			
		||||
    MD_FLOAT t = 0.0, p;
 | 
			
		||||
    MD_FLOAT t_sum = 0.0, t = 0.0, p;
 | 
			
		||||
    int me; 
 | 
			
		||||
 | 
			
		||||
    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
			
		||||
 | 
			
		||||
    for(int i = 0; i < atom->Nlocal; i++) {
 | 
			
		||||
        t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    t = t * t_scale;
 | 
			
		||||
    p = (t * dof_boltz) * p_scale;
 | 
			
		||||
    int istep = iflag;
 | 
			
		||||
    MPI_Reduce(&t, &t_sum, 1, type, MPI_SUM, 0 ,MPI_COMM_WORLD);
 | 
			
		||||
    if(me == 0)
 | 
			
		||||
    {
 | 
			
		||||
        t = t_sum * t_scale;
 | 
			
		||||
        p = (t * dof_boltz) * p_scale;
 | 
			
		||||
        int istep = iflag;
 | 
			
		||||
 | 
			
		||||
    if(iflag == -1){
 | 
			
		||||
        istep = param->ntimes;
 | 
			
		||||
    }
 | 
			
		||||
    if(iflag == 0){
 | 
			
		||||
        mstat = 0;
 | 
			
		||||
    }
 | 
			
		||||
        if(iflag == -1){
 | 
			
		||||
            istep = param->ntimes;
 | 
			
		||||
        }
 | 
			
		||||
        if(iflag == 0){
 | 
			
		||||
            mstat = 0;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    steparr[mstat] = istep;
 | 
			
		||||
    tmparr[mstat] = t;
 | 
			
		||||
    prsarr[mstat] = p;
 | 
			
		||||
    mstat++;
 | 
			
		||||
    fprintf(stdout, "%i\t%e\t%e\n", istep, t, p);
 | 
			
		||||
        steparr[mstat] = istep;
 | 
			
		||||
        tmparr[mstat] = t;
 | 
			
		||||
        prsarr[mstat] = p;
 | 
			
		||||
        mstat++;
 | 
			
		||||
        fprintf(stdout, "%i\t%e\t%e\n", istep, t, p);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void adjustThermo(Parameter *param, Atom *atom)
 | 
			
		||||
{
 | 
			
		||||
    /* zero center-of-mass motion */
 | 
			
		||||
    MD_FLOAT vxtot = 0.0; MD_FLOAT vytot = 0.0; MD_FLOAT vztot = 0.0;
 | 
			
		||||
 | 
			
		||||
    MD_FLOAT v_sum[3], vtot[3];  
 | 
			
		||||
    
 | 
			
		||||
    for(int i = 0; i < atom->Nlocal; i++) {
 | 
			
		||||
        vxtot += atom_vx(i);
 | 
			
		||||
        vytot += atom_vy(i);
 | 
			
		||||
        vztot += atom_vz(i);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    vtot[0] = vxtot; vtot[1] = vytot; vtot[2] = vztot;  
 | 
			
		||||
 | 
			
		||||
    vxtot = vxtot / atom->Natoms;
 | 
			
		||||
    vytot = vytot / atom->Natoms;
 | 
			
		||||
    vztot = vztot / atom->Natoms;
 | 
			
		||||
    MPI_Allreduce(vtot, v_sum, 3, type, MPI_SUM, MPI_COMM_WORLD);
 | 
			
		||||
    
 | 
			
		||||
    vxtot = v_sum[0] / atom->Natoms;
 | 
			
		||||
    vytot = v_sum[1] / atom->Natoms;
 | 
			
		||||
    vztot = v_sum[2] / atom->Natoms;
 | 
			
		||||
 | 
			
		||||
    for(int i = 0; i < atom->Nlocal; i++) {
 | 
			
		||||
        atom_vx(i) -= vxtot;
 | 
			
		||||
        atom_vy(i) -= vytot;
 | 
			
		||||
        atom_vz(i) -= vztot;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    t_act = 0;
 | 
			
		||||
   
 | 
			
		||||
    MD_FLOAT t = 0.0;
 | 
			
		||||
    MD_FLOAT t_sum = 0.0;
 | 
			
		||||
 | 
			
		||||
    for(int i = 0; i < atom->Nlocal; i++) {
 | 
			
		||||
        t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    MPI_Allreduce(&t, &t_sum, 1,type, MPI_SUM,MPI_COMM_WORLD);
 | 
			
		||||
 | 
			
		||||
    t = t_sum; 
 | 
			
		||||
    t *= t_scale;
 | 
			
		||||
    MD_FLOAT factor = sqrt(param->temp / t);
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -10,6 +10,7 @@
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include <util.h>
 | 
			
		||||
#include <math.h>
 | 
			
		||||
 | 
			
		||||
/* Park/Miller RNG w/out MASKING, so as to be like f90s version */
 | 
			
		||||
#define IA 16807
 | 
			
		||||
@@ -86,6 +87,7 @@ int get_cuda_num_threads() {
 | 
			
		||||
 | 
			
		||||
void readline(char *line, FILE *fp) {
 | 
			
		||||
    if(fgets(line, MAXLINE, fp) == NULL) {
 | 
			
		||||
        printf("error %i\n",errno);
 | 
			
		||||
        if(errno != 0) {
 | 
			
		||||
            perror("readline()");
 | 
			
		||||
            exit(-1);
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user