Final MPI version
This commit is contained in:
97
common/box.c
Normal file
97
common/box.c
Normal file
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <parameter.h>
|
||||
#include <util.h>
|
||||
#include <box.h>
|
||||
#include <mpi.h>
|
||||
|
||||
int overlapBox(int dim, int dir, const Box* mybox, const Box* other, Box* cut, MD_FLOAT xprd, MD_FLOAT cutneigh)
|
||||
{
|
||||
int pbc = -100;
|
||||
MD_FLOAT min[3], max[3];
|
||||
int same = (mybox->id == other->id) ? 1 : 0;
|
||||
|
||||
//projections
|
||||
min[_x] = MAX(mybox->lo[_x], other->lo[_x]); max[_x] = MIN(mybox->hi[_x], other->hi[_x]);
|
||||
min[_y] = MAX(mybox->lo[_y], other->lo[_y]); max[_y] = MIN(mybox->hi[_y], other->hi[_y]);
|
||||
min[_z] = MAX(mybox->lo[_z], other->lo[_z]); max[_z] = MIN(mybox->hi[_z], other->hi[_z]);
|
||||
|
||||
//Intersection no periodic case
|
||||
if(!same){
|
||||
if (dir == 0) max[dim] = MIN(mybox->hi[dim], other->hi[dim]+ cutneigh);
|
||||
if (dir == 1) min[dim] = MAX(mybox->lo[dim], other->lo[dim]- cutneigh);
|
||||
if ((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z])) pbc = 0;
|
||||
}
|
||||
|
||||
//Intersection periodic case
|
||||
if(pbc < 0)
|
||||
{
|
||||
if(dir == 0){
|
||||
min[dim] = MAX(mybox->lo[dim] , other->lo[dim]- xprd);
|
||||
max[dim] = MIN(mybox->hi[dim] , other->hi[dim]- xprd + cutneigh);
|
||||
|
||||
} else {
|
||||
min[dim] = MAX(mybox->lo[dim], other->lo[dim]+ xprd - cutneigh);
|
||||
max[dim] = MIN(mybox->hi[dim], other->hi[dim]+ xprd);
|
||||
|
||||
}
|
||||
if((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z]))
|
||||
pbc = (dir == 0) ? 1:-1;
|
||||
}
|
||||
|
||||
//storing the cuts
|
||||
cut->lo[_x] = min[_x]; cut->hi[_x] = max[_x];
|
||||
cut->lo[_y] = min[_y]; cut->hi[_y] = max[_y];
|
||||
cut->lo[_z] = min[_z]; cut->hi[_z] = max[_z];
|
||||
|
||||
return pbc;
|
||||
}
|
||||
|
||||
int overlapFullBox(Parameter* param, MD_FLOAT *cutneigh ,const Box* mybox, const Box* other)
|
||||
{
|
||||
MD_FLOAT min[3], max[3];
|
||||
MD_FLOAT xprd = param->xprd;
|
||||
MD_FLOAT yprd = param->yprd;
|
||||
MD_FLOAT zprd = param->zprd;
|
||||
|
||||
for(int k = -1; k < 2; k++)
|
||||
{
|
||||
for(int j = -1; j < 2; j++)
|
||||
{
|
||||
for(int i= -1; i < 2; i++)
|
||||
{
|
||||
min[_x] = MAX(mybox->lo[_x], other->lo[_x]-cutneigh[_x] + i*xprd);
|
||||
min[_y] = MAX(mybox->lo[_y], other->lo[_y]-cutneigh[_y] + j*yprd);
|
||||
min[_z] = MAX(mybox->lo[_z], other->lo[_z]-cutneigh[_z] + k*zprd);
|
||||
max[_x] = MIN(mybox->hi[_x], other->hi[_x]+cutneigh[_x] + i*xprd);
|
||||
max[_y] = MIN(mybox->hi[_y], other->hi[_y]+cutneigh[_y] + j*yprd);
|
||||
max[_z] = MIN(mybox->hi[_z], other->hi[_z]+cutneigh[_z] + k*zprd);
|
||||
if ((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z]))
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void expandBox(int iswap, const Box* me, const Box* other, Box* cut, MD_FLOAT cutneigh)
|
||||
{
|
||||
if(iswap==2 || iswap==3){
|
||||
if(me->lo[_x] <= other->lo[_x]) cut->lo[_x] -= cutneigh;
|
||||
if(me->hi[_x] >= other->hi[_x]) cut->hi[_x] += cutneigh;
|
||||
}
|
||||
|
||||
if(iswap==4 || iswap==5){
|
||||
if(me->lo[_x] <= other->lo[_x]) cut->lo[_x] -= cutneigh;
|
||||
if(me->hi[_x] >= other->hi[_x]) cut->hi[_x] += cutneigh;
|
||||
if(me->lo[_y] <= other->lo[_y]) cut->lo[_y] -= cutneigh;
|
||||
if(me->hi[_y] >= other->hi[_y]) cut->hi[_y] += cutneigh;
|
||||
}
|
||||
}
|
||||
|
556
common/comm.c
Normal file
556
common/comm.c
Normal file
@@ -0,0 +1,556 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <comm.h>
|
||||
#include <allocate.h>
|
||||
#include <mpi.h>
|
||||
#include <util.h>
|
||||
|
||||
#define NEIGHMIN 6
|
||||
#define BUFFACTOR 2
|
||||
#define BUFMIN 1000
|
||||
#define BUFEXTRA 100
|
||||
#define world MPI_COMM_WORLD
|
||||
|
||||
MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
|
||||
static inline void allocDynamicBuffers(Comm*);
|
||||
static inline void freeDynamicBuffers(Comm*);
|
||||
static inline void freeBuffers(Comm*);
|
||||
|
||||
void defineReverseList(Comm* comm){
|
||||
int dim = 0;
|
||||
int index = 0;
|
||||
int me = comm->myproc;
|
||||
|
||||
//Set the inverse list
|
||||
for(int iswap = 0; iswap<6; iswap++){
|
||||
int dim = comm->swapdim[iswap];
|
||||
int dir = comm->swapdir[iswap];
|
||||
int invswap = comm->swap[dim][(dir+1)%2];
|
||||
|
||||
for(int ineigh = comm->sendfrom[invswap]; ineigh< comm->sendtill[invswap]; ineigh++)
|
||||
comm->nrecv[index++] = comm->nsend[ineigh];
|
||||
|
||||
comm->recvfrom[iswap] = (iswap == 0) ? 0 : comm->recvtill[iswap-1];
|
||||
comm->recvtill[iswap] = index;
|
||||
}
|
||||
|
||||
//set if myproc is unique in the swap
|
||||
for(int iswap = 0; iswap<6; iswap++){
|
||||
int sizeswap = comm->sendtill[iswap]-comm->sendfrom[iswap];
|
||||
int index = comm->sendfrom[iswap];
|
||||
int myneigh = comm->nsend[index];
|
||||
comm->othersend[iswap] = (sizeswap != 1 || comm->myproc != myneigh) ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
void addNeighToExchangeList(Comm* comm, int newneigh){
|
||||
|
||||
int numneigh = comm->numneighexch;
|
||||
|
||||
if(comm->numneighexch>=comm->maxneighexch){
|
||||
size_t oldByteSize = comm->maxneighexch*sizeof(int);
|
||||
comm->maxneighexch *=2;
|
||||
comm->nexch = (int*) reallocate(comm->nexch, ALIGNMENT, comm->maxneighexch * sizeof(int), oldByteSize);
|
||||
}
|
||||
|
||||
// Add the new element to the list
|
||||
comm->nexch[numneigh] = newneigh;
|
||||
comm->numneighexch++;
|
||||
}
|
||||
|
||||
//Exported functions
|
||||
void neighComm(Comm *comm, Parameter* param, Grid *grid)
|
||||
{
|
||||
int me = comm->myproc;
|
||||
int numproc = comm ->numproc;
|
||||
int PAD = 6; //number of elements for processor in the map
|
||||
int ineigh = 0;
|
||||
int sneigh = 0;
|
||||
MD_FLOAT *map = grid->map;
|
||||
MD_FLOAT cutneigh = param->cutneigh;
|
||||
MD_FLOAT prd[3] = {param->xprd, param->yprd, param->zprd};
|
||||
Box mybox, other, cut;
|
||||
|
||||
//needed for rebalancing
|
||||
freeDynamicBuffers(comm);
|
||||
|
||||
//Local box
|
||||
mybox.id = me;
|
||||
mybox.lo[_x] = map[me*PAD+0]; mybox.hi[_x] = map[me*PAD+3];
|
||||
mybox.lo[_y] = map[me*PAD+1]; mybox.hi[_y] = map[me*PAD+4];
|
||||
mybox.lo[_z] = map[me*PAD+2]; mybox.hi[_z] = map[me*PAD+5];
|
||||
|
||||
//Check for all possible neighbours only for exchange atoms
|
||||
comm->numneighexch = 0;
|
||||
for(int proc = 0; proc <numproc; proc++){
|
||||
other.id = proc;
|
||||
other.lo[_x] = map[proc*PAD+0]; other.hi[_x] = map[proc*PAD+3];
|
||||
other.lo[_y] = map[proc*PAD+1]; other.hi[_y] = map[proc*PAD+4];
|
||||
other.lo[_z] = map[proc*PAD+2]; other.hi[_z] = map[proc*PAD+5];
|
||||
|
||||
if(proc != me){
|
||||
int intersection = overlapFullBox(param,grid->cutneigh,&mybox,&other);
|
||||
if(intersection) addNeighToExchangeList(comm,proc);
|
||||
}
|
||||
}
|
||||
|
||||
//MAP is stored as follows: xlo,ylo,zlo,xhi,yhi,zhi
|
||||
for(int iswap = 0; iswap <6; iswap++)
|
||||
{
|
||||
int dir = comm->swapdir[iswap];
|
||||
int dim = comm->swapdim[iswap];
|
||||
|
||||
for(int proc = 0; proc < numproc; proc++)
|
||||
{
|
||||
//Check for neighbours along dimmensions, for forwardComm, backwardComm and ghostComm
|
||||
other.id = proc;
|
||||
other.lo[_x] = map[proc*PAD+0]; other.hi[_x] = map[proc*PAD+3];
|
||||
other.lo[_y] = map[proc*PAD+1]; other.hi[_y] = map[proc*PAD+4];
|
||||
other.lo[_z] = map[proc*PAD+2]; other.hi[_z] = map[proc*PAD+5];
|
||||
|
||||
//return if two boxes intersect: -100 not intersection, 0, 1 and -1 intersection for each different pbc.
|
||||
int pbc = overlapBox(dim,dir,&mybox,&other,&cut,prd[dim],cutneigh);
|
||||
if(pbc == -100) continue;
|
||||
|
||||
expandBox(iswap, &mybox, &other, &cut, cutneigh);
|
||||
|
||||
if(ineigh >= comm->maxneigh) {
|
||||
size_t oldByteSize = comm->maxneigh*sizeof(int);
|
||||
size_t oldBoxSize = comm->maxneigh*sizeof(Box);
|
||||
comm->maxneigh = 2*ineigh;
|
||||
comm->nsend = (int*) reallocate(comm->nsend, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
|
||||
comm->nrecv = (int*) reallocate(comm->nrecv, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
|
||||
comm->pbc_x = (int*) reallocate(comm->pbc_x, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
|
||||
comm->pbc_y = (int*) reallocate(comm->pbc_y, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
|
||||
comm->pbc_z = (int*) reallocate(comm->pbc_z, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
|
||||
comm->boxes = (Box*) reallocate(comm->boxes, ALIGNMENT, comm->maxneigh * sizeof(Box), oldBoxSize);
|
||||
}
|
||||
|
||||
comm->boxes[ineigh] = cut;
|
||||
comm->nsend[ineigh] = proc;
|
||||
comm->pbc_x[ineigh] = (dim == _x) ? pbc : 0;
|
||||
comm->pbc_y[ineigh] = (dim == _y) ? pbc : 0;
|
||||
comm->pbc_z[ineigh] = (dim == _z) ? pbc : 0;
|
||||
ineigh++;
|
||||
}
|
||||
|
||||
comm->sendfrom[iswap] = (iswap == 0) ? 0:comm->sendtill[iswap-1];
|
||||
comm->sendtill[iswap] = ineigh;
|
||||
comm->numneigh = ineigh;
|
||||
}
|
||||
|
||||
allocDynamicBuffers(comm);
|
||||
defineReverseList(comm);
|
||||
}
|
||||
|
||||
void initComm(int* argc, char*** argv, Comm* comm)
|
||||
{
|
||||
//MPI Initialize
|
||||
MPI_Init(argc, argv);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &(comm->numproc));
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &(comm->myproc));
|
||||
comm->numneigh = 0;
|
||||
comm->numneighexch = 0;
|
||||
comm->nrecv=NULL;
|
||||
comm->nsend=NULL;
|
||||
comm->nexch=NULL;
|
||||
comm->pbc_x=NULL;
|
||||
comm->pbc_y=NULL;
|
||||
comm->pbc_z=NULL;
|
||||
comm->boxes=NULL;
|
||||
comm->atom_send=NULL;
|
||||
comm->atom_recv=NULL;
|
||||
comm->off_atom_send=NULL;
|
||||
comm->off_atom_recv=NULL;
|
||||
comm->maxsendlist=NULL;
|
||||
comm->sendlist=NULL;
|
||||
comm->buf_send=NULL;
|
||||
comm->buf_recv=NULL;
|
||||
}
|
||||
|
||||
void endComm(Comm* comm)
|
||||
{
|
||||
comm->maxneigh = 0;
|
||||
comm->maxneighexch =0;
|
||||
comm->maxsend = 0;
|
||||
comm->maxrecv = 0;
|
||||
freeBuffers(comm);
|
||||
MPI_Finalize();
|
||||
}
|
||||
|
||||
void setupComm(Comm* comm, Parameter* param, Grid* grid){
|
||||
|
||||
comm->swap[_x][0] = 0; comm->swap[_x][1] =1;
|
||||
comm->swap[_y][0] = 2; comm->swap[_y][1] =3;
|
||||
comm->swap[_z][0] = 4; comm->swap[_z][1] =5;
|
||||
|
||||
comm->swapdim[0] = comm->swapdim[1] = _x;
|
||||
comm->swapdim[2] = comm->swapdim[3] = _y;
|
||||
comm->swapdim[4] = comm->swapdim[5] = _z;
|
||||
|
||||
comm->swapdir[0] = comm->swapdir[2] = comm->swapdir[4] = 0;
|
||||
comm->swapdir[1] = comm->swapdir[3] = comm->swapdir[5] = 1;
|
||||
|
||||
for(int i = 0; i<6; i++){
|
||||
comm->sendfrom[i] = 0;
|
||||
comm->sendtill[i] = 0;
|
||||
comm->recvfrom[i] = 0;
|
||||
comm->recvtill[i] = 0;
|
||||
}
|
||||
|
||||
comm->forwardSize = FORWARD_SIZE; //send coordiantes x,y,z
|
||||
comm->reverseSize = REVERSE_SIZE; //return forces fx, fy, fz
|
||||
comm->ghostSize = GHOST_SIZE; //send x,y,z,type;
|
||||
comm->exchangeSize = EXCHANGE_SIZE; //send x,y,z,vx,vy,vz,type
|
||||
|
||||
//Allocate memory for recv buffer and recv buffer
|
||||
comm->maxsend = BUFMIN;
|
||||
comm->maxrecv = BUFMIN;
|
||||
comm->buf_send = (MD_FLOAT*) allocate(ALIGNMENT,(comm->maxsend + BUFEXTRA) * sizeof(MD_FLOAT));
|
||||
comm->buf_recv = (MD_FLOAT*) allocate(ALIGNMENT, comm->maxrecv * sizeof(MD_FLOAT));
|
||||
|
||||
comm->maxneighexch = NEIGHMIN;
|
||||
comm->nexch = (int*) allocate(ALIGNMENT, comm->maxneighexch * sizeof(int));
|
||||
|
||||
comm->maxneigh = NEIGHMIN;
|
||||
comm->nsend = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
|
||||
comm->nrecv = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
|
||||
comm->pbc_x = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
|
||||
comm->pbc_y = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
|
||||
comm->pbc_z = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
|
||||
comm->boxes = (Box*) allocate(ALIGNMENT, comm->maxneigh * sizeof(Box));
|
||||
|
||||
neighComm(comm, param, grid);
|
||||
}
|
||||
|
||||
void forwardComm(Comm* comm, Atom* atom, int iswap)
|
||||
{
|
||||
int nrqst=0, offset=0, nsend=0, nrecv=0;
|
||||
int pbc[3];
|
||||
int size = comm->forwardSize;
|
||||
int maxrqst = comm->numneigh;
|
||||
MD_FLOAT* buf;
|
||||
MPI_Request requests[maxrqst];
|
||||
|
||||
for(int ineigh = comm->sendfrom[iswap]; ineigh < comm->sendtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_send[ineigh];
|
||||
pbc[_x]=comm->pbc_x[ineigh]; pbc[_y]=comm->pbc_y[ineigh]; pbc[_z]=comm->pbc_z[ineigh];
|
||||
packForward(atom, comm->atom_send[ineigh], comm->sendlist[ineigh], &comm->buf_send[offset*size],pbc);
|
||||
}
|
||||
|
||||
//Receives elements
|
||||
if(comm->othersend[iswap])
|
||||
for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_recv[ineigh]*size;
|
||||
nrecv = comm->atom_recv[ineigh]*size;
|
||||
MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nrecv[ineigh],0,world,&requests[nrqst++]);
|
||||
}
|
||||
|
||||
//Send elements
|
||||
if(comm->othersend[iswap])
|
||||
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_send[ineigh]*size;
|
||||
nsend = comm->atom_send[ineigh]*size;
|
||||
MPI_Send(&comm->buf_send[offset],nsend,type,comm->nsend[ineigh],0,world);
|
||||
}
|
||||
|
||||
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
|
||||
|
||||
if(comm->othersend[iswap]) buf = comm->buf_recv;
|
||||
else buf = comm->buf_send;
|
||||
|
||||
/* unpack buffer */
|
||||
for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_recv[ineigh];
|
||||
unpackForward(atom, comm->atom_recv[ineigh], comm->firstrecv[iswap] + offset, &buf[offset*size]);
|
||||
}
|
||||
}
|
||||
|
||||
void reverseComm(Comm* comm, Atom* atom, int iswap)
|
||||
{
|
||||
int nrqst=0, offset=0, nsend=0, nrecv=0 ;
|
||||
int size = comm->reverseSize;
|
||||
int maxrqst = comm->numneigh;
|
||||
MD_FLOAT* buf;
|
||||
MPI_Request requests[maxrqst];
|
||||
|
||||
for(int ineigh = comm->recvfrom[iswap]; ineigh < comm->recvtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_recv[ineigh];
|
||||
packReverse(atom, comm->atom_recv[ineigh], comm->firstrecv[iswap] + offset, &comm->buf_send[offset*size]);
|
||||
}
|
||||
//Receives elements
|
||||
if(comm->othersend[iswap])
|
||||
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_send[ineigh]*size;
|
||||
nrecv = comm->atom_send[ineigh]*size;
|
||||
MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nsend[ineigh],0,world,&requests[nrqst++]);
|
||||
}
|
||||
//Send elements
|
||||
if(comm->othersend[iswap])
|
||||
for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_recv[ineigh]*size;
|
||||
nsend = comm->atom_recv[ineigh]*size;
|
||||
MPI_Send(&comm->buf_send[offset],nsend,type,comm->nrecv[ineigh],0,world);
|
||||
}
|
||||
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
|
||||
if(comm->othersend[iswap]) buf = comm->buf_recv;
|
||||
else buf = comm->buf_send;
|
||||
|
||||
/* unpack buffer */
|
||||
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_send[ineigh];
|
||||
unpackReverse(atom, comm->atom_send[ineigh], comm->sendlist[ineigh], &buf[offset*size]);
|
||||
}
|
||||
}
|
||||
|
||||
void ghostComm(Comm* comm, Atom* atom,int iswap){
|
||||
|
||||
MD_FLOAT xlo=0, xhi=0, ylo=0, yhi=0, zlo=0, zhi=0;
|
||||
MD_FLOAT* buf;
|
||||
int nrqst=0, nsend=0, nrecv=0, offset=0, ineigh=0, pbc[3];
|
||||
int all_recv=0, all_send=0, currentSend=0;
|
||||
int size = comm->ghostSize;
|
||||
int maxrqrst = comm->numneigh;
|
||||
MPI_Request requests[maxrqrst];
|
||||
for(int i = 0; i<maxrqrst; i++)
|
||||
requests[maxrqrst]=MPI_REQUEST_NULL;
|
||||
if(iswap%2==0) comm->iterAtom = LOCAL+GHOST;
|
||||
int iter = 0;
|
||||
for(int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++)
|
||||
{
|
||||
Box* tile = &comm->boxes[ineigh];
|
||||
|
||||
xlo = tile->lo[_x]; ylo = tile->lo[_y]; zlo = tile->lo[_z];
|
||||
xhi = tile->hi[_x]; yhi = tile->hi[_y]; zhi = tile->hi[_z];
|
||||
pbc[_x]=comm->pbc_x[ineigh]; pbc[_y]=comm->pbc_y[ineigh]; pbc[_z]=comm->pbc_z[ineigh];
|
||||
nsend = 0;
|
||||
|
||||
for(int i = 0; i < comm->iterAtom ; i++)
|
||||
{
|
||||
if(IsinRegionToSend(i)){
|
||||
if(nsend >= comm->maxsendlist[ineigh]) growList(comm,ineigh,nsend);
|
||||
if(currentSend + size >= comm->maxsend) growSend(comm,currentSend);
|
||||
comm->sendlist[ineigh][nsend++] = i;
|
||||
currentSend += packGhost(atom, i, &comm->buf_send[currentSend], pbc);
|
||||
}
|
||||
}
|
||||
comm->atom_send[ineigh] = nsend; //#atoms send per neigh
|
||||
comm->off_atom_send[ineigh] = all_send; //offset atom respect to neighbours in a swap
|
||||
all_send += nsend; //all atoms send
|
||||
}
|
||||
//Receives how many elements to be received.
|
||||
if(comm->othersend[iswap])
|
||||
for(nrqst=0, ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++)
|
||||
MPI_Irecv(&comm->atom_recv[ineigh],1,MPI_INT,comm->nrecv[ineigh],0,world,&requests[nrqst++]);
|
||||
|
||||
if(!comm->othersend[iswap]) comm->atom_recv[comm->recvfrom[iswap]] = nsend;
|
||||
|
||||
//Communicate how many elements to be sent.
|
||||
if(comm->othersend[iswap])
|
||||
for(int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++)
|
||||
MPI_Send(&comm->atom_send[ineigh],1,MPI_INT,comm->nsend[ineigh],0,world);
|
||||
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
|
||||
|
||||
//Define offset to store in the recv_buff
|
||||
for(int ineigh = comm->recvfrom[iswap]; ineigh<comm->recvtill[iswap]; ineigh++){
|
||||
comm->off_atom_recv[ineigh] = all_recv;
|
||||
all_recv += comm->atom_recv[ineigh];
|
||||
}
|
||||
|
||||
if(all_recv*size>=comm->maxrecv) growRecv(comm,all_recv*size);
|
||||
|
||||
//Receives elements
|
||||
if(comm->othersend[iswap])
|
||||
for (nrqst=0, ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_recv[ineigh]*size;
|
||||
nrecv = comm->atom_recv[ineigh]*size;
|
||||
MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nrecv[ineigh],0,world,&requests[nrqst++]);
|
||||
}
|
||||
//Send elements
|
||||
if(comm->othersend[iswap])
|
||||
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_send[ineigh]*size;
|
||||
nsend = comm->atom_send[ineigh]*size;
|
||||
MPI_Send(&comm->buf_send[offset],nsend,type,comm->nsend[ineigh],0,world);
|
||||
}
|
||||
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
|
||||
|
||||
if(comm->othersend[iswap]) buf = comm->buf_recv;
|
||||
else buf = comm->buf_send;
|
||||
//unpack elements
|
||||
comm->firstrecv[iswap] = LOCAL+GHOST;
|
||||
for(int i = 0; i < all_recv; i++)
|
||||
unpackGhost(atom, LOCAL+GHOST, &buf[i*size]);
|
||||
|
||||
//Increases the buffer if needed
|
||||
int max_size = MAX(comm->forwardSize,comm->reverseSize);
|
||||
int max_buf = max_size * MAX(all_recv, all_send);
|
||||
if(max_buf>=comm->maxrecv) growRecv(comm,max_buf);
|
||||
if(max_buf>=comm->maxsend) growSend(comm,max_buf);
|
||||
}
|
||||
|
||||
void exchangeComm(Comm* comm, Atom* atom){
|
||||
|
||||
MD_FLOAT x,y,z;
|
||||
MD_FLOAT *lo = atom->mybox.lo;
|
||||
MD_FLOAT *hi = atom->mybox.hi;
|
||||
int size = comm->exchangeSize;
|
||||
int numneigh = comm->numneighexch;
|
||||
int offset_recv[numneigh];
|
||||
int size_recv[numneigh];
|
||||
MPI_Request requests[numneigh];
|
||||
int i =0, nsend = 0, nrecv = 0;
|
||||
int nrqst = 0;
|
||||
int nlocal, offset,m;
|
||||
|
||||
/* enforce PBC */
|
||||
pbc(atom);
|
||||
|
||||
if(comm->numneigh == 0) return;
|
||||
|
||||
nlocal = atom->Nlocal;
|
||||
while(i < nlocal) {
|
||||
if(atom_x(i) < lo[_x] || atom_x(i) >= hi[_x] ||
|
||||
atom_y(i) < lo[_y] || atom_y(i) >= hi[_y] ||
|
||||
atom_z(i) < lo[_z] || atom_z(i) >= hi[_z]) {
|
||||
if(nsend+size >= comm->maxsend) growSend(comm, nsend);
|
||||
nsend += packExchange(atom, i, &comm->buf_send[nsend]);
|
||||
copy(atom, i, nlocal-1);
|
||||
nlocal--;
|
||||
} else i++;
|
||||
}
|
||||
atom->Nlocal = nlocal;
|
||||
|
||||
/* send/recv number of to share atoms with neighbouring procs*/
|
||||
for(int ineigh = 0; ineigh < numneigh; ineigh++)
|
||||
MPI_Irecv(&size_recv[ineigh],1,MPI_INT,comm->nexch[ineigh],0,world,&requests[nrqst++]);
|
||||
|
||||
for (int ineigh = 0; ineigh < numneigh; ineigh++)
|
||||
MPI_Send(&nsend,1,MPI_INT,comm->nexch[ineigh],0,world);
|
||||
MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
|
||||
|
||||
//Define offset to store in the recv_buff
|
||||
for(int ineigh = 0; ineigh<numneigh; ineigh++){
|
||||
offset_recv[ineigh] = nrecv;
|
||||
nrecv += size_recv[ineigh];
|
||||
}
|
||||
|
||||
if(nrecv >= comm->maxrecv) growRecv(comm,nrecv);
|
||||
|
||||
//Receives elements
|
||||
nrqst=0;
|
||||
for (int ineigh = 0; ineigh< numneigh; ineigh++){
|
||||
offset = offset_recv[ineigh];
|
||||
MPI_Irecv(&comm->buf_recv[offset], size_recv[ineigh], type, comm->nexch[ineigh],0,world,&requests[nrqst++]);
|
||||
}
|
||||
//Send elements
|
||||
for (int ineigh = 0; ineigh< numneigh; ineigh++)
|
||||
MPI_Send(comm->buf_send,nsend,type,comm->nexch[ineigh],0,world);
|
||||
MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
|
||||
|
||||
nlocal = atom->Nlocal;
|
||||
m = 0;
|
||||
while(m < nrecv) {
|
||||
x = comm->buf_recv[m + _x];
|
||||
y = comm->buf_recv[m + _y];
|
||||
z = comm->buf_recv[m + _z];
|
||||
|
||||
if(x >= lo[_x] && x < hi[_x] &&
|
||||
y >= lo[_y] && y < hi[_y] &&
|
||||
z >= lo[_z] && z < hi[_z]){
|
||||
m += unpackExchange(atom, nlocal++, &comm->buf_recv[m]);
|
||||
} else {
|
||||
m += size;
|
||||
}
|
||||
}
|
||||
atom->Nlocal = nlocal;
|
||||
|
||||
int all_atoms=0;
|
||||
MPI_Allreduce(&atom->Nlocal, &all_atoms, 1, MPI_INT, MPI_SUM, world);
|
||||
if(atom->Natoms!=all_atoms && comm->myproc ==0){
|
||||
printf("Losing atoms! current atoms:%d expected atoms:%d\n",all_atoms,atom->Natoms);
|
||||
}
|
||||
}
|
||||
|
||||
//Internal functions
|
||||
|
||||
inline void growRecv(Comm* comm, int n)
|
||||
{
|
||||
comm -> maxrecv = BUFFACTOR * n;
|
||||
if(comm->buf_recv) free(comm -> buf_recv);
|
||||
comm -> buf_recv = (MD_FLOAT*) allocate(ALIGNMENT, comm->maxrecv * sizeof(MD_FLOAT));
|
||||
}
|
||||
|
||||
inline void growSend(Comm* comm, int n)
|
||||
{
|
||||
size_t oldByteSize = (comm->maxsend+BUFEXTRA)*sizeof(MD_FLOAT);
|
||||
comm -> maxsend = BUFFACTOR * n;
|
||||
comm -> buf_send = (MD_FLOAT*) reallocate(comm->buf_send, ALIGNMENT, (comm->maxsend + BUFEXTRA) * sizeof(MD_FLOAT), oldByteSize);
|
||||
}
|
||||
|
||||
inline void growList(Comm* comm, int ineigh, int n)
|
||||
{
|
||||
size_t oldByteSize = comm->maxsendlist[ineigh]*sizeof(int);
|
||||
comm->maxsendlist[ineigh] = BUFFACTOR * n;
|
||||
comm->sendlist[ineigh] = (int*) reallocate(comm->sendlist[ineigh],ALIGNMENT, comm->maxsendlist[ineigh] * sizeof(int), oldByteSize);
|
||||
}
|
||||
|
||||
static inline void allocDynamicBuffers(Comm* comm)
|
||||
{
|
||||
//Buffers depending on the # of my neighs
|
||||
int numneigh = comm->numneigh;
|
||||
comm->atom_send = (int*) allocate(ALIGNMENT, numneigh * sizeof(int));
|
||||
comm->atom_recv = (int*) allocate(ALIGNMENT, numneigh * sizeof(int));
|
||||
comm->off_atom_send = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
|
||||
comm->off_atom_recv = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
|
||||
comm->maxsendlist = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
|
||||
|
||||
for(int i = 0; i < numneigh; i++)
|
||||
comm->maxsendlist[i] = BUFMIN;
|
||||
|
||||
comm->sendlist = (int**) allocate(ALIGNMENT, numneigh * sizeof(int*));
|
||||
for(int i = 0; i < numneigh; i++)
|
||||
comm->sendlist[i] = (int*) allocate(ALIGNMENT, comm->maxsendlist[i] * sizeof(int));
|
||||
}
|
||||
|
||||
static inline void freeDynamicBuffers(Comm* comm)
|
||||
{
|
||||
int numneigh =comm->numneigh;
|
||||
|
||||
if(comm->atom_send) free(comm->atom_send);
|
||||
if(comm->atom_recv) free(comm->atom_recv);
|
||||
if(comm->off_atom_send) free(comm->off_atom_send);
|
||||
if(comm->off_atom_recv) free(comm->off_atom_recv);
|
||||
if(comm->maxsendlist) free(comm->maxsendlist);
|
||||
if(comm->sendlist){
|
||||
for(int i = 0; i < numneigh; i++)
|
||||
if(comm->sendlist[i]) free(comm->sendlist[i]);
|
||||
}
|
||||
if(comm->sendlist) free(comm->sendlist);
|
||||
}
|
||||
|
||||
static inline void freeBuffers(Comm* comm)
|
||||
{
|
||||
if(comm->nrecv) free(comm->nrecv);
|
||||
if(comm->nsend) free(comm->nsend);
|
||||
if(comm->nexch) free(comm->nexch);
|
||||
if(comm->pbc_x) free(comm->pbc_x);
|
||||
if(comm->pbc_y) free(comm->pbc_y);
|
||||
if(comm->pbc_z) free(comm->pbc_z);
|
||||
if(comm->boxes) free(comm->boxes);
|
||||
if(comm->atom_send) free(comm->atom_send);
|
||||
if(comm->atom_recv) free(comm->atom_recv);
|
||||
if(comm->off_atom_send) free(comm->off_atom_send);
|
||||
if(comm->off_atom_recv) free(comm->off_atom_recv);
|
||||
if(comm->maxsendlist) free(comm->maxsendlist);
|
||||
|
||||
if(comm->sendlist){
|
||||
for(int i = 0; i < comm->numneigh; i++)
|
||||
if(comm->sendlist[i]) free(comm->sendlist[i]);
|
||||
}
|
||||
if(comm->sendlist) free(comm->sendlist);
|
||||
|
||||
if(comm->buf_send) free(comm->buf_send);
|
||||
if(comm->buf_recv) free(comm->buf_recv);
|
||||
}
|
490
common/grid.c
Normal file
490
common/grid.c
Normal file
@@ -0,0 +1,490 @@
|
||||
#include <stdio.h>
|
||||
#include <grid.h>
|
||||
#include <mpi.h>
|
||||
#include <parameter.h>
|
||||
#include <allocate.h>
|
||||
#include <util.h>
|
||||
#include <math.h>
|
||||
|
||||
static MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
|
||||
|
||||
//Grommacs Balancing
|
||||
MD_FLOAT f_normalization(MD_FLOAT* x,MD_FLOAT* fx, MD_FLOAT minx, int nprocs) {
|
||||
|
||||
MD_FLOAT sum=0;
|
||||
for(int n = 0; n<nprocs; n++){
|
||||
fx[n] = MAX(minx,x[n]);
|
||||
sum+=fx[n];
|
||||
}
|
||||
|
||||
for(int n = 0; n<nprocs; n++)
|
||||
fx[n] /= sum;
|
||||
}
|
||||
|
||||
void fixedPointIteration(MD_FLOAT* x0, int nprocs, MD_FLOAT minx)
|
||||
{
|
||||
MD_FLOAT tolerance = 1e-3;
|
||||
MD_FLOAT alpha = 0.5;
|
||||
MD_FLOAT *fx = (MD_FLOAT*) malloc(nprocs*sizeof(MD_FLOAT));
|
||||
int maxIterations = 100;
|
||||
|
||||
for (int i = 0; i < maxIterations; i++) {
|
||||
|
||||
int converged = 1;
|
||||
f_normalization(x0,fx,minx,nprocs);
|
||||
|
||||
for(int n=0; n<nprocs; n++)
|
||||
fx[n]= (1-alpha) * x0[n] + alpha * fx[n];
|
||||
|
||||
for (int n=0; n<nprocs; n++) {
|
||||
if (fabs(fx[n] - x0[n]) >= tolerance) {
|
||||
converged = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (int n=0; n<nprocs; n++)
|
||||
x0[n] = fx[n];
|
||||
|
||||
if(converged){
|
||||
for(int n = 0; n<nprocs; n++)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
void staggeredBalance(Grid* grid, Atom* atom, Parameter* param, double newTime)
|
||||
{
|
||||
int me;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
int *coord = grid->coord;
|
||||
int *nprocs = grid ->nprocs;
|
||||
//Elapsed time since the last rebalance
|
||||
double time = newTime - grid->Timer;
|
||||
grid->Timer = newTime;
|
||||
//store the older dimm to compare later for exchange
|
||||
MD_FLOAT lo[3], hi[3];
|
||||
for(int dim = 0; dim< 3; dim++){
|
||||
lo[dim] = atom->mybox.lo[dim];
|
||||
hi[dim] = atom->mybox.hi[dim];
|
||||
}
|
||||
|
||||
//Define parameters
|
||||
MPI_Comm subComm[3];
|
||||
int color[3] = {0,0,0};
|
||||
int id[3] = {0,0,0};
|
||||
MD_FLOAT ** load = (MD_FLOAT**) malloc(3*sizeof(MD_FLOAT*));
|
||||
for(int dim = 0; dim<3; dim++)
|
||||
load[dim] = (MD_FLOAT*) malloc(nprocs[dim]*sizeof(MD_FLOAT));
|
||||
|
||||
int maxprocs = MAX(MAX(nprocs[_x],nprocs[_y]),nprocs[_z]);
|
||||
MD_FLOAT* cellSize = (MD_FLOAT*) malloc(maxprocs*sizeof(MD_FLOAT));
|
||||
MD_FLOAT* limits = (MD_FLOAT*) malloc(2*maxprocs*sizeof(MD_FLOAT)); //limits: (x0, x1), (x1, x2)... Repeat values in between to perfom MPI_Scatter later
|
||||
MD_FLOAT t_sum[3] = {0,0,0};
|
||||
MD_FLOAT recv_buf[2] = {0,0}; //Each proc only receives 2 elments per dimension xlo and xhi
|
||||
MD_FLOAT balancedLoad[3] = {0,0,0}; //1/nprocs
|
||||
MD_FLOAT minLoad[3] = {0,0,0}; //beta*(1/nprocs)
|
||||
MD_FLOAT prd[3] = {param->xprd, param->yprd, param->zprd};
|
||||
MD_FLOAT boundaries[6] ={0,0,0,0,0,0}; // xlo,xhi,ylo,yhi,zlo,zhi
|
||||
|
||||
//Create sub-communications along each dimension
|
||||
for(int dim = 0; dim<3; dim++){
|
||||
if(dim == _x){
|
||||
color[_x] = (coord[_y] == 0 && coord[_z] ==0) ? 1:MPI_UNDEFINED;
|
||||
id[_x] = me;
|
||||
} else if(dim == _y) {
|
||||
color[_y] = coord[_z] == 0 ? coord[_x]:MPI_UNDEFINED;
|
||||
id[_y] = (coord[_y] == 0 && coord[_z] == 0) ? 0:me;
|
||||
} else {
|
||||
color[_z]= coord[_y]*nprocs[_x]+coord[_x];
|
||||
id[_z] = coord[_z] == 0 ? 0 : me;
|
||||
}
|
||||
MPI_Comm_split(world, color[dim], id[dim], &subComm[dim]);
|
||||
}
|
||||
|
||||
//Set the minimum load and the balance load
|
||||
for(int dim = 0; dim<3; dim++){
|
||||
balancedLoad[dim] = 1./nprocs[dim];
|
||||
minLoad[dim] = 0.8*balancedLoad[dim];
|
||||
}
|
||||
//set and communicate the workload in reverse order
|
||||
for(int dim = _z; dim>= _x; dim--)
|
||||
{
|
||||
if(subComm[dim] != MPI_COMM_NULL){
|
||||
MPI_Gather(&time,1,type,load[dim],1,type,0,subComm[dim]);
|
||||
|
||||
if(id[dim] == 0)
|
||||
{
|
||||
for(int n=0; n<nprocs[dim]; n++)
|
||||
t_sum[dim] += load[dim][n];
|
||||
|
||||
for(int n=0; n<nprocs[dim]; n++)
|
||||
load[dim][n] /= t_sum[dim];
|
||||
}
|
||||
time =t_sum[dim];
|
||||
}
|
||||
MPI_Barrier(world);
|
||||
}
|
||||
|
||||
//Brodacast the new boundaries along dimensions
|
||||
for(int dim=0; dim<3; dim++){
|
||||
|
||||
if(subComm[dim] != MPI_COMM_NULL){
|
||||
|
||||
MPI_Bcast(boundaries,6,type,0,subComm[dim]);
|
||||
if(id[dim] == 0) {
|
||||
fixedPointIteration(load[dim], nprocs[dim], minLoad[dim]);
|
||||
MD_FLOAT inv_sum=0;
|
||||
for(int n=0; n<nprocs[dim];n++)
|
||||
inv_sum +=(1/load[dim][n]);
|
||||
|
||||
for(int n=0; n<nprocs[dim];n++)
|
||||
cellSize[n] = (prd[dim]/load[dim][n])*(1./inv_sum);
|
||||
|
||||
MD_FLOAT sum=0;
|
||||
for(int n=0; n<nprocs[dim]; n++){
|
||||
limits[2*n] = sum;
|
||||
limits[2*n+1] = sum+cellSize[n];
|
||||
sum+= cellSize[n];
|
||||
}
|
||||
limits[2*nprocs[dim]-1] = prd[dim];
|
||||
}
|
||||
MPI_Scatter(limits,2,type,recv_buf,2,type,0,subComm[dim]);
|
||||
boundaries[2*dim] = recv_buf[0];
|
||||
boundaries[2*dim+1] = recv_buf[1];
|
||||
}
|
||||
MPI_Barrier(world);
|
||||
}
|
||||
|
||||
atom->mybox.lo[_x]=boundaries[0]; atom->mybox.hi[_x]=boundaries[1];
|
||||
atom->mybox.lo[_y]=boundaries[2]; atom->mybox.hi[_y]=boundaries[3];
|
||||
atom->mybox.lo[_z]=boundaries[4]; atom->mybox.hi[_z]=boundaries[5];
|
||||
|
||||
MD_FLOAT domain[6] = {boundaries[0], boundaries[2], boundaries[4], boundaries[1], boundaries[3], boundaries[5]};
|
||||
MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
|
||||
|
||||
//because cells change dynamically, It is required to increase the neighbouring exchange region
|
||||
for(int dim =_x; dim<=_z; dim++){
|
||||
MD_FLOAT dr,dr_max;
|
||||
int n = grid->nprocs[dim];
|
||||
MD_FLOAT maxdelta = 0.2*prd[dim];
|
||||
dr = MAX(fabs(lo[dim] - atom->mybox.lo[dim]),fabs(hi[dim] - atom->mybox.hi[dim]));
|
||||
MPI_Allreduce(&dr, &dr_max, 1, type, MPI_MAX, world);
|
||||
grid->cutneigh[dim] = param->cutneigh+dr_max;
|
||||
}
|
||||
|
||||
for(int dim=0; dim<3; dim++) {
|
||||
if(subComm[dim] != MPI_COMM_NULL){
|
||||
MPI_Comm_free(&subComm[dim]);
|
||||
}
|
||||
free(load[dim]);
|
||||
}
|
||||
free(load);
|
||||
free(limits);
|
||||
}
|
||||
|
||||
//RCB Balancing
|
||||
MD_FLOAT meanTimeBisect(Atom *atom, MPI_Comm subComm, int dim, double time)
|
||||
{
|
||||
MD_FLOAT mean=0, sum=0, total_sum=0, weightAtoms= 0, total_weight=0;
|
||||
|
||||
for(int i=0; i<atom->Nlocal; i++){
|
||||
sum += atom_pos(i);
|
||||
}
|
||||
sum*=time;
|
||||
weightAtoms = atom->Nlocal*time;
|
||||
MPI_Allreduce(&sum, &total_sum, 1, type, MPI_SUM, subComm);
|
||||
MPI_Allreduce(&weightAtoms, &total_weight, 1, type, MPI_SUM, subComm);
|
||||
|
||||
mean = total_sum/total_weight;
|
||||
return mean;
|
||||
}
|
||||
|
||||
MD_FLOAT meanBisect(Atom* atom, MPI_Comm subComm, int dim, double time)
|
||||
{
|
||||
int Natoms = 0;
|
||||
MD_FLOAT sum=0, mean=0, total_sum=0;
|
||||
|
||||
for(int i=0; i<atom->Nlocal; i++){
|
||||
sum += atom_pos(i);
|
||||
}
|
||||
MPI_Allreduce(&sum, &total_sum, 1, type, MPI_SUM, subComm);
|
||||
MPI_Allreduce(&atom->Nlocal, &Natoms, 1, MPI_INT, MPI_SUM, subComm);
|
||||
mean = total_sum/Natoms;
|
||||
return mean;
|
||||
}
|
||||
|
||||
void nextBisectionLevel(Grid* grid, Atom* atom, RCB_Method method, MPI_Comm subComm, int dim ,int* color, int ilevel, double time)
|
||||
{
|
||||
int rank, size;
|
||||
int branch = 0, i = 0, m = 0;
|
||||
int nsend = 0, nrecv = 0, nrecv2 = 0;
|
||||
int values_per_atom = 7;
|
||||
MD_FLOAT bisection, pos;
|
||||
MPI_Request request[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
|
||||
MPI_Comm_rank(subComm,&rank);
|
||||
MPI_Comm_size(subComm,&size);
|
||||
|
||||
int odd = size%2;
|
||||
int extraProc = odd ? size-1:size;
|
||||
int half = (int) (0.5*size);
|
||||
int partner = (rank<half) ? rank+half:rank-half;
|
||||
if(odd && rank == extraProc) partner = 0;
|
||||
//Apply the bisection
|
||||
bisection = method(atom,subComm,dim,time);
|
||||
//Define the new boundaries
|
||||
if(rank<half){
|
||||
atom->mybox.hi[dim] = bisection;
|
||||
branch = 0;
|
||||
} else {
|
||||
atom->mybox.lo[dim] = bisection;
|
||||
branch = 1;
|
||||
}
|
||||
//Define new color for the further communicaton
|
||||
*color = (branch << ilevel) | *color;
|
||||
//Grow the send buffer
|
||||
if(atom->Nlocal>=grid->maxsend){
|
||||
if(grid->buf_send) free(grid->buf_send);
|
||||
grid->buf_send = (MD_FLOAT*) malloc(atom->Nlocal*values_per_atom* sizeof(MD_FLOAT));
|
||||
grid->maxsend = atom->Nlocal;
|
||||
}
|
||||
//buffer particles to send
|
||||
while(i < atom->Nlocal) {
|
||||
pos = atom_pos(i);
|
||||
if(pos < atom->mybox.lo[dim] || pos >= atom->mybox.hi[dim]) {
|
||||
nsend += packExchange(atom, i, &grid->buf_send[nsend]);
|
||||
copy(atom, i, atom->Nlocal-1);
|
||||
atom->Nlocal--;
|
||||
} else i++;
|
||||
}
|
||||
|
||||
//Communicate the number of elements to be sent
|
||||
if(rank < extraProc){
|
||||
MPI_Irecv(&nrecv,1,MPI_INT,partner,0,subComm,&request[0]);
|
||||
}
|
||||
if(odd && rank == 0){
|
||||
MPI_Irecv(&nrecv2,1,MPI_INT,extraProc,0,subComm,&request[1]);
|
||||
}
|
||||
MPI_Send(&nsend,1,MPI_INT,partner,0,subComm);
|
||||
MPI_Waitall(2,request,MPI_STATUS_IGNORE);
|
||||
|
||||
//Grow the recv buffer
|
||||
if(nrecv+nrecv2>=grid->maxrecv){
|
||||
if(grid->buf_recv) free(grid->buf_recv);
|
||||
grid->buf_recv = (MD_FLOAT*) malloc((nrecv+nrecv2)*values_per_atom*sizeof(MD_FLOAT));
|
||||
grid->maxrecv = nrecv+nrecv2;
|
||||
}
|
||||
|
||||
//communicate elements in the buffer
|
||||
request[0] = MPI_REQUEST_NULL;
|
||||
request[1] = MPI_REQUEST_NULL;
|
||||
|
||||
if(rank < extraProc){
|
||||
MPI_Irecv(grid->buf_recv,nrecv,type,partner,0,subComm,&request[0]);
|
||||
}
|
||||
if(odd && rank == 0){
|
||||
MPI_Irecv(&grid->buf_recv[nrecv],nrecv2,type,extraProc,0,subComm,&request[1]);
|
||||
}
|
||||
MPI_Send (grid->buf_send,nsend,type,partner,0,subComm);
|
||||
MPI_Waitall(2,request,MPI_STATUS_IGNORE);
|
||||
|
||||
//store atoms in atom list
|
||||
while(m < nrecv+nrecv2){
|
||||
m += unpackExchange(atom, atom->Nlocal++, &grid->buf_recv[m]);
|
||||
}
|
||||
}
|
||||
|
||||
void rcbBalance(Grid* grid, Atom* atom, Parameter* param, RCB_Method method, int ndim, double newTime)
|
||||
{
|
||||
int me, nprocs=0, ilevel=0, nboxes=1;
|
||||
int color = 0, size =0;
|
||||
int index, prd[3];
|
||||
MPI_Comm subComm;
|
||||
MPI_Comm_size(world, &nprocs);
|
||||
MPI_Comm_rank(world, &me);
|
||||
|
||||
//set the elapsed time since the last dynamic balance
|
||||
double time = newTime - grid->Timer;
|
||||
|
||||
prd[_x] = atom->mybox.xprd = param->xprd;
|
||||
prd[_y] = atom->mybox.yprd = param->yprd;
|
||||
prd[_z] = atom->mybox.zprd = param->zprd;
|
||||
|
||||
//Sort by larger dimension
|
||||
int largerDim[3] ={_x, _y, _z};
|
||||
|
||||
for(int i = 0; i< 2; i++){
|
||||
for(int j = i+1; j<3; j++)
|
||||
{
|
||||
if(prd[largerDim[j]]>prd[largerDim[i]]){
|
||||
MD_FLOAT tmp = largerDim[j];
|
||||
largerDim[j] = largerDim[i];
|
||||
largerDim[i] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
//Initial Partition
|
||||
atom->mybox.lo[_x] = 0; atom->mybox.hi[_x] = atom->mybox.xprd;
|
||||
atom->mybox.lo[_y] = 0; atom->mybox.hi[_y] = atom->mybox.yprd;
|
||||
atom->mybox.lo[_z] = 0; atom->mybox.hi[_z] = atom->mybox.zprd;
|
||||
|
||||
//Recursion tree
|
||||
while(nboxes<nprocs)
|
||||
{
|
||||
index = ilevel%ndim;
|
||||
MPI_Comm_split(world, color, me, &subComm);
|
||||
MPI_Comm_size(subComm,&size);
|
||||
if(size > 1){
|
||||
nextBisectionLevel(grid, atom, method, subComm, largerDim[index], &color, ilevel, time);
|
||||
}
|
||||
MPI_Comm_free(&subComm);
|
||||
nboxes = pow(2,++ilevel);
|
||||
}
|
||||
//Set the new timer grid
|
||||
grid->Timer = newTime;
|
||||
|
||||
//Creating the global map
|
||||
MD_FLOAT domain[6] = {atom->mybox.lo[_x], atom->mybox.lo[_y], atom->mybox.lo[_z], atom->mybox.hi[_x], atom->mybox.hi[_y], atom->mybox.hi[_z]};
|
||||
MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
|
||||
|
||||
//Define the same cutneighbour in all dimensions for the exchange communication
|
||||
for(int dim =_x; dim<=_z; dim++)
|
||||
grid->cutneigh[dim] = param->cutneigh;
|
||||
}
|
||||
|
||||
//Regular grid
|
||||
void cartisian3d(Grid* grid, Parameter* param, Box* box)
|
||||
{
|
||||
int me, nproc;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
|
||||
int numdim=3;
|
||||
int reorder=0;
|
||||
int periods[3]={1,1,1};
|
||||
int mycoord[3]={0,0,0};
|
||||
int griddim[3]={0,0,0};
|
||||
MD_FLOAT len[3];
|
||||
MPI_Comm cartesian;
|
||||
|
||||
box->xprd = param->xprd;
|
||||
box->yprd = param->yprd;
|
||||
box->zprd = param->zprd;
|
||||
|
||||
//Creates a cartesian 3d grid
|
||||
MPI_Dims_create(nproc, numdim, griddim);
|
||||
MPI_Cart_create(world,numdim,griddim,periods,reorder,&cartesian);
|
||||
grid->nprocs[_x] = griddim[_x];
|
||||
grid->nprocs[_y] = griddim[_y];
|
||||
grid->nprocs[_z] = griddim[_z];
|
||||
|
||||
//Coordinates position in the grid
|
||||
MPI_Cart_coords(cartesian,me,3,mycoord);
|
||||
grid->coord[_x] = mycoord[_x];
|
||||
grid->coord[_y] = mycoord[_y];
|
||||
grid->coord[_z] = mycoord[_z];
|
||||
|
||||
//boundaries of my local box, with origin in (0,0,0).
|
||||
len[_x] = param->xprd / griddim[_x];
|
||||
len[_y] = param->yprd / griddim[_y];
|
||||
len[_z] = param->zprd / griddim[_z];
|
||||
|
||||
box->lo[_x] = mycoord[_x] * len[_x];
|
||||
box->hi[_x] = (mycoord[_x] + 1) * len[_x];
|
||||
box->lo[_y] = mycoord[_y] * len[_y];
|
||||
box->hi[_y] = (mycoord[_y] + 1) * len[_y];
|
||||
box->lo[_z] = mycoord[_z] * len[_z];
|
||||
box->hi[_z] = (mycoord[_z] + 1) * len[_z];
|
||||
|
||||
MD_FLOAT domain[6] = {box->lo[_x], box->lo[_y], box->lo[_z], box->hi[_x], box->hi[_y], box->hi[_z]};
|
||||
MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
|
||||
MPI_Comm_free(&cartesian);
|
||||
|
||||
//Define the same cutneighbour in all dimensions for the exchange communication
|
||||
for(int dim =_x; dim<=_z; dim++)
|
||||
grid->cutneigh[dim] = param->cutneigh;
|
||||
}
|
||||
|
||||
//Other Functions from the grid
|
||||
void initGrid(Grid* grid)
|
||||
{ //start with regular grid
|
||||
int nprocs;
|
||||
MPI_Comm_size(world, &nprocs);
|
||||
grid->map_size = 6 * nprocs;
|
||||
grid->map = (MD_FLOAT*) allocate(ALIGNMENT, grid->map_size * sizeof(MD_FLOAT));
|
||||
//========rcb=======
|
||||
grid->maxsend = 0;
|
||||
grid->maxrecv = 0;
|
||||
grid->buf_send = NULL;
|
||||
grid->buf_recv = NULL;
|
||||
//====staggered=====
|
||||
grid->Timer = 0.;
|
||||
}
|
||||
|
||||
void setupGrid(Grid* grid, Atom* atom, Parameter* param)
|
||||
{
|
||||
int me;
|
||||
MD_FLOAT xlo, ylo, zlo, xhi, yhi, zhi;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
initGrid(grid);
|
||||
|
||||
//Set the origin at (0,0,0)
|
||||
if(param->input_file){
|
||||
for(int i=0; i<atom->Nlocal; i++){
|
||||
atom_x(i) = atom_x(i) - param->xlo;
|
||||
atom_y(i) = atom_y(i) - param->ylo;
|
||||
atom_z(i) = atom_z(i) - param->zlo;
|
||||
}
|
||||
}
|
||||
|
||||
cartisian3d(grid, param, &atom->mybox);
|
||||
|
||||
xlo = atom->mybox.lo[_x]; xhi = atom->mybox.hi[_x];
|
||||
ylo = atom->mybox.lo[_y]; yhi = atom->mybox.hi[_y];
|
||||
zlo = atom->mybox.lo[_z]; zhi = atom->mybox.hi[_z];
|
||||
|
||||
int i = 0;
|
||||
while(i < atom->Nlocal)
|
||||
{
|
||||
if(atom_x(i) >= xlo && atom_x(i)< xhi &&
|
||||
atom_y(i) >= ylo && atom_y(i)< yhi &&
|
||||
atom_z(i) >= zlo && atom_z(i)< zhi)
|
||||
{
|
||||
i++;
|
||||
} else {
|
||||
copy(atom, i, atom->Nlocal-1);
|
||||
atom->Nlocal--;
|
||||
}
|
||||
}
|
||||
|
||||
//printGrid(grid);
|
||||
if(!param->balance){
|
||||
MPI_Allreduce(&atom->Nlocal, &atom->Natoms, 1, MPI_INT, MPI_SUM, world);
|
||||
printf("Processor:%i, Local atoms:%i, Total atoms:%i\n",me, atom->Nlocal,atom->Natoms);
|
||||
MPI_Barrier(world);
|
||||
}
|
||||
}
|
||||
|
||||
void printGrid(Grid* grid)
|
||||
{
|
||||
int me, nprocs;
|
||||
MPI_Comm_size(world, &nprocs);
|
||||
MPI_Comm_rank(world, &me);
|
||||
MD_FLOAT* map = grid->map;
|
||||
if(me==0)
|
||||
{
|
||||
|
||||
printf("GRID:\n");
|
||||
printf("===================================================================================================\n");
|
||||
for(int i=0; i<nprocs; i++)
|
||||
printf("Box:%i\txlo:%.4f\txhi:%.4f\tylo:%.4f\tyhi:%.4f\tzlo:%.4f\tzhi:%.4f\n", i,map[6*i],map[6*i+3],map[6*i+1],map[6*i+4],map[6*i+2],map[6*i+5]);
|
||||
printf("\n\n");
|
||||
//printf("Box processor:%i\n xlo:%.4f\txhi:%.4f\n ylo:%.4f\tyhi:%.4f\n zlo:%.4f\tzhi:%.4f\n", i,map[6*i],map[6*i+3],map[6*i+1],map[6*i+4],map[6*i+2],map[6*i+5]);
|
||||
}
|
||||
MPI_Barrier(world);
|
||||
}
|
||||
|
||||
|
||||
|
22
common/includes/box.h
Normal file
22
common/includes/box.h
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <parameter.h>
|
||||
|
||||
#ifndef __BOX_H_
|
||||
#define __BOX_H_
|
||||
|
||||
typedef struct {
|
||||
int id;
|
||||
MD_FLOAT xprd, yprd, zprd; //Domain Dimension
|
||||
MD_FLOAT lo[3]; //smallest coordinate of my subdomain
|
||||
MD_FLOAT hi[3]; //Highest coordinate of my subdomain
|
||||
} Box;
|
||||
|
||||
int overlapBox(int, int , const Box*, const Box* , Box* , MD_FLOAT , MD_FLOAT);
|
||||
int overlapFullBox(Parameter*, MD_FLOAT*, const Box*, const Box*);
|
||||
void expandBox(int , const Box*, const Box* , Box* , MD_FLOAT);
|
||||
#endif
|
104
common/includes/comm.h
Normal file
104
common/includes/comm.h
Normal file
@@ -0,0 +1,104 @@
|
||||
#include <atom.h>
|
||||
#include <parameter.h>
|
||||
#include <box.h>
|
||||
#include <grid.h>
|
||||
|
||||
#ifndef COMM_H
|
||||
#define COMM_H
|
||||
|
||||
#ifdef GROMACS
|
||||
#define FORWARD_SIZE (3*CLUSTER_N)
|
||||
#define REVERSE_SIZE (3*CLUSTER_N)
|
||||
#define GHOST_SIZE (4*CLUSTER_N+10)
|
||||
#define EXCHANGE_SIZE 7
|
||||
|
||||
#define JFAC MAX(1, CLUSTER_N / CLUSTER_M)
|
||||
#define LOCAL atom->Nclusters_local / JFAC
|
||||
#define GHOST atom->Nclusters_ghost
|
||||
|
||||
#define IsinRegionToSend(cj) \
|
||||
((atom->jclusters[(cj)].bbminx >= xlo || atom->jclusters[(cj)].bbmaxx >= xlo) && \
|
||||
(atom->jclusters[(cj)].bbminx < xhi || atom->jclusters[(cj)].bbmaxx < xhi) && \
|
||||
(atom->jclusters[(cj)].bbminy >= ylo || atom->jclusters[(cj)].bbmaxy >= ylo) && \
|
||||
(atom->jclusters[(cj)].bbminy < yhi || atom->jclusters[(cj)].bbmaxy < yhi) && \
|
||||
(atom->jclusters[(cj)].bbminz >= zlo || atom->jclusters[(cj)].bbmaxz >= zlo) && \
|
||||
(atom->jclusters[(cj)].bbminz < zhi || atom->jclusters[(cj)].bbmaxz < zhi))
|
||||
|
||||
#else
|
||||
|
||||
#define FORWARD_SIZE 3
|
||||
#define REVERSE_SIZE 3
|
||||
#define GHOST_SIZE 4
|
||||
#define EXCHANGE_SIZE 7
|
||||
#define LOCAL atom->Nlocal
|
||||
#define GHOST atom->Nghost
|
||||
|
||||
#define IsinRegionToSend(i) \
|
||||
((atom_x((i)) >= xlo && atom_x((i)) < xhi) && \
|
||||
(atom_y((i)) >= ylo && atom_y((i)) < yhi) && \
|
||||
(atom_z((i)) >= zlo && atom_z((i)) < zhi))
|
||||
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
int myproc; // my proc ID
|
||||
int numproc; // # of processors
|
||||
|
||||
int numneigh; // # of all my neighs along all swaps
|
||||
int maxneigh; // Buffer size for my neighs
|
||||
int sendfrom[6]; //return the lowest neigh index to send in each swap
|
||||
int sendtill[6]; //return the highest neigh index to send in each swao
|
||||
int recvfrom[6]; //return the lowest neigh index to recv in each swap
|
||||
int recvtill[6]; //return the highest neigh index to recv in each swap
|
||||
int* nsend; // neigh whose I want to send
|
||||
int* nrecv; // neigh whose I want to recv
|
||||
|
||||
int* pbc_x; // if pbc in x
|
||||
int* pbc_y; // if pbc in y
|
||||
int* pbc_z; // if pbc in z
|
||||
|
||||
int* atom_send, *atom_recv; // # of atoms to send/recv for each of my neighs
|
||||
int* off_atom_send; // atom offset to send, inside of a swap
|
||||
int* off_atom_recv; // atom offset to recv, inside of a swap
|
||||
|
||||
int* nexch; //procs to exchange
|
||||
int numneighexch; //# of neighbours to exchange
|
||||
int maxneighexch; //max buff size to store neighbours
|
||||
|
||||
int numswap; // # of swaps to perform, it is 6
|
||||
int swapdim[6]; // dimension of the swap (_x, _y or _z)
|
||||
int swapdir[6]; // direction of the swap 0 or 1
|
||||
int swap[3][2]; // given a dim and dir, knows the swap
|
||||
int othersend[6]; // Determine if a proc interact with more procs in a given swap
|
||||
|
||||
int firstrecv[6]; // where to put 1st recv atom in each swap
|
||||
int** sendlist; // list of atoms to send in each swap
|
||||
int* maxsendlist; // max # of atoms send in each list-swap
|
||||
|
||||
int maxsend; // max elements in buff sender
|
||||
int maxrecv; // max elements in buff receiver
|
||||
MD_FLOAT* buf_send; // sender buffer for all comm
|
||||
MD_FLOAT* buf_recv; // receicer buffer for all comm
|
||||
|
||||
int forwardSize; // # of paramaters per atom in forward comm.
|
||||
int reverseSize; // # of parameters per atom in reverse
|
||||
int exchangeSize; // # of parameters per atom in exchange
|
||||
int ghostSize; // # of parameters per atom in ghost list
|
||||
|
||||
int iterAtom; //last atom to iterate in each swap.
|
||||
Box* boxes; // Boundaries to be sent to other procs as ghost.
|
||||
} Comm;
|
||||
|
||||
|
||||
void initComm(int*, char***, Comm*); //Init MPI
|
||||
void endComm(Comm*); //End MPI
|
||||
void setupComm(Comm*,Parameter*,Grid*); //Creates a 3d grid or rcb grid
|
||||
void neighComm(Comm*,Parameter*,Grid*); //Find neighbours within cut-off and defines ghost regions
|
||||
void forwardComm(Comm*,Atom*,int); //Send info in one direction
|
||||
void reverseComm(Comm*,Atom*,int); //Return info after forward communication
|
||||
void exchangeComm(Comm*,Atom*); //Exchange info between procs
|
||||
void ghostComm(Comm*, Atom*,int); //Build the ghost neighbours to send during next forwards
|
||||
void growSend(Comm*,int); //Grows the size of the buffer sender
|
||||
void growRecv(Comm*,int); //Grows the size of the buffer receiver
|
||||
void growList(Comm*, int, int); //Grows the size of the list to send
|
||||
#endif
|
51
common/includes/grid.h
Normal file
51
common/includes/grid.h
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
|
||||
|
||||
#include <parameter.h>
|
||||
#include <box.h>
|
||||
#include <atom.h>
|
||||
#include <mpi.h>
|
||||
|
||||
#ifndef __MAP_H_
|
||||
#define __MAP_H_
|
||||
|
||||
#define world MPI_COMM_WORLD
|
||||
#define atom_pos(i) ((dim == _x) ? atom_x((i)) : (dim == _y) ? atom_y((i)) : atom_z((i)))
|
||||
|
||||
enum {RCB=1, meanTimeRCB, Staggered};
|
||||
|
||||
typedef struct {
|
||||
int balance_every;
|
||||
int map_size;
|
||||
MD_FLOAT* map;
|
||||
//===Param for Staggerd balance
|
||||
int nprocs[3];
|
||||
int coord[3];
|
||||
MD_FLOAT cutneigh[3];
|
||||
double Timer;
|
||||
//===Param for RCB balance
|
||||
MD_FLOAT* buf_send;
|
||||
MD_FLOAT* buf_recv;
|
||||
int maxsend;
|
||||
int maxrecv;
|
||||
} Grid;
|
||||
|
||||
|
||||
typedef MD_FLOAT(*RCB_Method)(Atom*,MPI_Comm,int,double);
|
||||
|
||||
void setupGrid(Grid*, Atom*, Parameter*);
|
||||
void cartisian3d(Grid*, Parameter*, Box*);
|
||||
void rcbBalance(Grid*, Atom*, Parameter* ,RCB_Method, int, double);
|
||||
void staggeredBalance(Grid*, Atom*, Parameter*, double);
|
||||
void printGrid(Grid*);
|
||||
//rcb methods
|
||||
MD_FLOAT meanBisect(Atom* , MPI_Comm, int, double);
|
||||
MD_FLOAT meanTimeBisect(Atom*, MPI_Comm, int, double);
|
||||
#endif
|
||||
|
||||
|
@@ -53,6 +53,10 @@ typedef struct {
|
||||
MD_FLOAT k_dn;
|
||||
MD_FLOAT gx, gy, gz;
|
||||
MD_FLOAT reflect_x, reflect_y, reflect_z;
|
||||
//MPI implementation
|
||||
int balance;
|
||||
int method;
|
||||
int balance_every;
|
||||
} Parameter;
|
||||
|
||||
void initParameter(Parameter*);
|
||||
|
71
common/includes/shell_methods.h
Normal file
71
common/includes/shell_methods.h
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
#include <comm.h>
|
||||
#include <atom.h>
|
||||
#include <timing.h>
|
||||
#include <parameter.h>
|
||||
#include <util.h>
|
||||
|
||||
//static void addDummyCluster(Atom*);
|
||||
|
||||
double forward(Comm* comm, Atom *atom, Parameter* param){
|
||||
double S, E;
|
||||
S = getTimeStamp();
|
||||
if(param->method == halfShell){
|
||||
for(int iswap = 0; iswap < 5; iswap++)
|
||||
forwardComm(comm, atom, iswap);
|
||||
} else if(param->method == eightShell){
|
||||
for(int iswap = 0; iswap < 6; iswap+=2)
|
||||
forwardComm(comm, atom, iswap);
|
||||
} else {
|
||||
for(int iswap = 0; iswap < 6; iswap++)
|
||||
forwardComm(comm, atom, iswap);
|
||||
}
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double reverse(Comm* comm, Atom *atom, Parameter* param){
|
||||
double S, E;
|
||||
S = getTimeStamp();
|
||||
if(param->method == halfShell){
|
||||
for(int iswap = 4; iswap >= 0; iswap--)
|
||||
reverseComm(comm, atom, iswap);
|
||||
} else if(param->method == eightShell){
|
||||
for(int iswap = 4; iswap >= 0; iswap-=2)
|
||||
reverseComm(comm, atom, iswap);
|
||||
} else if(param->method == halfStencil){
|
||||
for(int iswap = 5; iswap >= 0; iswap--)
|
||||
reverseComm(comm, atom, iswap);
|
||||
} else { } //Full Shell Reverse does nothing
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
void ghostNeighbor(Comm* comm, Atom* atom, Parameter* param)
|
||||
{
|
||||
#ifdef GROMACS
|
||||
atom->Nclusters_ghost = 0;
|
||||
#endif
|
||||
atom->Nghost = 0;
|
||||
if(param->method == halfShell){
|
||||
for(int iswap=0; iswap<5; iswap++)
|
||||
ghostComm(comm,atom,iswap);
|
||||
} else if(param->method == eightShell){
|
||||
for(int iswap = 0; iswap<6; iswap+=2)
|
||||
ghostComm(comm, atom,iswap);
|
||||
} else {
|
||||
for(int iswap=0; iswap<6; iswap++)
|
||||
ghostComm(comm,atom,iswap);
|
||||
}
|
||||
}
|
@@ -9,9 +9,15 @@
|
||||
|
||||
typedef enum {
|
||||
TOTAL = 0,
|
||||
NEIGH,
|
||||
FORCE,
|
||||
NEIGH,
|
||||
FORWARD,
|
||||
REVERSE,
|
||||
UPDATE,
|
||||
BALANCE,
|
||||
SETUP,
|
||||
REST,
|
||||
NUMTIMER
|
||||
} timertype;
|
||||
} timerComm;
|
||||
|
||||
#endif
|
||||
|
@@ -4,6 +4,8 @@
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <math.h>
|
||||
|
||||
#ifndef __UTIL_H_
|
||||
#define __UTIL_H_
|
||||
|
||||
@@ -35,6 +37,13 @@
|
||||
# define PRECISION_STRING "double"
|
||||
#endif
|
||||
|
||||
#define BigOrEqual(a,b) (fabs((a)-(b))<1e-9 || (a)>(b))
|
||||
#define Equal(a,b) (fabs((a)-(b))<1e-9)
|
||||
|
||||
enum {_x=0, _y, _z};
|
||||
enum {fullShell=0, halfShell, eightShell, halfStencil};
|
||||
|
||||
|
||||
extern double myrandom(int*);
|
||||
extern void random_reset(int *seed, int ibase, double *coord);
|
||||
extern int str2ff(const char *string);
|
||||
|
@@ -11,6 +11,7 @@
|
||||
#include <atom.h>
|
||||
#include <parameter.h>
|
||||
#include <util.h>
|
||||
#include <mpi.h>
|
||||
|
||||
void initParameter(Parameter *param) {
|
||||
param->input_file = NULL;
|
||||
@@ -54,13 +55,17 @@ void initParameter(Parameter *param) {
|
||||
param->reflect_x = 0.0;
|
||||
param->reflect_y = 0.0;
|
||||
param->reflect_z = 0.0;
|
||||
//MPI
|
||||
param->balance = 0;
|
||||
param->method = 0;
|
||||
param->balance_every =param->reneigh_every;
|
||||
}
|
||||
|
||||
void readParameter(Parameter *param, const char *filename) {
|
||||
FILE *fp = fopen(filename, "r");
|
||||
char line[MAXLINE];
|
||||
int i;
|
||||
|
||||
|
||||
if(!fp) {
|
||||
fprintf(stderr, "Could not open parameter file: %s\n", filename);
|
||||
exit(-1);
|
||||
@@ -72,8 +77,8 @@ void readParameter(Parameter *param, const char *filename) {
|
||||
for(i = 0; line[i] != '\0' && line[i] != '#'; i++);
|
||||
line[i] = '\0';
|
||||
|
||||
char *tok = strtok(line, " ");
|
||||
char *val = strtok(NULL, " ");
|
||||
char *tok = strtok(line, "\t ");
|
||||
char *val = strtok(NULL, "\t ");
|
||||
|
||||
#define PARSE_PARAM(p,f) if(strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) { param->p = f(val); }
|
||||
#define PARSE_STRING(p) PARSE_PARAM(p, strdup)
|
||||
@@ -117,15 +122,20 @@ void readParameter(Parameter *param, const char *filename) {
|
||||
PARSE_INT(x_out_every);
|
||||
PARSE_INT(v_out_every);
|
||||
PARSE_INT(half_neigh);
|
||||
PARSE_INT(method);
|
||||
PARSE_INT(balance);
|
||||
PARSE_INT(balance_every);
|
||||
}
|
||||
}
|
||||
|
||||
// Update dtforce
|
||||
param->dtforce = 0.5 * param->dt;
|
||||
|
||||
// Update sigma6 parameter
|
||||
MD_FLOAT s2 = param->sigma * param->sigma;
|
||||
param->sigma6 = s2 * s2 * s2;
|
||||
|
||||
//Update balance parameter, 10 could be change
|
||||
param->balance_every *=param->reneigh_every;
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
@@ -183,4 +193,19 @@ void printParameter(Parameter *param) {
|
||||
printf("\tSkin: %e\n", param->skin);
|
||||
printf("\tHalf neighbor lists: %d\n", param->half_neigh);
|
||||
printf("\tProcessor frequency (GHz): %.4f\n", param->proc_freq);
|
||||
|
||||
// ================ New MPI features =============
|
||||
char str[20];
|
||||
strcpy(str, (param->method == 1) ? "Half Shell" :
|
||||
(param->method == 2) ? "Eight Shell" :
|
||||
(param->method == 3) ? "Half Stencil":
|
||||
"Full Shell");
|
||||
printf("\tMethod: %s\n", str);
|
||||
strcpy(str, (param->balance == 1) ? "mean RCB" :
|
||||
(param->balance == 2) ? "mean Time RCB" :
|
||||
(param->balance == 3) ? "Staggered" :
|
||||
"cartisian");
|
||||
printf("\tPartition: %s\n", str);
|
||||
if(param->balance)
|
||||
printf("\tRebalancing every (timesteps): %d\n",param->balance_every);
|
||||
}
|
||||
|
@@ -10,6 +10,7 @@
|
||||
|
||||
#include <thermo.h>
|
||||
#include <util.h>
|
||||
#include <mpi.h>
|
||||
|
||||
static int *steparr;
|
||||
static MD_FLOAT *tmparr;
|
||||
@@ -24,6 +25,7 @@ static MD_FLOAT t_act;
|
||||
static MD_FLOAT p_act;
|
||||
static MD_FLOAT e_act;
|
||||
static int mstat;
|
||||
static MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
|
||||
|
||||
/* exported subroutines */
|
||||
void setupThermo(Parameter *param, int natoms)
|
||||
@@ -53,57 +55,73 @@ void setupThermo(Parameter *param, int natoms)
|
||||
|
||||
void computeThermo(int iflag, Parameter *param, Atom *atom)
|
||||
{
|
||||
MD_FLOAT t = 0.0, p;
|
||||
MD_FLOAT t_sum = 0.0, t = 0.0, p;
|
||||
int me;
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
|
||||
}
|
||||
|
||||
t = t * t_scale;
|
||||
p = (t * dof_boltz) * p_scale;
|
||||
int istep = iflag;
|
||||
MPI_Reduce(&t, &t_sum, 1, type, MPI_SUM, 0 ,MPI_COMM_WORLD);
|
||||
if(me == 0)
|
||||
{
|
||||
t = t_sum * t_scale;
|
||||
p = (t * dof_boltz) * p_scale;
|
||||
int istep = iflag;
|
||||
|
||||
if(iflag == -1){
|
||||
istep = param->ntimes;
|
||||
}
|
||||
if(iflag == 0){
|
||||
mstat = 0;
|
||||
}
|
||||
if(iflag == -1){
|
||||
istep = param->ntimes;
|
||||
}
|
||||
if(iflag == 0){
|
||||
mstat = 0;
|
||||
}
|
||||
|
||||
steparr[mstat] = istep;
|
||||
tmparr[mstat] = t;
|
||||
prsarr[mstat] = p;
|
||||
mstat++;
|
||||
fprintf(stdout, "%i\t%e\t%e\n", istep, t, p);
|
||||
steparr[mstat] = istep;
|
||||
tmparr[mstat] = t;
|
||||
prsarr[mstat] = p;
|
||||
mstat++;
|
||||
fprintf(stdout, "%i\t%e\t%e\n", istep, t, p);
|
||||
}
|
||||
}
|
||||
|
||||
void adjustThermo(Parameter *param, Atom *atom)
|
||||
{
|
||||
/* zero center-of-mass motion */
|
||||
MD_FLOAT vxtot = 0.0; MD_FLOAT vytot = 0.0; MD_FLOAT vztot = 0.0;
|
||||
|
||||
MD_FLOAT v_sum[3], vtot[3];
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
vxtot += atom_vx(i);
|
||||
vytot += atom_vy(i);
|
||||
vztot += atom_vz(i);
|
||||
}
|
||||
|
||||
vtot[0] = vxtot; vtot[1] = vytot; vtot[2] = vztot;
|
||||
|
||||
vxtot = vxtot / atom->Natoms;
|
||||
vytot = vytot / atom->Natoms;
|
||||
vztot = vztot / atom->Natoms;
|
||||
MPI_Allreduce(vtot, v_sum, 3, type, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
vxtot = v_sum[0] / atom->Natoms;
|
||||
vytot = v_sum[1] / atom->Natoms;
|
||||
vztot = v_sum[2] / atom->Natoms;
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
atom_vx(i) -= vxtot;
|
||||
atom_vy(i) -= vytot;
|
||||
atom_vz(i) -= vztot;
|
||||
}
|
||||
|
||||
t_act = 0;
|
||||
|
||||
MD_FLOAT t = 0.0;
|
||||
MD_FLOAT t_sum = 0.0;
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
|
||||
}
|
||||
|
||||
MPI_Allreduce(&t, &t_sum, 1,type, MPI_SUM,MPI_COMM_WORLD);
|
||||
|
||||
t = t_sum;
|
||||
t *= t_scale;
|
||||
MD_FLOAT factor = sqrt(param->temp / t);
|
||||
|
||||
|
@@ -10,6 +10,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <util.h>
|
||||
#include <math.h>
|
||||
|
||||
/* Park/Miller RNG w/out MASKING, so as to be like f90s version */
|
||||
#define IA 16807
|
||||
@@ -86,6 +87,7 @@ int get_cuda_num_threads() {
|
||||
|
||||
void readline(char *line, FILE *fp) {
|
||||
if(fgets(line, MAXLINE, fp) == NULL) {
|
||||
printf("error %i\n",errno);
|
||||
if(errno != 0) {
|
||||
perror("readline()");
|
||||
exit(-1);
|
||||
|
Reference in New Issue
Block a user