Final MPI version

This commit is contained in:
JairoBuitrago
2024-04-15 16:53:25 +02:00
parent a6a269703d
commit a13a0f3bae
33 changed files with 3568 additions and 624 deletions

97
common/box.c Normal file
View File

@@ -0,0 +1,97 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <parameter.h>
#include <util.h>
#include <box.h>
#include <mpi.h>
int overlapBox(int dim, int dir, const Box* mybox, const Box* other, Box* cut, MD_FLOAT xprd, MD_FLOAT cutneigh)
{
int pbc = -100;
MD_FLOAT min[3], max[3];
int same = (mybox->id == other->id) ? 1 : 0;
//projections
min[_x] = MAX(mybox->lo[_x], other->lo[_x]); max[_x] = MIN(mybox->hi[_x], other->hi[_x]);
min[_y] = MAX(mybox->lo[_y], other->lo[_y]); max[_y] = MIN(mybox->hi[_y], other->hi[_y]);
min[_z] = MAX(mybox->lo[_z], other->lo[_z]); max[_z] = MIN(mybox->hi[_z], other->hi[_z]);
//Intersection no periodic case
if(!same){
if (dir == 0) max[dim] = MIN(mybox->hi[dim], other->hi[dim]+ cutneigh);
if (dir == 1) min[dim] = MAX(mybox->lo[dim], other->lo[dim]- cutneigh);
if ((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z])) pbc = 0;
}
//Intersection periodic case
if(pbc < 0)
{
if(dir == 0){
min[dim] = MAX(mybox->lo[dim] , other->lo[dim]- xprd);
max[dim] = MIN(mybox->hi[dim] , other->hi[dim]- xprd + cutneigh);
} else {
min[dim] = MAX(mybox->lo[dim], other->lo[dim]+ xprd - cutneigh);
max[dim] = MIN(mybox->hi[dim], other->hi[dim]+ xprd);
}
if((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z]))
pbc = (dir == 0) ? 1:-1;
}
//storing the cuts
cut->lo[_x] = min[_x]; cut->hi[_x] = max[_x];
cut->lo[_y] = min[_y]; cut->hi[_y] = max[_y];
cut->lo[_z] = min[_z]; cut->hi[_z] = max[_z];
return pbc;
}
int overlapFullBox(Parameter* param, MD_FLOAT *cutneigh ,const Box* mybox, const Box* other)
{
MD_FLOAT min[3], max[3];
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
for(int k = -1; k < 2; k++)
{
for(int j = -1; j < 2; j++)
{
for(int i= -1; i < 2; i++)
{
min[_x] = MAX(mybox->lo[_x], other->lo[_x]-cutneigh[_x] + i*xprd);
min[_y] = MAX(mybox->lo[_y], other->lo[_y]-cutneigh[_y] + j*yprd);
min[_z] = MAX(mybox->lo[_z], other->lo[_z]-cutneigh[_z] + k*zprd);
max[_x] = MIN(mybox->hi[_x], other->hi[_x]+cutneigh[_x] + i*xprd);
max[_y] = MIN(mybox->hi[_y], other->hi[_y]+cutneigh[_y] + j*yprd);
max[_z] = MIN(mybox->hi[_z], other->hi[_z]+cutneigh[_z] + k*zprd);
if ((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z]))
return 1;
}
}
}
return 0;
}
void expandBox(int iswap, const Box* me, const Box* other, Box* cut, MD_FLOAT cutneigh)
{
if(iswap==2 || iswap==3){
if(me->lo[_x] <= other->lo[_x]) cut->lo[_x] -= cutneigh;
if(me->hi[_x] >= other->hi[_x]) cut->hi[_x] += cutneigh;
}
if(iswap==4 || iswap==5){
if(me->lo[_x] <= other->lo[_x]) cut->lo[_x] -= cutneigh;
if(me->hi[_x] >= other->hi[_x]) cut->hi[_x] += cutneigh;
if(me->lo[_y] <= other->lo[_y]) cut->lo[_y] -= cutneigh;
if(me->hi[_y] >= other->hi[_y]) cut->hi[_y] += cutneigh;
}
}

556
common/comm.c Normal file
View File

@@ -0,0 +1,556 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <comm.h>
#include <allocate.h>
#include <mpi.h>
#include <util.h>
#define NEIGHMIN 6
#define BUFFACTOR 2
#define BUFMIN 1000
#define BUFEXTRA 100
#define world MPI_COMM_WORLD
MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
static inline void allocDynamicBuffers(Comm*);
static inline void freeDynamicBuffers(Comm*);
static inline void freeBuffers(Comm*);
void defineReverseList(Comm* comm){
int dim = 0;
int index = 0;
int me = comm->myproc;
//Set the inverse list
for(int iswap = 0; iswap<6; iswap++){
int dim = comm->swapdim[iswap];
int dir = comm->swapdir[iswap];
int invswap = comm->swap[dim][(dir+1)%2];
for(int ineigh = comm->sendfrom[invswap]; ineigh< comm->sendtill[invswap]; ineigh++)
comm->nrecv[index++] = comm->nsend[ineigh];
comm->recvfrom[iswap] = (iswap == 0) ? 0 : comm->recvtill[iswap-1];
comm->recvtill[iswap] = index;
}
//set if myproc is unique in the swap
for(int iswap = 0; iswap<6; iswap++){
int sizeswap = comm->sendtill[iswap]-comm->sendfrom[iswap];
int index = comm->sendfrom[iswap];
int myneigh = comm->nsend[index];
comm->othersend[iswap] = (sizeswap != 1 || comm->myproc != myneigh) ? 1 : 0;
}
}
void addNeighToExchangeList(Comm* comm, int newneigh){
int numneigh = comm->numneighexch;
if(comm->numneighexch>=comm->maxneighexch){
size_t oldByteSize = comm->maxneighexch*sizeof(int);
comm->maxneighexch *=2;
comm->nexch = (int*) reallocate(comm->nexch, ALIGNMENT, comm->maxneighexch * sizeof(int), oldByteSize);
}
// Add the new element to the list
comm->nexch[numneigh] = newneigh;
comm->numneighexch++;
}
//Exported functions
void neighComm(Comm *comm, Parameter* param, Grid *grid)
{
int me = comm->myproc;
int numproc = comm ->numproc;
int PAD = 6; //number of elements for processor in the map
int ineigh = 0;
int sneigh = 0;
MD_FLOAT *map = grid->map;
MD_FLOAT cutneigh = param->cutneigh;
MD_FLOAT prd[3] = {param->xprd, param->yprd, param->zprd};
Box mybox, other, cut;
//needed for rebalancing
freeDynamicBuffers(comm);
//Local box
mybox.id = me;
mybox.lo[_x] = map[me*PAD+0]; mybox.hi[_x] = map[me*PAD+3];
mybox.lo[_y] = map[me*PAD+1]; mybox.hi[_y] = map[me*PAD+4];
mybox.lo[_z] = map[me*PAD+2]; mybox.hi[_z] = map[me*PAD+5];
//Check for all possible neighbours only for exchange atoms
comm->numneighexch = 0;
for(int proc = 0; proc <numproc; proc++){
other.id = proc;
other.lo[_x] = map[proc*PAD+0]; other.hi[_x] = map[proc*PAD+3];
other.lo[_y] = map[proc*PAD+1]; other.hi[_y] = map[proc*PAD+4];
other.lo[_z] = map[proc*PAD+2]; other.hi[_z] = map[proc*PAD+5];
if(proc != me){
int intersection = overlapFullBox(param,grid->cutneigh,&mybox,&other);
if(intersection) addNeighToExchangeList(comm,proc);
}
}
//MAP is stored as follows: xlo,ylo,zlo,xhi,yhi,zhi
for(int iswap = 0; iswap <6; iswap++)
{
int dir = comm->swapdir[iswap];
int dim = comm->swapdim[iswap];
for(int proc = 0; proc < numproc; proc++)
{
//Check for neighbours along dimmensions, for forwardComm, backwardComm and ghostComm
other.id = proc;
other.lo[_x] = map[proc*PAD+0]; other.hi[_x] = map[proc*PAD+3];
other.lo[_y] = map[proc*PAD+1]; other.hi[_y] = map[proc*PAD+4];
other.lo[_z] = map[proc*PAD+2]; other.hi[_z] = map[proc*PAD+5];
//return if two boxes intersect: -100 not intersection, 0, 1 and -1 intersection for each different pbc.
int pbc = overlapBox(dim,dir,&mybox,&other,&cut,prd[dim],cutneigh);
if(pbc == -100) continue;
expandBox(iswap, &mybox, &other, &cut, cutneigh);
if(ineigh >= comm->maxneigh) {
size_t oldByteSize = comm->maxneigh*sizeof(int);
size_t oldBoxSize = comm->maxneigh*sizeof(Box);
comm->maxneigh = 2*ineigh;
comm->nsend = (int*) reallocate(comm->nsend, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
comm->nrecv = (int*) reallocate(comm->nrecv, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
comm->pbc_x = (int*) reallocate(comm->pbc_x, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
comm->pbc_y = (int*) reallocate(comm->pbc_y, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
comm->pbc_z = (int*) reallocate(comm->pbc_z, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
comm->boxes = (Box*) reallocate(comm->boxes, ALIGNMENT, comm->maxneigh * sizeof(Box), oldBoxSize);
}
comm->boxes[ineigh] = cut;
comm->nsend[ineigh] = proc;
comm->pbc_x[ineigh] = (dim == _x) ? pbc : 0;
comm->pbc_y[ineigh] = (dim == _y) ? pbc : 0;
comm->pbc_z[ineigh] = (dim == _z) ? pbc : 0;
ineigh++;
}
comm->sendfrom[iswap] = (iswap == 0) ? 0:comm->sendtill[iswap-1];
comm->sendtill[iswap] = ineigh;
comm->numneigh = ineigh;
}
allocDynamicBuffers(comm);
defineReverseList(comm);
}
void initComm(int* argc, char*** argv, Comm* comm)
{
//MPI Initialize
MPI_Init(argc, argv);
MPI_Comm_size(MPI_COMM_WORLD, &(comm->numproc));
MPI_Comm_rank(MPI_COMM_WORLD, &(comm->myproc));
comm->numneigh = 0;
comm->numneighexch = 0;
comm->nrecv=NULL;
comm->nsend=NULL;
comm->nexch=NULL;
comm->pbc_x=NULL;
comm->pbc_y=NULL;
comm->pbc_z=NULL;
comm->boxes=NULL;
comm->atom_send=NULL;
comm->atom_recv=NULL;
comm->off_atom_send=NULL;
comm->off_atom_recv=NULL;
comm->maxsendlist=NULL;
comm->sendlist=NULL;
comm->buf_send=NULL;
comm->buf_recv=NULL;
}
void endComm(Comm* comm)
{
comm->maxneigh = 0;
comm->maxneighexch =0;
comm->maxsend = 0;
comm->maxrecv = 0;
freeBuffers(comm);
MPI_Finalize();
}
void setupComm(Comm* comm, Parameter* param, Grid* grid){
comm->swap[_x][0] = 0; comm->swap[_x][1] =1;
comm->swap[_y][0] = 2; comm->swap[_y][1] =3;
comm->swap[_z][0] = 4; comm->swap[_z][1] =5;
comm->swapdim[0] = comm->swapdim[1] = _x;
comm->swapdim[2] = comm->swapdim[3] = _y;
comm->swapdim[4] = comm->swapdim[5] = _z;
comm->swapdir[0] = comm->swapdir[2] = comm->swapdir[4] = 0;
comm->swapdir[1] = comm->swapdir[3] = comm->swapdir[5] = 1;
for(int i = 0; i<6; i++){
comm->sendfrom[i] = 0;
comm->sendtill[i] = 0;
comm->recvfrom[i] = 0;
comm->recvtill[i] = 0;
}
comm->forwardSize = FORWARD_SIZE; //send coordiantes x,y,z
comm->reverseSize = REVERSE_SIZE; //return forces fx, fy, fz
comm->ghostSize = GHOST_SIZE; //send x,y,z,type;
comm->exchangeSize = EXCHANGE_SIZE; //send x,y,z,vx,vy,vz,type
//Allocate memory for recv buffer and recv buffer
comm->maxsend = BUFMIN;
comm->maxrecv = BUFMIN;
comm->buf_send = (MD_FLOAT*) allocate(ALIGNMENT,(comm->maxsend + BUFEXTRA) * sizeof(MD_FLOAT));
comm->buf_recv = (MD_FLOAT*) allocate(ALIGNMENT, comm->maxrecv * sizeof(MD_FLOAT));
comm->maxneighexch = NEIGHMIN;
comm->nexch = (int*) allocate(ALIGNMENT, comm->maxneighexch * sizeof(int));
comm->maxneigh = NEIGHMIN;
comm->nsend = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
comm->nrecv = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
comm->pbc_x = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
comm->pbc_y = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
comm->pbc_z = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
comm->boxes = (Box*) allocate(ALIGNMENT, comm->maxneigh * sizeof(Box));
neighComm(comm, param, grid);
}
void forwardComm(Comm* comm, Atom* atom, int iswap)
{
int nrqst=0, offset=0, nsend=0, nrecv=0;
int pbc[3];
int size = comm->forwardSize;
int maxrqst = comm->numneigh;
MD_FLOAT* buf;
MPI_Request requests[maxrqst];
for(int ineigh = comm->sendfrom[iswap]; ineigh < comm->sendtill[iswap]; ineigh++){
offset = comm->off_atom_send[ineigh];
pbc[_x]=comm->pbc_x[ineigh]; pbc[_y]=comm->pbc_y[ineigh]; pbc[_z]=comm->pbc_z[ineigh];
packForward(atom, comm->atom_send[ineigh], comm->sendlist[ineigh], &comm->buf_send[offset*size],pbc);
}
//Receives elements
if(comm->othersend[iswap])
for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
offset = comm->off_atom_recv[ineigh]*size;
nrecv = comm->atom_recv[ineigh]*size;
MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nrecv[ineigh],0,world,&requests[nrqst++]);
}
//Send elements
if(comm->othersend[iswap])
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
offset = comm->off_atom_send[ineigh]*size;
nsend = comm->atom_send[ineigh]*size;
MPI_Send(&comm->buf_send[offset],nsend,type,comm->nsend[ineigh],0,world);
}
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
if(comm->othersend[iswap]) buf = comm->buf_recv;
else buf = comm->buf_send;
/* unpack buffer */
for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
offset = comm->off_atom_recv[ineigh];
unpackForward(atom, comm->atom_recv[ineigh], comm->firstrecv[iswap] + offset, &buf[offset*size]);
}
}
void reverseComm(Comm* comm, Atom* atom, int iswap)
{
int nrqst=0, offset=0, nsend=0, nrecv=0 ;
int size = comm->reverseSize;
int maxrqst = comm->numneigh;
MD_FLOAT* buf;
MPI_Request requests[maxrqst];
for(int ineigh = comm->recvfrom[iswap]; ineigh < comm->recvtill[iswap]; ineigh++){
offset = comm->off_atom_recv[ineigh];
packReverse(atom, comm->atom_recv[ineigh], comm->firstrecv[iswap] + offset, &comm->buf_send[offset*size]);
}
//Receives elements
if(comm->othersend[iswap])
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
offset = comm->off_atom_send[ineigh]*size;
nrecv = comm->atom_send[ineigh]*size;
MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nsend[ineigh],0,world,&requests[nrqst++]);
}
//Send elements
if(comm->othersend[iswap])
for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
offset = comm->off_atom_recv[ineigh]*size;
nsend = comm->atom_recv[ineigh]*size;
MPI_Send(&comm->buf_send[offset],nsend,type,comm->nrecv[ineigh],0,world);
}
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
if(comm->othersend[iswap]) buf = comm->buf_recv;
else buf = comm->buf_send;
/* unpack buffer */
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
offset = comm->off_atom_send[ineigh];
unpackReverse(atom, comm->atom_send[ineigh], comm->sendlist[ineigh], &buf[offset*size]);
}
}
void ghostComm(Comm* comm, Atom* atom,int iswap){
MD_FLOAT xlo=0, xhi=0, ylo=0, yhi=0, zlo=0, zhi=0;
MD_FLOAT* buf;
int nrqst=0, nsend=0, nrecv=0, offset=0, ineigh=0, pbc[3];
int all_recv=0, all_send=0, currentSend=0;
int size = comm->ghostSize;
int maxrqrst = comm->numneigh;
MPI_Request requests[maxrqrst];
for(int i = 0; i<maxrqrst; i++)
requests[maxrqrst]=MPI_REQUEST_NULL;
if(iswap%2==0) comm->iterAtom = LOCAL+GHOST;
int iter = 0;
for(int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++)
{
Box* tile = &comm->boxes[ineigh];
xlo = tile->lo[_x]; ylo = tile->lo[_y]; zlo = tile->lo[_z];
xhi = tile->hi[_x]; yhi = tile->hi[_y]; zhi = tile->hi[_z];
pbc[_x]=comm->pbc_x[ineigh]; pbc[_y]=comm->pbc_y[ineigh]; pbc[_z]=comm->pbc_z[ineigh];
nsend = 0;
for(int i = 0; i < comm->iterAtom ; i++)
{
if(IsinRegionToSend(i)){
if(nsend >= comm->maxsendlist[ineigh]) growList(comm,ineigh,nsend);
if(currentSend + size >= comm->maxsend) growSend(comm,currentSend);
comm->sendlist[ineigh][nsend++] = i;
currentSend += packGhost(atom, i, &comm->buf_send[currentSend], pbc);
}
}
comm->atom_send[ineigh] = nsend; //#atoms send per neigh
comm->off_atom_send[ineigh] = all_send; //offset atom respect to neighbours in a swap
all_send += nsend; //all atoms send
}
//Receives how many elements to be received.
if(comm->othersend[iswap])
for(nrqst=0, ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++)
MPI_Irecv(&comm->atom_recv[ineigh],1,MPI_INT,comm->nrecv[ineigh],0,world,&requests[nrqst++]);
if(!comm->othersend[iswap]) comm->atom_recv[comm->recvfrom[iswap]] = nsend;
//Communicate how many elements to be sent.
if(comm->othersend[iswap])
for(int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++)
MPI_Send(&comm->atom_send[ineigh],1,MPI_INT,comm->nsend[ineigh],0,world);
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
//Define offset to store in the recv_buff
for(int ineigh = comm->recvfrom[iswap]; ineigh<comm->recvtill[iswap]; ineigh++){
comm->off_atom_recv[ineigh] = all_recv;
all_recv += comm->atom_recv[ineigh];
}
if(all_recv*size>=comm->maxrecv) growRecv(comm,all_recv*size);
//Receives elements
if(comm->othersend[iswap])
for (nrqst=0, ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
offset = comm->off_atom_recv[ineigh]*size;
nrecv = comm->atom_recv[ineigh]*size;
MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nrecv[ineigh],0,world,&requests[nrqst++]);
}
//Send elements
if(comm->othersend[iswap])
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
offset = comm->off_atom_send[ineigh]*size;
nsend = comm->atom_send[ineigh]*size;
MPI_Send(&comm->buf_send[offset],nsend,type,comm->nsend[ineigh],0,world);
}
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
if(comm->othersend[iswap]) buf = comm->buf_recv;
else buf = comm->buf_send;
//unpack elements
comm->firstrecv[iswap] = LOCAL+GHOST;
for(int i = 0; i < all_recv; i++)
unpackGhost(atom, LOCAL+GHOST, &buf[i*size]);
//Increases the buffer if needed
int max_size = MAX(comm->forwardSize,comm->reverseSize);
int max_buf = max_size * MAX(all_recv, all_send);
if(max_buf>=comm->maxrecv) growRecv(comm,max_buf);
if(max_buf>=comm->maxsend) growSend(comm,max_buf);
}
void exchangeComm(Comm* comm, Atom* atom){
MD_FLOAT x,y,z;
MD_FLOAT *lo = atom->mybox.lo;
MD_FLOAT *hi = atom->mybox.hi;
int size = comm->exchangeSize;
int numneigh = comm->numneighexch;
int offset_recv[numneigh];
int size_recv[numneigh];
MPI_Request requests[numneigh];
int i =0, nsend = 0, nrecv = 0;
int nrqst = 0;
int nlocal, offset,m;
/* enforce PBC */
pbc(atom);
if(comm->numneigh == 0) return;
nlocal = atom->Nlocal;
while(i < nlocal) {
if(atom_x(i) < lo[_x] || atom_x(i) >= hi[_x] ||
atom_y(i) < lo[_y] || atom_y(i) >= hi[_y] ||
atom_z(i) < lo[_z] || atom_z(i) >= hi[_z]) {
if(nsend+size >= comm->maxsend) growSend(comm, nsend);
nsend += packExchange(atom, i, &comm->buf_send[nsend]);
copy(atom, i, nlocal-1);
nlocal--;
} else i++;
}
atom->Nlocal = nlocal;
/* send/recv number of to share atoms with neighbouring procs*/
for(int ineigh = 0; ineigh < numneigh; ineigh++)
MPI_Irecv(&size_recv[ineigh],1,MPI_INT,comm->nexch[ineigh],0,world,&requests[nrqst++]);
for (int ineigh = 0; ineigh < numneigh; ineigh++)
MPI_Send(&nsend,1,MPI_INT,comm->nexch[ineigh],0,world);
MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
//Define offset to store in the recv_buff
for(int ineigh = 0; ineigh<numneigh; ineigh++){
offset_recv[ineigh] = nrecv;
nrecv += size_recv[ineigh];
}
if(nrecv >= comm->maxrecv) growRecv(comm,nrecv);
//Receives elements
nrqst=0;
for (int ineigh = 0; ineigh< numneigh; ineigh++){
offset = offset_recv[ineigh];
MPI_Irecv(&comm->buf_recv[offset], size_recv[ineigh], type, comm->nexch[ineigh],0,world,&requests[nrqst++]);
}
//Send elements
for (int ineigh = 0; ineigh< numneigh; ineigh++)
MPI_Send(comm->buf_send,nsend,type,comm->nexch[ineigh],0,world);
MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
nlocal = atom->Nlocal;
m = 0;
while(m < nrecv) {
x = comm->buf_recv[m + _x];
y = comm->buf_recv[m + _y];
z = comm->buf_recv[m + _z];
if(x >= lo[_x] && x < hi[_x] &&
y >= lo[_y] && y < hi[_y] &&
z >= lo[_z] && z < hi[_z]){
m += unpackExchange(atom, nlocal++, &comm->buf_recv[m]);
} else {
m += size;
}
}
atom->Nlocal = nlocal;
int all_atoms=0;
MPI_Allreduce(&atom->Nlocal, &all_atoms, 1, MPI_INT, MPI_SUM, world);
if(atom->Natoms!=all_atoms && comm->myproc ==0){
printf("Losing atoms! current atoms:%d expected atoms:%d\n",all_atoms,atom->Natoms);
}
}
//Internal functions
inline void growRecv(Comm* comm, int n)
{
comm -> maxrecv = BUFFACTOR * n;
if(comm->buf_recv) free(comm -> buf_recv);
comm -> buf_recv = (MD_FLOAT*) allocate(ALIGNMENT, comm->maxrecv * sizeof(MD_FLOAT));
}
inline void growSend(Comm* comm, int n)
{
size_t oldByteSize = (comm->maxsend+BUFEXTRA)*sizeof(MD_FLOAT);
comm -> maxsend = BUFFACTOR * n;
comm -> buf_send = (MD_FLOAT*) reallocate(comm->buf_send, ALIGNMENT, (comm->maxsend + BUFEXTRA) * sizeof(MD_FLOAT), oldByteSize);
}
inline void growList(Comm* comm, int ineigh, int n)
{
size_t oldByteSize = comm->maxsendlist[ineigh]*sizeof(int);
comm->maxsendlist[ineigh] = BUFFACTOR * n;
comm->sendlist[ineigh] = (int*) reallocate(comm->sendlist[ineigh],ALIGNMENT, comm->maxsendlist[ineigh] * sizeof(int), oldByteSize);
}
static inline void allocDynamicBuffers(Comm* comm)
{
//Buffers depending on the # of my neighs
int numneigh = comm->numneigh;
comm->atom_send = (int*) allocate(ALIGNMENT, numneigh * sizeof(int));
comm->atom_recv = (int*) allocate(ALIGNMENT, numneigh * sizeof(int));
comm->off_atom_send = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
comm->off_atom_recv = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
comm->maxsendlist = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
for(int i = 0; i < numneigh; i++)
comm->maxsendlist[i] = BUFMIN;
comm->sendlist = (int**) allocate(ALIGNMENT, numneigh * sizeof(int*));
for(int i = 0; i < numneigh; i++)
comm->sendlist[i] = (int*) allocate(ALIGNMENT, comm->maxsendlist[i] * sizeof(int));
}
static inline void freeDynamicBuffers(Comm* comm)
{
int numneigh =comm->numneigh;
if(comm->atom_send) free(comm->atom_send);
if(comm->atom_recv) free(comm->atom_recv);
if(comm->off_atom_send) free(comm->off_atom_send);
if(comm->off_atom_recv) free(comm->off_atom_recv);
if(comm->maxsendlist) free(comm->maxsendlist);
if(comm->sendlist){
for(int i = 0; i < numneigh; i++)
if(comm->sendlist[i]) free(comm->sendlist[i]);
}
if(comm->sendlist) free(comm->sendlist);
}
static inline void freeBuffers(Comm* comm)
{
if(comm->nrecv) free(comm->nrecv);
if(comm->nsend) free(comm->nsend);
if(comm->nexch) free(comm->nexch);
if(comm->pbc_x) free(comm->pbc_x);
if(comm->pbc_y) free(comm->pbc_y);
if(comm->pbc_z) free(comm->pbc_z);
if(comm->boxes) free(comm->boxes);
if(comm->atom_send) free(comm->atom_send);
if(comm->atom_recv) free(comm->atom_recv);
if(comm->off_atom_send) free(comm->off_atom_send);
if(comm->off_atom_recv) free(comm->off_atom_recv);
if(comm->maxsendlist) free(comm->maxsendlist);
if(comm->sendlist){
for(int i = 0; i < comm->numneigh; i++)
if(comm->sendlist[i]) free(comm->sendlist[i]);
}
if(comm->sendlist) free(comm->sendlist);
if(comm->buf_send) free(comm->buf_send);
if(comm->buf_recv) free(comm->buf_recv);
}

490
common/grid.c Normal file
View File

@@ -0,0 +1,490 @@
#include <stdio.h>
#include <grid.h>
#include <mpi.h>
#include <parameter.h>
#include <allocate.h>
#include <util.h>
#include <math.h>
static MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
//Grommacs Balancing
MD_FLOAT f_normalization(MD_FLOAT* x,MD_FLOAT* fx, MD_FLOAT minx, int nprocs) {
MD_FLOAT sum=0;
for(int n = 0; n<nprocs; n++){
fx[n] = MAX(minx,x[n]);
sum+=fx[n];
}
for(int n = 0; n<nprocs; n++)
fx[n] /= sum;
}
void fixedPointIteration(MD_FLOAT* x0, int nprocs, MD_FLOAT minx)
{
MD_FLOAT tolerance = 1e-3;
MD_FLOAT alpha = 0.5;
MD_FLOAT *fx = (MD_FLOAT*) malloc(nprocs*sizeof(MD_FLOAT));
int maxIterations = 100;
for (int i = 0; i < maxIterations; i++) {
int converged = 1;
f_normalization(x0,fx,minx,nprocs);
for(int n=0; n<nprocs; n++)
fx[n]= (1-alpha) * x0[n] + alpha * fx[n];
for (int n=0; n<nprocs; n++) {
if (fabs(fx[n] - x0[n]) >= tolerance) {
converged = 0;
break;
}
}
for (int n=0; n<nprocs; n++)
x0[n] = fx[n];
if(converged){
for(int n = 0; n<nprocs; n++)
return;
}
}
}
void staggeredBalance(Grid* grid, Atom* atom, Parameter* param, double newTime)
{
int me;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
int *coord = grid->coord;
int *nprocs = grid ->nprocs;
//Elapsed time since the last rebalance
double time = newTime - grid->Timer;
grid->Timer = newTime;
//store the older dimm to compare later for exchange
MD_FLOAT lo[3], hi[3];
for(int dim = 0; dim< 3; dim++){
lo[dim] = atom->mybox.lo[dim];
hi[dim] = atom->mybox.hi[dim];
}
//Define parameters
MPI_Comm subComm[3];
int color[3] = {0,0,0};
int id[3] = {0,0,0};
MD_FLOAT ** load = (MD_FLOAT**) malloc(3*sizeof(MD_FLOAT*));
for(int dim = 0; dim<3; dim++)
load[dim] = (MD_FLOAT*) malloc(nprocs[dim]*sizeof(MD_FLOAT));
int maxprocs = MAX(MAX(nprocs[_x],nprocs[_y]),nprocs[_z]);
MD_FLOAT* cellSize = (MD_FLOAT*) malloc(maxprocs*sizeof(MD_FLOAT));
MD_FLOAT* limits = (MD_FLOAT*) malloc(2*maxprocs*sizeof(MD_FLOAT)); //limits: (x0, x1), (x1, x2)... Repeat values in between to perfom MPI_Scatter later
MD_FLOAT t_sum[3] = {0,0,0};
MD_FLOAT recv_buf[2] = {0,0}; //Each proc only receives 2 elments per dimension xlo and xhi
MD_FLOAT balancedLoad[3] = {0,0,0}; //1/nprocs
MD_FLOAT minLoad[3] = {0,0,0}; //beta*(1/nprocs)
MD_FLOAT prd[3] = {param->xprd, param->yprd, param->zprd};
MD_FLOAT boundaries[6] ={0,0,0,0,0,0}; // xlo,xhi,ylo,yhi,zlo,zhi
//Create sub-communications along each dimension
for(int dim = 0; dim<3; dim++){
if(dim == _x){
color[_x] = (coord[_y] == 0 && coord[_z] ==0) ? 1:MPI_UNDEFINED;
id[_x] = me;
} else if(dim == _y) {
color[_y] = coord[_z] == 0 ? coord[_x]:MPI_UNDEFINED;
id[_y] = (coord[_y] == 0 && coord[_z] == 0) ? 0:me;
} else {
color[_z]= coord[_y]*nprocs[_x]+coord[_x];
id[_z] = coord[_z] == 0 ? 0 : me;
}
MPI_Comm_split(world, color[dim], id[dim], &subComm[dim]);
}
//Set the minimum load and the balance load
for(int dim = 0; dim<3; dim++){
balancedLoad[dim] = 1./nprocs[dim];
minLoad[dim] = 0.8*balancedLoad[dim];
}
//set and communicate the workload in reverse order
for(int dim = _z; dim>= _x; dim--)
{
if(subComm[dim] != MPI_COMM_NULL){
MPI_Gather(&time,1,type,load[dim],1,type,0,subComm[dim]);
if(id[dim] == 0)
{
for(int n=0; n<nprocs[dim]; n++)
t_sum[dim] += load[dim][n];
for(int n=0; n<nprocs[dim]; n++)
load[dim][n] /= t_sum[dim];
}
time =t_sum[dim];
}
MPI_Barrier(world);
}
//Brodacast the new boundaries along dimensions
for(int dim=0; dim<3; dim++){
if(subComm[dim] != MPI_COMM_NULL){
MPI_Bcast(boundaries,6,type,0,subComm[dim]);
if(id[dim] == 0) {
fixedPointIteration(load[dim], nprocs[dim], minLoad[dim]);
MD_FLOAT inv_sum=0;
for(int n=0; n<nprocs[dim];n++)
inv_sum +=(1/load[dim][n]);
for(int n=0; n<nprocs[dim];n++)
cellSize[n] = (prd[dim]/load[dim][n])*(1./inv_sum);
MD_FLOAT sum=0;
for(int n=0; n<nprocs[dim]; n++){
limits[2*n] = sum;
limits[2*n+1] = sum+cellSize[n];
sum+= cellSize[n];
}
limits[2*nprocs[dim]-1] = prd[dim];
}
MPI_Scatter(limits,2,type,recv_buf,2,type,0,subComm[dim]);
boundaries[2*dim] = recv_buf[0];
boundaries[2*dim+1] = recv_buf[1];
}
MPI_Barrier(world);
}
atom->mybox.lo[_x]=boundaries[0]; atom->mybox.hi[_x]=boundaries[1];
atom->mybox.lo[_y]=boundaries[2]; atom->mybox.hi[_y]=boundaries[3];
atom->mybox.lo[_z]=boundaries[4]; atom->mybox.hi[_z]=boundaries[5];
MD_FLOAT domain[6] = {boundaries[0], boundaries[2], boundaries[4], boundaries[1], boundaries[3], boundaries[5]};
MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
//because cells change dynamically, It is required to increase the neighbouring exchange region
for(int dim =_x; dim<=_z; dim++){
MD_FLOAT dr,dr_max;
int n = grid->nprocs[dim];
MD_FLOAT maxdelta = 0.2*prd[dim];
dr = MAX(fabs(lo[dim] - atom->mybox.lo[dim]),fabs(hi[dim] - atom->mybox.hi[dim]));
MPI_Allreduce(&dr, &dr_max, 1, type, MPI_MAX, world);
grid->cutneigh[dim] = param->cutneigh+dr_max;
}
for(int dim=0; dim<3; dim++) {
if(subComm[dim] != MPI_COMM_NULL){
MPI_Comm_free(&subComm[dim]);
}
free(load[dim]);
}
free(load);
free(limits);
}
//RCB Balancing
MD_FLOAT meanTimeBisect(Atom *atom, MPI_Comm subComm, int dim, double time)
{
MD_FLOAT mean=0, sum=0, total_sum=0, weightAtoms= 0, total_weight=0;
for(int i=0; i<atom->Nlocal; i++){
sum += atom_pos(i);
}
sum*=time;
weightAtoms = atom->Nlocal*time;
MPI_Allreduce(&sum, &total_sum, 1, type, MPI_SUM, subComm);
MPI_Allreduce(&weightAtoms, &total_weight, 1, type, MPI_SUM, subComm);
mean = total_sum/total_weight;
return mean;
}
MD_FLOAT meanBisect(Atom* atom, MPI_Comm subComm, int dim, double time)
{
int Natoms = 0;
MD_FLOAT sum=0, mean=0, total_sum=0;
for(int i=0; i<atom->Nlocal; i++){
sum += atom_pos(i);
}
MPI_Allreduce(&sum, &total_sum, 1, type, MPI_SUM, subComm);
MPI_Allreduce(&atom->Nlocal, &Natoms, 1, MPI_INT, MPI_SUM, subComm);
mean = total_sum/Natoms;
return mean;
}
void nextBisectionLevel(Grid* grid, Atom* atom, RCB_Method method, MPI_Comm subComm, int dim ,int* color, int ilevel, double time)
{
int rank, size;
int branch = 0, i = 0, m = 0;
int nsend = 0, nrecv = 0, nrecv2 = 0;
int values_per_atom = 7;
MD_FLOAT bisection, pos;
MPI_Request request[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
MPI_Comm_rank(subComm,&rank);
MPI_Comm_size(subComm,&size);
int odd = size%2;
int extraProc = odd ? size-1:size;
int half = (int) (0.5*size);
int partner = (rank<half) ? rank+half:rank-half;
if(odd && rank == extraProc) partner = 0;
//Apply the bisection
bisection = method(atom,subComm,dim,time);
//Define the new boundaries
if(rank<half){
atom->mybox.hi[dim] = bisection;
branch = 0;
} else {
atom->mybox.lo[dim] = bisection;
branch = 1;
}
//Define new color for the further communicaton
*color = (branch << ilevel) | *color;
//Grow the send buffer
if(atom->Nlocal>=grid->maxsend){
if(grid->buf_send) free(grid->buf_send);
grid->buf_send = (MD_FLOAT*) malloc(atom->Nlocal*values_per_atom* sizeof(MD_FLOAT));
grid->maxsend = atom->Nlocal;
}
//buffer particles to send
while(i < atom->Nlocal) {
pos = atom_pos(i);
if(pos < atom->mybox.lo[dim] || pos >= atom->mybox.hi[dim]) {
nsend += packExchange(atom, i, &grid->buf_send[nsend]);
copy(atom, i, atom->Nlocal-1);
atom->Nlocal--;
} else i++;
}
//Communicate the number of elements to be sent
if(rank < extraProc){
MPI_Irecv(&nrecv,1,MPI_INT,partner,0,subComm,&request[0]);
}
if(odd && rank == 0){
MPI_Irecv(&nrecv2,1,MPI_INT,extraProc,0,subComm,&request[1]);
}
MPI_Send(&nsend,1,MPI_INT,partner,0,subComm);
MPI_Waitall(2,request,MPI_STATUS_IGNORE);
//Grow the recv buffer
if(nrecv+nrecv2>=grid->maxrecv){
if(grid->buf_recv) free(grid->buf_recv);
grid->buf_recv = (MD_FLOAT*) malloc((nrecv+nrecv2)*values_per_atom*sizeof(MD_FLOAT));
grid->maxrecv = nrecv+nrecv2;
}
//communicate elements in the buffer
request[0] = MPI_REQUEST_NULL;
request[1] = MPI_REQUEST_NULL;
if(rank < extraProc){
MPI_Irecv(grid->buf_recv,nrecv,type,partner,0,subComm,&request[0]);
}
if(odd && rank == 0){
MPI_Irecv(&grid->buf_recv[nrecv],nrecv2,type,extraProc,0,subComm,&request[1]);
}
MPI_Send (grid->buf_send,nsend,type,partner,0,subComm);
MPI_Waitall(2,request,MPI_STATUS_IGNORE);
//store atoms in atom list
while(m < nrecv+nrecv2){
m += unpackExchange(atom, atom->Nlocal++, &grid->buf_recv[m]);
}
}
void rcbBalance(Grid* grid, Atom* atom, Parameter* param, RCB_Method method, int ndim, double newTime)
{
int me, nprocs=0, ilevel=0, nboxes=1;
int color = 0, size =0;
int index, prd[3];
MPI_Comm subComm;
MPI_Comm_size(world, &nprocs);
MPI_Comm_rank(world, &me);
//set the elapsed time since the last dynamic balance
double time = newTime - grid->Timer;
prd[_x] = atom->mybox.xprd = param->xprd;
prd[_y] = atom->mybox.yprd = param->yprd;
prd[_z] = atom->mybox.zprd = param->zprd;
//Sort by larger dimension
int largerDim[3] ={_x, _y, _z};
for(int i = 0; i< 2; i++){
for(int j = i+1; j<3; j++)
{
if(prd[largerDim[j]]>prd[largerDim[i]]){
MD_FLOAT tmp = largerDim[j];
largerDim[j] = largerDim[i];
largerDim[i] = tmp;
}
}
}
//Initial Partition
atom->mybox.lo[_x] = 0; atom->mybox.hi[_x] = atom->mybox.xprd;
atom->mybox.lo[_y] = 0; atom->mybox.hi[_y] = atom->mybox.yprd;
atom->mybox.lo[_z] = 0; atom->mybox.hi[_z] = atom->mybox.zprd;
//Recursion tree
while(nboxes<nprocs)
{
index = ilevel%ndim;
MPI_Comm_split(world, color, me, &subComm);
MPI_Comm_size(subComm,&size);
if(size > 1){
nextBisectionLevel(grid, atom, method, subComm, largerDim[index], &color, ilevel, time);
}
MPI_Comm_free(&subComm);
nboxes = pow(2,++ilevel);
}
//Set the new timer grid
grid->Timer = newTime;
//Creating the global map
MD_FLOAT domain[6] = {atom->mybox.lo[_x], atom->mybox.lo[_y], atom->mybox.lo[_z], atom->mybox.hi[_x], atom->mybox.hi[_y], atom->mybox.hi[_z]};
MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
//Define the same cutneighbour in all dimensions for the exchange communication
for(int dim =_x; dim<=_z; dim++)
grid->cutneigh[dim] = param->cutneigh;
}
//Regular grid
void cartisian3d(Grid* grid, Parameter* param, Box* box)
{
int me, nproc;
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &me);
int numdim=3;
int reorder=0;
int periods[3]={1,1,1};
int mycoord[3]={0,0,0};
int griddim[3]={0,0,0};
MD_FLOAT len[3];
MPI_Comm cartesian;
box->xprd = param->xprd;
box->yprd = param->yprd;
box->zprd = param->zprd;
//Creates a cartesian 3d grid
MPI_Dims_create(nproc, numdim, griddim);
MPI_Cart_create(world,numdim,griddim,periods,reorder,&cartesian);
grid->nprocs[_x] = griddim[_x];
grid->nprocs[_y] = griddim[_y];
grid->nprocs[_z] = griddim[_z];
//Coordinates position in the grid
MPI_Cart_coords(cartesian,me,3,mycoord);
grid->coord[_x] = mycoord[_x];
grid->coord[_y] = mycoord[_y];
grid->coord[_z] = mycoord[_z];
//boundaries of my local box, with origin in (0,0,0).
len[_x] = param->xprd / griddim[_x];
len[_y] = param->yprd / griddim[_y];
len[_z] = param->zprd / griddim[_z];
box->lo[_x] = mycoord[_x] * len[_x];
box->hi[_x] = (mycoord[_x] + 1) * len[_x];
box->lo[_y] = mycoord[_y] * len[_y];
box->hi[_y] = (mycoord[_y] + 1) * len[_y];
box->lo[_z] = mycoord[_z] * len[_z];
box->hi[_z] = (mycoord[_z] + 1) * len[_z];
MD_FLOAT domain[6] = {box->lo[_x], box->lo[_y], box->lo[_z], box->hi[_x], box->hi[_y], box->hi[_z]};
MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
MPI_Comm_free(&cartesian);
//Define the same cutneighbour in all dimensions for the exchange communication
for(int dim =_x; dim<=_z; dim++)
grid->cutneigh[dim] = param->cutneigh;
}
//Other Functions from the grid
void initGrid(Grid* grid)
{ //start with regular grid
int nprocs;
MPI_Comm_size(world, &nprocs);
grid->map_size = 6 * nprocs;
grid->map = (MD_FLOAT*) allocate(ALIGNMENT, grid->map_size * sizeof(MD_FLOAT));
//========rcb=======
grid->maxsend = 0;
grid->maxrecv = 0;
grid->buf_send = NULL;
grid->buf_recv = NULL;
//====staggered=====
grid->Timer = 0.;
}
void setupGrid(Grid* grid, Atom* atom, Parameter* param)
{
int me;
MD_FLOAT xlo, ylo, zlo, xhi, yhi, zhi;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
initGrid(grid);
//Set the origin at (0,0,0)
if(param->input_file){
for(int i=0; i<atom->Nlocal; i++){
atom_x(i) = atom_x(i) - param->xlo;
atom_y(i) = atom_y(i) - param->ylo;
atom_z(i) = atom_z(i) - param->zlo;
}
}
cartisian3d(grid, param, &atom->mybox);
xlo = atom->mybox.lo[_x]; xhi = atom->mybox.hi[_x];
ylo = atom->mybox.lo[_y]; yhi = atom->mybox.hi[_y];
zlo = atom->mybox.lo[_z]; zhi = atom->mybox.hi[_z];
int i = 0;
while(i < atom->Nlocal)
{
if(atom_x(i) >= xlo && atom_x(i)< xhi &&
atom_y(i) >= ylo && atom_y(i)< yhi &&
atom_z(i) >= zlo && atom_z(i)< zhi)
{
i++;
} else {
copy(atom, i, atom->Nlocal-1);
atom->Nlocal--;
}
}
//printGrid(grid);
if(!param->balance){
MPI_Allreduce(&atom->Nlocal, &atom->Natoms, 1, MPI_INT, MPI_SUM, world);
printf("Processor:%i, Local atoms:%i, Total atoms:%i\n",me, atom->Nlocal,atom->Natoms);
MPI_Barrier(world);
}
}
void printGrid(Grid* grid)
{
int me, nprocs;
MPI_Comm_size(world, &nprocs);
MPI_Comm_rank(world, &me);
MD_FLOAT* map = grid->map;
if(me==0)
{
printf("GRID:\n");
printf("===================================================================================================\n");
for(int i=0; i<nprocs; i++)
printf("Box:%i\txlo:%.4f\txhi:%.4f\tylo:%.4f\tyhi:%.4f\tzlo:%.4f\tzhi:%.4f\n", i,map[6*i],map[6*i+3],map[6*i+1],map[6*i+4],map[6*i+2],map[6*i+5]);
printf("\n\n");
//printf("Box processor:%i\n xlo:%.4f\txhi:%.4f\n ylo:%.4f\tyhi:%.4f\n zlo:%.4f\tzhi:%.4f\n", i,map[6*i],map[6*i+3],map[6*i+1],map[6*i+4],map[6*i+2],map[6*i+5]);
}
MPI_Barrier(world);
}

22
common/includes/box.h Normal file
View File

@@ -0,0 +1,22 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <parameter.h>
#ifndef __BOX_H_
#define __BOX_H_
typedef struct {
int id;
MD_FLOAT xprd, yprd, zprd; //Domain Dimension
MD_FLOAT lo[3]; //smallest coordinate of my subdomain
MD_FLOAT hi[3]; //Highest coordinate of my subdomain
} Box;
int overlapBox(int, int , const Box*, const Box* , Box* , MD_FLOAT , MD_FLOAT);
int overlapFullBox(Parameter*, MD_FLOAT*, const Box*, const Box*);
void expandBox(int , const Box*, const Box* , Box* , MD_FLOAT);
#endif

104
common/includes/comm.h Normal file
View File

@@ -0,0 +1,104 @@
#include <atom.h>
#include <parameter.h>
#include <box.h>
#include <grid.h>
#ifndef COMM_H
#define COMM_H
#ifdef GROMACS
#define FORWARD_SIZE (3*CLUSTER_N)
#define REVERSE_SIZE (3*CLUSTER_N)
#define GHOST_SIZE (4*CLUSTER_N+10)
#define EXCHANGE_SIZE 7
#define JFAC MAX(1, CLUSTER_N / CLUSTER_M)
#define LOCAL atom->Nclusters_local / JFAC
#define GHOST atom->Nclusters_ghost
#define IsinRegionToSend(cj) \
((atom->jclusters[(cj)].bbminx >= xlo || atom->jclusters[(cj)].bbmaxx >= xlo) && \
(atom->jclusters[(cj)].bbminx < xhi || atom->jclusters[(cj)].bbmaxx < xhi) && \
(atom->jclusters[(cj)].bbminy >= ylo || atom->jclusters[(cj)].bbmaxy >= ylo) && \
(atom->jclusters[(cj)].bbminy < yhi || atom->jclusters[(cj)].bbmaxy < yhi) && \
(atom->jclusters[(cj)].bbminz >= zlo || atom->jclusters[(cj)].bbmaxz >= zlo) && \
(atom->jclusters[(cj)].bbminz < zhi || atom->jclusters[(cj)].bbmaxz < zhi))
#else
#define FORWARD_SIZE 3
#define REVERSE_SIZE 3
#define GHOST_SIZE 4
#define EXCHANGE_SIZE 7
#define LOCAL atom->Nlocal
#define GHOST atom->Nghost
#define IsinRegionToSend(i) \
((atom_x((i)) >= xlo && atom_x((i)) < xhi) && \
(atom_y((i)) >= ylo && atom_y((i)) < yhi) && \
(atom_z((i)) >= zlo && atom_z((i)) < zhi))
#endif
typedef struct {
int myproc; // my proc ID
int numproc; // # of processors
int numneigh; // # of all my neighs along all swaps
int maxneigh; // Buffer size for my neighs
int sendfrom[6]; //return the lowest neigh index to send in each swap
int sendtill[6]; //return the highest neigh index to send in each swao
int recvfrom[6]; //return the lowest neigh index to recv in each swap
int recvtill[6]; //return the highest neigh index to recv in each swap
int* nsend; // neigh whose I want to send
int* nrecv; // neigh whose I want to recv
int* pbc_x; // if pbc in x
int* pbc_y; // if pbc in y
int* pbc_z; // if pbc in z
int* atom_send, *atom_recv; // # of atoms to send/recv for each of my neighs
int* off_atom_send; // atom offset to send, inside of a swap
int* off_atom_recv; // atom offset to recv, inside of a swap
int* nexch; //procs to exchange
int numneighexch; //# of neighbours to exchange
int maxneighexch; //max buff size to store neighbours
int numswap; // # of swaps to perform, it is 6
int swapdim[6]; // dimension of the swap (_x, _y or _z)
int swapdir[6]; // direction of the swap 0 or 1
int swap[3][2]; // given a dim and dir, knows the swap
int othersend[6]; // Determine if a proc interact with more procs in a given swap
int firstrecv[6]; // where to put 1st recv atom in each swap
int** sendlist; // list of atoms to send in each swap
int* maxsendlist; // max # of atoms send in each list-swap
int maxsend; // max elements in buff sender
int maxrecv; // max elements in buff receiver
MD_FLOAT* buf_send; // sender buffer for all comm
MD_FLOAT* buf_recv; // receicer buffer for all comm
int forwardSize; // # of paramaters per atom in forward comm.
int reverseSize; // # of parameters per atom in reverse
int exchangeSize; // # of parameters per atom in exchange
int ghostSize; // # of parameters per atom in ghost list
int iterAtom; //last atom to iterate in each swap.
Box* boxes; // Boundaries to be sent to other procs as ghost.
} Comm;
void initComm(int*, char***, Comm*); //Init MPI
void endComm(Comm*); //End MPI
void setupComm(Comm*,Parameter*,Grid*); //Creates a 3d grid or rcb grid
void neighComm(Comm*,Parameter*,Grid*); //Find neighbours within cut-off and defines ghost regions
void forwardComm(Comm*,Atom*,int); //Send info in one direction
void reverseComm(Comm*,Atom*,int); //Return info after forward communication
void exchangeComm(Comm*,Atom*); //Exchange info between procs
void ghostComm(Comm*, Atom*,int); //Build the ghost neighbours to send during next forwards
void growSend(Comm*,int); //Grows the size of the buffer sender
void growRecv(Comm*,int); //Grows the size of the buffer receiver
void growList(Comm*, int, int); //Grows the size of the list to send
#endif

51
common/includes/grid.h Normal file
View File

@@ -0,0 +1,51 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <parameter.h>
#include <box.h>
#include <atom.h>
#include <mpi.h>
#ifndef __MAP_H_
#define __MAP_H_
#define world MPI_COMM_WORLD
#define atom_pos(i) ((dim == _x) ? atom_x((i)) : (dim == _y) ? atom_y((i)) : atom_z((i)))
enum {RCB=1, meanTimeRCB, Staggered};
typedef struct {
int balance_every;
int map_size;
MD_FLOAT* map;
//===Param for Staggerd balance
int nprocs[3];
int coord[3];
MD_FLOAT cutneigh[3];
double Timer;
//===Param for RCB balance
MD_FLOAT* buf_send;
MD_FLOAT* buf_recv;
int maxsend;
int maxrecv;
} Grid;
typedef MD_FLOAT(*RCB_Method)(Atom*,MPI_Comm,int,double);
void setupGrid(Grid*, Atom*, Parameter*);
void cartisian3d(Grid*, Parameter*, Box*);
void rcbBalance(Grid*, Atom*, Parameter* ,RCB_Method, int, double);
void staggeredBalance(Grid*, Atom*, Parameter*, double);
void printGrid(Grid*);
//rcb methods
MD_FLOAT meanBisect(Atom* , MPI_Comm, int, double);
MD_FLOAT meanTimeBisect(Atom*, MPI_Comm, int, double);
#endif

View File

@@ -53,6 +53,10 @@ typedef struct {
MD_FLOAT k_dn;
MD_FLOAT gx, gy, gz;
MD_FLOAT reflect_x, reflect_y, reflect_z;
//MPI implementation
int balance;
int method;
int balance_every;
} Parameter;
void initParameter(Parameter*);

View File

@@ -0,0 +1,71 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <limits.h>
#include <math.h>
#include <comm.h>
#include <atom.h>
#include <timing.h>
#include <parameter.h>
#include <util.h>
//static void addDummyCluster(Atom*);
double forward(Comm* comm, Atom *atom, Parameter* param){
double S, E;
S = getTimeStamp();
if(param->method == halfShell){
for(int iswap = 0; iswap < 5; iswap++)
forwardComm(comm, atom, iswap);
} else if(param->method == eightShell){
for(int iswap = 0; iswap < 6; iswap+=2)
forwardComm(comm, atom, iswap);
} else {
for(int iswap = 0; iswap < 6; iswap++)
forwardComm(comm, atom, iswap);
}
E = getTimeStamp();
return E-S;
}
double reverse(Comm* comm, Atom *atom, Parameter* param){
double S, E;
S = getTimeStamp();
if(param->method == halfShell){
for(int iswap = 4; iswap >= 0; iswap--)
reverseComm(comm, atom, iswap);
} else if(param->method == eightShell){
for(int iswap = 4; iswap >= 0; iswap-=2)
reverseComm(comm, atom, iswap);
} else if(param->method == halfStencil){
for(int iswap = 5; iswap >= 0; iswap--)
reverseComm(comm, atom, iswap);
} else { } //Full Shell Reverse does nothing
E = getTimeStamp();
return E-S;
}
void ghostNeighbor(Comm* comm, Atom* atom, Parameter* param)
{
#ifdef GROMACS
atom->Nclusters_ghost = 0;
#endif
atom->Nghost = 0;
if(param->method == halfShell){
for(int iswap=0; iswap<5; iswap++)
ghostComm(comm,atom,iswap);
} else if(param->method == eightShell){
for(int iswap = 0; iswap<6; iswap+=2)
ghostComm(comm, atom,iswap);
} else {
for(int iswap=0; iswap<6; iswap++)
ghostComm(comm,atom,iswap);
}
}

View File

@@ -9,9 +9,15 @@
typedef enum {
TOTAL = 0,
NEIGH,
FORCE,
NEIGH,
FORWARD,
REVERSE,
UPDATE,
BALANCE,
SETUP,
REST,
NUMTIMER
} timertype;
} timerComm;
#endif

View File

@@ -4,6 +4,8 @@
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <math.h>
#ifndef __UTIL_H_
#define __UTIL_H_
@@ -35,6 +37,13 @@
# define PRECISION_STRING "double"
#endif
#define BigOrEqual(a,b) (fabs((a)-(b))<1e-9 || (a)>(b))
#define Equal(a,b) (fabs((a)-(b))<1e-9)
enum {_x=0, _y, _z};
enum {fullShell=0, halfShell, eightShell, halfStencil};
extern double myrandom(int*);
extern void random_reset(int *seed, int ibase, double *coord);
extern int str2ff(const char *string);

View File

@@ -11,6 +11,7 @@
#include <atom.h>
#include <parameter.h>
#include <util.h>
#include <mpi.h>
void initParameter(Parameter *param) {
param->input_file = NULL;
@@ -54,13 +55,17 @@ void initParameter(Parameter *param) {
param->reflect_x = 0.0;
param->reflect_y = 0.0;
param->reflect_z = 0.0;
//MPI
param->balance = 0;
param->method = 0;
param->balance_every =param->reneigh_every;
}
void readParameter(Parameter *param, const char *filename) {
FILE *fp = fopen(filename, "r");
char line[MAXLINE];
int i;
if(!fp) {
fprintf(stderr, "Could not open parameter file: %s\n", filename);
exit(-1);
@@ -72,8 +77,8 @@ void readParameter(Parameter *param, const char *filename) {
for(i = 0; line[i] != '\0' && line[i] != '#'; i++);
line[i] = '\0';
char *tok = strtok(line, " ");
char *val = strtok(NULL, " ");
char *tok = strtok(line, "\t ");
char *val = strtok(NULL, "\t ");
#define PARSE_PARAM(p,f) if(strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) { param->p = f(val); }
#define PARSE_STRING(p) PARSE_PARAM(p, strdup)
@@ -117,15 +122,20 @@ void readParameter(Parameter *param, const char *filename) {
PARSE_INT(x_out_every);
PARSE_INT(v_out_every);
PARSE_INT(half_neigh);
PARSE_INT(method);
PARSE_INT(balance);
PARSE_INT(balance_every);
}
}
// Update dtforce
param->dtforce = 0.5 * param->dt;
// Update sigma6 parameter
MD_FLOAT s2 = param->sigma * param->sigma;
param->sigma6 = s2 * s2 * s2;
//Update balance parameter, 10 could be change
param->balance_every *=param->reneigh_every;
fclose(fp);
}
@@ -183,4 +193,19 @@ void printParameter(Parameter *param) {
printf("\tSkin: %e\n", param->skin);
printf("\tHalf neighbor lists: %d\n", param->half_neigh);
printf("\tProcessor frequency (GHz): %.4f\n", param->proc_freq);
// ================ New MPI features =============
char str[20];
strcpy(str, (param->method == 1) ? "Half Shell" :
(param->method == 2) ? "Eight Shell" :
(param->method == 3) ? "Half Stencil":
"Full Shell");
printf("\tMethod: %s\n", str);
strcpy(str, (param->balance == 1) ? "mean RCB" :
(param->balance == 2) ? "mean Time RCB" :
(param->balance == 3) ? "Staggered" :
"cartisian");
printf("\tPartition: %s\n", str);
if(param->balance)
printf("\tRebalancing every (timesteps): %d\n",param->balance_every);
}

View File

@@ -10,6 +10,7 @@
#include <thermo.h>
#include <util.h>
#include <mpi.h>
static int *steparr;
static MD_FLOAT *tmparr;
@@ -24,6 +25,7 @@ static MD_FLOAT t_act;
static MD_FLOAT p_act;
static MD_FLOAT e_act;
static int mstat;
static MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
/* exported subroutines */
void setupThermo(Parameter *param, int natoms)
@@ -53,57 +55,73 @@ void setupThermo(Parameter *param, int natoms)
void computeThermo(int iflag, Parameter *param, Atom *atom)
{
MD_FLOAT t = 0.0, p;
MD_FLOAT t_sum = 0.0, t = 0.0, p;
int me;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
for(int i = 0; i < atom->Nlocal; i++) {
t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
}
t = t * t_scale;
p = (t * dof_boltz) * p_scale;
int istep = iflag;
MPI_Reduce(&t, &t_sum, 1, type, MPI_SUM, 0 ,MPI_COMM_WORLD);
if(me == 0)
{
t = t_sum * t_scale;
p = (t * dof_boltz) * p_scale;
int istep = iflag;
if(iflag == -1){
istep = param->ntimes;
}
if(iflag == 0){
mstat = 0;
}
if(iflag == -1){
istep = param->ntimes;
}
if(iflag == 0){
mstat = 0;
}
steparr[mstat] = istep;
tmparr[mstat] = t;
prsarr[mstat] = p;
mstat++;
fprintf(stdout, "%i\t%e\t%e\n", istep, t, p);
steparr[mstat] = istep;
tmparr[mstat] = t;
prsarr[mstat] = p;
mstat++;
fprintf(stdout, "%i\t%e\t%e\n", istep, t, p);
}
}
void adjustThermo(Parameter *param, Atom *atom)
{
/* zero center-of-mass motion */
MD_FLOAT vxtot = 0.0; MD_FLOAT vytot = 0.0; MD_FLOAT vztot = 0.0;
MD_FLOAT v_sum[3], vtot[3];
for(int i = 0; i < atom->Nlocal; i++) {
vxtot += atom_vx(i);
vytot += atom_vy(i);
vztot += atom_vz(i);
}
vtot[0] = vxtot; vtot[1] = vytot; vtot[2] = vztot;
vxtot = vxtot / atom->Natoms;
vytot = vytot / atom->Natoms;
vztot = vztot / atom->Natoms;
MPI_Allreduce(vtot, v_sum, 3, type, MPI_SUM, MPI_COMM_WORLD);
vxtot = v_sum[0] / atom->Natoms;
vytot = v_sum[1] / atom->Natoms;
vztot = v_sum[2] / atom->Natoms;
for(int i = 0; i < atom->Nlocal; i++) {
atom_vx(i) -= vxtot;
atom_vy(i) -= vytot;
atom_vz(i) -= vztot;
}
t_act = 0;
MD_FLOAT t = 0.0;
MD_FLOAT t_sum = 0.0;
for(int i = 0; i < atom->Nlocal; i++) {
t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
}
MPI_Allreduce(&t, &t_sum, 1,type, MPI_SUM,MPI_COMM_WORLD);
t = t_sum;
t *= t_scale;
MD_FLOAT factor = sqrt(param->temp / t);

View File

@@ -10,6 +10,7 @@
#include <stdlib.h>
#include <string.h>
#include <util.h>
#include <math.h>
/* Park/Miller RNG w/out MASKING, so as to be like f90s version */
#define IA 16807
@@ -86,6 +87,7 @@ int get_cuda_num_threads() {
void readline(char *line, FILE *fp) {
if(fgets(line, MAXLINE, fp) == NULL) {
printf("error %i\n",errno);
if(errno != 0) {
perror("readline()");
exit(-1);