MD-Bench/common/grid.c

491 lines
14 KiB
C
Raw Normal View History

2024-04-15 16:53:25 +02:00
#include <stdio.h>
#include <grid.h>
#include <mpi.h>
#include <parameter.h>
#include <allocate.h>
#include <util.h>
#include <math.h>
static MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
//Grommacs Balancing
MD_FLOAT f_normalization(MD_FLOAT* x,MD_FLOAT* fx, MD_FLOAT minx, int nprocs) {
MD_FLOAT sum=0;
for(int n = 0; n<nprocs; n++){
fx[n] = MAX(minx,x[n]);
sum+=fx[n];
}
for(int n = 0; n<nprocs; n++)
fx[n] /= sum;
}
void fixedPointIteration(MD_FLOAT* x0, int nprocs, MD_FLOAT minx)
{
MD_FLOAT tolerance = 1e-3;
MD_FLOAT alpha = 0.5;
MD_FLOAT *fx = (MD_FLOAT*) malloc(nprocs*sizeof(MD_FLOAT));
int maxIterations = 100;
for (int i = 0; i < maxIterations; i++) {
int converged = 1;
f_normalization(x0,fx,minx,nprocs);
for(int n=0; n<nprocs; n++)
fx[n]= (1-alpha) * x0[n] + alpha * fx[n];
for (int n=0; n<nprocs; n++) {
if (fabs(fx[n] - x0[n]) >= tolerance) {
converged = 0;
break;
}
}
for (int n=0; n<nprocs; n++)
x0[n] = fx[n];
if(converged){
for(int n = 0; n<nprocs; n++)
return;
}
}
}
void staggeredBalance(Grid* grid, Atom* atom, Parameter* param, double newTime)
{
int me;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
int *coord = grid->coord;
int *nprocs = grid ->nprocs;
//Elapsed time since the last rebalance
double time = newTime - grid->Timer;
grid->Timer = newTime;
//store the older dimm to compare later for exchange
MD_FLOAT lo[3], hi[3];
for(int dim = 0; dim< 3; dim++){
lo[dim] = atom->mybox.lo[dim];
hi[dim] = atom->mybox.hi[dim];
}
//Define parameters
MPI_Comm subComm[3];
int color[3] = {0,0,0};
int id[3] = {0,0,0};
MD_FLOAT ** load = (MD_FLOAT**) malloc(3*sizeof(MD_FLOAT*));
for(int dim = 0; dim<3; dim++)
load[dim] = (MD_FLOAT*) malloc(nprocs[dim]*sizeof(MD_FLOAT));
int maxprocs = MAX(MAX(nprocs[_x],nprocs[_y]),nprocs[_z]);
MD_FLOAT* cellSize = (MD_FLOAT*) malloc(maxprocs*sizeof(MD_FLOAT));
MD_FLOAT* limits = (MD_FLOAT*) malloc(2*maxprocs*sizeof(MD_FLOAT)); //limits: (x0, x1), (x1, x2)... Repeat values in between to perfom MPI_Scatter later
MD_FLOAT t_sum[3] = {0,0,0};
MD_FLOAT recv_buf[2] = {0,0}; //Each proc only receives 2 elments per dimension xlo and xhi
MD_FLOAT balancedLoad[3] = {0,0,0}; //1/nprocs
MD_FLOAT minLoad[3] = {0,0,0}; //beta*(1/nprocs)
MD_FLOAT prd[3] = {param->xprd, param->yprd, param->zprd};
MD_FLOAT boundaries[6] ={0,0,0,0,0,0}; // xlo,xhi,ylo,yhi,zlo,zhi
//Create sub-communications along each dimension
for(int dim = 0; dim<3; dim++){
if(dim == _x){
color[_x] = (coord[_y] == 0 && coord[_z] ==0) ? 1:MPI_UNDEFINED;
id[_x] = me;
} else if(dim == _y) {
color[_y] = coord[_z] == 0 ? coord[_x]:MPI_UNDEFINED;
id[_y] = (coord[_y] == 0 && coord[_z] == 0) ? 0:me;
} else {
color[_z]= coord[_y]*nprocs[_x]+coord[_x];
id[_z] = coord[_z] == 0 ? 0 : me;
}
MPI_Comm_split(world, color[dim], id[dim], &subComm[dim]);
}
//Set the minimum load and the balance load
for(int dim = 0; dim<3; dim++){
balancedLoad[dim] = 1./nprocs[dim];
minLoad[dim] = 0.8*balancedLoad[dim];
}
//set and communicate the workload in reverse order
for(int dim = _z; dim>= _x; dim--)
{
if(subComm[dim] != MPI_COMM_NULL){
MPI_Gather(&time,1,type,load[dim],1,type,0,subComm[dim]);
if(id[dim] == 0)
{
for(int n=0; n<nprocs[dim]; n++)
t_sum[dim] += load[dim][n];
for(int n=0; n<nprocs[dim]; n++)
load[dim][n] /= t_sum[dim];
}
time =t_sum[dim];
}
MPI_Barrier(world);
}
//Brodacast the new boundaries along dimensions
for(int dim=0; dim<3; dim++){
if(subComm[dim] != MPI_COMM_NULL){
MPI_Bcast(boundaries,6,type,0,subComm[dim]);
if(id[dim] == 0) {
fixedPointIteration(load[dim], nprocs[dim], minLoad[dim]);
MD_FLOAT inv_sum=0;
for(int n=0; n<nprocs[dim];n++)
inv_sum +=(1/load[dim][n]);
for(int n=0; n<nprocs[dim];n++)
cellSize[n] = (prd[dim]/load[dim][n])*(1./inv_sum);
MD_FLOAT sum=0;
for(int n=0; n<nprocs[dim]; n++){
limits[2*n] = sum;
limits[2*n+1] = sum+cellSize[n];
sum+= cellSize[n];
}
limits[2*nprocs[dim]-1] = prd[dim];
}
MPI_Scatter(limits,2,type,recv_buf,2,type,0,subComm[dim]);
boundaries[2*dim] = recv_buf[0];
boundaries[2*dim+1] = recv_buf[1];
}
MPI_Barrier(world);
}
atom->mybox.lo[_x]=boundaries[0]; atom->mybox.hi[_x]=boundaries[1];
atom->mybox.lo[_y]=boundaries[2]; atom->mybox.hi[_y]=boundaries[3];
atom->mybox.lo[_z]=boundaries[4]; atom->mybox.hi[_z]=boundaries[5];
MD_FLOAT domain[6] = {boundaries[0], boundaries[2], boundaries[4], boundaries[1], boundaries[3], boundaries[5]};
MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
//because cells change dynamically, It is required to increase the neighbouring exchange region
for(int dim =_x; dim<=_z; dim++){
MD_FLOAT dr,dr_max;
int n = grid->nprocs[dim];
MD_FLOAT maxdelta = 0.2*prd[dim];
dr = MAX(fabs(lo[dim] - atom->mybox.lo[dim]),fabs(hi[dim] - atom->mybox.hi[dim]));
MPI_Allreduce(&dr, &dr_max, 1, type, MPI_MAX, world);
grid->cutneigh[dim] = param->cutneigh+dr_max;
}
for(int dim=0; dim<3; dim++) {
if(subComm[dim] != MPI_COMM_NULL){
MPI_Comm_free(&subComm[dim]);
}
free(load[dim]);
}
free(load);
free(limits);
}
//RCB Balancing
MD_FLOAT meanTimeBisect(Atom *atom, MPI_Comm subComm, int dim, double time)
{
MD_FLOAT mean=0, sum=0, total_sum=0, weightAtoms= 0, total_weight=0;
for(int i=0; i<atom->Nlocal; i++){
sum += atom_pos(i);
}
sum*=time;
weightAtoms = atom->Nlocal*time;
MPI_Allreduce(&sum, &total_sum, 1, type, MPI_SUM, subComm);
MPI_Allreduce(&weightAtoms, &total_weight, 1, type, MPI_SUM, subComm);
mean = total_sum/total_weight;
return mean;
}
MD_FLOAT meanBisect(Atom* atom, MPI_Comm subComm, int dim, double time)
{
int Natoms = 0;
MD_FLOAT sum=0, mean=0, total_sum=0;
for(int i=0; i<atom->Nlocal; i++){
sum += atom_pos(i);
}
MPI_Allreduce(&sum, &total_sum, 1, type, MPI_SUM, subComm);
MPI_Allreduce(&atom->Nlocal, &Natoms, 1, MPI_INT, MPI_SUM, subComm);
mean = total_sum/Natoms;
return mean;
}
void nextBisectionLevel(Grid* grid, Atom* atom, RCB_Method method, MPI_Comm subComm, int dim ,int* color, int ilevel, double time)
{
int rank, size;
int branch = 0, i = 0, m = 0;
int nsend = 0, nrecv = 0, nrecv2 = 0;
int values_per_atom = 7;
MD_FLOAT bisection, pos;
MPI_Request request[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
MPI_Comm_rank(subComm,&rank);
MPI_Comm_size(subComm,&size);
int odd = size%2;
int extraProc = odd ? size-1:size;
int half = (int) (0.5*size);
int partner = (rank<half) ? rank+half:rank-half;
if(odd && rank == extraProc) partner = 0;
//Apply the bisection
bisection = method(atom,subComm,dim,time);
//Define the new boundaries
if(rank<half){
atom->mybox.hi[dim] = bisection;
branch = 0;
} else {
atom->mybox.lo[dim] = bisection;
branch = 1;
}
//Define new color for the further communicaton
*color = (branch << ilevel) | *color;
//Grow the send buffer
if(atom->Nlocal>=grid->maxsend){
if(grid->buf_send) free(grid->buf_send);
grid->buf_send = (MD_FLOAT*) malloc(atom->Nlocal*values_per_atom* sizeof(MD_FLOAT));
grid->maxsend = atom->Nlocal;
}
//buffer particles to send
while(i < atom->Nlocal) {
pos = atom_pos(i);
if(pos < atom->mybox.lo[dim] || pos >= atom->mybox.hi[dim]) {
nsend += packExchange(atom, i, &grid->buf_send[nsend]);
copy(atom, i, atom->Nlocal-1);
atom->Nlocal--;
} else i++;
}
//Communicate the number of elements to be sent
if(rank < extraProc){
MPI_Irecv(&nrecv,1,MPI_INT,partner,0,subComm,&request[0]);
}
if(odd && rank == 0){
MPI_Irecv(&nrecv2,1,MPI_INT,extraProc,0,subComm,&request[1]);
}
MPI_Send(&nsend,1,MPI_INT,partner,0,subComm);
MPI_Waitall(2,request,MPI_STATUS_IGNORE);
//Grow the recv buffer
if(nrecv+nrecv2>=grid->maxrecv){
if(grid->buf_recv) free(grid->buf_recv);
grid->buf_recv = (MD_FLOAT*) malloc((nrecv+nrecv2)*values_per_atom*sizeof(MD_FLOAT));
grid->maxrecv = nrecv+nrecv2;
}
//communicate elements in the buffer
request[0] = MPI_REQUEST_NULL;
request[1] = MPI_REQUEST_NULL;
if(rank < extraProc){
MPI_Irecv(grid->buf_recv,nrecv,type,partner,0,subComm,&request[0]);
}
if(odd && rank == 0){
MPI_Irecv(&grid->buf_recv[nrecv],nrecv2,type,extraProc,0,subComm,&request[1]);
}
MPI_Send (grid->buf_send,nsend,type,partner,0,subComm);
MPI_Waitall(2,request,MPI_STATUS_IGNORE);
//store atoms in atom list
while(m < nrecv+nrecv2){
m += unpackExchange(atom, atom->Nlocal++, &grid->buf_recv[m]);
}
}
void rcbBalance(Grid* grid, Atom* atom, Parameter* param, RCB_Method method, int ndim, double newTime)
{
int me, nprocs=0, ilevel=0, nboxes=1;
int color = 0, size =0;
int index, prd[3];
MPI_Comm subComm;
MPI_Comm_size(world, &nprocs);
MPI_Comm_rank(world, &me);
//set the elapsed time since the last dynamic balance
double time = newTime - grid->Timer;
prd[_x] = atom->mybox.xprd = param->xprd;
prd[_y] = atom->mybox.yprd = param->yprd;
prd[_z] = atom->mybox.zprd = param->zprd;
//Sort by larger dimension
int largerDim[3] ={_x, _y, _z};
for(int i = 0; i< 2; i++){
for(int j = i+1; j<3; j++)
{
if(prd[largerDim[j]]>prd[largerDim[i]]){
MD_FLOAT tmp = largerDim[j];
largerDim[j] = largerDim[i];
largerDim[i] = tmp;
}
}
}
//Initial Partition
atom->mybox.lo[_x] = 0; atom->mybox.hi[_x] = atom->mybox.xprd;
atom->mybox.lo[_y] = 0; atom->mybox.hi[_y] = atom->mybox.yprd;
atom->mybox.lo[_z] = 0; atom->mybox.hi[_z] = atom->mybox.zprd;
//Recursion tree
while(nboxes<nprocs)
{
index = ilevel%ndim;
MPI_Comm_split(world, color, me, &subComm);
MPI_Comm_size(subComm,&size);
if(size > 1){
nextBisectionLevel(grid, atom, method, subComm, largerDim[index], &color, ilevel, time);
}
MPI_Comm_free(&subComm);
nboxes = pow(2,++ilevel);
}
//Set the new timer grid
grid->Timer = newTime;
//Creating the global map
MD_FLOAT domain[6] = {atom->mybox.lo[_x], atom->mybox.lo[_y], atom->mybox.lo[_z], atom->mybox.hi[_x], atom->mybox.hi[_y], atom->mybox.hi[_z]};
MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
//Define the same cutneighbour in all dimensions for the exchange communication
for(int dim =_x; dim<=_z; dim++)
grid->cutneigh[dim] = param->cutneigh;
}
//Regular grid
void cartisian3d(Grid* grid, Parameter* param, Box* box)
{
int me, nproc;
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &me);
int numdim=3;
int reorder=0;
int periods[3]={1,1,1};
int mycoord[3]={0,0,0};
int griddim[3]={0,0,0};
MD_FLOAT len[3];
MPI_Comm cartesian;
box->xprd = param->xprd;
box->yprd = param->yprd;
box->zprd = param->zprd;
//Creates a cartesian 3d grid
MPI_Dims_create(nproc, numdim, griddim);
MPI_Cart_create(world,numdim,griddim,periods,reorder,&cartesian);
grid->nprocs[_x] = griddim[_x];
grid->nprocs[_y] = griddim[_y];
grid->nprocs[_z] = griddim[_z];
//Coordinates position in the grid
MPI_Cart_coords(cartesian,me,3,mycoord);
grid->coord[_x] = mycoord[_x];
grid->coord[_y] = mycoord[_y];
grid->coord[_z] = mycoord[_z];
//boundaries of my local box, with origin in (0,0,0).
len[_x] = param->xprd / griddim[_x];
len[_y] = param->yprd / griddim[_y];
len[_z] = param->zprd / griddim[_z];
box->lo[_x] = mycoord[_x] * len[_x];
box->hi[_x] = (mycoord[_x] + 1) * len[_x];
box->lo[_y] = mycoord[_y] * len[_y];
box->hi[_y] = (mycoord[_y] + 1) * len[_y];
box->lo[_z] = mycoord[_z] * len[_z];
box->hi[_z] = (mycoord[_z] + 1) * len[_z];
MD_FLOAT domain[6] = {box->lo[_x], box->lo[_y], box->lo[_z], box->hi[_x], box->hi[_y], box->hi[_z]};
MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
MPI_Comm_free(&cartesian);
//Define the same cutneighbour in all dimensions for the exchange communication
for(int dim =_x; dim<=_z; dim++)
grid->cutneigh[dim] = param->cutneigh;
}
//Other Functions from the grid
void initGrid(Grid* grid)
{ //start with regular grid
int nprocs;
MPI_Comm_size(world, &nprocs);
grid->map_size = 6 * nprocs;
grid->map = (MD_FLOAT*) allocate(ALIGNMENT, grid->map_size * sizeof(MD_FLOAT));
//========rcb=======
grid->maxsend = 0;
grid->maxrecv = 0;
grid->buf_send = NULL;
grid->buf_recv = NULL;
//====staggered=====
grid->Timer = 0.;
}
void setupGrid(Grid* grid, Atom* atom, Parameter* param)
{
int me;
MD_FLOAT xlo, ylo, zlo, xhi, yhi, zhi;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
initGrid(grid);
//Set the origin at (0,0,0)
if(param->input_file){
for(int i=0; i<atom->Nlocal; i++){
atom_x(i) = atom_x(i) - param->xlo;
atom_y(i) = atom_y(i) - param->ylo;
atom_z(i) = atom_z(i) - param->zlo;
}
}
cartisian3d(grid, param, &atom->mybox);
xlo = atom->mybox.lo[_x]; xhi = atom->mybox.hi[_x];
ylo = atom->mybox.lo[_y]; yhi = atom->mybox.hi[_y];
zlo = atom->mybox.lo[_z]; zhi = atom->mybox.hi[_z];
int i = 0;
while(i < atom->Nlocal)
{
if(atom_x(i) >= xlo && atom_x(i)< xhi &&
atom_y(i) >= ylo && atom_y(i)< yhi &&
atom_z(i) >= zlo && atom_z(i)< zhi)
{
i++;
} else {
copy(atom, i, atom->Nlocal-1);
atom->Nlocal--;
}
}
//printGrid(grid);
if(!param->balance){
MPI_Allreduce(&atom->Nlocal, &atom->Natoms, 1, MPI_INT, MPI_SUM, world);
printf("Processor:%i, Local atoms:%i, Total atoms:%i\n",me, atom->Nlocal,atom->Natoms);
MPI_Barrier(world);
}
}
void printGrid(Grid* grid)
{
int me, nprocs;
MPI_Comm_size(world, &nprocs);
MPI_Comm_rank(world, &me);
MD_FLOAT* map = grid->map;
if(me==0)
{
printf("GRID:\n");
printf("===================================================================================================\n");
for(int i=0; i<nprocs; i++)
printf("Box:%i\txlo:%.4f\txhi:%.4f\tylo:%.4f\tyhi:%.4f\tzlo:%.4f\tzhi:%.4f\n", i,map[6*i],map[6*i+3],map[6*i+1],map[6*i+4],map[6*i+2],map[6*i+5]);
printf("\n\n");
//printf("Box processor:%i\n xlo:%.4f\txhi:%.4f\n ylo:%.4f\tyhi:%.4f\n zlo:%.4f\tzhi:%.4f\n", i,map[6*i],map[6*i+3],map[6*i+1],map[6*i+4],map[6*i+2],map[6*i+5]);
}
MPI_Barrier(world);
}