Compare commits

...

2 Commits
main ... MPI

Author SHA1 Message Date
JairoBuitrago
0094c3c4e1 Update neighbor.c 2024-04-15 18:12:27 +02:00
JairoBuitrago
a13a0f3bae Final MPI version 2024-04-15 16:53:25 +02:00
33 changed files with 3567 additions and 624 deletions

View File

@ -17,6 +17,9 @@ include $(MAKE_DIR)/include_ISA.mk
include $(MAKE_DIR)/include_GROMACS.mk
INCLUDES += -I./$(SRC_DIR)/includes -I./$(COMMON_DIR)/includes
ifeq ($(strip $(OPT_SCHEME)),gromacs)
DEFINES += -DGROMACS
endif
ifeq ($(strip $(DATA_LAYOUT)),AOS)
DEFINES += -DAOS
endif

97
common/box.c Normal file
View File

@ -0,0 +1,97 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <parameter.h>
#include <util.h>
#include <box.h>
#include <mpi.h>
int overlapBox(int dim, int dir, const Box* mybox, const Box* other, Box* cut, MD_FLOAT xprd, MD_FLOAT cutneigh)
{
int pbc = -100;
MD_FLOAT min[3], max[3];
int same = (mybox->id == other->id) ? 1 : 0;
//projections
min[_x] = MAX(mybox->lo[_x], other->lo[_x]); max[_x] = MIN(mybox->hi[_x], other->hi[_x]);
min[_y] = MAX(mybox->lo[_y], other->lo[_y]); max[_y] = MIN(mybox->hi[_y], other->hi[_y]);
min[_z] = MAX(mybox->lo[_z], other->lo[_z]); max[_z] = MIN(mybox->hi[_z], other->hi[_z]);
//Intersection no periodic case
if(!same){
if (dir == 0) max[dim] = MIN(mybox->hi[dim], other->hi[dim]+ cutneigh);
if (dir == 1) min[dim] = MAX(mybox->lo[dim], other->lo[dim]- cutneigh);
if ((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z])) pbc = 0;
}
//Intersection periodic case
if(pbc < 0)
{
if(dir == 0){
min[dim] = MAX(mybox->lo[dim] , other->lo[dim]- xprd);
max[dim] = MIN(mybox->hi[dim] , other->hi[dim]- xprd + cutneigh);
} else {
min[dim] = MAX(mybox->lo[dim], other->lo[dim]+ xprd - cutneigh);
max[dim] = MIN(mybox->hi[dim], other->hi[dim]+ xprd);
}
if((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z]))
pbc = (dir == 0) ? 1:-1;
}
//storing the cuts
cut->lo[_x] = min[_x]; cut->hi[_x] = max[_x];
cut->lo[_y] = min[_y]; cut->hi[_y] = max[_y];
cut->lo[_z] = min[_z]; cut->hi[_z] = max[_z];
return pbc;
}
int overlapFullBox(Parameter* param, MD_FLOAT *cutneigh ,const Box* mybox, const Box* other)
{
MD_FLOAT min[3], max[3];
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
for(int k = -1; k < 2; k++)
{
for(int j = -1; j < 2; j++)
{
for(int i= -1; i < 2; i++)
{
min[_x] = MAX(mybox->lo[_x], other->lo[_x]-cutneigh[_x] + i*xprd);
min[_y] = MAX(mybox->lo[_y], other->lo[_y]-cutneigh[_y] + j*yprd);
min[_z] = MAX(mybox->lo[_z], other->lo[_z]-cutneigh[_z] + k*zprd);
max[_x] = MIN(mybox->hi[_x], other->hi[_x]+cutneigh[_x] + i*xprd);
max[_y] = MIN(mybox->hi[_y], other->hi[_y]+cutneigh[_y] + j*yprd);
max[_z] = MIN(mybox->hi[_z], other->hi[_z]+cutneigh[_z] + k*zprd);
if ((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z]))
return 1;
}
}
}
return 0;
}
void expandBox(int iswap, const Box* me, const Box* other, Box* cut, MD_FLOAT cutneigh)
{
if(iswap==2 || iswap==3){
if(me->lo[_x] <= other->lo[_x]) cut->lo[_x] -= cutneigh;
if(me->hi[_x] >= other->hi[_x]) cut->hi[_x] += cutneigh;
}
if(iswap==4 || iswap==5){
if(me->lo[_x] <= other->lo[_x]) cut->lo[_x] -= cutneigh;
if(me->hi[_x] >= other->hi[_x]) cut->hi[_x] += cutneigh;
if(me->lo[_y] <= other->lo[_y]) cut->lo[_y] -= cutneigh;
if(me->hi[_y] >= other->hi[_y]) cut->hi[_y] += cutneigh;
}
}

556
common/comm.c Normal file
View File

@ -0,0 +1,556 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <comm.h>
#include <allocate.h>
#include <mpi.h>
#include <util.h>
#define NEIGHMIN 6
#define BUFFACTOR 2
#define BUFMIN 1000
#define BUFEXTRA 100
#define world MPI_COMM_WORLD
MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
static inline void allocDynamicBuffers(Comm*);
static inline void freeDynamicBuffers(Comm*);
static inline void freeBuffers(Comm*);
void defineReverseList(Comm* comm){
int dim = 0;
int index = 0;
int me = comm->myproc;
//Set the inverse list
for(int iswap = 0; iswap<6; iswap++){
int dim = comm->swapdim[iswap];
int dir = comm->swapdir[iswap];
int invswap = comm->swap[dim][(dir+1)%2];
for(int ineigh = comm->sendfrom[invswap]; ineigh< comm->sendtill[invswap]; ineigh++)
comm->nrecv[index++] = comm->nsend[ineigh];
comm->recvfrom[iswap] = (iswap == 0) ? 0 : comm->recvtill[iswap-1];
comm->recvtill[iswap] = index;
}
//set if myproc is unique in the swap
for(int iswap = 0; iswap<6; iswap++){
int sizeswap = comm->sendtill[iswap]-comm->sendfrom[iswap];
int index = comm->sendfrom[iswap];
int myneigh = comm->nsend[index];
comm->othersend[iswap] = (sizeswap != 1 || comm->myproc != myneigh) ? 1 : 0;
}
}
void addNeighToExchangeList(Comm* comm, int newneigh){
int numneigh = comm->numneighexch;
if(comm->numneighexch>=comm->maxneighexch){
size_t oldByteSize = comm->maxneighexch*sizeof(int);
comm->maxneighexch *=2;
comm->nexch = (int*) reallocate(comm->nexch, ALIGNMENT, comm->maxneighexch * sizeof(int), oldByteSize);
}
// Add the new element to the list
comm->nexch[numneigh] = newneigh;
comm->numneighexch++;
}
//Exported functions
void neighComm(Comm *comm, Parameter* param, Grid *grid)
{
int me = comm->myproc;
int numproc = comm ->numproc;
int PAD = 6; //number of elements for processor in the map
int ineigh = 0;
int sneigh = 0;
MD_FLOAT *map = grid->map;
MD_FLOAT cutneigh = param->cutneigh;
MD_FLOAT prd[3] = {param->xprd, param->yprd, param->zprd};
Box mybox, other, cut;
//needed for rebalancing
freeDynamicBuffers(comm);
//Local box
mybox.id = me;
mybox.lo[_x] = map[me*PAD+0]; mybox.hi[_x] = map[me*PAD+3];
mybox.lo[_y] = map[me*PAD+1]; mybox.hi[_y] = map[me*PAD+4];
mybox.lo[_z] = map[me*PAD+2]; mybox.hi[_z] = map[me*PAD+5];
//Check for all possible neighbours only for exchange atoms
comm->numneighexch = 0;
for(int proc = 0; proc <numproc; proc++){
other.id = proc;
other.lo[_x] = map[proc*PAD+0]; other.hi[_x] = map[proc*PAD+3];
other.lo[_y] = map[proc*PAD+1]; other.hi[_y] = map[proc*PAD+4];
other.lo[_z] = map[proc*PAD+2]; other.hi[_z] = map[proc*PAD+5];
if(proc != me){
int intersection = overlapFullBox(param,grid->cutneigh,&mybox,&other);
if(intersection) addNeighToExchangeList(comm,proc);
}
}
//MAP is stored as follows: xlo,ylo,zlo,xhi,yhi,zhi
for(int iswap = 0; iswap <6; iswap++)
{
int dir = comm->swapdir[iswap];
int dim = comm->swapdim[iswap];
for(int proc = 0; proc < numproc; proc++)
{
//Check for neighbours along dimmensions, for forwardComm, backwardComm and ghostComm
other.id = proc;
other.lo[_x] = map[proc*PAD+0]; other.hi[_x] = map[proc*PAD+3];
other.lo[_y] = map[proc*PAD+1]; other.hi[_y] = map[proc*PAD+4];
other.lo[_z] = map[proc*PAD+2]; other.hi[_z] = map[proc*PAD+5];
//return if two boxes intersect: -100 not intersection, 0, 1 and -1 intersection for each different pbc.
int pbc = overlapBox(dim,dir,&mybox,&other,&cut,prd[dim],cutneigh);
if(pbc == -100) continue;
expandBox(iswap, &mybox, &other, &cut, cutneigh);
if(ineigh >= comm->maxneigh) {
size_t oldByteSize = comm->maxneigh*sizeof(int);
size_t oldBoxSize = comm->maxneigh*sizeof(Box);
comm->maxneigh = 2*ineigh;
comm->nsend = (int*) reallocate(comm->nsend, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
comm->nrecv = (int*) reallocate(comm->nrecv, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
comm->pbc_x = (int*) reallocate(comm->pbc_x, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
comm->pbc_y = (int*) reallocate(comm->pbc_y, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
comm->pbc_z = (int*) reallocate(comm->pbc_z, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
comm->boxes = (Box*) reallocate(comm->boxes, ALIGNMENT, comm->maxneigh * sizeof(Box), oldBoxSize);
}
comm->boxes[ineigh] = cut;
comm->nsend[ineigh] = proc;
comm->pbc_x[ineigh] = (dim == _x) ? pbc : 0;
comm->pbc_y[ineigh] = (dim == _y) ? pbc : 0;
comm->pbc_z[ineigh] = (dim == _z) ? pbc : 0;
ineigh++;
}
comm->sendfrom[iswap] = (iswap == 0) ? 0:comm->sendtill[iswap-1];
comm->sendtill[iswap] = ineigh;
comm->numneigh = ineigh;
}
allocDynamicBuffers(comm);
defineReverseList(comm);
}
void initComm(int* argc, char*** argv, Comm* comm)
{
//MPI Initialize
MPI_Init(argc, argv);
MPI_Comm_size(MPI_COMM_WORLD, &(comm->numproc));
MPI_Comm_rank(MPI_COMM_WORLD, &(comm->myproc));
comm->numneigh = 0;
comm->numneighexch = 0;
comm->nrecv=NULL;
comm->nsend=NULL;
comm->nexch=NULL;
comm->pbc_x=NULL;
comm->pbc_y=NULL;
comm->pbc_z=NULL;
comm->boxes=NULL;
comm->atom_send=NULL;
comm->atom_recv=NULL;
comm->off_atom_send=NULL;
comm->off_atom_recv=NULL;
comm->maxsendlist=NULL;
comm->sendlist=NULL;
comm->buf_send=NULL;
comm->buf_recv=NULL;
}
void endComm(Comm* comm)
{
comm->maxneigh = 0;
comm->maxneighexch =0;
comm->maxsend = 0;
comm->maxrecv = 0;
freeBuffers(comm);
MPI_Finalize();
}
void setupComm(Comm* comm, Parameter* param, Grid* grid){
comm->swap[_x][0] = 0; comm->swap[_x][1] =1;
comm->swap[_y][0] = 2; comm->swap[_y][1] =3;
comm->swap[_z][0] = 4; comm->swap[_z][1] =5;
comm->swapdim[0] = comm->swapdim[1] = _x;
comm->swapdim[2] = comm->swapdim[3] = _y;
comm->swapdim[4] = comm->swapdim[5] = _z;
comm->swapdir[0] = comm->swapdir[2] = comm->swapdir[4] = 0;
comm->swapdir[1] = comm->swapdir[3] = comm->swapdir[5] = 1;
for(int i = 0; i<6; i++){
comm->sendfrom[i] = 0;
comm->sendtill[i] = 0;
comm->recvfrom[i] = 0;
comm->recvtill[i] = 0;
}
comm->forwardSize = FORWARD_SIZE; //send coordiantes x,y,z
comm->reverseSize = REVERSE_SIZE; //return forces fx, fy, fz
comm->ghostSize = GHOST_SIZE; //send x,y,z,type;
comm->exchangeSize = EXCHANGE_SIZE; //send x,y,z,vx,vy,vz,type
//Allocate memory for recv buffer and recv buffer
comm->maxsend = BUFMIN;
comm->maxrecv = BUFMIN;
comm->buf_send = (MD_FLOAT*) allocate(ALIGNMENT,(comm->maxsend + BUFEXTRA) * sizeof(MD_FLOAT));
comm->buf_recv = (MD_FLOAT*) allocate(ALIGNMENT, comm->maxrecv * sizeof(MD_FLOAT));
comm->maxneighexch = NEIGHMIN;
comm->nexch = (int*) allocate(ALIGNMENT, comm->maxneighexch * sizeof(int));
comm->maxneigh = NEIGHMIN;
comm->nsend = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
comm->nrecv = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
comm->pbc_x = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
comm->pbc_y = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
comm->pbc_z = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
comm->boxes = (Box*) allocate(ALIGNMENT, comm->maxneigh * sizeof(Box));
neighComm(comm, param, grid);
}
void forwardComm(Comm* comm, Atom* atom, int iswap)
{
int nrqst=0, offset=0, nsend=0, nrecv=0;
int pbc[3];
int size = comm->forwardSize;
int maxrqst = comm->numneigh;
MD_FLOAT* buf;
MPI_Request requests[maxrqst];
for(int ineigh = comm->sendfrom[iswap]; ineigh < comm->sendtill[iswap]; ineigh++){
offset = comm->off_atom_send[ineigh];
pbc[_x]=comm->pbc_x[ineigh]; pbc[_y]=comm->pbc_y[ineigh]; pbc[_z]=comm->pbc_z[ineigh];
packForward(atom, comm->atom_send[ineigh], comm->sendlist[ineigh], &comm->buf_send[offset*size],pbc);
}
//Receives elements
if(comm->othersend[iswap])
for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
offset = comm->off_atom_recv[ineigh]*size;
nrecv = comm->atom_recv[ineigh]*size;
MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nrecv[ineigh],0,world,&requests[nrqst++]);
}
//Send elements
if(comm->othersend[iswap])
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
offset = comm->off_atom_send[ineigh]*size;
nsend = comm->atom_send[ineigh]*size;
MPI_Send(&comm->buf_send[offset],nsend,type,comm->nsend[ineigh],0,world);
}
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
if(comm->othersend[iswap]) buf = comm->buf_recv;
else buf = comm->buf_send;
/* unpack buffer */
for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
offset = comm->off_atom_recv[ineigh];
unpackForward(atom, comm->atom_recv[ineigh], comm->firstrecv[iswap] + offset, &buf[offset*size]);
}
}
void reverseComm(Comm* comm, Atom* atom, int iswap)
{
int nrqst=0, offset=0, nsend=0, nrecv=0 ;
int size = comm->reverseSize;
int maxrqst = comm->numneigh;
MD_FLOAT* buf;
MPI_Request requests[maxrqst];
for(int ineigh = comm->recvfrom[iswap]; ineigh < comm->recvtill[iswap]; ineigh++){
offset = comm->off_atom_recv[ineigh];
packReverse(atom, comm->atom_recv[ineigh], comm->firstrecv[iswap] + offset, &comm->buf_send[offset*size]);
}
//Receives elements
if(comm->othersend[iswap])
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
offset = comm->off_atom_send[ineigh]*size;
nrecv = comm->atom_send[ineigh]*size;
MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nsend[ineigh],0,world,&requests[nrqst++]);
}
//Send elements
if(comm->othersend[iswap])
for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
offset = comm->off_atom_recv[ineigh]*size;
nsend = comm->atom_recv[ineigh]*size;
MPI_Send(&comm->buf_send[offset],nsend,type,comm->nrecv[ineigh],0,world);
}
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
if(comm->othersend[iswap]) buf = comm->buf_recv;
else buf = comm->buf_send;
/* unpack buffer */
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
offset = comm->off_atom_send[ineigh];
unpackReverse(atom, comm->atom_send[ineigh], comm->sendlist[ineigh], &buf[offset*size]);
}
}
void ghostComm(Comm* comm, Atom* atom,int iswap){
MD_FLOAT xlo=0, xhi=0, ylo=0, yhi=0, zlo=0, zhi=0;
MD_FLOAT* buf;
int nrqst=0, nsend=0, nrecv=0, offset=0, ineigh=0, pbc[3];
int all_recv=0, all_send=0, currentSend=0;
int size = comm->ghostSize;
int maxrqrst = comm->numneigh;
MPI_Request requests[maxrqrst];
for(int i = 0; i<maxrqrst; i++)
requests[maxrqrst]=MPI_REQUEST_NULL;
if(iswap%2==0) comm->iterAtom = LOCAL+GHOST;
int iter = 0;
for(int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++)
{
Box* tile = &comm->boxes[ineigh];
xlo = tile->lo[_x]; ylo = tile->lo[_y]; zlo = tile->lo[_z];
xhi = tile->hi[_x]; yhi = tile->hi[_y]; zhi = tile->hi[_z];
pbc[_x]=comm->pbc_x[ineigh]; pbc[_y]=comm->pbc_y[ineigh]; pbc[_z]=comm->pbc_z[ineigh];
nsend = 0;
for(int i = 0; i < comm->iterAtom ; i++)
{
if(IsinRegionToSend(i)){
if(nsend >= comm->maxsendlist[ineigh]) growList(comm,ineigh,nsend);
if(currentSend + size >= comm->maxsend) growSend(comm,currentSend);
comm->sendlist[ineigh][nsend++] = i;
currentSend += packGhost(atom, i, &comm->buf_send[currentSend], pbc);
}
}
comm->atom_send[ineigh] = nsend; //#atoms send per neigh
comm->off_atom_send[ineigh] = all_send; //offset atom respect to neighbours in a swap
all_send += nsend; //all atoms send
}
//Receives how many elements to be received.
if(comm->othersend[iswap])
for(nrqst=0, ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++)
MPI_Irecv(&comm->atom_recv[ineigh],1,MPI_INT,comm->nrecv[ineigh],0,world,&requests[nrqst++]);
if(!comm->othersend[iswap]) comm->atom_recv[comm->recvfrom[iswap]] = nsend;
//Communicate how many elements to be sent.
if(comm->othersend[iswap])
for(int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++)
MPI_Send(&comm->atom_send[ineigh],1,MPI_INT,comm->nsend[ineigh],0,world);
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
//Define offset to store in the recv_buff
for(int ineigh = comm->recvfrom[iswap]; ineigh<comm->recvtill[iswap]; ineigh++){
comm->off_atom_recv[ineigh] = all_recv;
all_recv += comm->atom_recv[ineigh];
}
if(all_recv*size>=comm->maxrecv) growRecv(comm,all_recv*size);
//Receives elements
if(comm->othersend[iswap])
for (nrqst=0, ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
offset = comm->off_atom_recv[ineigh]*size;
nrecv = comm->atom_recv[ineigh]*size;
MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nrecv[ineigh],0,world,&requests[nrqst++]);
}
//Send elements
if(comm->othersend[iswap])
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
offset = comm->off_atom_send[ineigh]*size;
nsend = comm->atom_send[ineigh]*size;
MPI_Send(&comm->buf_send[offset],nsend,type,comm->nsend[ineigh],0,world);
}
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
if(comm->othersend[iswap]) buf = comm->buf_recv;
else buf = comm->buf_send;
//unpack elements
comm->firstrecv[iswap] = LOCAL+GHOST;
for(int i = 0; i < all_recv; i++)
unpackGhost(atom, LOCAL+GHOST, &buf[i*size]);
//Increases the buffer if needed
int max_size = MAX(comm->forwardSize,comm->reverseSize);
int max_buf = max_size * MAX(all_recv, all_send);
if(max_buf>=comm->maxrecv) growRecv(comm,max_buf);
if(max_buf>=comm->maxsend) growSend(comm,max_buf);
}
void exchangeComm(Comm* comm, Atom* atom){
MD_FLOAT x,y,z;
MD_FLOAT *lo = atom->mybox.lo;
MD_FLOAT *hi = atom->mybox.hi;
int size = comm->exchangeSize;
int numneigh = comm->numneighexch;
int offset_recv[numneigh];
int size_recv[numneigh];
MPI_Request requests[numneigh];
int i =0, nsend = 0, nrecv = 0;
int nrqst = 0;
int nlocal, offset,m;
/* enforce PBC */
pbc(atom);
if(comm->numneigh == 0) return;
nlocal = atom->Nlocal;
while(i < nlocal) {
if(atom_x(i) < lo[_x] || atom_x(i) >= hi[_x] ||
atom_y(i) < lo[_y] || atom_y(i) >= hi[_y] ||
atom_z(i) < lo[_z] || atom_z(i) >= hi[_z]) {
if(nsend+size >= comm->maxsend) growSend(comm, nsend);
nsend += packExchange(atom, i, &comm->buf_send[nsend]);
copy(atom, i, nlocal-1);
nlocal--;
} else i++;
}
atom->Nlocal = nlocal;
/* send/recv number of to share atoms with neighbouring procs*/
for(int ineigh = 0; ineigh < numneigh; ineigh++)
MPI_Irecv(&size_recv[ineigh],1,MPI_INT,comm->nexch[ineigh],0,world,&requests[nrqst++]);
for (int ineigh = 0; ineigh < numneigh; ineigh++)
MPI_Send(&nsend,1,MPI_INT,comm->nexch[ineigh],0,world);
MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
//Define offset to store in the recv_buff
for(int ineigh = 0; ineigh<numneigh; ineigh++){
offset_recv[ineigh] = nrecv;
nrecv += size_recv[ineigh];
}
if(nrecv >= comm->maxrecv) growRecv(comm,nrecv);
//Receives elements
nrqst=0;
for (int ineigh = 0; ineigh< numneigh; ineigh++){
offset = offset_recv[ineigh];
MPI_Irecv(&comm->buf_recv[offset], size_recv[ineigh], type, comm->nexch[ineigh],0,world,&requests[nrqst++]);
}
//Send elements
for (int ineigh = 0; ineigh< numneigh; ineigh++)
MPI_Send(comm->buf_send,nsend,type,comm->nexch[ineigh],0,world);
MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
nlocal = atom->Nlocal;
m = 0;
while(m < nrecv) {
x = comm->buf_recv[m + _x];
y = comm->buf_recv[m + _y];
z = comm->buf_recv[m + _z];
if(x >= lo[_x] && x < hi[_x] &&
y >= lo[_y] && y < hi[_y] &&
z >= lo[_z] && z < hi[_z]){
m += unpackExchange(atom, nlocal++, &comm->buf_recv[m]);
} else {
m += size;
}
}
atom->Nlocal = nlocal;
int all_atoms=0;
MPI_Allreduce(&atom->Nlocal, &all_atoms, 1, MPI_INT, MPI_SUM, world);
if(atom->Natoms!=all_atoms && comm->myproc ==0){
printf("Losing atoms! current atoms:%d expected atoms:%d\n",all_atoms,atom->Natoms);
}
}
//Internal functions
inline void growRecv(Comm* comm, int n)
{
comm -> maxrecv = BUFFACTOR * n;
if(comm->buf_recv) free(comm -> buf_recv);
comm -> buf_recv = (MD_FLOAT*) allocate(ALIGNMENT, comm->maxrecv * sizeof(MD_FLOAT));
}
inline void growSend(Comm* comm, int n)
{
size_t oldByteSize = (comm->maxsend+BUFEXTRA)*sizeof(MD_FLOAT);
comm -> maxsend = BUFFACTOR * n;
comm -> buf_send = (MD_FLOAT*) reallocate(comm->buf_send, ALIGNMENT, (comm->maxsend + BUFEXTRA) * sizeof(MD_FLOAT), oldByteSize);
}
inline void growList(Comm* comm, int ineigh, int n)
{
size_t oldByteSize = comm->maxsendlist[ineigh]*sizeof(int);
comm->maxsendlist[ineigh] = BUFFACTOR * n;
comm->sendlist[ineigh] = (int*) reallocate(comm->sendlist[ineigh],ALIGNMENT, comm->maxsendlist[ineigh] * sizeof(int), oldByteSize);
}
static inline void allocDynamicBuffers(Comm* comm)
{
//Buffers depending on the # of my neighs
int numneigh = comm->numneigh;
comm->atom_send = (int*) allocate(ALIGNMENT, numneigh * sizeof(int));
comm->atom_recv = (int*) allocate(ALIGNMENT, numneigh * sizeof(int));
comm->off_atom_send = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
comm->off_atom_recv = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
comm->maxsendlist = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
for(int i = 0; i < numneigh; i++)
comm->maxsendlist[i] = BUFMIN;
comm->sendlist = (int**) allocate(ALIGNMENT, numneigh * sizeof(int*));
for(int i = 0; i < numneigh; i++)
comm->sendlist[i] = (int*) allocate(ALIGNMENT, comm->maxsendlist[i] * sizeof(int));
}
static inline void freeDynamicBuffers(Comm* comm)
{
int numneigh =comm->numneigh;
if(comm->atom_send) free(comm->atom_send);
if(comm->atom_recv) free(comm->atom_recv);
if(comm->off_atom_send) free(comm->off_atom_send);
if(comm->off_atom_recv) free(comm->off_atom_recv);
if(comm->maxsendlist) free(comm->maxsendlist);
if(comm->sendlist){
for(int i = 0; i < numneigh; i++)
if(comm->sendlist[i]) free(comm->sendlist[i]);
}
if(comm->sendlist) free(comm->sendlist);
}
static inline void freeBuffers(Comm* comm)
{
if(comm->nrecv) free(comm->nrecv);
if(comm->nsend) free(comm->nsend);
if(comm->nexch) free(comm->nexch);
if(comm->pbc_x) free(comm->pbc_x);
if(comm->pbc_y) free(comm->pbc_y);
if(comm->pbc_z) free(comm->pbc_z);
if(comm->boxes) free(comm->boxes);
if(comm->atom_send) free(comm->atom_send);
if(comm->atom_recv) free(comm->atom_recv);
if(comm->off_atom_send) free(comm->off_atom_send);
if(comm->off_atom_recv) free(comm->off_atom_recv);
if(comm->maxsendlist) free(comm->maxsendlist);
if(comm->sendlist){
for(int i = 0; i < comm->numneigh; i++)
if(comm->sendlist[i]) free(comm->sendlist[i]);
}
if(comm->sendlist) free(comm->sendlist);
if(comm->buf_send) free(comm->buf_send);
if(comm->buf_recv) free(comm->buf_recv);
}

490
common/grid.c Normal file
View File

@ -0,0 +1,490 @@
#include <stdio.h>
#include <grid.h>
#include <mpi.h>
#include <parameter.h>
#include <allocate.h>
#include <util.h>
#include <math.h>
static MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
//Grommacs Balancing
MD_FLOAT f_normalization(MD_FLOAT* x,MD_FLOAT* fx, MD_FLOAT minx, int nprocs) {
MD_FLOAT sum=0;
for(int n = 0; n<nprocs; n++){
fx[n] = MAX(minx,x[n]);
sum+=fx[n];
}
for(int n = 0; n<nprocs; n++)
fx[n] /= sum;
}
void fixedPointIteration(MD_FLOAT* x0, int nprocs, MD_FLOAT minx)
{
MD_FLOAT tolerance = 1e-3;
MD_FLOAT alpha = 0.5;
MD_FLOAT *fx = (MD_FLOAT*) malloc(nprocs*sizeof(MD_FLOAT));
int maxIterations = 100;
for (int i = 0; i < maxIterations; i++) {
int converged = 1;
f_normalization(x0,fx,minx,nprocs);
for(int n=0; n<nprocs; n++)
fx[n]= (1-alpha) * x0[n] + alpha * fx[n];
for (int n=0; n<nprocs; n++) {
if (fabs(fx[n] - x0[n]) >= tolerance) {
converged = 0;
break;
}
}
for (int n=0; n<nprocs; n++)
x0[n] = fx[n];
if(converged){
for(int n = 0; n<nprocs; n++)
return;
}
}
}
void staggeredBalance(Grid* grid, Atom* atom, Parameter* param, double newTime)
{
int me;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
int *coord = grid->coord;
int *nprocs = grid ->nprocs;
//Elapsed time since the last rebalance
double time = newTime - grid->Timer;
grid->Timer = newTime;
//store the older dimm to compare later for exchange
MD_FLOAT lo[3], hi[3];
for(int dim = 0; dim< 3; dim++){
lo[dim] = atom->mybox.lo[dim];
hi[dim] = atom->mybox.hi[dim];
}
//Define parameters
MPI_Comm subComm[3];
int color[3] = {0,0,0};
int id[3] = {0,0,0};
MD_FLOAT ** load = (MD_FLOAT**) malloc(3*sizeof(MD_FLOAT*));
for(int dim = 0; dim<3; dim++)
load[dim] = (MD_FLOAT*) malloc(nprocs[dim]*sizeof(MD_FLOAT));
int maxprocs = MAX(MAX(nprocs[_x],nprocs[_y]),nprocs[_z]);
MD_FLOAT* cellSize = (MD_FLOAT*) malloc(maxprocs*sizeof(MD_FLOAT));
MD_FLOAT* limits = (MD_FLOAT*) malloc(2*maxprocs*sizeof(MD_FLOAT)); //limits: (x0, x1), (x1, x2)... Repeat values in between to perfom MPI_Scatter later
MD_FLOAT t_sum[3] = {0,0,0};
MD_FLOAT recv_buf[2] = {0,0}; //Each proc only receives 2 elments per dimension xlo and xhi
MD_FLOAT balancedLoad[3] = {0,0,0}; //1/nprocs
MD_FLOAT minLoad[3] = {0,0,0}; //beta*(1/nprocs)
MD_FLOAT prd[3] = {param->xprd, param->yprd, param->zprd};
MD_FLOAT boundaries[6] ={0,0,0,0,0,0}; // xlo,xhi,ylo,yhi,zlo,zhi
//Create sub-communications along each dimension
for(int dim = 0; dim<3; dim++){
if(dim == _x){
color[_x] = (coord[_y] == 0 && coord[_z] ==0) ? 1:MPI_UNDEFINED;
id[_x] = me;
} else if(dim == _y) {
color[_y] = coord[_z] == 0 ? coord[_x]:MPI_UNDEFINED;
id[_y] = (coord[_y] == 0 && coord[_z] == 0) ? 0:me;
} else {
color[_z]= coord[_y]*nprocs[_x]+coord[_x];
id[_z] = coord[_z] == 0 ? 0 : me;
}
MPI_Comm_split(world, color[dim], id[dim], &subComm[dim]);
}
//Set the minimum load and the balance load
for(int dim = 0; dim<3; dim++){
balancedLoad[dim] = 1./nprocs[dim];
minLoad[dim] = 0.8*balancedLoad[dim];
}
//set and communicate the workload in reverse order
for(int dim = _z; dim>= _x; dim--)
{
if(subComm[dim] != MPI_COMM_NULL){
MPI_Gather(&time,1,type,load[dim],1,type,0,subComm[dim]);
if(id[dim] == 0)
{
for(int n=0; n<nprocs[dim]; n++)
t_sum[dim] += load[dim][n];
for(int n=0; n<nprocs[dim]; n++)
load[dim][n] /= t_sum[dim];
}
time =t_sum[dim];
}
MPI_Barrier(world);
}
//Brodacast the new boundaries along dimensions
for(int dim=0; dim<3; dim++){
if(subComm[dim] != MPI_COMM_NULL){
MPI_Bcast(boundaries,6,type,0,subComm[dim]);
if(id[dim] == 0) {
fixedPointIteration(load[dim], nprocs[dim], minLoad[dim]);
MD_FLOAT inv_sum=0;
for(int n=0; n<nprocs[dim];n++)
inv_sum +=(1/load[dim][n]);
for(int n=0; n<nprocs[dim];n++)
cellSize[n] = (prd[dim]/load[dim][n])*(1./inv_sum);
MD_FLOAT sum=0;
for(int n=0; n<nprocs[dim]; n++){
limits[2*n] = sum;
limits[2*n+1] = sum+cellSize[n];
sum+= cellSize[n];
}
limits[2*nprocs[dim]-1] = prd[dim];
}
MPI_Scatter(limits,2,type,recv_buf,2,type,0,subComm[dim]);
boundaries[2*dim] = recv_buf[0];
boundaries[2*dim+1] = recv_buf[1];
}
MPI_Barrier(world);
}
atom->mybox.lo[_x]=boundaries[0]; atom->mybox.hi[_x]=boundaries[1];
atom->mybox.lo[_y]=boundaries[2]; atom->mybox.hi[_y]=boundaries[3];
atom->mybox.lo[_z]=boundaries[4]; atom->mybox.hi[_z]=boundaries[5];
MD_FLOAT domain[6] = {boundaries[0], boundaries[2], boundaries[4], boundaries[1], boundaries[3], boundaries[5]};
MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
//because cells change dynamically, It is required to increase the neighbouring exchange region
for(int dim =_x; dim<=_z; dim++){
MD_FLOAT dr,dr_max;
int n = grid->nprocs[dim];
MD_FLOAT maxdelta = 0.2*prd[dim];
dr = MAX(fabs(lo[dim] - atom->mybox.lo[dim]),fabs(hi[dim] - atom->mybox.hi[dim]));
MPI_Allreduce(&dr, &dr_max, 1, type, MPI_MAX, world);
grid->cutneigh[dim] = param->cutneigh+dr_max;
}
for(int dim=0; dim<3; dim++) {
if(subComm[dim] != MPI_COMM_NULL){
MPI_Comm_free(&subComm[dim]);
}
free(load[dim]);
}
free(load);
free(limits);
}
//RCB Balancing
MD_FLOAT meanTimeBisect(Atom *atom, MPI_Comm subComm, int dim, double time)
{
MD_FLOAT mean=0, sum=0, total_sum=0, weightAtoms= 0, total_weight=0;
for(int i=0; i<atom->Nlocal; i++){
sum += atom_pos(i);
}
sum*=time;
weightAtoms = atom->Nlocal*time;
MPI_Allreduce(&sum, &total_sum, 1, type, MPI_SUM, subComm);
MPI_Allreduce(&weightAtoms, &total_weight, 1, type, MPI_SUM, subComm);
mean = total_sum/total_weight;
return mean;
}
MD_FLOAT meanBisect(Atom* atom, MPI_Comm subComm, int dim, double time)
{
int Natoms = 0;
MD_FLOAT sum=0, mean=0, total_sum=0;
for(int i=0; i<atom->Nlocal; i++){
sum += atom_pos(i);
}
MPI_Allreduce(&sum, &total_sum, 1, type, MPI_SUM, subComm);
MPI_Allreduce(&atom->Nlocal, &Natoms, 1, MPI_INT, MPI_SUM, subComm);
mean = total_sum/Natoms;
return mean;
}
void nextBisectionLevel(Grid* grid, Atom* atom, RCB_Method method, MPI_Comm subComm, int dim ,int* color, int ilevel, double time)
{
int rank, size;
int branch = 0, i = 0, m = 0;
int nsend = 0, nrecv = 0, nrecv2 = 0;
int values_per_atom = 7;
MD_FLOAT bisection, pos;
MPI_Request request[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
MPI_Comm_rank(subComm,&rank);
MPI_Comm_size(subComm,&size);
int odd = size%2;
int extraProc = odd ? size-1:size;
int half = (int) (0.5*size);
int partner = (rank<half) ? rank+half:rank-half;
if(odd && rank == extraProc) partner = 0;
//Apply the bisection
bisection = method(atom,subComm,dim,time);
//Define the new boundaries
if(rank<half){
atom->mybox.hi[dim] = bisection;
branch = 0;
} else {
atom->mybox.lo[dim] = bisection;
branch = 1;
}
//Define new color for the further communicaton
*color = (branch << ilevel) | *color;
//Grow the send buffer
if(atom->Nlocal>=grid->maxsend){
if(grid->buf_send) free(grid->buf_send);
grid->buf_send = (MD_FLOAT*) malloc(atom->Nlocal*values_per_atom* sizeof(MD_FLOAT));
grid->maxsend = atom->Nlocal;
}
//buffer particles to send
while(i < atom->Nlocal) {
pos = atom_pos(i);
if(pos < atom->mybox.lo[dim] || pos >= atom->mybox.hi[dim]) {
nsend += packExchange(atom, i, &grid->buf_send[nsend]);
copy(atom, i, atom->Nlocal-1);
atom->Nlocal--;
} else i++;
}
//Communicate the number of elements to be sent
if(rank < extraProc){
MPI_Irecv(&nrecv,1,MPI_INT,partner,0,subComm,&request[0]);
}
if(odd && rank == 0){
MPI_Irecv(&nrecv2,1,MPI_INT,extraProc,0,subComm,&request[1]);
}
MPI_Send(&nsend,1,MPI_INT,partner,0,subComm);
MPI_Waitall(2,request,MPI_STATUS_IGNORE);
//Grow the recv buffer
if(nrecv+nrecv2>=grid->maxrecv){
if(grid->buf_recv) free(grid->buf_recv);
grid->buf_recv = (MD_FLOAT*) malloc((nrecv+nrecv2)*values_per_atom*sizeof(MD_FLOAT));
grid->maxrecv = nrecv+nrecv2;
}
//communicate elements in the buffer
request[0] = MPI_REQUEST_NULL;
request[1] = MPI_REQUEST_NULL;
if(rank < extraProc){
MPI_Irecv(grid->buf_recv,nrecv,type,partner,0,subComm,&request[0]);
}
if(odd && rank == 0){
MPI_Irecv(&grid->buf_recv[nrecv],nrecv2,type,extraProc,0,subComm,&request[1]);
}
MPI_Send (grid->buf_send,nsend,type,partner,0,subComm);
MPI_Waitall(2,request,MPI_STATUS_IGNORE);
//store atoms in atom list
while(m < nrecv+nrecv2){
m += unpackExchange(atom, atom->Nlocal++, &grid->buf_recv[m]);
}
}
void rcbBalance(Grid* grid, Atom* atom, Parameter* param, RCB_Method method, int ndim, double newTime)
{
int me, nprocs=0, ilevel=0, nboxes=1;
int color = 0, size =0;
int index, prd[3];
MPI_Comm subComm;
MPI_Comm_size(world, &nprocs);
MPI_Comm_rank(world, &me);
//set the elapsed time since the last dynamic balance
double time = newTime - grid->Timer;
prd[_x] = atom->mybox.xprd = param->xprd;
prd[_y] = atom->mybox.yprd = param->yprd;
prd[_z] = atom->mybox.zprd = param->zprd;
//Sort by larger dimension
int largerDim[3] ={_x, _y, _z};
for(int i = 0; i< 2; i++){
for(int j = i+1; j<3; j++)
{
if(prd[largerDim[j]]>prd[largerDim[i]]){
MD_FLOAT tmp = largerDim[j];
largerDim[j] = largerDim[i];
largerDim[i] = tmp;
}
}
}
//Initial Partition
atom->mybox.lo[_x] = 0; atom->mybox.hi[_x] = atom->mybox.xprd;
atom->mybox.lo[_y] = 0; atom->mybox.hi[_y] = atom->mybox.yprd;
atom->mybox.lo[_z] = 0; atom->mybox.hi[_z] = atom->mybox.zprd;
//Recursion tree
while(nboxes<nprocs)
{
index = ilevel%ndim;
MPI_Comm_split(world, color, me, &subComm);
MPI_Comm_size(subComm,&size);
if(size > 1){
nextBisectionLevel(grid, atom, method, subComm, largerDim[index], &color, ilevel, time);
}
MPI_Comm_free(&subComm);
nboxes = pow(2,++ilevel);
}
//Set the new timer grid
grid->Timer = newTime;
//Creating the global map
MD_FLOAT domain[6] = {atom->mybox.lo[_x], atom->mybox.lo[_y], atom->mybox.lo[_z], atom->mybox.hi[_x], atom->mybox.hi[_y], atom->mybox.hi[_z]};
MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
//Define the same cutneighbour in all dimensions for the exchange communication
for(int dim =_x; dim<=_z; dim++)
grid->cutneigh[dim] = param->cutneigh;
}
//Regular grid
void cartisian3d(Grid* grid, Parameter* param, Box* box)
{
int me, nproc;
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &me);
int numdim=3;
int reorder=0;
int periods[3]={1,1,1};
int mycoord[3]={0,0,0};
int griddim[3]={0,0,0};
MD_FLOAT len[3];
MPI_Comm cartesian;
box->xprd = param->xprd;
box->yprd = param->yprd;
box->zprd = param->zprd;
//Creates a cartesian 3d grid
MPI_Dims_create(nproc, numdim, griddim);
MPI_Cart_create(world,numdim,griddim,periods,reorder,&cartesian);
grid->nprocs[_x] = griddim[_x];
grid->nprocs[_y] = griddim[_y];
grid->nprocs[_z] = griddim[_z];
//Coordinates position in the grid
MPI_Cart_coords(cartesian,me,3,mycoord);
grid->coord[_x] = mycoord[_x];
grid->coord[_y] = mycoord[_y];
grid->coord[_z] = mycoord[_z];
//boundaries of my local box, with origin in (0,0,0).
len[_x] = param->xprd / griddim[_x];
len[_y] = param->yprd / griddim[_y];
len[_z] = param->zprd / griddim[_z];
box->lo[_x] = mycoord[_x] * len[_x];
box->hi[_x] = (mycoord[_x] + 1) * len[_x];
box->lo[_y] = mycoord[_y] * len[_y];
box->hi[_y] = (mycoord[_y] + 1) * len[_y];
box->lo[_z] = mycoord[_z] * len[_z];
box->hi[_z] = (mycoord[_z] + 1) * len[_z];
MD_FLOAT domain[6] = {box->lo[_x], box->lo[_y], box->lo[_z], box->hi[_x], box->hi[_y], box->hi[_z]};
MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
MPI_Comm_free(&cartesian);
//Define the same cutneighbour in all dimensions for the exchange communication
for(int dim =_x; dim<=_z; dim++)
grid->cutneigh[dim] = param->cutneigh;
}
//Other Functions from the grid
void initGrid(Grid* grid)
{ //start with regular grid
int nprocs;
MPI_Comm_size(world, &nprocs);
grid->map_size = 6 * nprocs;
grid->map = (MD_FLOAT*) allocate(ALIGNMENT, grid->map_size * sizeof(MD_FLOAT));
//========rcb=======
grid->maxsend = 0;
grid->maxrecv = 0;
grid->buf_send = NULL;
grid->buf_recv = NULL;
//====staggered=====
grid->Timer = 0.;
}
void setupGrid(Grid* grid, Atom* atom, Parameter* param)
{
int me;
MD_FLOAT xlo, ylo, zlo, xhi, yhi, zhi;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
initGrid(grid);
//Set the origin at (0,0,0)
if(param->input_file){
for(int i=0; i<atom->Nlocal; i++){
atom_x(i) = atom_x(i) - param->xlo;
atom_y(i) = atom_y(i) - param->ylo;
atom_z(i) = atom_z(i) - param->zlo;
}
}
cartisian3d(grid, param, &atom->mybox);
xlo = atom->mybox.lo[_x]; xhi = atom->mybox.hi[_x];
ylo = atom->mybox.lo[_y]; yhi = atom->mybox.hi[_y];
zlo = atom->mybox.lo[_z]; zhi = atom->mybox.hi[_z];
int i = 0;
while(i < atom->Nlocal)
{
if(atom_x(i) >= xlo && atom_x(i)< xhi &&
atom_y(i) >= ylo && atom_y(i)< yhi &&
atom_z(i) >= zlo && atom_z(i)< zhi)
{
i++;
} else {
copy(atom, i, atom->Nlocal-1);
atom->Nlocal--;
}
}
//printGrid(grid);
if(!param->balance){
MPI_Allreduce(&atom->Nlocal, &atom->Natoms, 1, MPI_INT, MPI_SUM, world);
printf("Processor:%i, Local atoms:%i, Total atoms:%i\n",me, atom->Nlocal,atom->Natoms);
MPI_Barrier(world);
}
}
void printGrid(Grid* grid)
{
int me, nprocs;
MPI_Comm_size(world, &nprocs);
MPI_Comm_rank(world, &me);
MD_FLOAT* map = grid->map;
if(me==0)
{
printf("GRID:\n");
printf("===================================================================================================\n");
for(int i=0; i<nprocs; i++)
printf("Box:%i\txlo:%.4f\txhi:%.4f\tylo:%.4f\tyhi:%.4f\tzlo:%.4f\tzhi:%.4f\n", i,map[6*i],map[6*i+3],map[6*i+1],map[6*i+4],map[6*i+2],map[6*i+5]);
printf("\n\n");
//printf("Box processor:%i\n xlo:%.4f\txhi:%.4f\n ylo:%.4f\tyhi:%.4f\n zlo:%.4f\tzhi:%.4f\n", i,map[6*i],map[6*i+3],map[6*i+1],map[6*i+4],map[6*i+2],map[6*i+5]);
}
MPI_Barrier(world);
}

22
common/includes/box.h Normal file
View File

@ -0,0 +1,22 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <parameter.h>
#ifndef __BOX_H_
#define __BOX_H_
typedef struct {
int id;
MD_FLOAT xprd, yprd, zprd; //Domain Dimension
MD_FLOAT lo[3]; //smallest coordinate of my subdomain
MD_FLOAT hi[3]; //Highest coordinate of my subdomain
} Box;
int overlapBox(int, int , const Box*, const Box* , Box* , MD_FLOAT , MD_FLOAT);
int overlapFullBox(Parameter*, MD_FLOAT*, const Box*, const Box*);
void expandBox(int , const Box*, const Box* , Box* , MD_FLOAT);
#endif

104
common/includes/comm.h Normal file
View File

@ -0,0 +1,104 @@
#include <atom.h>
#include <parameter.h>
#include <box.h>
#include <grid.h>
#ifndef COMM_H
#define COMM_H
#ifdef GROMACS
#define FORWARD_SIZE (3*CLUSTER_N)
#define REVERSE_SIZE (3*CLUSTER_N)
#define GHOST_SIZE (4*CLUSTER_N+10)
#define EXCHANGE_SIZE 7
#define JFAC MAX(1, CLUSTER_N / CLUSTER_M)
#define LOCAL atom->Nclusters_local / JFAC
#define GHOST atom->Nclusters_ghost
#define IsinRegionToSend(cj) \
((atom->jclusters[(cj)].bbminx >= xlo || atom->jclusters[(cj)].bbmaxx >= xlo) && \
(atom->jclusters[(cj)].bbminx < xhi || atom->jclusters[(cj)].bbmaxx < xhi) && \
(atom->jclusters[(cj)].bbminy >= ylo || atom->jclusters[(cj)].bbmaxy >= ylo) && \
(atom->jclusters[(cj)].bbminy < yhi || atom->jclusters[(cj)].bbmaxy < yhi) && \
(atom->jclusters[(cj)].bbminz >= zlo || atom->jclusters[(cj)].bbmaxz >= zlo) && \
(atom->jclusters[(cj)].bbminz < zhi || atom->jclusters[(cj)].bbmaxz < zhi))
#else
#define FORWARD_SIZE 3
#define REVERSE_SIZE 3
#define GHOST_SIZE 4
#define EXCHANGE_SIZE 7
#define LOCAL atom->Nlocal
#define GHOST atom->Nghost
#define IsinRegionToSend(i) \
((atom_x((i)) >= xlo && atom_x((i)) < xhi) && \
(atom_y((i)) >= ylo && atom_y((i)) < yhi) && \
(atom_z((i)) >= zlo && atom_z((i)) < zhi))
#endif
typedef struct {
int myproc; // my proc ID
int numproc; // # of processors
int numneigh; // # of all my neighs along all swaps
int maxneigh; // Buffer size for my neighs
int sendfrom[6]; //return the lowest neigh index to send in each swap
int sendtill[6]; //return the highest neigh index to send in each swao
int recvfrom[6]; //return the lowest neigh index to recv in each swap
int recvtill[6]; //return the highest neigh index to recv in each swap
int* nsend; // neigh whose I want to send
int* nrecv; // neigh whose I want to recv
int* pbc_x; // if pbc in x
int* pbc_y; // if pbc in y
int* pbc_z; // if pbc in z
int* atom_send, *atom_recv; // # of atoms to send/recv for each of my neighs
int* off_atom_send; // atom offset to send, inside of a swap
int* off_atom_recv; // atom offset to recv, inside of a swap
int* nexch; //procs to exchange
int numneighexch; //# of neighbours to exchange
int maxneighexch; //max buff size to store neighbours
int numswap; // # of swaps to perform, it is 6
int swapdim[6]; // dimension of the swap (_x, _y or _z)
int swapdir[6]; // direction of the swap 0 or 1
int swap[3][2]; // given a dim and dir, knows the swap
int othersend[6]; // Determine if a proc interact with more procs in a given swap
int firstrecv[6]; // where to put 1st recv atom in each swap
int** sendlist; // list of atoms to send in each swap
int* maxsendlist; // max # of atoms send in each list-swap
int maxsend; // max elements in buff sender
int maxrecv; // max elements in buff receiver
MD_FLOAT* buf_send; // sender buffer for all comm
MD_FLOAT* buf_recv; // receicer buffer for all comm
int forwardSize; // # of paramaters per atom in forward comm.
int reverseSize; // # of parameters per atom in reverse
int exchangeSize; // # of parameters per atom in exchange
int ghostSize; // # of parameters per atom in ghost list
int iterAtom; //last atom to iterate in each swap.
Box* boxes; // Boundaries to be sent to other procs as ghost.
} Comm;
void initComm(int*, char***, Comm*); //Init MPI
void endComm(Comm*); //End MPI
void setupComm(Comm*,Parameter*,Grid*); //Creates a 3d grid or rcb grid
void neighComm(Comm*,Parameter*,Grid*); //Find neighbours within cut-off and defines ghost regions
void forwardComm(Comm*,Atom*,int); //Send info in one direction
void reverseComm(Comm*,Atom*,int); //Return info after forward communication
void exchangeComm(Comm*,Atom*); //Exchange info between procs
void ghostComm(Comm*, Atom*,int); //Build the ghost neighbours to send during next forwards
void growSend(Comm*,int); //Grows the size of the buffer sender
void growRecv(Comm*,int); //Grows the size of the buffer receiver
void growList(Comm*, int, int); //Grows the size of the list to send
#endif

51
common/includes/grid.h Normal file
View File

@ -0,0 +1,51 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <parameter.h>
#include <box.h>
#include <atom.h>
#include <mpi.h>
#ifndef __MAP_H_
#define __MAP_H_
#define world MPI_COMM_WORLD
#define atom_pos(i) ((dim == _x) ? atom_x((i)) : (dim == _y) ? atom_y((i)) : atom_z((i)))
enum {RCB=1, meanTimeRCB, Staggered};
typedef struct {
int balance_every;
int map_size;
MD_FLOAT* map;
//===Param for Staggerd balance
int nprocs[3];
int coord[3];
MD_FLOAT cutneigh[3];
double Timer;
//===Param for RCB balance
MD_FLOAT* buf_send;
MD_FLOAT* buf_recv;
int maxsend;
int maxrecv;
} Grid;
typedef MD_FLOAT(*RCB_Method)(Atom*,MPI_Comm,int,double);
void setupGrid(Grid*, Atom*, Parameter*);
void cartisian3d(Grid*, Parameter*, Box*);
void rcbBalance(Grid*, Atom*, Parameter* ,RCB_Method, int, double);
void staggeredBalance(Grid*, Atom*, Parameter*, double);
void printGrid(Grid*);
//rcb methods
MD_FLOAT meanBisect(Atom* , MPI_Comm, int, double);
MD_FLOAT meanTimeBisect(Atom*, MPI_Comm, int, double);
#endif

View File

@ -53,6 +53,10 @@ typedef struct {
MD_FLOAT k_dn;
MD_FLOAT gx, gy, gz;
MD_FLOAT reflect_x, reflect_y, reflect_z;
//MPI implementation
int balance;
int method;
int balance_every;
} Parameter;
void initParameter(Parameter*);

View File

@ -0,0 +1,71 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <limits.h>
#include <math.h>
#include <comm.h>
#include <atom.h>
#include <timing.h>
#include <parameter.h>
#include <util.h>
//static void addDummyCluster(Atom*);
double forward(Comm* comm, Atom *atom, Parameter* param){
double S, E;
S = getTimeStamp();
if(param->method == halfShell){
for(int iswap = 0; iswap < 5; iswap++)
forwardComm(comm, atom, iswap);
} else if(param->method == eightShell){
for(int iswap = 0; iswap < 6; iswap+=2)
forwardComm(comm, atom, iswap);
} else {
for(int iswap = 0; iswap < 6; iswap++)
forwardComm(comm, atom, iswap);
}
E = getTimeStamp();
return E-S;
}
double reverse(Comm* comm, Atom *atom, Parameter* param){
double S, E;
S = getTimeStamp();
if(param->method == halfShell){
for(int iswap = 4; iswap >= 0; iswap--)
reverseComm(comm, atom, iswap);
} else if(param->method == eightShell){
for(int iswap = 4; iswap >= 0; iswap-=2)
reverseComm(comm, atom, iswap);
} else if(param->method == halfStencil){
for(int iswap = 5; iswap >= 0; iswap--)
reverseComm(comm, atom, iswap);
} else { } //Full Shell Reverse does nothing
E = getTimeStamp();
return E-S;
}
void ghostNeighbor(Comm* comm, Atom* atom, Parameter* param)
{
#ifdef GROMACS
atom->Nclusters_ghost = 0;
#endif
atom->Nghost = 0;
if(param->method == halfShell){
for(int iswap=0; iswap<5; iswap++)
ghostComm(comm,atom,iswap);
} else if(param->method == eightShell){
for(int iswap = 0; iswap<6; iswap+=2)
ghostComm(comm, atom,iswap);
} else {
for(int iswap=0; iswap<6; iswap++)
ghostComm(comm,atom,iswap);
}
}

View File

@ -9,9 +9,15 @@
typedef enum {
TOTAL = 0,
NEIGH,
FORCE,
NEIGH,
FORWARD,
REVERSE,
UPDATE,
BALANCE,
SETUP,
REST,
NUMTIMER
} timertype;
} timerComm;
#endif

View File

@ -4,6 +4,8 @@
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <math.h>
#ifndef __UTIL_H_
#define __UTIL_H_
@ -35,6 +37,13 @@
# define PRECISION_STRING "double"
#endif
#define BigOrEqual(a,b) (fabs((a)-(b))<1e-9 || (a)>(b))
#define Equal(a,b) (fabs((a)-(b))<1e-9)
enum {_x=0, _y, _z};
enum {fullShell=0, halfShell, eightShell, halfStencil};
extern double myrandom(int*);
extern void random_reset(int *seed, int ibase, double *coord);
extern int str2ff(const char *string);

View File

@ -11,6 +11,7 @@
#include <atom.h>
#include <parameter.h>
#include <util.h>
#include <mpi.h>
void initParameter(Parameter *param) {
param->input_file = NULL;
@ -54,13 +55,17 @@ void initParameter(Parameter *param) {
param->reflect_x = 0.0;
param->reflect_y = 0.0;
param->reflect_z = 0.0;
//MPI
param->balance = 0;
param->method = 0;
param->balance_every =param->reneigh_every;
}
void readParameter(Parameter *param, const char *filename) {
FILE *fp = fopen(filename, "r");
char line[MAXLINE];
int i;
if(!fp) {
fprintf(stderr, "Could not open parameter file: %s\n", filename);
exit(-1);
@ -72,8 +77,8 @@ void readParameter(Parameter *param, const char *filename) {
for(i = 0; line[i] != '\0' && line[i] != '#'; i++);
line[i] = '\0';
char *tok = strtok(line, " ");
char *val = strtok(NULL, " ");
char *tok = strtok(line, "\t ");
char *val = strtok(NULL, "\t ");
#define PARSE_PARAM(p,f) if(strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) { param->p = f(val); }
#define PARSE_STRING(p) PARSE_PARAM(p, strdup)
@ -117,15 +122,20 @@ void readParameter(Parameter *param, const char *filename) {
PARSE_INT(x_out_every);
PARSE_INT(v_out_every);
PARSE_INT(half_neigh);
PARSE_INT(method);
PARSE_INT(balance);
PARSE_INT(balance_every);
}
}
// Update dtforce
param->dtforce = 0.5 * param->dt;
// Update sigma6 parameter
MD_FLOAT s2 = param->sigma * param->sigma;
param->sigma6 = s2 * s2 * s2;
//Update balance parameter, 10 could be change
param->balance_every *=param->reneigh_every;
fclose(fp);
}
@ -183,4 +193,19 @@ void printParameter(Parameter *param) {
printf("\tSkin: %e\n", param->skin);
printf("\tHalf neighbor lists: %d\n", param->half_neigh);
printf("\tProcessor frequency (GHz): %.4f\n", param->proc_freq);
// ================ New MPI features =============
char str[20];
strcpy(str, (param->method == 1) ? "Half Shell" :
(param->method == 2) ? "Eight Shell" :
(param->method == 3) ? "Half Stencil":
"Full Shell");
printf("\tMethod: %s\n", str);
strcpy(str, (param->balance == 1) ? "mean RCB" :
(param->balance == 2) ? "mean Time RCB" :
(param->balance == 3) ? "Staggered" :
"cartisian");
printf("\tPartition: %s\n", str);
if(param->balance)
printf("\tRebalancing every (timesteps): %d\n",param->balance_every);
}

View File

@ -10,6 +10,7 @@
#include <thermo.h>
#include <util.h>
#include <mpi.h>
static int *steparr;
static MD_FLOAT *tmparr;
@ -24,6 +25,7 @@ static MD_FLOAT t_act;
static MD_FLOAT p_act;
static MD_FLOAT e_act;
static int mstat;
static MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
/* exported subroutines */
void setupThermo(Parameter *param, int natoms)
@ -53,57 +55,73 @@ void setupThermo(Parameter *param, int natoms)
void computeThermo(int iflag, Parameter *param, Atom *atom)
{
MD_FLOAT t = 0.0, p;
MD_FLOAT t_sum = 0.0, t = 0.0, p;
int me;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
for(int i = 0; i < atom->Nlocal; i++) {
t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
}
t = t * t_scale;
p = (t * dof_boltz) * p_scale;
int istep = iflag;
MPI_Reduce(&t, &t_sum, 1, type, MPI_SUM, 0 ,MPI_COMM_WORLD);
if(me == 0)
{
t = t_sum * t_scale;
p = (t * dof_boltz) * p_scale;
int istep = iflag;
if(iflag == -1){
istep = param->ntimes;
}
if(iflag == 0){
mstat = 0;
}
if(iflag == -1){
istep = param->ntimes;
}
if(iflag == 0){
mstat = 0;
}
steparr[mstat] = istep;
tmparr[mstat] = t;
prsarr[mstat] = p;
mstat++;
fprintf(stdout, "%i\t%e\t%e\n", istep, t, p);
steparr[mstat] = istep;
tmparr[mstat] = t;
prsarr[mstat] = p;
mstat++;
fprintf(stdout, "%i\t%e\t%e\n", istep, t, p);
}
}
void adjustThermo(Parameter *param, Atom *atom)
{
/* zero center-of-mass motion */
MD_FLOAT vxtot = 0.0; MD_FLOAT vytot = 0.0; MD_FLOAT vztot = 0.0;
MD_FLOAT v_sum[3], vtot[3];
for(int i = 0; i < atom->Nlocal; i++) {
vxtot += atom_vx(i);
vytot += atom_vy(i);
vztot += atom_vz(i);
}
vtot[0] = vxtot; vtot[1] = vytot; vtot[2] = vztot;
vxtot = vxtot / atom->Natoms;
vytot = vytot / atom->Natoms;
vztot = vztot / atom->Natoms;
MPI_Allreduce(vtot, v_sum, 3, type, MPI_SUM, MPI_COMM_WORLD);
vxtot = v_sum[0] / atom->Natoms;
vytot = v_sum[1] / atom->Natoms;
vztot = v_sum[2] / atom->Natoms;
for(int i = 0; i < atom->Nlocal; i++) {
atom_vx(i) -= vxtot;
atom_vy(i) -= vytot;
atom_vz(i) -= vztot;
}
t_act = 0;
MD_FLOAT t = 0.0;
MD_FLOAT t_sum = 0.0;
for(int i = 0; i < atom->Nlocal; i++) {
t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
}
MPI_Allreduce(&t, &t_sum, 1,type, MPI_SUM,MPI_COMM_WORLD);
t = t_sum;
t *= t_scale;
MD_FLOAT factor = sqrt(param->temp / t);

View File

@ -10,6 +10,7 @@
#include <stdlib.h>
#include <string.h>
#include <util.h>
#include <math.h>
/* Park/Miller RNG w/out MASKING, so as to be like f90s version */
#define IA 16807
@ -86,6 +87,7 @@ int get_cuda_num_threads() {
void readline(char *line, FILE *fp) {
if(fgets(line, MAXLINE, fp) == NULL) {
printf("error %i\n",errno);
if(errno != 0) {
perror("readline()");
exit(-1);

View File

@ -1,15 +1,15 @@
# Compiler tag (GCC/CLANG/ICC/ICX/ONEAPI/NVCC)
TAG ?= ICC
TAG ?= MPIICC
# Instruction set (SSE/AVX/AVX_FMA/AVX2/AVX512)
ISA ?= AVX512
# Optimization scheme (lammps/gromacs/clusters_per_bin)
OPT_SCHEME ?= lammps
OPT_SCHEME ?= gromacs
# Enable likwid (true or false)
ENABLE_LIKWID ?= true
ENABLE_LIKWID ?= false
# SP or DP
DATA_TYPE ?= DP
# AOS or SOA
DATA_LAYOUT ?= AOS
DATA_LAYOUT ?= SOA
# Assembly syntax to generate (ATT/INTEL)
ASM_SYNTAX ?= ATT
# Debug
@ -24,13 +24,13 @@ MEM_TRACER ?= false
# Trace indexes and distances for gather-md (true or false)
INDEX_TRACER ?= false
# Compute statistics
COMPUTE_STATS ?= true
COMPUTE_STATS ?= false
# Configurations for lammps optimization scheme
# Use omp simd pragma when running with half neighbor-lists
ENABLE_OMP_SIMD ?= true
ENABLE_OMP_SIMD ?= false
# Use kernel with explicit SIMD intrinsics
USE_SIMD_KERNEL ?= false
USE_SIMD_KERNEL ?= true
# Configurations for gromacs optimization scheme
# Use reference version
@ -38,7 +38,7 @@ USE_REFERENCE_VERSION ?= false
# Enable XTC output
XTC_OUTPUT ?= false
# Check if cj is local when decreasing reaction force
HALF_NEIGHBOR_LISTS_CHECK_CJ ?= true
HALF_NEIGHBOR_LISTS_CHECK_CJ ?= false
# Configurations for CUDA
# Use CUDA host memory to optimize transfers

View File

@ -12,7 +12,8 @@
#include <atom.h>
#include <allocate.h>
#include <util.h>
#include <mpi.h>
void initAtom(Atom *atom) {
atom->x = NULL; atom->y = NULL; atom->z = NULL;
atom->vx = NULL; atom->vy = NULL; atom->vz = NULL;
@ -27,6 +28,7 @@ void initAtom(Atom *atom) {
atom->Nclusters = 0;
atom->Nclusters_local = 0;
atom->Nclusters_ghost = 0;
atom->NmaxGhost = 0; //Temporal
atom->Nclusters_max = 0;
atom->type = NULL;
atom->ntypes = 0;
@ -37,10 +39,19 @@ void initAtom(Atom *atom) {
atom->iclusters = NULL;
atom->jclusters = NULL;
atom->icluster_bin = NULL;
atom->PBCx = NULL;
atom->PBCy = NULL;
atom->PBCz = NULL;
initMasks(atom);
//MPI
Box *mybox = &(atom->mybox);
mybox->xprd = mybox->yprd = mybox->zprd = 0;
mybox->lo[0] = mybox->lo[1] = mybox->lo[2] = 0;
mybox->hi[0] = mybox->hi[1] = mybox->hi[2] = 0;
}
void createAtom(Atom *atom, Parameter *param) {
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
@ -90,7 +101,7 @@ void createAtom(Atom *atom, Parameter *param) {
ytmp = 0.5 * alat * j;
ztmp = 0.5 * alat * k;
if(xtmp >= xlo && xtmp < xhi && ytmp >= ylo && ytmp < yhi && ztmp >= zlo && ztmp < zhi ) {
if(xtmp >= xlo && xtmp < xhi && ytmp >= ylo && ytmp < yhi && ztmp >= zlo && ztmp < zhi ) {
n = k * (2 * param->ny) * (2 * param->nx) + j * (2 * param->nx) + i + 1;
for(m = 0; m < 5; m++) { myrandom(&n); }
vxtmp = myrandom(&n);
@ -128,22 +139,26 @@ int type_str2int(const char *type) {
}
int readAtom(Atom* atom, Parameter* param) {
int me = 0;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
int len = strlen(param->input_file);
if(strncmp(&param->input_file[len - 4], ".pdb", 4) == 0) { return readAtom_pdb(atom, param); }
if(strncmp(&param->input_file[len - 4], ".gro", 4) == 0) { return readAtom_gro(atom, param); }
if(strncmp(&param->input_file[len - 4], ".dmp", 4) == 0) { return readAtom_dmp(atom, param); }
fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp\n", param->input_file);
if(me==0) fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp\n", param->input_file);
exit(-1);
return -1;
}
int readAtom_pdb(Atom* atom, Parameter* param) {
int me = 0;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
int read_atoms = 0;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
@ -153,11 +168,11 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
char *item = strtok(line, " ");
if(strncmp(item, "CRYST1", 6) == 0) {
param->xlo = 0.0;
param->xhi = atof(strtok(NULL, " "));
param->xhi = atof(strtok(NULL, "\t "));
param->ylo = 0.0;
param->yhi = atof(strtok(NULL, " "));
param->yhi = atof(strtok(NULL, "\t "));
param->zlo = 0.0;
param->zhi = atof(strtok(NULL, " "));
param->zhi = atof(strtok(NULL, "\t "));
param->xprd = param->xhi - param->xlo;
param->yprd = param->yhi - param->ylo;
param->zprd = param->zhi - param->zlo;
@ -166,23 +181,23 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
char *label;
int atom_id, comp_id;
MD_FLOAT occupancy, charge;
atom_id = atoi(strtok(NULL, " ")) - 1;
atom_id = atoi(strtok(NULL, "\t ")) - 1;
while(atom_id + 1 >= atom->Nmax) {
growAtom(atom);
}
atom->type[atom_id] = type_str2int(strtok(NULL, " "));
label = strtok(NULL, " ");
comp_id = atoi(strtok(NULL, " "));
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom->type[atom_id] = type_str2int(strtok(NULL, "\t "));
label = strtok(NULL, "\t ");
comp_id = atoi(strtok(NULL, "\t "));
atom_x(atom_id) = atof(strtok(NULL, "\t "));
atom_y(atom_id) = atof(strtok(NULL, "\t "));
atom_z(atom_id) = atof(strtok(NULL, "\t "));
atom->vx[atom_id] = 0.0;
atom->vy[atom_id] = 0.0;
atom->vz[atom_id] = 0.0;
occupancy = atof(strtok(NULL, " "));
charge = atof(strtok(NULL, " "));
occupancy = atof(strtok(NULL, "\t "));
charge = atof(strtok(NULL, "\t "));
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
atom->Natoms++;
atom->Nlocal++;
@ -194,14 +209,14 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
strncmp(item, "ENDMDL", 6) == 0) {
// Do nothing
} else {
fprintf(stderr, "Invalid item: %s\n", item);
if(me==0) fprintf(stderr, "Invalid item: %s\n", item);
exit(-1);
return -1;
}
}
if(!read_atoms) {
fprintf(stderr, "Input error: No atoms read!\n");
if(me==0) fprintf(stderr, "Input error: No atoms read!\n");
exit(-1);
return -1;
}
@ -217,12 +232,15 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
if(me==0) fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
fclose(fp);
return read_atoms;
}
int readAtom_gro(Atom* atom, Parameter* param) {
int me = 0;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
char desc[MAXLINE];
@ -231,7 +249,7 @@ int readAtom_gro(Atom* atom, Parameter* param) {
int i = 0;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
@ -241,25 +259,25 @@ int readAtom_gro(Atom* atom, Parameter* param) {
desc[i] = '\0';
readline(line, fp);
atoms_to_read = atoi(strtok(line, " "));
fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
if(me==0) fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
while(!feof(fp) && read_atoms < atoms_to_read) {
readline(line, fp);
char *label = strtok(line, " ");
int type = type_str2int(strtok(NULL, " "));
int atom_id = atoi(strtok(NULL, " ")) - 1;
char *label = strtok(line, "\t ");
int type = type_str2int(strtok(NULL, "\t "));
int atom_id = atoi(strtok(NULL, "\t ")) - 1;
atom_id = read_atoms;
while(atom_id + 1 >= atom->Nmax) {
growAtom(atom);
}
atom->type[atom_id] = type;
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom->vx[atom_id] = atof(strtok(NULL, " "));
atom->vy[atom_id] = atof(strtok(NULL, " "));
atom->vz[atom_id] = atof(strtok(NULL, " "));
atom_x(atom_id) = atof(strtok(NULL, "\t "));
atom_y(atom_id) = atof(strtok(NULL, "\t "));
atom_z(atom_id) = atof(strtok(NULL, "\t "));
atom->vx[atom_id] = atof(strtok(NULL, "\t "));
atom->vy[atom_id] = atof(strtok(NULL, "\t "));
atom->vz[atom_id] = atof(strtok(NULL, "\t "));
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
atom->Natoms++;
atom->Nlocal++;
@ -269,18 +287,18 @@ int readAtom_gro(Atom* atom, Parameter* param) {
if(!feof(fp)) {
readline(line, fp);
param->xlo = 0.0;
param->xhi = atof(strtok(line, " "));
param->xhi = atof(strtok(line, "\t "));
param->ylo = 0.0;
param->yhi = atof(strtok(NULL, " "));
param->yhi = atof(strtok(NULL, "\t "));
param->zlo = 0.0;
param->zhi = atof(strtok(NULL, " "));
param->zhi = atof(strtok(NULL, "\t "));
param->xprd = param->xhi - param->xlo;
param->yprd = param->yhi - param->ylo;
param->zprd = param->zhi - param->zlo;
}
if(read_atoms != atoms_to_read) {
fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
if(me==0) fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
exit(-1);
return -1;
}
@ -296,12 +314,14 @@ int readAtom_gro(Atom* atom, Parameter* param) {
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
if(me==0) fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
fclose(fp);
return read_atoms;
}
int readAtom_dmp(Atom* atom, Parameter* param) {
int me = 0;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
int natoms = 0;
@ -310,7 +330,7 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
int ts = -1;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
@ -333,47 +353,47 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
}
} else if(strncmp(item, "BOX BOUNDS pp pp pp", 19) == 0) {
readline(line, fp);
param->xlo = atof(strtok(line, " "));
param->xhi = atof(strtok(NULL, " "));
param->xlo = atof(strtok(line, "\t "));
param->xhi = atof(strtok(NULL, "\t "));
param->xprd = param->xhi - param->xlo;
readline(line, fp);
param->ylo = atof(strtok(line, " "));
param->yhi = atof(strtok(NULL, " "));
param->ylo = atof(strtok(line, "\t "));
param->yhi = atof(strtok(NULL, "\t "));
param->yprd = param->yhi - param->ylo;
readline(line, fp);
param->zlo = atof(strtok(line, " "));
param->zhi = atof(strtok(NULL, " "));
param->zlo = atof(strtok(line, "\t "));
param->zhi = atof(strtok(NULL, "\t "));
param->zprd = param->zhi - param->zlo;
} else if(strncmp(item, "ATOMS id type x y z vx vy vz", 28) == 0) {
for(int i = 0; i < natoms; i++) {
readline(line, fp);
atom_id = atoi(strtok(line, " ")) - 1;
atom->type[atom_id] = atoi(strtok(NULL, " "));
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom->vx[atom_id] = atof(strtok(NULL, " "));
atom->vy[atom_id] = atof(strtok(NULL, " "));
atom->vz[atom_id] = atof(strtok(NULL, " "));
atom_id = atoi(strtok(line, "\t ")) - 1;
atom->type[atom_id] = atoi(strtok(NULL, "\t "));
atom_x(atom_id) = atof(strtok(NULL, "\t "));
atom_y(atom_id) = atof(strtok(NULL, "\t "));
atom_z(atom_id) = atof(strtok(NULL, "\t "));
atom->vx[atom_id] = atof(strtok(NULL, "\t "));
atom->vy[atom_id] = atof(strtok(NULL, "\t "));
atom->vz[atom_id] = atof(strtok(NULL, "\t "));
atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
read_atoms++;
}
} else {
fprintf(stderr, "Invalid item: %s\n", item);
if(me==0) fprintf(stderr, "Invalid item: %s\n", item);
exit(-1);
return -1;
}
} else {
fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
if(me==0) fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
exit(-1);
return -1;
}
}
if(ts < 0 || !natoms || !read_atoms) {
fprintf(stderr, "Input error: atom data was not read!\n");
if(me==0) fprintf(stderr, "Input error: atom data was not read!\n");
exit(-1);
return -1;
}
@ -389,7 +409,7 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
if(me==0) fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
fclose(fp);
return natoms;
}
@ -530,3 +550,249 @@ void growClusters(Atom *atom) {
atom->cl_v = (MD_FLOAT*) reallocate(atom->cl_v, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT), nold * CLUSTER_M * 3 * sizeof(MD_FLOAT));
atom->cl_type = (int*) reallocate(atom->cl_type, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * sizeof(int), nold * CLUSTER_M * sizeof(int));
}
/* MPI added*/
void growPbc(Atom* atom) {
int nold = atom->NmaxGhost;
atom->NmaxGhost += DELTA;
if (atom->PBCx || atom->PBCy || atom->PBCz){
atom->PBCx = (int*) reallocate(atom->PBCx, ALIGNMENT, atom->NmaxGhost * sizeof(int), nold * sizeof(int));
atom->PBCy = (int*) reallocate(atom->PBCy, ALIGNMENT, atom->NmaxGhost * sizeof(int), nold * sizeof(int));
atom->PBCz = (int*) reallocate(atom->PBCz, ALIGNMENT, atom->NmaxGhost * sizeof(int), nold * sizeof(int));
} else {
atom->PBCx = (int*) malloc(atom->NmaxGhost * sizeof(int));
atom->PBCy = (int*) malloc(atom->NmaxGhost * sizeof(int));
atom->PBCz = (int*) malloc(atom->NmaxGhost * sizeof(int));
}
}
void packForward(Atom* atom, int nc, int* list, MD_FLOAT* buf, int* pbc)
{
for(int i = 0; i < nc; i++) {
int cj = list[i];
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
int displ = i*CLUSTER_N;
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
buf[3*(displ+cjj)+0] = cj_x[CL_X_OFFSET + cjj] + pbc[_x] * atom->mybox.xprd;
buf[3*(displ+cjj)+1] = cj_x[CL_Y_OFFSET + cjj] + pbc[_y] * atom->mybox.yprd;
buf[3*(displ+cjj)+2] = cj_x[CL_Z_OFFSET + cjj] + pbc[_z] * atom->mybox.zprd;
}
for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
buf[3*(displ+cjj)+0] = -1; //x
buf[3*(displ+cjj)+1] = -1; //y
buf[3*(displ+cjj)+2] = -1; //z
}
}
}
void unpackForward(Atom* atom, int nc, int c0, MD_FLOAT* buf)
{
for(int i = 0; i < nc; i++) {
int cj = c0+i;
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
int displ = i*CLUSTER_N;
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
if(cj_x[CL_X_OFFSET + cjj]<INFINITY) cj_x[CL_X_OFFSET + cjj] = buf[3*(displ+cjj)+0];
if(cj_x[CL_Y_OFFSET + cjj]<INFINITY) cj_x[CL_Y_OFFSET + cjj] = buf[3*(displ+cjj)+1];
if(cj_x[CL_Z_OFFSET + cjj]<INFINITY) cj_x[CL_Z_OFFSET + cjj] = buf[3*(displ+cjj)+2];
}
}
}
int packGhost(Atom* atom, int cj, MD_FLOAT* buf, int* pbc)
{
//#of elements per cluster natoms,x0,y0,z0,type_0, . . ,xn,yn,zn,type_n,bbminx,bbmaxxy,bbminy,bbmaxy,bbminz,bbmaxz
//count = 4*N_CLUSTER+7, if N_CLUSTER =4 => count = 23 value/cluster + trackpbc[x] + trackpbc[y] + trackpbc[z]
int m = 0;
if(atom->jclusters[cj].natoms > 0) {
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
int cj_sca_base = CJ_SCALAR_BASE_INDEX(cj);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY;
MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
MD_FLOAT bbminz = INFINITY, bbmaxz = -INFINITY;
buf[m++] = atom->jclusters[cj].natoms;
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
MD_FLOAT xtmp = cj_x[CL_X_OFFSET + cjj] + pbc[_x] * atom->mybox.xprd;
MD_FLOAT ytmp = cj_x[CL_Y_OFFSET + cjj] + pbc[_y] * atom->mybox.yprd;
MD_FLOAT ztmp = cj_x[CL_Z_OFFSET + cjj] + pbc[_z] * atom->mybox.zprd;
buf[m++] = xtmp;
buf[m++] = ytmp;
buf[m++] = ztmp;
buf[m++]= atom->cl_type[cj_sca_base + cjj];
if(bbminx > xtmp) { bbminx = xtmp; }
if(bbmaxx < xtmp) { bbmaxx = xtmp; }
if(bbminy > ytmp) { bbminy = ytmp; }
if(bbmaxy < ytmp) { bbmaxy = ytmp; }
if(bbminz > ztmp) { bbminz = ztmp; }
if(bbmaxz < ztmp) { bbmaxz = ztmp; }
}
for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
buf[m++] = -1; //x
buf[m++] = -1; //y
buf[m++] = -1; //z
buf[m++] = -1; //type
}
buf[m++] = bbminx;
buf[m++] = bbmaxx;
buf[m++] = bbminy;
buf[m++] = bbmaxy;
buf[m++] = bbminz;
buf[m++] = bbmaxz;
//TODO: check atom->ncj
int ghostId = cj-atom->ncj;
//check for ghost particles
buf[m++] = (cj-atom->ncj>=0) ? pbc[_x]+atom->PBCx[ghostId]:pbc[_x];
buf[m++] = (cj-atom->ncj>=0) ? pbc[_y]+atom->PBCy[ghostId]:pbc[_y];
buf[m++] = (cj-atom->ncj>=0) ? pbc[_z]+atom->PBCz[ghostId]:pbc[_z];
}
return m;
}
int unpackGhost(Atom* atom, int cj, MD_FLOAT* buf)
{
int m = 0;
int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
if(cj*jfac>=atom->Nclusters_max) growClusters(atom);
if(atom->Nclusters_ghost>=atom->NmaxGhost) growPbc(atom);
int cj_sca_base = CJ_SCALAR_BASE_INDEX(cj);
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
atom->jclusters[cj].natoms = buf[m++];
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
cj_x[CL_X_OFFSET + cjj] = buf[m++];
cj_x[CL_Y_OFFSET + cjj] = buf[m++];
cj_x[CL_Z_OFFSET + cjj] = buf[m++];
atom->cl_type[cj_sca_base + cjj] = buf[m++];
atom->Nghost++;
}
for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
cj_x[CL_X_OFFSET + cjj] = INFINITY;
cj_x[CL_Y_OFFSET + cjj] = INFINITY;
cj_x[CL_Z_OFFSET + cjj] = INFINITY;
atom->cl_type[cj_sca_base + cjj] = -1;
m+=4;
}
atom->jclusters[cj].bbminx = buf[m++];
atom->jclusters[cj].bbmaxx = buf[m++];
atom->jclusters[cj].bbminy = buf[m++];
atom->jclusters[cj].bbmaxy = buf[m++];
atom->jclusters[cj].bbminz = buf[m++];
atom->jclusters[cj].bbmaxz = buf[m++];
atom->PBCx[atom->Nclusters_ghost] = buf[m++];
atom->PBCy[atom->Nclusters_ghost] = buf[m++];
atom->PBCz[atom->Nclusters_ghost] = buf[m++];
atom->Nclusters_ghost++;
}
void packReverse(Atom* atom, int nc, int c0, MD_FLOAT* buf)
{
for(int i = 0; i < nc; i++) {
int cj = c0+i;
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
int displ = i*CLUSTER_N;
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
buf[3*(displ+cjj)+0] = cj_f[CL_X_OFFSET + cjj];
buf[3*(displ+cjj)+1] = cj_f[CL_Y_OFFSET + cjj];
buf[3*(displ+cjj)+2] = cj_f[CL_Z_OFFSET + cjj];
}
for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
buf[3*(displ+cjj)+0] = -1; //x
buf[3*(displ+cjj)+1] = -1; //y
buf[3*(displ+cjj)+2] = -1; //z
}
}
}
void unpackReverse(Atom* atom, int nc, int* list, MD_FLOAT* buf)
{
for(int i = 0; i < nc; i++) {
int cj = list[i];
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
int displ = i*CLUSTER_N;
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
cj_f[CL_X_OFFSET + cjj] += buf[3*(displ+cjj)+0];
cj_f[CL_Y_OFFSET + cjj] += buf[3*(displ+cjj)+1];
cj_f[CL_Z_OFFSET + cjj] += buf[3*(displ+cjj)+2];
}
}
}
int packExchange(Atom* atom, int i, MD_FLOAT* buf)
{
int m = 0;
buf[m++] = atom_x(i);
buf[m++] = atom_y(i);
buf[m++] = atom_z(i);
buf[m++] = atom_vx(i);
buf[m++] = atom_vy(i);
buf[m++] = atom_vz(i);
buf[m++] = atom->type[i];
return m;
}
int unpackExchange(Atom* atom, int i, MD_FLOAT* buf)
{
while(i >= atom->Nmax) growAtom(atom);
int m = 0;
atom_x(i) = buf[m++];
atom_y(i) = buf[m++];
atom_z(i) = buf[m++];
atom_vx(i) = buf[m++];
atom_vy(i) = buf[m++];
atom_vz(i) = buf[m++];
atom->type[i] = buf[m++];
return m;
}
void pbc(Atom* atom)
{
for(int i = 0; i < atom->Nlocal; i++) {
MD_FLOAT xprd = atom->mybox.xprd;
MD_FLOAT yprd = atom->mybox.yprd;
MD_FLOAT zprd = atom->mybox.zprd;
if(atom_x(i) < 0.0) atom_x(i) += xprd;
if(atom_y(i) < 0.0) atom_y(i) += yprd;
if(atom_z(i) < 0.0) atom_z(i) +=zprd;
if(atom_x(i) >= xprd) atom_x(i) -=xprd;
if(atom_y(i) >= yprd) atom_y(i) -=yprd;
if(atom_z(i) >= zprd) atom_z(i) -=zprd;
}
}
void copy(Atom* atom, int i, int j)
{
atom_x(i) = atom_x(j);
atom_y(i) = atom_y(j);
atom_z(i) = atom_z(j);
atom_vx(i) = atom_vx(j);
atom_vy(i) = atom_vy(j);
atom_vz(i) = atom_vz(j);
atom->type[i] = atom->type[j];
}

View File

@ -14,8 +14,9 @@
#include <stats.h>
#include <util.h>
#include <simd.h>
#include <math.h>
void computeForceGhostShell(Parameter*, Atom*, Neighbor*);
/*
static inline void gmx_load_simd_2xnn_interactions(
int excl,
@ -49,7 +50,6 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
@ -60,13 +60,23 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
}
}
for(int cg = atom->ncj; cg < atom->ncj+atom->Nclusters_ghost; cg++) {
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
cj_f[CL_X_OFFSET + cjj] = 0.0;
cj_f[CL_Y_OFFSET + cjj] = 0.0;
cj_f[CL_Z_OFFSET + cjj] = 0.0;
}
}
double S = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("force");
#pragma omp for schedule(runtime)
#pragma omp for
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci);
int ci_cj1 = CJ1_FROM_CI(ci);
@ -75,14 +85,14 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
int numneighs = neighbor->numneigh[ci];
for(int k = 0; k < numneighs; k++) {
int cj = neighs[k];
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
int any = 0;
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
for(int cii = 0; cii < CLUSTER_M; cii++) {
MD_FLOAT xtmp = ci_x[CL_X_OFFSET + cii];
MD_FLOAT ytmp = ci_x[CL_Y_OFFSET + cii];
@ -103,6 +113,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
cond = neighbor->half_neigh ? (ci_cj0 != cj || cii < cjj) && (ci_cj1 != cj || cii < cjj + CLUSTER_N) :
(ci_cj0 != cj || cii != cjj) && (ci_cj1 != cj || cii != cjj + CLUSTER_N);
#endif
if(cond) {
MD_FLOAT delx = xtmp - cj_x[CL_X_OFFSET + cjj];
MD_FLOAT dely = ytmp - cj_x[CL_Y_OFFSET + cjj];
@ -113,12 +124,11 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
if(neighbor->half_neigh) {
if(neighbor->half_neigh || param->method) {
cj_f[CL_X_OFFSET + cjj] -= delx * force;
cj_f[CL_Y_OFFSET + cjj] -= dely * force;
cj_f[CL_Z_OFFSET + cjj] -= delz * force;
}
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
@ -129,13 +139,11 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
}
}
}
if(any != 0) {
addStat(stats->clusters_within_cutoff, 1);
} else {
addStat(stats->clusters_outside_cutoff, 1);
}
ci_f[CL_X_OFFSET + cii] += fix;
ci_f[CL_Y_OFFSET + cii] += fiy;
ci_f[CL_Z_OFFSET + cii] += fiz;
@ -146,7 +154,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
addStat(stats->num_neighs, numneighs);
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
}
if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor);
LIKWID_MARKER_STOP("force");
}
@ -168,7 +176,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
for(int ci = 0; ci < atom->Nclusters_local+atom->Nclusters_ghost; ci++) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
@ -178,6 +186,16 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
}
}
for(int cg = atom->ncj; cg < atom->ncj+atom->Nclusters_ghost; cg++) {
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
cj_f[CL_X_OFFSET + cjj] = 0.0;
cj_f[CL_Y_OFFSET + cjj] = 0.0;
cj_f[CL_Z_OFFSET + cjj] = 0.0;
}
}
double S = getTimeStamp();
#pragma omp parallel
@ -213,7 +231,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
#endif
*/
#pragma omp for schedule(runtime)
#pragma omp for
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N
@ -322,7 +340,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
fiz2 += tz2;
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
if(cj < CJ1_FROM_CI(atom->Nlocal)|| param->method) {
simd_h_decr3(cj_f, tx0 + tx2, ty0 + ty2, tz0 + tz2);
}
#else
@ -373,7 +391,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
fiz2 += tz2;
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
if(cj < CJ1_FROM_CI(atom->Nlocal) || param->method) {
simd_h_decr3(cj_f, tx0 + tx2, ty0 + ty2, tz0 + tz2);
}
#else
@ -389,7 +407,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
addStat(stats->num_neighs, numneighs);
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
}
if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor);
LIKWID_MARKER_STOP("force");
}
@ -427,7 +445,7 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor
{
LIKWID_MARKER_START("force");
#pragma omp for schedule(runtime)
#pragma omp for
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N
@ -562,7 +580,6 @@ double computeForceLJ_2xnn(Parameter *param, Atom *atom, Neighbor *neighbor, Sta
if(neighbor->half_neigh) {
return computeForceLJ_2xnn_half(param, atom, neighbor, stats);
}
return computeForceLJ_2xnn_full(param, atom, neighbor, stats);
}
@ -589,13 +606,23 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
}
}
for(int cg = atom->ncj; cg < atom->ncj+atom->Nclusters_ghost; cg++) {
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
cj_f[CL_X_OFFSET + cjj] = 0.0;
cj_f[CL_Y_OFFSET + cjj] = 0.0;
cj_f[CL_Z_OFFSET + cjj] = 0.0;
}
}
double S = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("force");
#pragma omp for schedule(runtime)
#pragma omp for
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N
@ -726,7 +753,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
fiz3 = simd_add(fiz3, tz3);
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
if(cj < CJ1_FROM_CI(atom->Nlocal) || param->method) {
simd_store(&cj_f[CL_X_OFFSET], simd_load(&cj_f[CL_X_OFFSET]) - (tx0 + tx1 + tx2 + tx3));
simd_store(&cj_f[CL_Y_OFFSET], simd_load(&cj_f[CL_Y_OFFSET]) - (ty0 + ty1 + ty2 + ty3));
simd_store(&cj_f[CL_Z_OFFSET], simd_load(&cj_f[CL_Z_OFFSET]) - (tz0 + tz1 + tz2 + tz3));
@ -811,7 +838,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
fiz3 = simd_add(fiz3, tz3);
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
if(cj < CJ1_FROM_CI(atom->Nlocal) || param->method) {
simd_store(&cj_f[CL_X_OFFSET], simd_load(&cj_f[CL_X_OFFSET]) - (tx0 + tx1 + tx2 + tx3));
simd_store(&cj_f[CL_Y_OFFSET], simd_load(&cj_f[CL_Y_OFFSET]) - (ty0 + ty1 + ty2 + ty3));
simd_store(&cj_f[CL_Z_OFFSET], simd_load(&cj_f[CL_Z_OFFSET]) - (tz0 + tz1 + tz2 + tz3));
@ -831,7 +858,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
addStat(stats->num_neighs, numneighs);
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
}
if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor);
LIKWID_MARKER_STOP("force");
}
@ -869,7 +896,7 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
{
LIKWID_MARKER_START("force");
#pragma omp for schedule(runtime)
#pragma omp for
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N
@ -1070,3 +1097,120 @@ double computeForceLJ_4xn(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
return computeForceLJ_4xn_full(param, atom, neighbor, stats);
}
//Routine for eight shell method
void computeForceGhostShell(Parameter *param, Atom *atom, Neighbor *neighbor) {
DEBUG_MESSAGE("computeForceLJ begin\n");
int Nshell = neighbor->Nshell;
int *neighs;
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
for(int ci = 0; ci < Nshell; ci++) {
neighs = &neighbor->neighshell[ci * neighbor->maxneighs];
int numneighs = neighbor->numNeighShell[ci];
int cs = neighbor->listshell[ci];
int cs_vec_base = CJ_VECTOR_BASE_INDEX(cs);
MD_FLOAT *cs_x = &atom->cl_x[cs_vec_base];
MD_FLOAT *cs_f = &atom->cl_f[cs_vec_base];
for(int k = 0; k < numneighs; k++) {
int cj = neighs[k];
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
for(int css = 0; css < CLUSTER_N; css++) {
MD_FLOAT x = cs_x[CL_X_OFFSET + css];
MD_FLOAT y = cs_x[CL_Y_OFFSET + css];
MD_FLOAT z = cs_x[CL_Z_OFFSET + css];
MD_FLOAT fix = 0;
MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0;
for(int cjj = 0; cjj < CLUSTER_N; cjj++) {
MD_FLOAT delx = x - cj_x[CL_X_OFFSET + cjj];
MD_FLOAT dely = y - cj_x[CL_Y_OFFSET + cjj];
MD_FLOAT delz = z - cj_x[CL_Z_OFFSET + cjj];
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq < cutforcesq) {
MD_FLOAT sr2 = 1.0 / rsq;
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
cj_f[CL_X_OFFSET + cjj] -= delx * force;
cj_f[CL_Y_OFFSET + cjj] -= dely * force;
cj_f[CL_Z_OFFSET + cjj] -= delz * force;
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
}
}
cs_f[CL_X_OFFSET + css] += fix;
cs_f[CL_Y_OFFSET + css] += fiy;
cs_f[CL_Z_OFFSET + css] += fiz;
}
}
// addStat(stats->calculated_forces, 1);
// addStat(stats->num_neighs, numneighs);
// addStat(stats->force_iters, (long long int)((double)numneighs));
}
}
/*
void computeForceGhostShell(Parameter *param, Atom *atom, Neighbor *neighbor) {
int Nshell = neighbor->Nshell;
Pair* neighs;
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
for(int ci = 0; ci < Nshell; ci++) {
neighs = &neighbor->neighshell[ci * neighbor->maxneighs];
int numneighs = neighbor->numNeighShell[ci];
int cs = neighbor->listshell[ci].cluster;
int css = neighbor->listshell[ci].atom;
int cs_vec_base = CJ_VECTOR_BASE_INDEX(cs);
MD_FLOAT *cs_x = &atom->cl_x[cs_vec_base];
MD_FLOAT *cs_f = &atom->cl_f[cs_vec_base];
MD_FLOAT x = cs_x[CL_X_OFFSET + css];
MD_FLOAT y = cs_x[CL_Y_OFFSET + css];
MD_FLOAT z = cs_x[CL_Z_OFFSET + css];
for(int k = 0; k < numneighs; k++) {
int cj = neighs[k].cluster;
int cjj = neighs[k].atom;
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
MD_FLOAT delx = x - cj_x[CL_X_OFFSET + cjj];
MD_FLOAT dely = y - cj_x[CL_Y_OFFSET + cjj];
MD_FLOAT delz = z - cj_x[CL_Z_OFFSET + cjj];
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq < cutforcesq) {
MD_FLOAT sr2 = 1.0 / rsq;
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
cj_f[CL_X_OFFSET + cjj] -= delx * force;
cj_f[CL_Y_OFFSET + cjj] -= dely * force;
cj_f[CL_Z_OFFSET + cjj] -= delz * force;
cs_f[CL_X_OFFSET + css] += delx * force;
cs_f[CL_Y_OFFSET + css] += delx * force;
cs_f[CL_Z_OFFSET + css] += delx * force;
}
}
}
}
*/

View File

@ -5,6 +5,7 @@
* license that can be found in the LICENSE file.
*/
#include <parameter.h>
#include <box.h>
#ifndef __ATOM_H_
#define __ATOM_H_
@ -102,7 +103,7 @@ typedef struct {
typedef struct {
int Natoms, Nlocal, Nghost, Nmax;
int Nclusters, Nclusters_local, Nclusters_ghost, Nclusters_max;
int Nclusters, Nclusters_local, Nclusters_ghost, Nclusters_max, NmaxGhost,ncj;
MD_FLOAT *x, *y, *z;
MD_FLOAT *vx, *vy, *vz;
int *border_map;
@ -112,6 +113,7 @@ typedef struct {
MD_FLOAT *sigma6;
MD_FLOAT *cutforcesq;
MD_FLOAT *cutneighsq;
//track the movement of a particle along boundaries
int *PBCx, *PBCy, *PBCz;
// Data in cluster format
MD_FLOAT *cl_x;
@ -128,6 +130,9 @@ typedef struct {
unsigned int masks_2xnn_fn[8];
unsigned int masks_4xn_hn[16];
unsigned int masks_4xn_fn[16];
//Info Subdomain
Box mybox;
} Atom;
extern void initAtom(Atom*);
@ -140,6 +145,18 @@ extern int readAtom_dmp(Atom*, Parameter*);
extern void growAtom(Atom*);
extern void growClusters(Atom*);
int packGhost(Atom*, int, MD_FLOAT* , int*);
int unpackGhost(Atom*, int, MD_FLOAT*);
int packExchange(Atom*, int, MD_FLOAT*);
int unpackExchange(Atom*, int, MD_FLOAT*);
void packForward(Atom*, int, int*, MD_FLOAT*, int*);
void unpackForward(Atom*, int, int, MD_FLOAT*);
void packReverse(Atom* , int , int , MD_FLOAT*);
void unpackReverse(Atom*, int, int*, MD_FLOAT*);
void pbc(Atom*);
void copy(Atom*, int, int);
#ifdef AOS
# define POS_DATA_LAYOUT "AoS"
# define atom_x(i) atom->x[(i) * 3 + 0]

View File

@ -9,10 +9,13 @@
#include <atom.h>
#include <parameter.h>
#include <util.h>
#include <timers.h>
#include <timing.h>
#include <simd.h>
/*
void cpuInitialIntegrate(Parameter *param, Atom *atom) {
DEBUG_MESSAGE("cpuInitialIntegrate start\n");
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
@ -32,9 +35,9 @@ void cpuInitialIntegrate(Parameter *param, Atom *atom) {
DEBUG_MESSAGE("cpuInitialIntegrate end\n");
}
void cpuFinalIntegrate(Parameter *param, Atom *atom) {
DEBUG_MESSAGE("cpuFinalIntegrate start\n");
void cpuFinalIntegrate(Parameter *param, Atom *atom) {
DEBUG_MESSAGE("cpuFinalIntegrate start\n");
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
@ -46,6 +49,56 @@ void cpuFinalIntegrate(Parameter *param, Atom *atom) {
ci_v[CL_Z_OFFSET + cii] += param->dtforce * ci_f[CL_Z_OFFSET + cii];
}
}
DEBUG_MESSAGE("cpuFinalIntegrate end\n");
}
*/
void cpuInitialIntegrate(Parameter *param, Atom *atom) {
DEBUG_MESSAGE("cpuInitialIntegrate start\n");
for(int ci = 0; ci < atom->Nclusters_local; ci+=2) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
MD_SIMD_FLOAT dtforce = simd_broadcast(param->dtforce);
MD_SIMD_FLOAT dt = simd_broadcast(param->dt);
MD_SIMD_FLOAT vx_vector = simd_fma(simd_load(&ci_f[CL_X_OFFSET]), dtforce, simd_load(&ci_v[CL_X_OFFSET]));
MD_SIMD_FLOAT vy_vector = simd_fma(simd_load(&ci_f[CL_Y_OFFSET]), dtforce, simd_load(&ci_v[CL_Y_OFFSET]));
MD_SIMD_FLOAT vz_vector = simd_fma(simd_load(&ci_f[CL_Z_OFFSET]), dtforce, simd_load(&ci_v[CL_Z_OFFSET]));
MD_SIMD_FLOAT x_vector = simd_fma(vx_vector, dt, simd_load(&ci_x[CL_X_OFFSET]));
MD_SIMD_FLOAT y_vector = simd_fma(vy_vector, dt, simd_load(&ci_x[CL_Y_OFFSET]));
MD_SIMD_FLOAT z_vector = simd_fma(vz_vector, dt, simd_load(&ci_x[CL_Z_OFFSET]));
simd_store(&ci_v[CL_X_OFFSET], vx_vector);
simd_store(&ci_v[CL_Y_OFFSET], vy_vector);
simd_store(&ci_v[CL_Z_OFFSET], vz_vector);
simd_store(&ci_x[CL_X_OFFSET], x_vector);
simd_store(&ci_x[CL_Y_OFFSET], y_vector);
simd_store(&ci_x[CL_Z_OFFSET], z_vector);
}
DEBUG_MESSAGE("cpuInitialIntegrate end\n");
}
void cpuFinalIntegrate(Parameter *param, Atom *atom) {
DEBUG_MESSAGE("cpuFinalIntegrate start\n");
for(int ci = 0; ci < atom->Nclusters_local; ci+=2) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
MD_SIMD_FLOAT dtforce = simd_broadcast(param->dtforce);
MD_SIMD_FLOAT vx_vector = simd_fma(simd_load(&ci_f[CL_X_OFFSET]), dtforce, simd_load(&ci_v[CL_X_OFFSET]));
MD_SIMD_FLOAT vy_vector = simd_fma(simd_load(&ci_f[CL_Y_OFFSET]), dtforce, simd_load(&ci_v[CL_Y_OFFSET]));
MD_SIMD_FLOAT vz_vector = simd_fma(simd_load(&ci_f[CL_Z_OFFSET]), dtforce, simd_load(&ci_v[CL_Z_OFFSET]));
simd_store(&ci_v[CL_X_OFFSET], vx_vector);
simd_store(&ci_v[CL_Y_OFFSET], vy_vector);
simd_store(&ci_v[CL_Z_OFFSET], vz_vector);
}
DEBUG_MESSAGE("cpuFinalIntegrate end\n");
}
@ -54,3 +107,6 @@ void cpuFinalIntegrate(Parameter *param, Atom *atom) {
void cudaInitialIntegrate(Parameter*, Atom*);
void cudaFinalIntegrate(Parameter*, Atom*);
#endif

View File

@ -25,6 +25,11 @@
#define NBNXN_INTERACTION_MASK_DIAG_J8_0 0xf0f8fcfeU
#define NBNXN_INTERACTION_MASK_DIAG_J8_1 0x0080c0e0U
typedef struct {
int cluster;
int atom;
} Pair;
typedef struct {
int every;
int ncalls;
@ -34,8 +39,20 @@ typedef struct {
int half_neigh;
int* neighbors;
unsigned int* neighbors_imask;
//MPI
/*
int Nshell; //# of atoms in listShell(Cluster here cover all possible ghost interactions)
int *numNeighShell; //# of neighs for each atom in listShell
Pair *neighshell; //list of neighs for each atom in listShell
Pair *listshell; //Atoms to compute the force
*/
int Nshell; //# of cluster in listShell(Cluster here cover all possible ghost interactions)
int *numNeighShell; //# of neighs for each atom in listShell
int *neighshell; //list of neighs for each atom in listShell
int *listshell; //Atoms to compute the force
} Neighbor;
extern void initNeighbor(Neighbor*, Parameter*);
extern void setupNeighbor(Parameter*, Atom*);
extern void binatoms(Atom*);

View File

@ -5,6 +5,8 @@
* license that can be found in the LICENSE file.
*/
#include <atom.h>
#include <comm.h>
#include <parameter.h>
#ifndef __VTK_H_
#define __VTK_H_
@ -13,4 +15,5 @@ extern int write_local_atoms_to_vtk_file(const char* filename, Atom* atom, int t
extern int write_ghost_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
extern int write_local_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep);
extern int write_ghost_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep);
extern void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep);
#endif

View File

@ -5,9 +5,7 @@
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <omp.h>
//--
#include <likwid-marker.h>
//--
@ -26,6 +24,10 @@
#include <util.h>
#include <vtk.h>
#include <xtc.h>
#include <comm.h>
#include <grid.h>
#include <shell_methods.h>
#include <mpi.h>
#define HLINE "----------------------------------------------------------------------------\n"
@ -42,17 +44,55 @@ extern void copyDataFromCUDADevice(Atom *atom);
extern void cudaDeviceFree();
#endif
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats) {
double dynamicBalance(Comm* comm, Grid* grid, Atom* atom, Parameter* param, double time)
{
double S, E;
int dims = 3; //TODO: Adjust to do in 3d and 2d
S = getTimeStamp();
if(param->balance == RCB) {
rcbBalance(grid, atom, param, meanBisect,dims,0);
neighComm(comm, param, grid);
}else if(param->balance == meanTimeRCB){
rcbBalance(grid, atom, param, meanTimeBisect,dims,time);
neighComm(comm, param, grid);
}else if(param->balance == Staggered) {
staggeredBalance(grid, atom, param, time);
neighComm(comm, param, grid);
exchangeComm(comm,atom);
}else { } //Do nothing
//printGrid(grid);
E = getTimeStamp();
return E-S;
}
double initialBalance(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid)
{
double E,S,time;
int me;
MPI_Comm_rank(world,&me);
S = getTimeStamp();
if(param->balance == meanTimeRCB || param->balance == RCB){
rcbBalance(grid, atom, param, meanBisect,3,0);
neighComm(comm, param, grid);
}
MPI_Allreduce(&atom->Nlocal, &atom->Natoms, 1, MPI_INT, MPI_SUM, world);
printf("Processor:%i, Local atoms:%i, Total atoms:%i\n",me, atom->Nlocal,atom->Natoms);
MPI_Barrier(world);
E = getTimeStamp();
return E-S;
}
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid) {
if(param->force_field == FF_EAM) { initEam(eam, param); }
double S, E;
param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
param->xprd = param->nx * param->lattice;
param->yprd = param->ny * param->lattice;
param->zprd = param->nz * param->lattice;
S = getTimeStamp();
initAtom(atom);
initPbc(atom);
//initPbc(atom);
initStats(stats);
initNeighbor(neighbor, param);
if(param->input_file == NULL) {
@ -60,13 +100,18 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
} else {
readAtom(atom, param);
}
setupGrid(grid,atom,param);
setupNeighbor(param, atom);
setupComm(comm, param, grid);
if(param->balance){
initialBalance(param, eam, atom, neighbor, stats, comm, grid);
}
setupThermo(param, atom->Natoms);
if(param->input_file == NULL) { adjustThermo(param, atom); }
buildClusters(atom);
defineJClusters(atom);
setupPbc(atom, param);
//setupPbc(atom, param);
ghostNeighbor(comm, atom, param); //change
binClusters(atom);
buildNeighbor(atom, neighbor);
initDevice(atom, neighbor);
@ -74,15 +119,15 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
return E-S;
}
double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
double reneighbour(Comm* comm, Parameter *param, Atom *atom, Neighbor *neighbor) {
double S, E;
S = getTimeStamp();
LIKWID_MARKER_START("reneighbour");
updateSingleAtoms(atom);
updateAtomsPbc(atom, param);
//updateAtomsPbc(atom, param);
buildClusters(atom);
defineJClusters(atom);
setupPbc(atom, param);
//setupPbc(atom, param);
ghostNeighbor(comm, atom, param);
binClusters(atom);
buildNeighbor(atom, neighbor);
LIKWID_MARKER_STOP("reneighbour");
@ -90,15 +135,13 @@ double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
return E-S;
}
void printAtomState(Atom *atom) {
printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n",
atom->Natoms, atom->Nlocal, atom->Nghost, atom->Nmax);
/* int nall = atom->Nlocal + atom->Nghost; */
/* for (int i=0; i<nall; i++) { */
/* printf("%d %f %f %f\n", i, atom->x[i], atom->y[i], atom->z[i]); */
/* } */
double updateAtoms(Comm* comm, Atom* atom){
double S,E;
S = getTimeStamp();
updateSingleAtoms(atom);
exchangeComm(comm, atom);
E = getTimeStamp();
return E-S;
}
int main(int argc, char** argv) {
@ -108,7 +151,8 @@ int main(int argc, char** argv) {
Neighbor neighbor;
Stats stats;
Parameter param;
Comm comm;
Grid grid;
LIKWID_MARKER_INIT;
#pragma omp parallel
{
@ -116,7 +160,7 @@ int main(int argc, char** argv) {
//LIKWID_MARKER_REGISTER("reneighbour");
//LIKWID_MARKER_REGISTER("pbc");
}
initComm(&argc, &argv, &comm); //change
initParameter(&param);
for(int i = 0; i < argc; i++) {
if((strcmp(argv[i], "-p") == 0) || (strcmp(argv[i], "--param") == 0)) {
@ -158,6 +202,24 @@ int main(int argc, char** argv) {
param.half_neigh = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-method") == 0)) {
param.method = atoi(argv[++i]);
if (param.method>2 || param.method< 0){
if(comm.myproc == 0) fprintf(stderr, "Method does not exist!\n");
endComm(&comm);
exit(0);
}
continue;
}
if((strcmp(argv[i], "-bal") == 0)) {
param.balance = atoi(argv[++i]);
if (param.balance>3 || param.balance< 0){
if(comm.myproc == 0) fprintf(stderr, "Load balance does not exist!\n");
endComm(&comm);
exit(0);
}
continue;
}
if((strcmp(argv[i], "-m") == 0) || (strcmp(argv[i], "--mass") == 0)) {
param.mass = atof(argv[++i]);
continue;
@ -188,6 +250,7 @@ int main(int argc, char** argv) {
continue;
}
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
//TODO: add the shell and ac print options
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
printf(HLINE);
printf("-p <string>: file to read parameters from (can be specified more than once)\n");
@ -205,98 +268,101 @@ int main(int argc, char** argv) {
exit(EXIT_SUCCESS);
}
}
if(param.balance>0 && param.method == 1){
if(comm.myproc == 0) fprintf(stderr, "Half Shell is not supported by load balance!\n");
endComm(&comm);
exit(0);
}
param.cutneigh = param.cutforce + param.skin;
setup(&param, &eam, &atom, &neighbor, &stats);
printParameter(&param);
printf(HLINE);
printf("step\ttemp\t\tpressure\n");
timer[SETUP]=setup(&param, &eam, &atom, &neighbor, &stats, &comm, &grid);
if(comm.myproc == 0) printParameter(&param);
if(comm.myproc == 0) printf(HLINE);
if(comm.myproc == 0) printf("step\ttemp\t\tpressure\n");
computeThermo(0, &param, &atom);
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
traceAddresses(&param, &atom, &neighbor, n + 1);
#endif
#ifdef CUDA_TARGET
copyDataToCUDADevice(&atom);
#endif
if(param.force_field == FF_EAM) {
timer[FORCE] = computeForceEam(&eam, &param, &atom, &neighbor, &stats);
} else {
timer[FORCE] = computeForceLJ(&param, &atom, &neighbor, &stats);
}
timer[NEIGH] = 0.0;
timer[NEIGH] = 0.0;
timer[FORWARD] = 0.0;
timer[UPDATE] = 0.0;
timer[BALANCE] = 0.0;
timer[REVERSE] = reverse(&comm, &atom, &param);
MPI_Barrier(world);
timer[TOTAL] = getTimeStamp();
if(param.vtk_file != NULL) {
write_data_to_vtk_file(param.vtk_file, &atom, 0);
//write_data_to_vtk_file(param.vtk_file, &comm ,&atom, 0);
printvtk(param.vtk_file, &comm, &atom, &param, 0);
}
//TODO: modify xct
if(param.xtc_file != NULL) {
xtc_init(param.xtc_file, &atom, 0);
}
for(int n = 0; n < param.ntimes; n++) {
double forceTime=0.0;
double commTime=0.0;
for(int n = 0; n < param.ntimes; n++) {
initialIntegrate(&param, &atom);
if((n + 1) % param.reneigh_every) {
if(!((n + 1) % param.prune_every)) {
timer[FORWARD]+=forward(&comm, &atom, &param);
if(!((n + 1) % param.prune_every)){
pruneNeighbor(&param, &atom, &neighbor);
}
updatePbc(&atom, &param, 0);
} else {
#ifdef CUDA_TARGET
copyDataFromCUDADevice(&atom);
#endif
timer[NEIGH] += reneighbour(&param, &atom, &neighbor);
timer[UPDATE] +=updateAtoms(&comm,&atom);
if(param.balance && !((n+1)%param.balance_every))
timer[BALANCE] +=dynamicBalance(&comm, &grid, &atom , &param, timer[FORCE]);
timer[NEIGH] += reneighbour(&comm, &param, &atom, &neighbor);
#ifdef CUDA_TARGET
copyDataToCUDADevice(&atom);
isReneighboured = 1;
#endif
}
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
traceAddresses(&param, &atom, &neighbor, n + 1);
#endif
if(param.force_field == FF_EAM) {
timer[FORCE] += computeForceEam(&eam, &param, &atom, &neighbor, &stats);
timer[FORCE] += computeForceEam(&eam, &param, &atom, &neighbor, &stats);
} else {
timer[FORCE] += computeForceLJ(&param, &atom, &neighbor, &stats);
}
}
timer[REVERSE] += reverse(&comm, &atom, &param);
finalIntegrate(&param, &atom);
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
computeThermo(n + 1, &param, &atom);
}
int write_pos = !((n + 1) % param.x_out_every);
int write_vel = !((n + 1) % param.v_out_every);
if(write_pos || write_vel) {
if(param.vtk_file != NULL) {
write_data_to_vtk_file(param.vtk_file, &atom, n + 1);
printvtk(param.vtk_file, &comm, &atom, &param, n+1);
}
//TODO: xtc file
if(param.xtc_file != NULL) {
xtc_write(&atom, n + 1, write_pos, write_vel);
}
}
}
}
#ifdef CUDA_TARGET
copyDataFromCUDADevice(&atom);
#endif
MPI_Barrier(world);
timer[TOTAL] = getTimeStamp() - timer[TOTAL];
updateSingleAtoms(&atom);
updateAtoms(&comm,&atom);
computeThermo(-1, &param, &atom);
//TODO:
if(param.xtc_file != NULL) {
xtc_end();
}
@ -304,41 +370,35 @@ int main(int argc, char** argv) {
#ifdef CUDA_TARGET
cudaDeviceFree();
#endif
printf(HLINE);
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
printf(HLINE);
double mint[NUMTIMER];
double maxt[NUMTIMER];
double sumt[NUMTIMER];
timer[REST] = timer[TOTAL]-timer[FORCE]-timer[NEIGH]-timer[BALANCE]-timer[FORWARD]-timer[REVERSE];
MPI_Reduce(timer,mint,NUMTIMER,MPI_DOUBLE,MPI_MIN,0,world);
MPI_Reduce(timer,maxt,NUMTIMER,MPI_DOUBLE,MPI_MAX,0,world);
MPI_Reduce(timer,sumt,NUMTIMER,MPI_DOUBLE,MPI_SUM,0,world);
int Nghost;
MPI_Reduce(&atom.Nghost,&Nghost,1,MPI_INT,MPI_SUM,0,world);
int nthreads = 0;
int chunkSize = 0;
omp_sched_t schedKind;
char schedType[10];
#pragma omp parallel
#pragma omp master
{
omp_get_schedule(&schedKind, &chunkSize);
switch (schedKind)
{
case omp_sched_static: strcpy(schedType, "static"); break;
case omp_sched_dynamic: strcpy(schedType, "dynamic"); break;
case omp_sched_guided: strcpy(schedType, "guided"); break;
case omp_sched_auto: strcpy(schedType, "auto"); break;
}
nthreads = omp_get_max_threads();
}
printf("Num threads: %d\n", nthreads);
printf("Schedule: (%s,%d)\n", schedType, chunkSize);
printf("Performance: %.2f million atom updates per second\n",
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
#ifdef COMPUTE_STATS
if(comm.myproc == 0){
int n = comm.numproc;
printf(HLINE);
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, Nghost, param.ntimes);
printf("TOTAL %.2fs\n\n",timer[TOTAL]);
printf("%4s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|\n","","FORCE ", "NEIGH ", "BALANCE", "FORWARD", "REVERSE","UPDATE","REST ","SETUP");
printf("----|-------|-------|-------|-------|-------|-------|-------|-------|\n");
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "AVG", sumt[FORCE]/n,sumt[NEIGH]/n,sumt[BALANCE]/n,sumt[FORWARD]/n,sumt[REVERSE]/n,sumt[UPDATE]/n,sumt[REST]/n,sumt[SETUP]/n);
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MIN", mint[FORCE],mint[NEIGH],mint[BALANCE],mint[FORWARD],mint[REVERSE],mint[UPDATE],mint[REST],mint[SETUP]);
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MAX", maxt[FORCE],maxt[NEIGH],maxt[BALANCE],maxt[FORWARD],maxt[REVERSE],maxt[UPDATE],maxt[REST],maxt[SETUP]);
printf(HLINE);
printf("Performance: %.2f million atom updates per second\n",
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
#ifdef COMPUTE_STATS
displayStatistics(&atom, &param, &stats, timer);
#endif
#endif
}
endComm(&comm);
LIKWID_MARKER_CLOSE;
return EXIT_SUCCESS;
}

View File

@ -7,15 +7,15 @@
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#include <util.h>
#include <mpi.h>
#define SMALL 1.0e-6
#define FACTOR 0.999
#define eps 1.0e-9
static MD_FLOAT xprd, yprd, zprd;
static MD_FLOAT bininvx, bininvy;
static int mbinxlo, mbinylo;
@ -34,9 +34,16 @@ static int nmax;
static int nstencil; // # of bins in stencil
static int* stencil; // stencil list of bin offsets
static MD_FLOAT binsizex, binsizey;
int me; //rank
int method; // method
int shellMethod; //If shell method exist
static int coord2bin(MD_FLOAT, MD_FLOAT);
static MD_FLOAT bindist(int, int);
//static int ghostZone(Atom*, int);
static int halfZoneCluster(Atom*,int);
static int ghostClusterinRange(Atom*, int, int, MD_FLOAT);
static void neighborGhost(Atom*, Neighbor*);
/* exported subroutines */
void initNeighbor(Neighbor *neighbor, Parameter *param) {
@ -53,12 +60,25 @@ void initNeighbor(Neighbor *neighbor, Parameter *param) {
bincount = NULL;
bin_clusters = NULL;
bin_nclusters = NULL;
neighbor->half_neigh = param->half_neigh;
neighbor->maxneighs = 100;
neighbor->maxneighs = 200;
neighbor->numneigh = NULL;
neighbor->numneigh_masked = NULL;
neighbor->neighbors = NULL;
neighbor->neighbors_imask = NULL;
//MPI
shellMethod = 0;
method = param->method;
if(method == halfShell || method == eightShell){
param->half_neigh = 1;
shellMethod = 1;
}
me = 0;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
neighbor->half_neigh = param->half_neigh;
neighbor->Nshell = 0;
neighbor->numNeighShell = NULL;
neighbor->neighshell = NULL;
neighbor->listshell = NULL;
}
void setupNeighbor(Parameter *param, Atom *atom) {
@ -77,7 +97,7 @@ void setupNeighbor(Parameter *param, Atom *atom) {
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = zprd;
MD_FLOAT atom_density = ((MD_FLOAT)(atom->Nlocal)) / ((xhi - xlo) * (yhi - ylo) * (zhi - zlo));
MD_FLOAT atom_density = ((MD_FLOAT)(atom->Natoms)) / ((xhi - xlo) * (yhi - ylo) * (zhi - zlo));
MD_FLOAT atoms_in_cell = MAX(CLUSTER_M, CLUSTER_N);
MD_FLOAT targetsizex = cbrt(atoms_in_cell / atom_density);
MD_FLOAT targetsizey = cbrt(atoms_in_cell / atom_density);
@ -146,6 +166,7 @@ void setupNeighbor(Parameter *param, Atom *atom) {
}
MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
MD_FLOAT dl = atom->iclusters[ci].bbminx - atom->jclusters[cj].bbmaxx;
MD_FLOAT dh = atom->jclusters[cj].bbminx - atom->iclusters[ci].bbmaxx;
MD_FLOAT dm = MAX(dl, dh);
@ -163,6 +184,7 @@ MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
dm = MAX(dl, dh);
dm0 = MAX(dm, 0.0);
d2 += dm0 * dm0;
return d2;
}
@ -225,7 +247,6 @@ static unsigned int get_imask_simd_j8(int rdiag, int ci, int cj) {
void buildNeighbor(Atom *atom, Neighbor *neighbor) {
DEBUG_MESSAGE("buildNeighbor start\n");
/* extend atom arrays if necessary */
if(atom->Nclusters_local > nmax) {
nmax = atom->Nclusters_local;
@ -249,7 +270,6 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
while(resize) {
int new_maxneighs = neighbor->maxneighs;
resize = 0;
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj1 = CJ1_FROM_CI(ci);
int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
@ -262,14 +282,12 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
MD_FLOAT ibb_ymax = atom->iclusters[ci].bbmaxy;
MD_FLOAT ibb_zmin = atom->iclusters[ci].bbminz;
MD_FLOAT ibb_zmax = atom->iclusters[ci].bbmaxz;
for(int k = 0; k < nstencil; k++) {
int jbin = ibin + stencil[k];
int *loc_bin = &bin_clusters[jbin * clusters_per_bin];
int cj, m = -1;
MD_FLOAT jbb_xmin, jbb_xmax, jbb_ymin, jbb_ymax, jbb_zmin, jbb_zmax;
const int c = bin_nclusters[jbin];
if(c > 0) {
MD_FLOAT dl, dh, dm, dm0, d_bb_sq;
@ -279,6 +297,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
if(neighbor->half_neigh && ci_cj1 > cj) {
continue;
}
jbb_zmin = atom->jclusters[cj].bbminz;
jbb_zmax = atom->jclusters[cj].bbmaxz;
dl = ibb_zmin - jbb_zmax;
@ -287,7 +306,6 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
dm0 = MAX(dm, 0.0);
d_bb_sq = dm0 * dm0;
} while(m + 1 < c && d_bb_sq > cutneighsq);
jbb_xmin = atom->jclusters[cj].bbminx;
jbb_xmax = atom->jclusters[cj].bbmaxx;
jbb_ymin = atom->jclusters[cj].bbminy;
@ -357,7 +375,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
jbb_zmin = atom->jclusters[cj].bbminz;
jbb_zmax = atom->jclusters[cj].bbmaxz;
}
}
}
}
}
@ -383,14 +401,15 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
if(resize) {
neighbor->maxneighs = new_maxneighs * 1.2;
fprintf(stdout, "RESIZE %d\n", neighbor->maxneighs);
fprintf(stdout, "RESIZE %d, PROC %d\n", neighbor->maxneighs,me);
free(neighbor->neighbors);
free(neighbor->neighbors_imask);
neighbor->neighbors = (int *) malloc(nmax * neighbor->maxneighs * sizeof(int));
neighbor->neighbors_imask = (unsigned int *) malloc(nmax * neighbor->maxneighs * sizeof(unsigned int));
}
}
if(method == eightShell) neighborGhost(atom, neighbor);
/*
DEBUG_MESSAGE("\ncutneighsq = %f, rbb_sq = %f\n", cutneighsq, rbb_sq);
for(int ci = 0; ci < 6; ci++) {
@ -511,46 +530,44 @@ int coord2bin(MD_FLOAT xin, MD_FLOAT yin) {
int ix, iy;
if(xin >= xprd) {
ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
ix = (int)((xin + eps - xprd) * bininvx) + nbinx - mbinxlo;
} else if(xin >= 0.0) {
ix = (int)(xin * bininvx) - mbinxlo;
ix = (int)((xin+eps) * bininvx) - mbinxlo;
} else {
ix = (int)(xin * bininvx) - mbinxlo - 1;
ix = (int)((xin+eps) * bininvx) - mbinxlo - 1;
}
if(yin >= yprd) {
iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
if(yin >= yprd) {
iy = (int)(((yin+eps) - yprd) * bininvy) + nbiny - mbinylo;
} else if(yin >= 0.0) {
iy = (int)(yin * bininvy) - mbinylo;
iy = (int)((yin+eps) * bininvy) - mbinylo;
} else {
iy = (int)(yin * bininvy) - mbinylo - 1;
iy = (int)((yin+eps) * bininvy) - mbinylo - 1;
}
return (iy * mbinx + ix + 1);
}
void coord2bin2D(MD_FLOAT xin, MD_FLOAT yin, int *ix, int *iy) {
if(xin >= xprd) {
*ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
*ix = (int)((xin + eps - xprd) * bininvx) + nbinx - mbinxlo;
} else if(xin >= 0.0) {
*ix = (int)(xin * bininvx) - mbinxlo;
*ix = (int)((xin+eps) * bininvx) - mbinxlo;
} else {
*ix = (int)(xin * bininvx) - mbinxlo - 1;
*ix = (int)((xin+eps) * bininvx) - mbinxlo - 1;
}
if(yin >= yprd) {
*iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
if(yin >= yprd) {
*iy = (int)((yin + eps - yprd) * bininvy) + nbiny - mbinylo;
} else if(yin >= 0.0) {
*iy = (int)(yin * bininvy) - mbinylo;
*iy = (int)((yin+eps) * bininvy) - mbinylo;
} else {
*iy = (int)(yin * bininvy) - mbinylo - 1;
*iy = (int)((yin+eps) * bininvy) - mbinylo - 1;
}
}
void binAtoms(Atom *atom) {
DEBUG_MESSAGE("binAtoms start\n");
int resize = 1;
while(resize > 0) {
resize = 0;
@ -567,7 +584,7 @@ void binAtoms(Atom *atom) {
resize = 1;
}
}
if(resize) {
free(bins);
atoms_per_bin *= 2;
@ -615,8 +632,7 @@ void buildClusters(Atom *atom) {
/* bin local atoms */
binAtoms(atom);
sortAtomsByZCoord(atom);
sortAtomsByZCoord(atom);
for(int bin = 0; bin < mbins; bin++) {
int c = bincount[bin];
int ac = 0;
@ -688,6 +704,9 @@ void buildClusters(Atom *atom) {
void defineJClusters(Atom *atom) {
DEBUG_MESSAGE("defineJClusters start\n");
const int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
atom->ncj = atom->Nclusters_local / jfac;
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int cj0 = CJ0_FROM_CI(ci);
@ -830,12 +849,11 @@ void binClusters(Atom *atom) {
}
}
}
for(int cg = 0; cg < atom->Nclusters_ghost && !resize; cg++) {
const int cj = ncj + cg;
int ix = -1, iy = -1;
MD_FLOAT xtmp, ytmp;
if(shellMethod == halfShell && !halfZoneCluster(atom, cj)) continue;
if(atom->jclusters[cj].natoms > 0) {
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
@ -846,6 +864,7 @@ void binClusters(Atom *atom) {
coord2bin2D(xtmp, ytmp, &ix, &iy);
ix = MAX(MIN(ix, mbinx - 1), 0);
iy = MAX(MIN(iy, mbiny - 1), 0);
for(int cjj = 1; cjj < atom->jclusters[cj].natoms; cjj++) {
int nix, niy;
xtmp = cj_x[CL_X_OFFSET + cjj];
@ -853,7 +872,7 @@ void binClusters(Atom *atom) {
coord2bin2D(xtmp, ytmp, &nix, &niy);
nix = MAX(MIN(nix, mbinx - 1), 0);
niy = MAX(MIN(niy, mbiny - 1), 0);
// Always put the cluster on the bin of its innermost atom so
// the cluster should be closer to local clusters
if(atom->PBCx[cg] > 0 && ix > nix) { ix = nix; }
@ -861,7 +880,6 @@ void binClusters(Atom *atom) {
if(atom->PBCy[cg] > 0 && iy > niy) { iy = niy; }
if(atom->PBCy[cg] < 0 && iy < niy) { iy = niy; }
}
int bin = iy * mbinx + ix + 1;
int c = bin_nclusters[bin];
if(c < clusters_per_bin) {
@ -883,25 +901,21 @@ void binClusters(Atom *atom) {
break;
}
}
if(!inserted) {
bin_clusters[bin * clusters_per_bin + c] = cj;
}
bin_nclusters[bin]++;
} else {
resize = 1;
}
}
}
if(resize) {
free(bin_clusters);
clusters_per_bin *= 2;
bin_clusters = (int*) malloc(mbins * clusters_per_bin * sizeof(int));
}
}
/*
DEBUG_MESSAGE("bin_nclusters\n");
for(int i = 0; i < mbins; i++) { DEBUG_MESSAGE("%d, ", bin_nclusters[i]); }
@ -919,7 +933,6 @@ void updateSingleAtoms(Atom *atom) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
atom_x(Natom) = ci_x[CL_X_OFFSET + cii];
atom_y(Natom) = ci_x[CL_Y_OFFSET + cii];
@ -928,12 +941,174 @@ void updateSingleAtoms(Atom *atom) {
atom->vy[Natom] = ci_v[CL_Y_OFFSET + cii];
atom->vz[Natom] = ci_v[CL_Z_OFFSET + cii];
Natom++;
}
}
}
if(Natom != atom->Nlocal) {
fprintf(stderr, "updateSingleAtoms(): Number of atoms changed!\n");
}
DEBUG_MESSAGE("updateSingleAtoms stop\n");
}
//MPI Shell Methods
static int eightZoneCluster(Atom* atom, int cj)
{
//Mapping: 0->0, 1->1, 2->2, 3->6, 4->3, 5->5, 6->4, 7->7
int zoneMapping[] = {0, 1, 2, 6, 3, 5, 4, 7};
int zone = 0;
MD_FLOAT *hi = atom->mybox.hi;
if (atom->jclusters[cj].bbminx +eps >=hi[_x]){
zone += 1;
}
if (atom->jclusters[cj].bbminy +eps >=hi[_y]){
zone += 2;
}
if (atom->jclusters[cj].bbminz +eps >=hi[_z]){
zone += 4;
}
return zoneMapping[zone];
}
static int halfZoneCluster(Atom* atom, int cj)
{
MD_FLOAT *hi = atom->mybox.hi;
MD_FLOAT *lo = atom->mybox.lo;
if(atom->jclusters[cj].bbmaxx < lo[_x] && atom->jclusters[cj].bbmaxy < hi[_y] &&
atom->jclusters[cj].bbmaxz < hi[_z]){
return 0;
} else if(atom->jclusters[cj].bbmaxy < lo[_y] && atom->jclusters[cj].bbmaxz < hi[_z]){
return 0;
} else if(atom->jclusters[cj].bbmaxz < lo[_z]){
return 0;
} else {
return 1;
}
}
int BoxGhostDistance(Atom *atom, int ci, int cj) {
MD_FLOAT dl = atom->jclusters[ci].bbminx - atom->jclusters[cj].bbmaxx;
MD_FLOAT dh = atom->jclusters[cj].bbminx - atom->jclusters[ci].bbmaxx;
MD_FLOAT dm = MAX(dl, dh);
MD_FLOAT dm0 = MAX(dm, 0.0);
MD_FLOAT dx2 = dm0 * dm0;
dl = atom->jclusters[ci].bbminy - atom->jclusters[cj].bbmaxy;
dh = atom->jclusters[cj].bbminy - atom->jclusters[ci].bbmaxy;
dm = MAX(dl, dh);
dm0 = MAX(dm, 0.0);
MD_FLOAT dy2 = dm0 * dm0;
dl = atom->jclusters[ci].bbminz - atom->jclusters[cj].bbmaxz;
dh = atom->jclusters[cj].bbminz - atom->jclusters[ci].bbmaxz;
dm = MAX(dl, dh);
dm0 = MAX(dm, 0.0);
MD_FLOAT dz2 = dm0 * dm0;
return dx2 > cutneighsq ? 0 : dy2 > cutneighsq ? 0 : dz2 > cutneighsq ? 0 : 1;
}
static int ghostClusterinRange(Atom *atom, int cs, int cg, MD_FLOAT rsq) {
int cs_vec_base = CJ_VECTOR_BASE_INDEX(cs);
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
MD_FLOAT *cs_x = &atom->cl_x[cs_vec_base];
MD_FLOAT *cg_x = &atom->cl_x[cj_vec_base];
for(int cii = 0; cii < atom->jclusters[cs].natoms; cii++) {
for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
MD_FLOAT delx = cs_x[CL_X_OFFSET + cii] - cg_x[CL_X_OFFSET + cjj];
MD_FLOAT dely = cs_x[CL_Y_OFFSET + cii] - cg_x[CL_Y_OFFSET + cjj];
MD_FLOAT delz = cs_x[CL_Z_OFFSET + cii] - cg_x[CL_Z_OFFSET + cjj];
if(delx * delx + dely * dely + delz * delz < rsq) {
return 1;
}
}
}
return 0;
}
static void neighborGhost(Atom *atom, Neighbor *neighbor) {
int Nshell=0;
int Ncluster_local = atom->Nclusters_local;
int Nclusterghost = atom->Nclusters_ghost;
if(neighbor->listshell) free(neighbor->listshell);
neighbor->listshell = (int*) malloc(Nclusterghost * sizeof(int));
int* listzone = (int*) malloc(8 * Nclusterghost * sizeof(int));
int countCluster[8] = {0,0,0,0,0,0,0,0};
//Selecting ghost atoms for interaction and putting them into regions
for(int cg = atom->ncj; cg < atom->ncj+Nclusterghost; cg++) {
int czone = eightZoneCluster(atom,cg);
int *list = &listzone[Nclusterghost*czone];
int n = countCluster[czone];
list[n] = cg;
countCluster[czone]++;
//It is only necessary to find neighbour particles for 3 regions
//if(czone == 1 || czone == 2 || czone == 3)
//neighbor->listshell[Nshell++] = cg;
}
for(int zone = 1; zone<=3; zone++){
int *list = &listzone[Nclusterghost*zone];
for(int n=0; n<countCluster[zone]; n++)
neighbor->listshell[Nshell++] = list[n];
}
neighbor->Nshell = Nshell;
if(neighbor->numNeighShell) free(neighbor->numNeighShell);
if(neighbor->neighshell) free(neighbor->neighshell);
neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
neighbor->numNeighShell = (int*) malloc(Nshell * sizeof(int));
int resize = 1;
while(resize)
{
resize = 0;
for(int ic = 0; ic < Nshell; ic++) {
int *neighshell = &(neighbor->neighshell[ic*neighbor->maxneighs]);
int n = 0;
int icluster = neighbor->listshell[ic];
int iczone = eightZoneCluster(atom, icluster);
for(int jczone=0; jczone<8; jczone++){
if(jczone <=iczone) continue;
if(iczone == 1 && (jczone==5||jczone==6||jczone==7)) continue;
if(iczone == 2 && (jczone==4||jczone==6||jczone==7)) continue;
if(iczone == 3 && (jczone==4||jczone==5||jczone==7)) continue;
int Ncluster = countCluster[jczone];
int* loc_zone = &listzone[jczone * Nclusterghost];
for(int k = 0; k < Ncluster ; k++) {
int jcluster = loc_zone[k];
if(BoxGhostDistance(atom, icluster, jcluster))
{
if(ghostClusterinRange(atom, icluster, jcluster, cutneighsq))
neighshell[n++] = jcluster;
}
}
}
neighbor->numNeighShell[ic] = n;
if(n >= neighbor->maxneighs){
resize = 1;
neighbor->maxneighs = n * 1.2;
fprintf(stdout, "RESIZE EIGHT SHELL %d, PROC %d\n", neighbor->maxneighs,me);
break;
}
}
if(resize) {
free(neighbor->neighshell);
neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
}
}
free(listzone);
}

View File

@ -9,6 +9,11 @@
#include <atom.h>
#include <vtk.h>
#include <mpi.h>
#include <string.h>
static MPI_File _fh;
static inline void flushBuffer(char*);
void write_data_to_vtk_file(const char *filename, Atom* atom, int timestep) {
write_local_atoms_to_vtk_file(filename, atom, timestep);
@ -188,3 +193,128 @@ int write_ghost_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int
fclose(fp);
return 0;
}
int vtkOpen(const char* filename, Comm* comm, Atom* atom ,int timestep)
{
char msg[256];
char timestep_filename[128];
snprintf(timestep_filename, sizeof timestep_filename, "%s_%d.vtk", filename, timestep);
MPI_File_open(MPI_COMM_WORLD, timestep_filename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &_fh);
if(_fh == MPI_FILE_NULL) {
if(comm->myproc == 0) fprintf(stderr, "Could not open VTK file for writing!\n");
return -1;
}
if (comm->myproc==0){
sprintf(msg, "# vtk DataFile Version 2.0\n");
sprintf(msg, "%sParticle data\n",msg);
sprintf(msg, "%sASCII\n",msg);
sprintf(msg, "%sDATASET UNSTRUCTURED_GRID\n",msg);
sprintf(msg, "%sPOINTS %d double\n",msg, atom->Natoms);
flushBuffer(msg);
}
}
int vtkVector(Comm* comm, Atom* atom, Parameter* param)
{
if (_fh == MPI_FILE_NULL) {
if(comm->myproc==0) printf("vtk not initialize! Call vtkOpen first!\n");
return -1;
}
int sizeline= 25; //#initial guess of characters in "%.4f %.4f %.4f\n"
int extrabuff = 100;
int sizebuff = sizeline*atom->Nlocal+extrabuff;
int mysize = 0;
char* msg = (char*) malloc(sizebuff);
sprintf(msg, "");
for(int i = 0; i < atom->Nlocal; i++){
if(mysize+extrabuff >= sizebuff){
sizebuff*= 1.5;
msg = (char*) realloc(msg, sizebuff);
}
//TODO: do not forget to add param->xlo, param->ylo, param->zlo
sprintf(msg, "%s%.4f %.4f %.4f\n",msg, atom_x(i), atom_y(i), atom_z(i));
mysize = strlen(msg);
}
int gatherSize[comm->numproc];
MPI_Allgather(&mysize, 1, MPI_INT, gatherSize, 1, MPI_INT, MPI_COMM_WORLD);
int offset=0;
int globalSize = 0;
for(int i = 0; i < comm->myproc; i++)
offset+= gatherSize[i];
for(int i = 0; i < comm->numproc; i++)
globalSize+= gatherSize[i];
MPI_Offset displ;
MPI_Datatype FileType;
int GlobalSize[] = {globalSize};
int LocalSize[] = {mysize};
int Start[] = {offset};
if(LocalSize[0]>0){
MPI_Type_create_subarray(1, GlobalSize, LocalSize, Start, MPI_ORDER_C, MPI_CHAR, &FileType);
} else {
MPI_Type_vector(0,0,0,MPI_CHAR,&FileType);
}
MPI_Type_commit(&FileType);
MPI_File_get_size(_fh, &displ);
MPI_File_set_view(_fh, displ, MPI_CHAR, FileType, "native", MPI_INFO_NULL);
MPI_File_write_all (_fh, msg, mysize , MPI_CHAR ,MPI_STATUS_IGNORE);
MPI_Barrier(MPI_COMM_WORLD);
MPI_File_set_view(_fh,0,MPI_CHAR, MPI_CHAR, "native", MPI_INFO_NULL);
if (comm->myproc==0){
sprintf(msg, "\n\n");
sprintf(msg, "%sCELLS %d %d\n", msg, atom->Natoms, atom->Natoms * 2);
for(int i = 0; i < atom->Natoms; i++)
sprintf(msg, "%s1 %d\n", msg, i);
flushBuffer(msg);
sprintf(msg, "\n\n");
sprintf(msg, "%sCELL_TYPES %d\n",msg, atom->Natoms);
for(int i = 0; i < atom->Natoms; i++)
sprintf(msg, "%s1\n",msg);
flushBuffer(msg);
sprintf(msg, "\n\n");
sprintf(msg, "%sPOINT_DATA %d\n",msg,atom->Natoms);
sprintf(msg, "%sSCALARS mass double\n",msg);
sprintf(msg, "%sLOOKUP_TABLE default\n",msg);
for(int i = 0; i < atom->Natoms; i++)
sprintf(msg, "%s1.0\n",msg);
sprintf(msg, "%s\n\n",msg);
flushBuffer(msg);
}
}
void vtkClose()
{
MPI_File_close(&_fh);
_fh=MPI_FILE_NULL;
}
//TODO: print ghost and cluster using MPI
void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep)
{
if(comm->numproc == 1)
{
write_data_to_vtk_file(filename, atom, timestep);
return;
}
vtkOpen(filename, comm, atom, timestep);
vtkVector(comm, atom, param);
vtkClose();
}
static inline void flushBuffer(char* msg){
MPI_Offset displ;
MPI_File_get_size(_fh, &displ);
MPI_File_write_at(_fh, displ, msg, strlen(msg), MPI_CHAR, MPI_STATUS_IGNORE);
}

32
include_MPIICC.mk Normal file
View File

@ -0,0 +1,32 @@
CC = mpiicc
LINKER = $(CC)
OPENMP = #-qopenmp
PROFILE = #-profile-functions -g -pg
ifeq ($(ISA),AVX512)
OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE) #-g -debug
endif
ifeq ($(ISA),AVX2)
OPTS = -Ofast -xCORE-AVX2 $(PROFILE)
#OPTS = -Ofast -xAVX2 $(PROFILE)
#OPTS = -Ofast -march=core-avx2 $(PROFILE)
endif
ifeq ($(ISA),AVX)
OPTS = -Ofast -xAVX $(PROFILE)
endif
ifeq ($(ISA),SSE)
OPTS = -Ofast -xSSE4.2 $(PROFILE)
endif
#OPTS = -Ofast -no-vec $(PROFILE)
#OPTS = -Ofast -xHost $(PROFILE)
CFLAGS = $(PROFILE) -restrict $(OPENMP) $(OPTS)
ASFLAGS = #-masm=intel
LFLAGS = $(PROFILE) $(OPTS) $(OPENMP)
DEFINES = -std=c11 -pedantic-errors -D_GNU_SOURCE -DNO_ZMM_INTRIN
INCLUDES =
LIBS = -lm

View File

@ -9,10 +9,12 @@
#include <string.h>
#include <math.h>
#include <parameter.h>
#include <atom.h>
#include <allocate.h>
#include <device.h>
#include <util.h>
#include <mpi.h>
#define DELTA 20000
@ -21,10 +23,10 @@
#endif
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
void initAtom(Atom *atom) {
void initAtom(Atom *atom){
atom->x = NULL; atom->y = NULL; atom->z = NULL;
atom->vx = NULL; atom->vy = NULL; atom->vz = NULL;
atom->fx = NULL; atom->fy = NULL; atom->fz = NULL;
@ -41,6 +43,7 @@ void initAtom(Atom *atom) {
atom->radius = NULL;
atom->av = NULL;
atom->r = NULL;
atom->border_map = NULL;
DeviceAtom *d_atom = &(atom->d_atom);
d_atom->x = NULL; d_atom->y = NULL; d_atom->z = NULL;
@ -52,12 +55,19 @@ void initAtom(Atom *atom) {
d_atom->sigma6 = NULL;
d_atom->cutforcesq = NULL;
d_atom->cutneighsq = NULL;
//MPI
Box *mybox = &(atom->mybox);
mybox->xprd = mybox->yprd = mybox->zprd = 0;
mybox->lo[_x] = mybox->lo[_y] = mybox->lo[_z] = 0;
mybox->hi[_x] = mybox->hi[_y] = mybox->hi[_z] = 0;
}
void createAtom(Atom *atom, Parameter *param) {
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
MD_FLOAT xlo = 0; MD_FLOAT xhi = param->xprd;
MD_FLOAT ylo = 0; MD_FLOAT yhi = param->yprd;
MD_FLOAT zlo = 0; MD_FLOAT zhi = param->zprd;
atom->Natoms = 4 * param->nx * param->ny * param->nz;
atom->Nlocal = 0;
atom->ntypes = param->ntypes;
@ -107,15 +117,15 @@ void createAtom(Atom *atom, Parameter *param) {
xtmp = 0.5 * alat * i;
ytmp = 0.5 * alat * j;
ztmp = 0.5 * alat * k;
if( xtmp >= xlo && xtmp < xhi &&
ytmp >= ylo && ytmp < yhi &&
ztmp >= zlo && ztmp < zhi ) {
n = k * (2 * param->ny) * (2 * param->nx) +
j * (2 * param->nx) +
i + 1;
for(m = 0; m < 5; m++) {
myrandom(&n);
}
@ -131,7 +141,7 @@ void createAtom(Atom *atom, Parameter *param) {
}
vztmp = myrandom(&n);
if(atom->Nlocal == atom->Nmax) {
while(atom->Nlocal >= atom->Nmax) {
growAtom(atom);
}
@ -163,38 +173,42 @@ int type_str2int(const char *type) {
return -1;
}
int readAtom(Atom* atom, Parameter* param) {
int readAtom(Atom *atom, Parameter *param) {
int me = 0;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
int len = strlen(param->input_file);
if(strncmp(&param->input_file[len - 4], ".pdb", 4) == 0) { return readAtom_pdb(atom, param); }
if(strncmp(&param->input_file[len - 4], ".gro", 4) == 0) { return readAtom_gro(atom, param); }
if(strncmp(&param->input_file[len - 4], ".dmp", 4) == 0) { return readAtom_dmp(atom, param); }
if(strncmp(&param->input_file[len - 3], ".in", 3) == 0) { return readAtom_in(atom, param); }
fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp, in\n", param->input_file);
if(me==0) fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp, in\n", param->input_file);
exit(-1);
return -1;
}
int readAtom_pdb(Atom* atom, Parameter* param) {
int me = 0;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
int read_atoms = 0;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
if(me==0)fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
while(!feof(fp)) {
readline(line, fp);
char *item = strtok(line, " ");
char *item = strtok(line, "\t ");
if(strncmp(item, "CRYST1", 6) == 0) {
param->xlo = 0.0;
param->xhi = atof(strtok(NULL, " "));
param->xhi = atof(strtok(NULL, "\t "));
param->ylo = 0.0;
param->yhi = atof(strtok(NULL, " "));
param->yhi = atof(strtok(NULL, "\t "));
param->zlo = 0.0;
param->zhi = atof(strtok(NULL, " "));
param->zhi = atof(strtok(NULL, "\t "));
param->xprd = param->xhi - param->xlo;
param->yprd = param->yhi - param->ylo;
param->zprd = param->zhi - param->zlo;
@ -203,23 +217,23 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
char *label;
int atom_id, comp_id;
MD_FLOAT occupancy, charge;
atom_id = atoi(strtok(NULL, " ")) - 1;
atom_id = atoi(strtok(NULL, "\t ")) - 1;
while(atom_id + 1 >= atom->Nmax) {
growAtom(atom);
}
atom->type[atom_id] = type_str2int(strtok(NULL, " "));
label = strtok(NULL, " ");
comp_id = atoi(strtok(NULL, " "));
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom->type[atom_id] = type_str2int(strtok(NULL, "\t "));
label = strtok(NULL, "\t ");
comp_id = atoi(strtok(NULL, "\t "));
atom_x(atom_id) = atof(strtok(NULL, "\t "));
atom_y(atom_id) = atof(strtok(NULL, "\t "));
atom_z(atom_id) = atof(strtok(NULL, "\t "));
atom_vx(atom_id) = 0.0;
atom_vy(atom_id) = 0.0;
atom_vz(atom_id) = 0.0;
occupancy = atof(strtok(NULL, " "));
charge = atof(strtok(NULL, " "));
occupancy = atof(strtok(NULL, "\t "));
charge = atof(strtok(NULL, "\t "));
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
atom->Natoms++;
atom->Nlocal++;
@ -231,14 +245,14 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
strncmp(item, "ENDMDL", 6) == 0) {
// Do nothing
} else {
fprintf(stderr, "Invalid item: %s\n", item);
if(me==0)fprintf(stderr, "Invalid item: %s\n", item);
exit(-1);
return -1;
}
}
if(!read_atoms) {
fprintf(stderr, "Input error: No atoms read!\n");
if(me==0)fprintf(stderr, "Input error: No atoms read!\n");
exit(-1);
return -1;
}
@ -254,12 +268,15 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
if(me==0)fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
fclose(fp);
return read_atoms;
}
int readAtom_gro(Atom* atom, Parameter* param) {
int me = 0;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
char desc[MAXLINE];
@ -268,7 +285,7 @@ int readAtom_gro(Atom* atom, Parameter* param) {
int i = 0;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
if(me==0)fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
@ -277,26 +294,26 @@ int readAtom_gro(Atom* atom, Parameter* param) {
for(i = 0; desc[i] != '\n'; i++);
desc[i] = '\0';
readline(line, fp);
atoms_to_read = atoi(strtok(line, " "));
fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
atoms_to_read = atoi(strtok(line, "\t "));
if(me==0)fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
while(!feof(fp) && read_atoms < atoms_to_read) {
readline(line, fp);
char *label = strtok(line, " ");
int type = type_str2int(strtok(NULL, " "));
int atom_id = atoi(strtok(NULL, " ")) - 1;
char *label = strtok(line, "\t ");
int type = type_str2int(strtok(NULL, "\t "));
int atom_id = atoi(strtok(NULL, "\t ")) - 1;
atom_id = read_atoms;
while(atom_id + 1 >= atom->Nmax) {
growAtom(atom);
}
atom->type[atom_id] = type;
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom_vx(atom_id) = atof(strtok(NULL, " "));
atom_vy(atom_id) = atof(strtok(NULL, " "));
atom_vz(atom_id) = atof(strtok(NULL, " "));
atom_x(atom_id) = atof(strtok(NULL, "\t "));
atom_y(atom_id) = atof(strtok(NULL, "\t "));
atom_z(atom_id) = atof(strtok(NULL, "\t "));
atom_vx(atom_id) = atof(strtok(NULL, "\t "));
atom_vy(atom_id) = atof(strtok(NULL, "\t "));
atom_vz(atom_id) = atof(strtok(NULL, "\t "));
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
atom->Natoms++;
atom->Nlocal++;
@ -306,18 +323,18 @@ int readAtom_gro(Atom* atom, Parameter* param) {
if(!feof(fp)) {
readline(line, fp);
param->xlo = 0.0;
param->xhi = atof(strtok(line, " "));
param->xhi = atof(strtok(line, "\t "));
param->ylo = 0.0;
param->yhi = atof(strtok(NULL, " "));
param->yhi = atof(strtok(NULL, "\t "));
param->zlo = 0.0;
param->zhi = atof(strtok(NULL, " "));
param->zhi = atof(strtok(NULL, "\t "));
param->xprd = param->xhi - param->xlo;
param->yprd = param->yhi - param->ylo;
param->zprd = param->zhi - param->zlo;
}
if(read_atoms != atoms_to_read) {
fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
if(me==0)fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
exit(-1);
return -1;
}
@ -333,12 +350,14 @@ int readAtom_gro(Atom* atom, Parameter* param) {
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
if(me==0)fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
fclose(fp);
return read_atoms;
}
int readAtom_dmp(Atom* atom, Parameter* param) {
int me = 0;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
int natoms = 0;
@ -347,7 +366,7 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
int ts = -1;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
if(me==0)fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
@ -370,47 +389,47 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
}
} else if(strncmp(item, "BOX BOUNDS pp pp pp", 19) == 0) {
readline(line, fp);
param->xlo = atof(strtok(line, " "));
param->xhi = atof(strtok(NULL, " "));
param->xlo = atof(strtok(line, "\t "));
param->xhi = atof(strtok(NULL, "\t "));
param->xprd = param->xhi - param->xlo;
readline(line, fp);
param->ylo = atof(strtok(line, " "));
param->yhi = atof(strtok(NULL, " "));
param->ylo = atof(strtok(line, "\t "));
param->yhi = atof(strtok(NULL, "\t "));
param->yprd = param->yhi - param->ylo;
readline(line, fp);
param->zlo = atof(strtok(line, " "));
param->zhi = atof(strtok(NULL, " "));
param->zlo = atof(strtok(line, "\t "));
param->zhi = atof(strtok(NULL, "\t "));
param->zprd = param->zhi - param->zlo;
} else if(strncmp(item, "ATOMS id type x y z vx vy vz", 28) == 0) {
for(int i = 0; i < natoms; i++) {
readline(line, fp);
atom_id = atoi(strtok(line, " ")) - 1;
atom->type[atom_id] = atoi(strtok(NULL, " "));
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom_vx(atom_id) = atof(strtok(NULL, " "));
atom_vy(atom_id) = atof(strtok(NULL, " "));
atom_vz(atom_id) = atof(strtok(NULL, " "));
atom_id = atoi(strtok(line, "\t ")) - 1;
atom->type[atom_id] = atoi(strtok(NULL, "\t "));
atom_x(atom_id) = atof(strtok(NULL, "\t "));
atom_y(atom_id) = atof(strtok(NULL, "\t "));
atom_z(atom_id) = atof(strtok(NULL, "\t "));
atom_vx(atom_id) = atof(strtok(NULL, "\t "));
atom_vy(atom_id) = atof(strtok(NULL, "\t "));
atom_vz(atom_id) = atof(strtok(NULL, "\t "));
atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
read_atoms++;
}
} else {
fprintf(stderr, "Invalid item: %s\n", item);
if(me==0)fprintf(stderr, "Invalid item: %s\n", item);
exit(-1);
return -1;
}
} else {
fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
if(me==0)fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
exit(-1);
return -1;
}
}
if(ts < 0 || !natoms || !read_atoms) {
fprintf(stderr, "Input error: atom data was not read!\n");
if(me==0)fprintf(stderr, "Input error: atom data was not read!\n");
exit(-1);
return -1;
}
@ -426,30 +445,34 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
if(me==0)fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
return natoms;
}
int readAtom_in(Atom* atom, Parameter* param) {
int me = 0;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
int natoms = 0;
int atom_id = 0;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
readline(line, fp);
natoms = atoi(strtok(line, " "));
param->xlo = atof(strtok(NULL, " "));
param->xhi = atof(strtok(NULL, " "));
param->ylo = atof(strtok(NULL, " "));
param->yhi = atof(strtok(NULL, " "));
param->zlo = atof(strtok(NULL, " "));
param->zhi = atof(strtok(NULL, " "));
natoms = atoi(strtok(line, "\t "));
param->xlo = atof(strtok(NULL, "\t "));
param->xhi = atof(strtok(NULL, "\t "));
param->ylo = atof(strtok(NULL, "\t "));
param->yhi = atof(strtok(NULL, "\t "));
param->zlo = atof(strtok(NULL, "\t "));
param->zhi = atof(strtok(NULL, "\t "));
param->xprd = param->xhi - param->xlo;
param->yprd = param->yhi - param->ylo;
param->zprd = param->zhi - param->zlo;
atom->Natoms = natoms;
atom->Nlocal = natoms;
atom->ntypes = 1;
@ -462,27 +485,26 @@ int readAtom_in(Atom* atom, Parameter* param) {
readline(line, fp);
// TODO: store mass per atom
char *s_mass = strtok(line, " ");
char *s_mass = strtok(line, "\t ");
if(strncmp(s_mass, "inf", 3) == 0) {
// Set atom's mass to INFINITY
} else {
param->mass = atof(s_mass);
}
atom->radius[atom_id] = atof(strtok(NULL, " "));
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom_vx(atom_id) = atof(strtok(NULL, " "));
atom_vy(atom_id) = atof(strtok(NULL, " "));
atom_vz(atom_id) = atof(strtok(NULL, " "));
atom->radius[atom_id] = atof(strtok(NULL, "\t "));
atom_x(atom_id) = atof(strtok(NULL, "\t "));
atom_y(atom_id) = atof(strtok(NULL, "\t "));
atom_z(atom_id) = atof(strtok(NULL, "\t "));
atom_vx(atom_id) = atof(strtok(NULL, "\t "));
atom_vy(atom_id) = atof(strtok(NULL, "\t "));
atom_vz(atom_id) = atof(strtok(NULL, "\t "));
atom->type[atom_id] = 0;
atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
atom_id++;
}
if(!natoms) {
fprintf(stderr, "Input error: atom data was not read!\n");
if(me==0)fprintf(stderr, "Input error: atom data was not read!\n");
exit(-1);
return -1;
}
@ -498,25 +520,10 @@ int readAtom_in(Atom* atom, Parameter* param) {
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
if(me==0)fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
return natoms;
}
void writeAtom(Atom *atom, Parameter *param) {
FILE *fp = fopen(param->write_atom_file, "w");
for(int i = 0; i < atom->Nlocal; i++) {
fprintf(fp, "%d,%f,%f,%f,%f,%f,%f,%f,0\n",
atom->type[i], 1.0,
atom_x(i), atom_y(i), atom_z(i),
atom_vx(i), atom_vy(i), atom_vz(i));
}
fclose(fp);
fprintf(stdout, "Wrote input data to %s, grid size: %f, %f, %f\n",
param->write_atom_file, param->xprd, param->yprd, param->zprd);
}
void growAtom(Atom *atom) {
DeviceAtom *d_atom = &(atom->d_atom);
int nold = atom->Nmax;
@ -545,7 +552,125 @@ void growAtom(Atom *atom) {
REALLOC(type, int, atom->Nmax * sizeof(int), nold * sizeof(int));
// DEM
atom->radius = (MD_FLOAT *) reallocate(atom->radius, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->radius = (MD_FLOAT*) reallocate(atom->radius, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->av = (MD_FLOAT*) reallocate(atom->av, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
atom->r = (MD_FLOAT*) reallocate(atom->r, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 4, nold * sizeof(MD_FLOAT) * 4);
}
/* MPI added*/
void packForward(Atom* atom, int n ,int* list, MD_FLOAT* buf, int* pbc)
{
int i, j;
for(i = 0; i < n; i++) {
j = list[i];
buf_x(i) = atom_x(j) + pbc[0] * atom->mybox.xprd;
buf_y(i) = atom_y(j) + pbc[1] * atom->mybox.yprd;
buf_z(i) = atom_z(j) + pbc[2] * atom->mybox.zprd;
}
}
void unpackForward(Atom* atom, int n, int first, MD_FLOAT* buf)
{
for(int i = 0; i < n; i++) {
atom_x((first + i)) = buf_x(i);
atom_y((first + i)) = buf_y(i);
atom_z((first + i)) = buf_z(i);
}
}
int packGhost(Atom* atom, int i, MD_FLOAT* buf, int* pbc)
{
int m = 0;
buf[m++] = atom_x(i) + pbc[_x] * atom->mybox.xprd;
buf[m++] = atom_y(i) + pbc[_y] * atom->mybox.yprd;
buf[m++] = atom_z(i) + pbc[_z] * atom->mybox.zprd;
buf[m++] = atom->type[i];
return m;
}
int unpackGhost(Atom* atom, int i, MD_FLOAT* buf)
{
while (i>=atom->Nmax) growAtom(atom);
int m = 0;
atom_x(i) = buf[m++];
atom_y(i) = buf[m++];
atom_z(i) = buf[m++];
atom->type[i] = buf[m++];
atom->Nghost++;
return m;
}
void packReverse(Atom* atom, int n, int first, MD_FLOAT* buf)
{
for(int i = 0; i < n; i++) {
buf_x(i) = atom_fx(first + i);
buf_y(i) = atom_fy(first + i);
buf_z(i) = atom_fz(first + i);
}
}
void unpackReverse(Atom* atom, int n, int* list, MD_FLOAT* buf)
{
int i, j;
for(i = 0; i < n; i++) {
j = list[i];
atom_fx(j) += buf_x(i);
atom_fy(j) += buf_y(i);
atom_fz(j) += buf_z(i);
}
}
int packExchange(Atom* atom, int i, MD_FLOAT* buf)
{
int m = 0;
buf[m++] = atom_x(i);
buf[m++] = atom_y(i);
buf[m++] = atom_z(i);
buf[m++] = atom_vx(i);
buf[m++] = atom_vy(i);
buf[m++] = atom_vz(i);
buf[m++] = atom->type[i];
return m;
}
int unpackExchange(Atom* atom, int i, MD_FLOAT* buf)
{
while(i >= atom->Nmax) growAtom(atom);
int m = 0;
atom_x(i) = buf[m++];
atom_y(i) = buf[m++];
atom_z(i) = buf[m++];
atom_vx(i) = buf[m++];
atom_vy(i) = buf[m++];
atom_vz(i) = buf[m++];
atom->type[i] = buf[m++];
return m;
}
void pbc(Atom* atom)
{
for(int i = 0; i < atom->Nlocal; i++) {
MD_FLOAT xprd = atom->mybox.xprd;
MD_FLOAT yprd = atom->mybox.yprd;
MD_FLOAT zprd = atom->mybox.zprd;
if(atom_x(i) < 0.0) atom_x(i) += xprd;
if(atom_y(i) < 0.0) atom_y(i) += yprd;
if(atom_z(i) < 0.0) atom_z(i)+= zprd;
if(atom_x(i) >= xprd) atom_x(i) -= xprd;
if(atom_y(i) >= yprd) atom_y(i) -= yprd;
if(atom_z(i) >= zprd) atom_z(i) -= zprd;
}
}
void copy(Atom* atom, int i, int j)
{
atom_x(i) = atom_x(j);
atom_y(i) = atom_y(j);
atom_z(i) = atom_z(j);
atom_vx(i) = atom_vx(j);
atom_vy(i) = atom_vy(j);
atom_vz(i) = atom_vz(j);
atom->type[i] = atom->type[j];
}

View File

@ -13,13 +13,17 @@
#include <parameter.h>
#include <stats.h>
#include <timing.h>
#include <mpi.h>
#include <util.h>
#ifdef __SIMD_KERNEL__
#include <simd.h>
#endif
void computeForceGhostShell(Parameter*, Atom*, Neighbor*);
double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
int Nlocal = atom->Nlocal;
int Nghost = atom->Nghost;
int* neighs;
#ifndef EXPLICIT_TYPES
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
@ -41,21 +45,21 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
{
LIKWID_MARKER_START("force");
#pragma omp for schedule(runtime)
#pragma omp for
for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
MD_FLOAT fix = 0;
MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0;
MD_FLOAT fix = 0.0;
MD_FLOAT fiy = 0.0;
MD_FLOAT fiz = 0.0;
#ifdef EXPLICIT_TYPES
const int type_i = atom->type[i];
#endif
for(int k = 0; k < numneighs; k++) {
int j = neighs[k];
MD_FLOAT delx = xtmp - atom_x(j);
@ -70,26 +74,25 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
const MD_FLOAT epsilon = atom->epsilon[type_ij];
#endif
if(rsq < cutforcesq) {
MD_FLOAT sr2 = num1 / rsq;
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
fiz += delz * force;
#ifdef USE_REFERENCE_VERSION
addStat(stats->atoms_within_cutoff, 1);
} else {
addStat(stats->atoms_outside_cutoff, 1);
#endif
}
}
}
atom_fx(i) += fix;
atom_fy(i) += fiy;
atom_fz(i) += fiz;
#ifdef USE_REFERENCE_VERSION
if(numneighs % VECTOR_WIDTH > 0) {
addStat(stats->atoms_outside_cutoff, VECTOR_WIDTH - (numneighs % VECTOR_WIDTH));
@ -102,13 +105,13 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
LIKWID_MARKER_STOP("force");
}
double E = getTimeStamp();
return E-S;
}
double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
int Nlocal = atom->Nlocal;
int Nghost = atom->Nghost;
int* neighs;
#ifndef EXPLICIT_TYPES
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
@ -119,19 +122,18 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
const MD_FLOAT num48 = 48.0;
const MD_FLOAT num05 = 0.5;
for(int i = 0; i < Nlocal; i++) {
for(int i = 0; i < Nlocal+Nghost; i++) {
atom_fx(i) = 0.0;
atom_fy(i) = 0.0;
atom_fz(i) = 0.0;
}
double S = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("forceLJ-halfneigh");
#pragma omp for schedule(runtime)
#pragma omp for
for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
@ -172,16 +174,14 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
// We do not need to update forces for ghost atoms
if(j < Nlocal) {
// We need to update forces for ghost atoms if shell_method or half stencil is requiered
if((param->half_neigh && j<Nlocal) || param->method){
atom_fx(j) -= delx * force;
atom_fy(j) -= dely * force;
atom_fz(j) -= delz * force;
}
}
}
atom_fx(i) += fix;
atom_fy(i) += fiy;
atom_fz(i) += fiz;
@ -190,6 +190,7 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
}
if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor);
LIKWID_MARKER_STOP("forceLJ-halfneigh");
}
@ -227,7 +228,7 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig
{
LIKWID_MARKER_START("force");
#pragma omp for schedule(runtime)
#pragma omp for
for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
@ -276,3 +277,58 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig
double E = getTimeStamp();
return E-S;
}
void computeForceGhostShell(Parameter *param, Atom *atom, Neighbor *neighbor) {
int Nshell = neighbor->Nshell;
int* neighs;
#ifndef EXPLICIT_TYPES
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
#endif
const MD_FLOAT num1 = 1.0;
const MD_FLOAT num48 = 48.0;
const MD_FLOAT num05 = 0.5;
for(int i = 0; i < Nshell; i++) {
neighs = &(neighbor->neighshell[i * neighbor->maxneighs]);
int numneigh = neighbor->numNeighShell[i];
int iatom = neighbor->listshell[i];
MD_FLOAT xtmp = atom_x(iatom);
MD_FLOAT ytmp = atom_y(iatom);
MD_FLOAT ztmp = atom_z(iatom);
MD_FLOAT fix = 0;
MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0;
#ifdef EXPLICIT_TYPES
const int type_i = atom->type[i];
#endif
for(int k = 0; k < numneigh; k++) {
int jatom = neighs[k];
MD_FLOAT delx = xtmp - atom_x(jatom);
MD_FLOAT dely = ytmp - atom_y(jatom);
MD_FLOAT delz = ztmp - atom_z(jatom);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq < cutforcesq) {
MD_FLOAT sr2 = num1 / rsq;
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
atom_fx(jatom) -= delx * force;
atom_fy(jatom) -= dely * force;
atom_fz(jatom) -= delz * force;
}
}
atom_fx(iatom) += fix;
atom_fy(iatom) += fiy;
atom_fz(iatom) += fiz;
}
}

View File

@ -4,8 +4,9 @@
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <parameter.h>
#include <box.h>
#include <parameter.h>
#ifndef __ATOM_H_
#define __ATOM_H_
@ -56,6 +57,8 @@ typedef struct {
MD_FLOAT *sigma6;
MD_FLOAT *cutforcesq;
MD_FLOAT *cutneighsq;
//TODO: insert the id number
//MD_FLOAT *Atom_id;
// DEM
MD_FLOAT *radius;
@ -64,6 +67,9 @@ typedef struct {
// Device data
DeviceAtom d_atom;
//Info Subdomain
Box mybox;
} Atom;
extern void initAtom(Atom*);
@ -73,9 +79,19 @@ extern int readAtom_pdb(Atom*, Parameter*);
extern int readAtom_gro(Atom*, Parameter*);
extern int readAtom_dmp(Atom*, Parameter*);
extern int readAtom_in(Atom*, Parameter*);
extern void writeAtom(Atom*, Parameter*);
extern void growAtom(Atom*);
int packGhost(Atom*, int, MD_FLOAT*, int*);
int unpackGhost(Atom*, int, MD_FLOAT*);
int packExchange(Atom*, int, MD_FLOAT*);
int unpackExchange(Atom*, int, MD_FLOAT*);
void packForward(Atom*, int, int*, MD_FLOAT*, int*);
void unpackForward(Atom*, int, int, MD_FLOAT*);
void packReverse(Atom* , int , int , MD_FLOAT*);
void unpackReverse(Atom*, int, int*, MD_FLOAT*);
void pbc(Atom*);
void copy(Atom*, int, int);
#ifdef AOS
# define POS_DATA_LAYOUT "AoS"
# define atom_x(i) atom->x[(i) * 3 + 0]
@ -100,4 +116,8 @@ extern void growAtom(Atom*);
# define atom_fz(i) atom->fz[i]
#endif
# define buf_x(i) buf[3*(i)]
# define buf_y(i) buf[3*(i)+1]
# define buf_z(i) buf[3*(i)+2]
#endif

View File

@ -20,9 +20,14 @@ typedef struct {
int ncalls;
int maxneighs;
int half_neigh;
int half_stencil;
int *neighbors;
int *numneigh;
//MPI
int Nshell; //# of atoms in listShell
int *numNeighShell; //# of neighs for each atom in listShell
int *neighshell; //list of neighs for each atom in listShell
int *listshell; //Atoms to compute the force
// Device data
DeviceNeighbor d_neighbor;
} Neighbor;

View File

@ -5,8 +5,11 @@
* license that can be found in the LICENSE file.
*/
#include <atom.h>
#include <comm.h>
#include <parameter.h>
#ifndef __VTK_H_
#define __VTK_H_
extern int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
extern void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep);
#endif

View File

@ -11,10 +11,7 @@
#include <limits.h>
#include <math.h>
#include <float.h>
#include <omp.h>
#include <likwid-marker.h>
#include <allocate.h>
#include <atom.h>
#include <device.h>
@ -24,13 +21,19 @@
#include <timing.h>
#include <neighbor.h>
#include <parameter.h>
#include <pbc.h>
#include <stats.h>
#include <timers.h>
#include <util.h>
#include <vtk.h>
#include <comm.h>
#include <grid.h>
#include <shell_methods.h>
#include <mpi.h>
#define HLINE "----------------------------------------------------------------------------\n"
#ifdef CUDA_TARGET
extern double computeForceLJFullNeigh_cuda(Parameter*, Atom*, Neighbor*);
#endif
extern double computeForceLJFullNeigh_plain_c(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceLJFullNeigh_simd(Parameter*, Atom*, Neighbor*, Stats*);
@ -38,69 +41,6 @@ extern double computeForceLJHalfNeigh(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
#ifdef CUDA_TARGET
extern double computeForceLJFullNeigh_cuda(Parameter*, Atom*, Neighbor*);
#endif
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats) {
if(param->force_field == FF_EAM) { initEam(eam, param); }
double S, E;
param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
param->xprd = param->nx * param->lattice;
param->yprd = param->ny * param->lattice;
param->zprd = param->nz * param->lattice;
S = getTimeStamp();
initAtom(atom);
initPbc(atom);
initStats(stats);
initNeighbor(neighbor, param);
if(param->input_file == NULL) {
createAtom(atom, param);
} else {
readAtom(atom, param);
}
setupNeighbor(param);
setupThermo(param, atom->Natoms);
if(param->input_file == NULL) { adjustThermo(param, atom); }
#ifdef SORT_ATOMS
atom->Nghost = 0;
sortAtom(atom);
#endif
setupPbc(atom, param);
initDevice(atom, neighbor);
updatePbc(atom, param, true);
buildNeighbor(atom, neighbor);
E = getTimeStamp();
return E-S;
}
double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
double S, E;
S = getTimeStamp();
LIKWID_MARKER_START("reneighbour");
updateAtomsPbc(atom, param);
#ifdef SORT_ATOMS
atom->Nghost = 0;
sortAtom(atom);
#endif
setupPbc(atom, param);
updatePbc(atom, param, true);
buildNeighbor(atom, neighbor);
LIKWID_MARKER_STOP("reneighbour");
E = getTimeStamp();
return E-S;
}
void printAtomState(Atom *atom) {
printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n", atom->Natoms, atom->Nlocal, atom->Nghost, atom->Nmax);
// int nall = atom->Nlocal + atom->Nghost;
// for (int i=0; i<nall; i++) {
// printf("%d %f %f %f\n", i, atom->x[i], atom->y[i], atom->z[i]);
// }
}
double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
if(param->force_field == FF_EAM) {
return computeForceEam(eam, param, atom, neighbor, stats);
@ -113,7 +53,7 @@ double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor,
}
}
if(param->half_neigh) {
if(param->half_neigh || param->method) {
return computeForceLJHalfNeigh(param, atom, neighbor, stats);
}
@ -124,6 +64,102 @@ double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor,
#endif
}
double dynamicBalance(Comm* comm, Grid* grid, Atom* atom, Parameter* param, double time){
double S, E;
int dims = 3; //TODO: Adjust to do in 3d and 2d
S = getTimeStamp();
if(param->balance == RCB) {
rcbBalance(grid, atom, param, meanBisect,dims,0);
neighComm(comm, param, grid);
}else if(param->balance == meanTimeRCB){
rcbBalance(grid, atom, param, meanTimeBisect,dims,time);
neighComm(comm, param, grid);
}else if(param->balance == Staggered) {
staggeredBalance(grid, atom, param, time);
neighComm(comm, param, grid);
exchangeComm(comm,atom);
}else { } //Do nothing
//printGrid(grid);
E = getTimeStamp();
return E-S;
}
double initialBalance(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid)
{
double E,S,time;
int me;
MPI_Comm_rank(world,&me);
S = getTimeStamp();
if(param->balance == meanTimeRCB || param->balance == RCB){
rcbBalance(grid, atom, param, meanBisect,3,0);
neighComm(comm, param, grid);
}
MPI_Allreduce(&atom->Nlocal, &atom->Natoms, 1, MPI_INT, MPI_SUM, world);
printf("Processor:%i, Local atoms:%i, Total atoms:%i\n",me, atom->Nlocal,atom->Natoms);
MPI_Barrier(world);
E = getTimeStamp();
return E-S;
}
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid) {
if(param->force_field == FF_EAM) { initEam(eam, param); }
double S, E;
param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
param->xprd = param->nx * param->lattice;
param->yprd = param->ny * param->lattice;
param->zprd = param->nz * param->lattice;
S = getTimeStamp();
initAtom(atom);
initStats(stats);
initNeighbor(neighbor, param);
if(param->input_file == NULL) {
createAtom(atom, param);
} else {
readAtom(atom, param);
}
setupGrid(grid,atom,param);
setupNeighbor(param);
setupComm(comm, param, grid);
if(param->balance){
initialBalance(param, eam, atom, neighbor, stats, comm, grid);
}
setupThermo(param, atom->Natoms);
if(param->input_file == NULL) { adjustThermo(param, atom); }
#ifdef SORT_ATOMS
atom->Nghost = 0;
sortAtom(atom);
#endif
initDevice(atom, neighbor);
ghostNeighbor(comm, atom, param);
buildNeighbor(atom, neighbor);
E = getTimeStamp();
return E-S;
}
double reneighbour(Comm* comm, Parameter *param, Atom *atom, Neighbor *neighbor) {
double S, E;
S = getTimeStamp();
LIKWID_MARKER_START("reneighbour");
#ifdef SORT_ATOMS
atom->Nghost = 0;
sortAtom(atom);
#endif
ghostNeighbor(comm, atom, param);
buildNeighbor(atom, neighbor);
LIKWID_MARKER_STOP("reneighbour");
E = getTimeStamp();
return E-S;
}
double updateAtoms(Comm* comm, Atom* atom){
double S,E;
S = getTimeStamp();
exchangeComm(comm, atom);
E = getTimeStamp();
return E-S;
}
void writeInput(Parameter *param, Atom *atom) {
FILE *fpin = fopen("input.in", "w");
fprintf(fpin, "0,%f,0,%f,0,%f\n", param->xprd, param->yprd, param->zprd);
@ -142,18 +178,19 @@ int main(int argc, char** argv) {
Neighbor neighbor;
Stats stats;
Parameter param;
Comm comm;
Grid grid;
LIKWID_MARKER_INIT;
#pragma omp parallel
{
LIKWID_MARKER_REGISTER("force");
//LIKWID_MARKER_REGISTER("reneighbour");
//LIKWID_MARKER_REGISTER("pbc");
}
}
initComm(&argc, &argv, &comm);
initParameter(&param);
for(int i = 0; i < argc; i++) {
if((strcmp(argv[i], "-p") == 0) || strcmp(argv[i], "--params") == 0) {
if((strcmp(argv[i], "-p") == 0)) {
readParameter(&param, argv[++i]);
continue;
}
@ -191,6 +228,24 @@ int main(int argc, char** argv) {
if((strcmp(argv[i], "-half") == 0)) {
param.half_neigh = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-method") == 0)) {
param.method = atoi(argv[++i]);
if (param.method>3 || param.method< 0){
if(comm.myproc == 0) fprintf(stderr, "Method does not exist!\n");
endComm(&comm);
exit(0);
}
continue;
}
if((strcmp(argv[i], "-bal") == 0)) {
param.balance = atoi(argv[++i]);
if (param.balance>3 || param.balance< 0){
if(comm.myproc == 0) fprintf(stderr, "Load Balance does not exist!\n");
endComm(&comm);
exit(0);
}
continue;
}
if((strcmp(argv[i], "-r") == 0) || (strcmp(argv[i], "--radius") == 0)) {
param.cutforce = atof(argv[++i]);
@ -208,71 +263,71 @@ int main(int argc, char** argv) {
param.vtk_file = strdup(argv[++i]);
continue;
}
if((strcmp(argv[i], "-w") == 0)) {
param.write_atom_file = strdup(argv[++i]);
continue;
}
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
printf(HLINE);
printf("-p / --params <string>: file to read parameters from (can be specified more than once)\n");
printf("-f <string>: force field (lj, eam or dem), default lj\n");
printf("-i <string>: input file with atom positions (dump)\n");
printf("-e <string>: input file for EAM\n");
printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
printf("-half <int>: use half (1) or full (0) neighbor lists\n");
printf("-r / --radius <real>: set cutoff radius\n");
printf("-s / --skin <real>: set skin (verlet buffer)\n");
printf("-w <file>: write input atoms to file\n");
printf("--freq <real>: processor frequency (GHz)\n");
printf("--vtk <string>: VTK file for visualization\n");
printf(HLINE);
exit(EXIT_SUCCESS);
if(comm.myproc ==0 ){
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
printf(HLINE);
printf("-p <string>: file to read parameters from (can be specified more than once)\n");
printf("-f <string>: force field (lj, eam or dem), default lj\n");
printf("-i <string>: input file with atom positions (dump)\n");
printf("-e <string>: input file for EAM\n");
printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
printf("-r / --radius <real>: set cutoff radius\n");
printf("-s / --skin <real>: set skin (verlet buffer)\n");
printf("--freq <real>: processor frequency (GHz)\n");
printf("--vtk <string>: VTK file for visualization\n");
printf(HLINE);
}
exit(EXIT_SUCCESS);
}
}
if(param.balance>0 && param.method == 1){
if(comm.myproc == 0) fprintf(stderr, "Half Shell is not supported by load balance!\n");
endComm(&comm);
exit(0);
}
param.cutneigh = param.cutforce + param.skin;
setup(&param, &eam, &atom, &neighbor, &stats);
printParameter(&param);
printf(HLINE);
printf("step\ttemp\t\tpressure\n");
timer[SETUP]=setup(&param, &eam, &atom, &neighbor, &stats, &comm, &grid);
if(comm.myproc == 0)printParameter(&param);
if(comm.myproc == 0)printf(HLINE);
if(comm.myproc == 0) printf("step\ttemp\t\tpressure\n");
computeThermo(0, &param, &atom);
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
traceAddresses(&param, &atom, &neighbor, n + 1);
traceAddresses(&param, &atom, &neighbor, n + 1);// TODO: trace adress
#endif
if(param.write_atom_file != NULL) {
writeAtom(&atom, &param);
}
//writeInput(&param, &atom);
timer[FORCE] = computeForce(&eam, &param, &atom, &neighbor, &stats);
timer[NEIGH] = 0.0;
timer[TOTAL] = getTimeStamp();
timer[FORCE] = computeForce(&eam, &param, &atom, &neighbor, &stats);
timer[NEIGH] = 0.0;
timer[FORWARD] = 0.0;
timer[UPDATE] = 0.0;
timer[BALANCE] = 0.0;
timer[REVERSE] = reverse(&comm, &atom, &param);
MPI_Barrier(world);
timer[TOTAL] = getTimeStamp();
if(param.vtk_file != NULL) {
write_atoms_to_vtk_file(param.vtk_file, &atom, 0);
}
printvtk(param.vtk_file, &comm, &atom, &param, 0);
}
for(int n = 0; n < param.ntimes; n++) {
bool reneigh = (n + 1) % param.reneigh_every == 0;
initialIntegrate(reneigh, &param, &atom);
if((n + 1) % param.reneigh_every) {
updatePbc(&atom, &param, false);
if(reneigh) {
timer[UPDATE] +=updateAtoms(&comm,&atom);
if(param.balance && !((n+1)%param.balance_every))
timer[BALANCE] +=dynamicBalance(&comm, &grid, &atom , &param, timer[FORCE]);
timer[NEIGH] += reneighbour(&comm, &param, &atom, &neighbor);
} else {
timer[NEIGH] += reneighbour(&param, &atom, &neighbor);
}
timer[FORWARD] += forward(&comm, &atom, &param);
}
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
traceAddresses(&param, &atom, &neighbor, n + 1);
#endif
timer[FORCE] += computeForce(&eam, &param, &atom, &neighbor, &stats);
timer[REVERSE] += reverse(&comm, &atom, &param);
finalIntegrate(reneigh, &param, &atom);
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
#ifdef CUDA_TARGET
memcpyFromGPU(atom.x, atom.d_atom.x, atom.Nmax * sizeof(MD_FLOAT) * 3);
@ -281,47 +336,42 @@ int main(int argc, char** argv) {
}
if(param.vtk_file != NULL) {
write_atoms_to_vtk_file(param.vtk_file, &atom, n + 1);
}
printvtk(param.vtk_file, &comm, &atom ,&param, n+1);
}
}
MPI_Barrier(world);
timer[TOTAL] = getTimeStamp() - timer[TOTAL];
computeThermo(-1, &param, &atom);
double mint[NUMTIMER];
double maxt[NUMTIMER];
double sumt[NUMTIMER];
timer[REST] = timer[TOTAL]-timer[FORCE]-timer[NEIGH]-timer[BALANCE]-timer[FORWARD]-timer[REVERSE];
MPI_Reduce(timer,mint,NUMTIMER,MPI_DOUBLE,MPI_MIN,0,world);
MPI_Reduce(timer,maxt,NUMTIMER,MPI_DOUBLE,MPI_MAX,0,world);
MPI_Reduce(timer,sumt,NUMTIMER,MPI_DOUBLE,MPI_SUM,0,world);
int Nghost;
MPI_Reduce(&atom.Nghost,&Nghost,1,MPI_INT,MPI_SUM,0,world);
printf(HLINE);
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
printf(HLINE);
int nthreads = 0;
int chunkSize = 0;
omp_sched_t schedKind;
char schedType[10];
#pragma omp parallel
#pragma omp master
{
omp_get_schedule(&schedKind, &chunkSize);
switch (schedKind)
{
case omp_sched_static: strcpy(schedType, "static"); break;
case omp_sched_dynamic: strcpy(schedType, "dynamic"); break;
case omp_sched_guided: strcpy(schedType, "guided"); break;
case omp_sched_auto: strcpy(schedType, "auto"); break;
}
nthreads = omp_get_max_threads();
}
printf("Num threads: %d\n", nthreads);
printf("Schedule: (%s,%d)\n", schedType, chunkSize);
printf("Performance: %.2f million atom updates per second\n",
if(comm.myproc == 0){
int n = comm.numproc;
printf(HLINE);
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, Nghost, param.ntimes);
printf("TOTAL %.2fs\n\n",timer[TOTAL]);
printf("%4s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|\n","","FORCE ", "NEIGH ", "BALANCE", "FORWARD", "REVERSE","UPDATE","REST ","SETUP");
printf("----|-------|-------|-------|-------|-------|-------|-------|-------|\n");
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "AVG", sumt[FORCE]/n,sumt[NEIGH]/n,sumt[BALANCE]/n,sumt[FORWARD]/n,sumt[REVERSE]/n,sumt[UPDATE]/n,sumt[REST]/n,sumt[SETUP]/n);
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MIN", mint[FORCE],mint[NEIGH],mint[BALANCE],mint[FORWARD],mint[REVERSE],mint[UPDATE],mint[REST],mint[SETUP]);
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MAX", maxt[FORCE],maxt[NEIGH],maxt[BALANCE],maxt[FORWARD],maxt[REVERSE],maxt[UPDATE],maxt[REST],maxt[SETUP]);
printf(HLINE);
printf("Performance: %.2f million atom updates per second\n",
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
#ifdef COMPUTE_STATS
displayStatistics(&atom, &param, &stats, timer);
#endif
}
endComm(&comm);
LIKWID_MARKER_CLOSE;
return EXIT_SUCCESS;
}

View File

@ -11,27 +11,39 @@
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#include <util.h>
#include <mpi.h>
#define SMALL 1.0e-6
#define FACTOR 0.999
MD_FLOAT xprd, yprd, zprd;
MD_FLOAT bininvx, bininvy, bininvz;
int mbinxlo, mbinylo, mbinzlo;
int pad_x, pad_y, pad_z;
int nbinx, nbiny, nbinz;
int mbinx, mbiny, mbinz; // n bins in x, y, z
int mbinx, mbiny, mbinz; // m bins in x, y, z
int *bincount;
int *bins;
int mbins; //total number of bins
int atoms_per_bin; // max atoms per bin
int mbins; //total number of bins
int atoms_per_bin; // max atoms per bin
MD_FLOAT cutneigh;
MD_FLOAT cutneighsq; // neighbor cutoff squared
MD_FLOAT cutneighsq; // neighbor cutoff squared
int nmax;
int nstencil; // # of bins in stencil
int* stencil; // stencil list of bin offsets
int nstencil; // # of bins in stencil
int* stencil; // stencil list of bin offsets
MD_FLOAT binsizex, binsizey, binsizez;
int me; //rank
int method; // method
int half_stencil; //If half stencil exist
int shellMethod; //If shell method exist
static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT);
static MD_FLOAT bindist(int, int, int);
static int ghostZone(Atom*, int);
static int eightZone(Atom*, int);
static int halfZone(Atom*, int);
static void neighborGhost(Atom*, Neighbor*);
static inline int interaction(Atom* atom, int i, int j);
/* exported subroutines */
void initNeighbor(Neighbor *neighbor, Parameter *param) {
@ -51,7 +63,25 @@ void initNeighbor(Neighbor *neighbor, Parameter *param) {
neighbor->maxneighs = 100;
neighbor->numneigh = NULL;
neighbor->neighbors = NULL;
//========== MPI =============
shellMethod = 0;
half_stencil = 0;
method = param->method;
if(method == halfShell || method == eightShell){
param->half_neigh = 1;
shellMethod = 1;
}
if(method == halfStencil){
param->half_neigh = 0;
half_stencil = 1;
}
me = 0;
MPI_Comm_rank(MPI_COMM_WORLD,&me);
neighbor->half_neigh = param->half_neigh;
neighbor->Nshell = 0;
neighbor->numNeighShell = NULL;
neighbor->neighshell = NULL;
neighbor->listshell = NULL;
}
void setupNeighbor(Parameter* param) {
@ -64,7 +94,6 @@ void setupNeighbor(Parameter* param) {
yprd = param->yprd;
zprd = param->zprd;
}
// TODO: update lo and hi for standard case and use them here instead
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = xprd;
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
@ -93,54 +122,48 @@ void setupNeighbor(Parameter* param) {
bininvy = 1.0 / binsizey;
bininvz = 1.0 / binsizez;
}
coord = xlo - cutneigh - SMALL * xprd;
mbinxlo = (int) (coord * bininvx);
if (coord < 0.0) { mbinxlo = mbinxlo - 1; }
coord = xhi + cutneigh + SMALL * xprd;
mbinxhi = (int) (coord * bininvx);
coord = ylo - cutneigh - SMALL * yprd;
mbinylo = (int) (coord * bininvy);
if (coord < 0.0) { mbinylo = mbinylo - 1; }
coord = yhi + cutneigh + SMALL * yprd;
mbinyhi = (int) (coord * bininvy);
coord = zlo - cutneigh - SMALL * zprd;
mbinzlo = (int) (coord * bininvz);
if (coord < 0.0) { mbinzlo = mbinzlo - 1; }
coord = zhi + cutneigh + SMALL * zprd;
mbinzhi = (int) (coord * bininvz);
mbinxlo = mbinxlo - 1;
mbinxhi = mbinxhi + 1;
mbinx = mbinxhi - mbinxlo + 1;
mbinylo = mbinylo - 1;
mbinyhi = mbinyhi + 1;
mbiny = mbinyhi - mbinylo + 1;
mbinzlo = mbinzlo - 1;
mbinzhi = mbinzhi + 1;
mbinz = mbinzhi - mbinzlo + 1;
pad_x = (int)(cutneigh*bininvx);
while(pad_x * binsizex < FACTOR * cutneigh) pad_x++;
pad_y = (int)(cutneigh*bininvy);
while(pad_y * binsizey < FACTOR * cutneigh) pad_y++;
pad_z = (int)(cutneigh*bininvz);
while(pad_z * binsizez < FACTOR * cutneigh) pad_z++;
nextx = (int) (cutneigh * bininvx);
if(nextx * binsizex < FACTOR * cutneigh) nextx++;
if(nextx * binsizex < FACTOR * cutneigh){
nextx++;
pad_x++;
}
nexty = (int) (cutneigh * bininvy);
if(nexty * binsizey < FACTOR * cutneigh) nexty++;
if(nexty * binsizey < FACTOR * cutneigh){
nexty++;
pad_y++;
}
nextz = (int) (cutneigh * bininvz);
if(nextz * binsizez < FACTOR * cutneigh) nextz++;
if(nextz * binsizez < FACTOR * cutneigh){
nextz++;
pad_z++;
}
mbinx = nbinx+4*pad_x;
mbiny = nbiny+4*pad_y;
mbinz = nbinz+4*pad_z;
if (stencil) { free(stencil); }
stencil = (int*) malloc((2 * nextz + 1) * (2 * nexty + 1) * (2 * nextx + 1) * sizeof(int));
nstencil = 0;
int kstart = -nextz;
int jstart = -nexty;
int istart = -nextx;
int ibin = 0;
for(int k = kstart; k <= nextz; k++) {
for(int j = -nexty; j <= nexty; j++) {
for(int i = -nextx; i <= nextx; i++) {
if(bindist(i, j, k) < cutneighsq) {
stencil[nstencil++] = k * mbiny * mbinx + j * mbinx + i;
for(int j = jstart; j <= nexty; j++) {
for(int i = istart; i <= nextx; i++) {
if(bindist(i, j, k) < cutneighsq) {
int jbin = k * mbiny * mbinx + j * mbinx + i;
if(ibin>jbin && half_stencil) continue;
stencil[nstencil++] = jbin;
}
}
}
@ -154,8 +177,7 @@ void setupNeighbor(Parameter* param) {
}
void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
int nall = atom->Nlocal + atom->Nghost;
int nall = atom->Nlocal + atom->Nghost;
/* extend atom arrays if necessary */
if(nall > nmax) {
nmax = nall;
@ -164,16 +186,13 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*));
}
/* bin local & ghost atoms */
binatoms(atom);
int resize = 1;
/* loop over each atom, storing neighbors */
while(resize) {
int new_maxneighs = neighbor->maxneighs;
resize = 0;
for(int i = 0; i < atom->Nlocal; i++) {
int* neighptr = &(neighbor->neighbors[i * neighbor->maxneighs]);
int n = 0;
@ -184,21 +203,22 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
#ifdef EXPLICIT_TYPES
int type_i = atom->type[i];
#endif
for(int k = 0; k < nstencil; k++) {
int jbin = ibin + stencil[k];
int* loc_bin = &bins[jbin * atoms_per_bin];
for(int m = 0; m < bincount[jbin]; m++) {
for(int m = 0; m < bincount[jbin]; m++) {
int j = loc_bin[m];
if((j == i) || (neighbor->half_neigh && (j < i))) {
continue;
}
if((j==i) || (neighbor->half_neigh && (j<i)))
continue;
if(half_stencil && ibin==jbin && !interaction(atom,i,j))
continue;
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
int type_j = atom->type[j];
const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
@ -210,8 +230,8 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
}
}
}
neighbor->numneigh[i] = n;
if(n >= neighbor->maxneighs) {
resize = 1;
@ -220,14 +240,15 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
}
}
}
if(resize) {
printf("RESIZE %d\n", neighbor->maxneighs);
printf("RESIZE %d, PROC %d\n", neighbor->maxneighs,me);
neighbor->maxneighs = new_maxneighs * 1.2;
free(neighbor->neighbors);
neighbor->neighbors = (int*) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
}
}
if(method == eightShell) neighborGhost(atom, neighbor);
}
/* internal subroutines */
@ -257,44 +278,28 @@ MD_FLOAT bindist(int i, int j, int k) {
} else {
delz = (k + 1) * binsizez;
}
return (delx * delx + dely * dely + delz * delz);
}
int coord2bin(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin) {
int ix, iy, iz;
if(xin >= xprd) {
ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
} else if(xin >= 0.0) {
ix = (int)(xin * bininvx) - mbinxlo;
} else {
ix = (int)(xin * bininvx) - mbinxlo - 1;
}
if(yin >= yprd) {
iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
} else if(yin >= 0.0) {
iy = (int)(yin * bininvy) - mbinylo;
} else {
iy = (int)(yin * bininvy) - mbinylo - 1;
}
if(zin >= zprd) {
iz = (int)((zin - zprd) * bininvz) + nbinz - mbinzlo;
} else if(zin >= 0.0) {
iz = (int)(zin * bininvz) - mbinzlo;
} else {
iz = (int)(zin * bininvz) - mbinzlo - 1;
}
return (iz * mbiny * mbinx + iy * mbinx + ix + 1);
int ix, iy, iz;
MD_FLOAT eps = 1e-9;
MD_FLOAT xlo=0.0; MD_FLOAT ylo=0.0; MD_FLOAT zlo=0.0;
xlo = fabs(xlo - pad_x*binsizex)+eps;
ylo = fabs(ylo - pad_y*binsizey)+eps;
zlo = fabs(zlo - pad_z*binsizez)+eps;
ix = (int) ((xin + xlo)*bininvx);
iy = (int) ((yin + ylo)*bininvy);
iz = (int) ((zin + zlo)*bininvz);
return (iz * mbiny * mbinx + iy * mbinx + ix);
//return (iz * mbiny * mbinx + iy * mbinx + ix + 1);
}
void binatoms(Atom *atom) {
void binatoms(Atom *atom) {
int nall = atom->Nlocal + atom->Nghost;
int resize = 1;
while(resize > 0) {
resize = 0;
@ -304,7 +309,7 @@ void binatoms(Atom *atom) {
for(int i = 0; i < nall; i++) {
int ibin = coord2bin(atom_x(i), atom_y(i), atom_z(i));
if(shellMethod && !ghostZone(atom, i)) continue;
if(bincount[ibin] < atoms_per_bin) {
int ac = bincount[ibin]++;
bins[ibin * atoms_per_bin + ac] = i;
@ -325,11 +330,9 @@ void sortAtom(Atom* atom) {
binatoms(atom);
int Nmax = atom->Nmax;
int* binpos = bincount;
for(int i = 1; i < mbins; i++) {
binpos[i] += binpos[i - 1];
}
#ifdef AOS
MD_FLOAT* new_x = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
MD_FLOAT* new_vx = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
@ -367,7 +370,6 @@ void sortAtom(Atom* atom) {
#endif
}
}
free(atom->x);
free(atom->vx);
atom->x = new_x;
@ -383,3 +385,158 @@ void sortAtom(Atom* atom) {
atom->vz = new_vz;
#endif
}
/* internal subroutines
Added with MPI*/
static int ghostZone(Atom* atom, int i){
if(i<atom->Nlocal) return 1;
else if(method == halfShell) return halfZone(atom,i);
else if(method == eightShell) return eightZone(atom,i);
else return 0;
}
static int eightZone(Atom* atom, int i)
{
//Mapping: 0->0, 1->1, 2->2, 3->6, 4->3, 5->5, 6->4, 7->7
int zoneMapping[] = {0, 1, 2, 6, 3, 5, 4, 7};
MD_FLOAT *hi = atom->mybox.hi;
int zone = 0;
if(BigOrEqual(atom_x(i),hi[_x])) {
zone += 1;
}
if(BigOrEqual(atom_y(i),hi[_y])) {
zone += 2;
}
if(BigOrEqual(atom_z(i),hi[_z])) {
zone += 4;
}
return zoneMapping[zone];
}
static int halfZone(Atom* atom, int i)
{
MD_FLOAT *hi = atom->mybox.hi;
MD_FLOAT *lo = atom->mybox.lo;
if(atom_x(i)<lo[_x] && atom_y(i)<hi[_y] && atom_z(i)<hi[_z]){
return 0;
} else if(atom_y(i)<lo[_y] && atom_z(i)<hi[_z]){
return 0;
} else if(atom_z(i)<lo[_z]){
return 0;
} else {
return 1;
}
}
static void neighborGhost(Atom *atom, Neighbor *neighbor) {
int Nshell=0;
int Nlocal = atom->Nlocal;
int Nghost = atom->Nghost;
if(neighbor->listshell) free(neighbor->listshell);
neighbor->listshell = (int*) malloc(Nghost * sizeof(int));
int* listzone = (int*) malloc(8 * Nghost * sizeof(int));
int countAtoms[8] = {0,0,0,0,0,0,0,0};
//Selecting ghost atoms for interaction
for(int i = Nlocal; i < Nlocal+Nghost; i++) {
int izone = ghostZone(atom,i);
int *list = &listzone[Nghost*izone];
int n = countAtoms[izone];
list[n] = i;
countAtoms[izone]++;
}
for(int zone = 1; zone<=3; zone++){
int *list = &listzone[Nghost*zone];
for(int n=0; n<countAtoms[zone]; n++)
neighbor->listshell[Nshell++] = list[n];
}
neighbor->Nshell = Nshell;
if(neighbor->numNeighShell) free(neighbor->numNeighShell);
if(neighbor->neighshell) free(neighbor->neighshell);
neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
neighbor->numNeighShell = (int*) malloc(Nshell * sizeof(int));
int resize = 1;
while(resize)
{
resize = 0;
for(int i = 0; i < Nshell; i++) {
int *neighshell = &(neighbor->neighshell[i*neighbor->maxneighs]);
int n = 0;
int iatom = neighbor->listshell[i];
int izone = ghostZone(atom, iatom);
MD_FLOAT xtmp = atom_x(iatom);
MD_FLOAT ytmp = atom_y(iatom);
MD_FLOAT ztmp = atom_z(iatom);
int ibin = coord2bin(xtmp, ytmp, ztmp);
#ifdef EXPLICIT_TYPES
int type_i = atom->type[iatom];
#endif
for(int k = 0; k < nstencil; k++) {
int jbin = ibin + stencil[k];
int* loc_bin = &bins[jbin * atoms_per_bin];
for(int m = 0; m < bincount[jbin]; m++) {
int jatom = loc_bin[m];
int jzone = ghostZone(atom,jatom);
if(jzone <=izone) continue;
if(izone == 1 && (jzone==5||jzone==6||jzone==7)) continue;
if(izone == 2 && (jzone==4||jzone==6||jzone==7)) continue;
if(izone == 3 && (jzone==4||jzone==5||jzone==7)) continue;
MD_FLOAT delx = xtmp - atom_x(jatom);
MD_FLOAT dely = ytmp - atom_y(jatom);
MD_FLOAT delz = ztmp - atom_z(jatom);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
int type_j = atom->type[jatom];
const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
#else
const MD_FLOAT cutoff = cutneighsq;
#endif
if(rsq <= cutoff) {
neighshell[n++] = jatom;
}
}
}
neighbor->numNeighShell[i] = n;
if(n >= neighbor->maxneighs){
resize = 1;
neighbor->maxneighs = n * 1.2;
break;
}
}
if(resize) {
free(neighbor->neighshell);
neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
}
}
free(listzone);
}
static inline int interaction(Atom* atom, int i, int j) {
if(i<j && j<atom->Nlocal) {
return 1;
} else if( atom_z(j)>atom_z(i) && j>=atom->Nlocal) {
return 1;
} else if(Equal(atom_z(j),atom_z(i)) && atom_y(j)<atom_y(i) && j>=atom->Nlocal){
return 1;
} else if(Equal(atom_z(j),atom_z(i)) && Equal(atom_y(j),atom_y(i)) && atom_x(j)<atom_x(i) && j>=atom->Nlocal){
return 1;
} else {
return 0;
}
}

View File

@ -6,8 +6,12 @@
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <vtk.h>
#include <mpi.h>
#include <atom.h>
static MPI_File _fh;
static inline void flushBuffer(char*);
int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
char timestep_filename[128];
@ -18,12 +22,12 @@ int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
fprintf(stderr, "Could not open VTK file for writing!\n");
return -1;
}
fprintf(fp, "# vtk DataFile Version 2.0\n");
fprintf(fp, "Particle data\n");
fprintf(fp, "ASCII\n");
fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
fprintf(fp, "POINTS %d double\n", atom->Nlocal);
for(int i = 0; i < atom->Nlocal; ++i) {
fprintf(fp, "%.4f %.4f %.4f\n", atom_x(i), atom_y(i), atom_z(i));
}
@ -48,3 +52,168 @@ int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
fclose(fp);
return 0;
}
int vtkOpen(const char* filename, Comm* comm, Atom* atom ,int timestep)
{
char msg[256];
char timestep_filename[128];
snprintf(timestep_filename, sizeof timestep_filename, "%s_%d.vtk", filename, timestep);
MPI_File_open(MPI_COMM_WORLD, timestep_filename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &_fh);
if(_fh == MPI_FILE_NULL) {
if(comm->myproc == 0) fprintf(stderr, "Could not open VTK file for writing!\n");
return -1;
}
if (comm->myproc==0){
sprintf(msg, "# vtk DataFile Version 2.0\n");
sprintf(msg, "%sParticle data\n",msg);
sprintf(msg, "%sASCII\n",msg);
sprintf(msg, "%sDATASET UNSTRUCTURED_GRID\n",msg);
sprintf(msg, "%sPOINTS %d double\n",msg, atom->Natoms);
flushBuffer(msg);
}
}
int vtkVector(Comm* comm, Atom* atom, Parameter* param)
{
if (_fh == MPI_FILE_NULL) {
if(comm->myproc==0) printf("vtk not initialize! Call vtkOpen first!\n");
return -1;
}
int sizeline= 25; //#initial guess of characters in "%.4f %.4f %.4f\n"
int extrabuff = 100;
int sizebuff = sizeline*atom->Nlocal+extrabuff;
int mysize = 0;
char* msg = (char*) malloc(sizebuff);
sprintf(msg, "");
for(int i = 0; i < atom->Nlocal; i++){
if(mysize+extrabuff >= sizebuff){
sizebuff*= 1.5;
msg = (char*) realloc(msg, sizebuff);
}
//TODO: do not forget to add param->xlo, param->ylo, param->zlo
sprintf(msg, "%s%.4f %.4f %.4f\n",msg, atom_x(i), atom_y(i), atom_z(i));
mysize = strlen(msg);
}
int gatherSize[comm->numproc];
MPI_Allgather(&mysize, 1, MPI_INT, gatherSize, 1, MPI_INT, MPI_COMM_WORLD);
int offset=0;
int globalSize = 0;
for(int i = 0; i < comm->myproc; i++)
offset+= gatherSize[i];
for(int i = 0; i < comm->numproc; i++)
globalSize+= gatherSize[i];
MPI_Offset displ;
MPI_Datatype FileType;
int GlobalSize[] = {globalSize};
int LocalSize[] = {mysize};
int Start[] = {offset};
if(LocalSize[0]>0){
MPI_Type_create_subarray(1, GlobalSize, LocalSize, Start, MPI_ORDER_C, MPI_CHAR, &FileType);
} else {
MPI_Type_vector(0,0,0,MPI_CHAR,&FileType);
}
MPI_Type_commit(&FileType);
MPI_File_get_size(_fh, &displ);
MPI_File_set_view(_fh, displ, MPI_CHAR, FileType, "native", MPI_INFO_NULL);
MPI_File_write_all (_fh, msg, mysize , MPI_CHAR ,MPI_STATUS_IGNORE);
MPI_Barrier(MPI_COMM_WORLD);
MPI_File_set_view(_fh,0,MPI_CHAR, MPI_CHAR, "native", MPI_INFO_NULL);
if (comm->myproc==0){
sprintf(msg, "\n\n");
sprintf(msg, "%sCELLS %d %d\n", msg, atom->Natoms, atom->Natoms * 2);
for(int i = 0; i < atom->Natoms; i++)
sprintf(msg, "%s1 %d\n", msg, i);
flushBuffer(msg);
sprintf(msg, "\n\n");
sprintf(msg, "%sCELL_TYPES %d\n",msg, atom->Natoms);
for(int i = 0; i < atom->Natoms; i++)
sprintf(msg, "%s1\n",msg);
flushBuffer(msg);
sprintf(msg, "\n\n");
sprintf(msg, "%sPOINT_DATA %d\n",msg,atom->Natoms);
sprintf(msg, "%sSCALARS mass double\n",msg);
sprintf(msg, "%sLOOKUP_TABLE default\n",msg);
for(int i = 0; i < atom->Natoms; i++)
sprintf(msg, "%s1.0\n",msg);
sprintf(msg, "%s\n\n",msg);
flushBuffer(msg);
}
}
void vtkClose()
{
MPI_File_close(&_fh);
_fh=MPI_FILE_NULL;
}
int printGhost(const char* filename, Atom* atom, int timestep, int me) {
char timestep_filename[128];
snprintf(timestep_filename, sizeof timestep_filename, "%s_%d_ghost%i.vtk", filename, timestep,me);
FILE* fp = fopen(timestep_filename, "wb");
if(fp == NULL) {
fprintf(stderr, "Could not open VTK file for writing!\n");
return -1;
}
fprintf(fp, "# vtk DataFile Version 2.0\n");
fprintf(fp, "Particle data\n");
fprintf(fp, "ASCII\n");
fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
fprintf(fp, "POINTS %d double\n", atom->Nghost);
for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; ++i) {
fprintf(fp, "%.4f %.4f %.4f\n", atom_x(i), atom_y(i), atom_z(i));
}
fprintf(fp, "\n\n");
fprintf(fp, "CELLS %d %d\n", atom->Nlocal, atom->Nlocal * 2);
for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; ++i) {
fprintf(fp, "1 %d\n", i);
}
fprintf(fp, "\n\n");
fprintf(fp, "CELL_TYPES %d\n", atom->Nlocal);
for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; ++i) {
fprintf(fp, "1\n");
}
fprintf(fp, "\n\n");
fprintf(fp, "POINT_DATA %d\n", atom->Nghost);
fprintf(fp, "SCALARS mass double\n");
fprintf(fp, "LOOKUP_TABLE default\n");
for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; i++) {
fprintf(fp, "1.0\n");
}
fprintf(fp, "\n\n");
fclose(fp);
return 0;
}
void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep)
{
if(comm->numproc == 1)
{
write_atoms_to_vtk_file(filename, atom, timestep);
return;
}
vtkOpen(filename, comm, atom, timestep);
vtkVector(comm, atom, param);
vtkClose();
//printGhost(filename, atom, timestep, comm->myproc);
}
static inline void flushBuffer(char* msg){
MPI_Offset displ;
MPI_File_get_size(_fh, &displ);
MPI_File_write_at(_fh, displ, msg, strlen(msg), MPI_CHAR, MPI_STATUS_IGNORE);
}