Final MPI version
This commit is contained in:
parent
a6a269703d
commit
a13a0f3bae
3
Makefile
3
Makefile
@ -17,6 +17,9 @@ include $(MAKE_DIR)/include_ISA.mk
|
||||
include $(MAKE_DIR)/include_GROMACS.mk
|
||||
INCLUDES += -I./$(SRC_DIR)/includes -I./$(COMMON_DIR)/includes
|
||||
|
||||
ifeq ($(strip $(OPT_SCHEME)),gromacs)
|
||||
DEFINES += -DGROMACS
|
||||
endif
|
||||
ifeq ($(strip $(DATA_LAYOUT)),AOS)
|
||||
DEFINES += -DAOS
|
||||
endif
|
||||
|
97
common/box.c
Normal file
97
common/box.c
Normal file
@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <parameter.h>
|
||||
#include <util.h>
|
||||
#include <box.h>
|
||||
#include <mpi.h>
|
||||
|
||||
int overlapBox(int dim, int dir, const Box* mybox, const Box* other, Box* cut, MD_FLOAT xprd, MD_FLOAT cutneigh)
|
||||
{
|
||||
int pbc = -100;
|
||||
MD_FLOAT min[3], max[3];
|
||||
int same = (mybox->id == other->id) ? 1 : 0;
|
||||
|
||||
//projections
|
||||
min[_x] = MAX(mybox->lo[_x], other->lo[_x]); max[_x] = MIN(mybox->hi[_x], other->hi[_x]);
|
||||
min[_y] = MAX(mybox->lo[_y], other->lo[_y]); max[_y] = MIN(mybox->hi[_y], other->hi[_y]);
|
||||
min[_z] = MAX(mybox->lo[_z], other->lo[_z]); max[_z] = MIN(mybox->hi[_z], other->hi[_z]);
|
||||
|
||||
//Intersection no periodic case
|
||||
if(!same){
|
||||
if (dir == 0) max[dim] = MIN(mybox->hi[dim], other->hi[dim]+ cutneigh);
|
||||
if (dir == 1) min[dim] = MAX(mybox->lo[dim], other->lo[dim]- cutneigh);
|
||||
if ((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z])) pbc = 0;
|
||||
}
|
||||
|
||||
//Intersection periodic case
|
||||
if(pbc < 0)
|
||||
{
|
||||
if(dir == 0){
|
||||
min[dim] = MAX(mybox->lo[dim] , other->lo[dim]- xprd);
|
||||
max[dim] = MIN(mybox->hi[dim] , other->hi[dim]- xprd + cutneigh);
|
||||
|
||||
} else {
|
||||
min[dim] = MAX(mybox->lo[dim], other->lo[dim]+ xprd - cutneigh);
|
||||
max[dim] = MIN(mybox->hi[dim], other->hi[dim]+ xprd);
|
||||
|
||||
}
|
||||
if((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z]))
|
||||
pbc = (dir == 0) ? 1:-1;
|
||||
}
|
||||
|
||||
//storing the cuts
|
||||
cut->lo[_x] = min[_x]; cut->hi[_x] = max[_x];
|
||||
cut->lo[_y] = min[_y]; cut->hi[_y] = max[_y];
|
||||
cut->lo[_z] = min[_z]; cut->hi[_z] = max[_z];
|
||||
|
||||
return pbc;
|
||||
}
|
||||
|
||||
int overlapFullBox(Parameter* param, MD_FLOAT *cutneigh ,const Box* mybox, const Box* other)
|
||||
{
|
||||
MD_FLOAT min[3], max[3];
|
||||
MD_FLOAT xprd = param->xprd;
|
||||
MD_FLOAT yprd = param->yprd;
|
||||
MD_FLOAT zprd = param->zprd;
|
||||
|
||||
for(int k = -1; k < 2; k++)
|
||||
{
|
||||
for(int j = -1; j < 2; j++)
|
||||
{
|
||||
for(int i= -1; i < 2; i++)
|
||||
{
|
||||
min[_x] = MAX(mybox->lo[_x], other->lo[_x]-cutneigh[_x] + i*xprd);
|
||||
min[_y] = MAX(mybox->lo[_y], other->lo[_y]-cutneigh[_y] + j*yprd);
|
||||
min[_z] = MAX(mybox->lo[_z], other->lo[_z]-cutneigh[_z] + k*zprd);
|
||||
max[_x] = MIN(mybox->hi[_x], other->hi[_x]+cutneigh[_x] + i*xprd);
|
||||
max[_y] = MIN(mybox->hi[_y], other->hi[_y]+cutneigh[_y] + j*yprd);
|
||||
max[_z] = MIN(mybox->hi[_z], other->hi[_z]+cutneigh[_z] + k*zprd);
|
||||
if ((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z]))
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void expandBox(int iswap, const Box* me, const Box* other, Box* cut, MD_FLOAT cutneigh)
|
||||
{
|
||||
if(iswap==2 || iswap==3){
|
||||
if(me->lo[_x] <= other->lo[_x]) cut->lo[_x] -= cutneigh;
|
||||
if(me->hi[_x] >= other->hi[_x]) cut->hi[_x] += cutneigh;
|
||||
}
|
||||
|
||||
if(iswap==4 || iswap==5){
|
||||
if(me->lo[_x] <= other->lo[_x]) cut->lo[_x] -= cutneigh;
|
||||
if(me->hi[_x] >= other->hi[_x]) cut->hi[_x] += cutneigh;
|
||||
if(me->lo[_y] <= other->lo[_y]) cut->lo[_y] -= cutneigh;
|
||||
if(me->hi[_y] >= other->hi[_y]) cut->hi[_y] += cutneigh;
|
||||
}
|
||||
}
|
||||
|
556
common/comm.c
Normal file
556
common/comm.c
Normal file
@ -0,0 +1,556 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <comm.h>
|
||||
#include <allocate.h>
|
||||
#include <mpi.h>
|
||||
#include <util.h>
|
||||
|
||||
#define NEIGHMIN 6
|
||||
#define BUFFACTOR 2
|
||||
#define BUFMIN 1000
|
||||
#define BUFEXTRA 100
|
||||
#define world MPI_COMM_WORLD
|
||||
|
||||
MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
|
||||
static inline void allocDynamicBuffers(Comm*);
|
||||
static inline void freeDynamicBuffers(Comm*);
|
||||
static inline void freeBuffers(Comm*);
|
||||
|
||||
void defineReverseList(Comm* comm){
|
||||
int dim = 0;
|
||||
int index = 0;
|
||||
int me = comm->myproc;
|
||||
|
||||
//Set the inverse list
|
||||
for(int iswap = 0; iswap<6; iswap++){
|
||||
int dim = comm->swapdim[iswap];
|
||||
int dir = comm->swapdir[iswap];
|
||||
int invswap = comm->swap[dim][(dir+1)%2];
|
||||
|
||||
for(int ineigh = comm->sendfrom[invswap]; ineigh< comm->sendtill[invswap]; ineigh++)
|
||||
comm->nrecv[index++] = comm->nsend[ineigh];
|
||||
|
||||
comm->recvfrom[iswap] = (iswap == 0) ? 0 : comm->recvtill[iswap-1];
|
||||
comm->recvtill[iswap] = index;
|
||||
}
|
||||
|
||||
//set if myproc is unique in the swap
|
||||
for(int iswap = 0; iswap<6; iswap++){
|
||||
int sizeswap = comm->sendtill[iswap]-comm->sendfrom[iswap];
|
||||
int index = comm->sendfrom[iswap];
|
||||
int myneigh = comm->nsend[index];
|
||||
comm->othersend[iswap] = (sizeswap != 1 || comm->myproc != myneigh) ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
void addNeighToExchangeList(Comm* comm, int newneigh){
|
||||
|
||||
int numneigh = comm->numneighexch;
|
||||
|
||||
if(comm->numneighexch>=comm->maxneighexch){
|
||||
size_t oldByteSize = comm->maxneighexch*sizeof(int);
|
||||
comm->maxneighexch *=2;
|
||||
comm->nexch = (int*) reallocate(comm->nexch, ALIGNMENT, comm->maxneighexch * sizeof(int), oldByteSize);
|
||||
}
|
||||
|
||||
// Add the new element to the list
|
||||
comm->nexch[numneigh] = newneigh;
|
||||
comm->numneighexch++;
|
||||
}
|
||||
|
||||
//Exported functions
|
||||
void neighComm(Comm *comm, Parameter* param, Grid *grid)
|
||||
{
|
||||
int me = comm->myproc;
|
||||
int numproc = comm ->numproc;
|
||||
int PAD = 6; //number of elements for processor in the map
|
||||
int ineigh = 0;
|
||||
int sneigh = 0;
|
||||
MD_FLOAT *map = grid->map;
|
||||
MD_FLOAT cutneigh = param->cutneigh;
|
||||
MD_FLOAT prd[3] = {param->xprd, param->yprd, param->zprd};
|
||||
Box mybox, other, cut;
|
||||
|
||||
//needed for rebalancing
|
||||
freeDynamicBuffers(comm);
|
||||
|
||||
//Local box
|
||||
mybox.id = me;
|
||||
mybox.lo[_x] = map[me*PAD+0]; mybox.hi[_x] = map[me*PAD+3];
|
||||
mybox.lo[_y] = map[me*PAD+1]; mybox.hi[_y] = map[me*PAD+4];
|
||||
mybox.lo[_z] = map[me*PAD+2]; mybox.hi[_z] = map[me*PAD+5];
|
||||
|
||||
//Check for all possible neighbours only for exchange atoms
|
||||
comm->numneighexch = 0;
|
||||
for(int proc = 0; proc <numproc; proc++){
|
||||
other.id = proc;
|
||||
other.lo[_x] = map[proc*PAD+0]; other.hi[_x] = map[proc*PAD+3];
|
||||
other.lo[_y] = map[proc*PAD+1]; other.hi[_y] = map[proc*PAD+4];
|
||||
other.lo[_z] = map[proc*PAD+2]; other.hi[_z] = map[proc*PAD+5];
|
||||
|
||||
if(proc != me){
|
||||
int intersection = overlapFullBox(param,grid->cutneigh,&mybox,&other);
|
||||
if(intersection) addNeighToExchangeList(comm,proc);
|
||||
}
|
||||
}
|
||||
|
||||
//MAP is stored as follows: xlo,ylo,zlo,xhi,yhi,zhi
|
||||
for(int iswap = 0; iswap <6; iswap++)
|
||||
{
|
||||
int dir = comm->swapdir[iswap];
|
||||
int dim = comm->swapdim[iswap];
|
||||
|
||||
for(int proc = 0; proc < numproc; proc++)
|
||||
{
|
||||
//Check for neighbours along dimmensions, for forwardComm, backwardComm and ghostComm
|
||||
other.id = proc;
|
||||
other.lo[_x] = map[proc*PAD+0]; other.hi[_x] = map[proc*PAD+3];
|
||||
other.lo[_y] = map[proc*PAD+1]; other.hi[_y] = map[proc*PAD+4];
|
||||
other.lo[_z] = map[proc*PAD+2]; other.hi[_z] = map[proc*PAD+5];
|
||||
|
||||
//return if two boxes intersect: -100 not intersection, 0, 1 and -1 intersection for each different pbc.
|
||||
int pbc = overlapBox(dim,dir,&mybox,&other,&cut,prd[dim],cutneigh);
|
||||
if(pbc == -100) continue;
|
||||
|
||||
expandBox(iswap, &mybox, &other, &cut, cutneigh);
|
||||
|
||||
if(ineigh >= comm->maxneigh) {
|
||||
size_t oldByteSize = comm->maxneigh*sizeof(int);
|
||||
size_t oldBoxSize = comm->maxneigh*sizeof(Box);
|
||||
comm->maxneigh = 2*ineigh;
|
||||
comm->nsend = (int*) reallocate(comm->nsend, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
|
||||
comm->nrecv = (int*) reallocate(comm->nrecv, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
|
||||
comm->pbc_x = (int*) reallocate(comm->pbc_x, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
|
||||
comm->pbc_y = (int*) reallocate(comm->pbc_y, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
|
||||
comm->pbc_z = (int*) reallocate(comm->pbc_z, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
|
||||
comm->boxes = (Box*) reallocate(comm->boxes, ALIGNMENT, comm->maxneigh * sizeof(Box), oldBoxSize);
|
||||
}
|
||||
|
||||
comm->boxes[ineigh] = cut;
|
||||
comm->nsend[ineigh] = proc;
|
||||
comm->pbc_x[ineigh] = (dim == _x) ? pbc : 0;
|
||||
comm->pbc_y[ineigh] = (dim == _y) ? pbc : 0;
|
||||
comm->pbc_z[ineigh] = (dim == _z) ? pbc : 0;
|
||||
ineigh++;
|
||||
}
|
||||
|
||||
comm->sendfrom[iswap] = (iswap == 0) ? 0:comm->sendtill[iswap-1];
|
||||
comm->sendtill[iswap] = ineigh;
|
||||
comm->numneigh = ineigh;
|
||||
}
|
||||
|
||||
allocDynamicBuffers(comm);
|
||||
defineReverseList(comm);
|
||||
}
|
||||
|
||||
void initComm(int* argc, char*** argv, Comm* comm)
|
||||
{
|
||||
//MPI Initialize
|
||||
MPI_Init(argc, argv);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &(comm->numproc));
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &(comm->myproc));
|
||||
comm->numneigh = 0;
|
||||
comm->numneighexch = 0;
|
||||
comm->nrecv=NULL;
|
||||
comm->nsend=NULL;
|
||||
comm->nexch=NULL;
|
||||
comm->pbc_x=NULL;
|
||||
comm->pbc_y=NULL;
|
||||
comm->pbc_z=NULL;
|
||||
comm->boxes=NULL;
|
||||
comm->atom_send=NULL;
|
||||
comm->atom_recv=NULL;
|
||||
comm->off_atom_send=NULL;
|
||||
comm->off_atom_recv=NULL;
|
||||
comm->maxsendlist=NULL;
|
||||
comm->sendlist=NULL;
|
||||
comm->buf_send=NULL;
|
||||
comm->buf_recv=NULL;
|
||||
}
|
||||
|
||||
void endComm(Comm* comm)
|
||||
{
|
||||
comm->maxneigh = 0;
|
||||
comm->maxneighexch =0;
|
||||
comm->maxsend = 0;
|
||||
comm->maxrecv = 0;
|
||||
freeBuffers(comm);
|
||||
MPI_Finalize();
|
||||
}
|
||||
|
||||
void setupComm(Comm* comm, Parameter* param, Grid* grid){
|
||||
|
||||
comm->swap[_x][0] = 0; comm->swap[_x][1] =1;
|
||||
comm->swap[_y][0] = 2; comm->swap[_y][1] =3;
|
||||
comm->swap[_z][0] = 4; comm->swap[_z][1] =5;
|
||||
|
||||
comm->swapdim[0] = comm->swapdim[1] = _x;
|
||||
comm->swapdim[2] = comm->swapdim[3] = _y;
|
||||
comm->swapdim[4] = comm->swapdim[5] = _z;
|
||||
|
||||
comm->swapdir[0] = comm->swapdir[2] = comm->swapdir[4] = 0;
|
||||
comm->swapdir[1] = comm->swapdir[3] = comm->swapdir[5] = 1;
|
||||
|
||||
for(int i = 0; i<6; i++){
|
||||
comm->sendfrom[i] = 0;
|
||||
comm->sendtill[i] = 0;
|
||||
comm->recvfrom[i] = 0;
|
||||
comm->recvtill[i] = 0;
|
||||
}
|
||||
|
||||
comm->forwardSize = FORWARD_SIZE; //send coordiantes x,y,z
|
||||
comm->reverseSize = REVERSE_SIZE; //return forces fx, fy, fz
|
||||
comm->ghostSize = GHOST_SIZE; //send x,y,z,type;
|
||||
comm->exchangeSize = EXCHANGE_SIZE; //send x,y,z,vx,vy,vz,type
|
||||
|
||||
//Allocate memory for recv buffer and recv buffer
|
||||
comm->maxsend = BUFMIN;
|
||||
comm->maxrecv = BUFMIN;
|
||||
comm->buf_send = (MD_FLOAT*) allocate(ALIGNMENT,(comm->maxsend + BUFEXTRA) * sizeof(MD_FLOAT));
|
||||
comm->buf_recv = (MD_FLOAT*) allocate(ALIGNMENT, comm->maxrecv * sizeof(MD_FLOAT));
|
||||
|
||||
comm->maxneighexch = NEIGHMIN;
|
||||
comm->nexch = (int*) allocate(ALIGNMENT, comm->maxneighexch * sizeof(int));
|
||||
|
||||
comm->maxneigh = NEIGHMIN;
|
||||
comm->nsend = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
|
||||
comm->nrecv = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
|
||||
comm->pbc_x = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
|
||||
comm->pbc_y = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
|
||||
comm->pbc_z = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
|
||||
comm->boxes = (Box*) allocate(ALIGNMENT, comm->maxneigh * sizeof(Box));
|
||||
|
||||
neighComm(comm, param, grid);
|
||||
}
|
||||
|
||||
void forwardComm(Comm* comm, Atom* atom, int iswap)
|
||||
{
|
||||
int nrqst=0, offset=0, nsend=0, nrecv=0;
|
||||
int pbc[3];
|
||||
int size = comm->forwardSize;
|
||||
int maxrqst = comm->numneigh;
|
||||
MD_FLOAT* buf;
|
||||
MPI_Request requests[maxrqst];
|
||||
|
||||
for(int ineigh = comm->sendfrom[iswap]; ineigh < comm->sendtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_send[ineigh];
|
||||
pbc[_x]=comm->pbc_x[ineigh]; pbc[_y]=comm->pbc_y[ineigh]; pbc[_z]=comm->pbc_z[ineigh];
|
||||
packForward(atom, comm->atom_send[ineigh], comm->sendlist[ineigh], &comm->buf_send[offset*size],pbc);
|
||||
}
|
||||
|
||||
//Receives elements
|
||||
if(comm->othersend[iswap])
|
||||
for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_recv[ineigh]*size;
|
||||
nrecv = comm->atom_recv[ineigh]*size;
|
||||
MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nrecv[ineigh],0,world,&requests[nrqst++]);
|
||||
}
|
||||
|
||||
//Send elements
|
||||
if(comm->othersend[iswap])
|
||||
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_send[ineigh]*size;
|
||||
nsend = comm->atom_send[ineigh]*size;
|
||||
MPI_Send(&comm->buf_send[offset],nsend,type,comm->nsend[ineigh],0,world);
|
||||
}
|
||||
|
||||
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
|
||||
|
||||
if(comm->othersend[iswap]) buf = comm->buf_recv;
|
||||
else buf = comm->buf_send;
|
||||
|
||||
/* unpack buffer */
|
||||
for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_recv[ineigh];
|
||||
unpackForward(atom, comm->atom_recv[ineigh], comm->firstrecv[iswap] + offset, &buf[offset*size]);
|
||||
}
|
||||
}
|
||||
|
||||
void reverseComm(Comm* comm, Atom* atom, int iswap)
|
||||
{
|
||||
int nrqst=0, offset=0, nsend=0, nrecv=0 ;
|
||||
int size = comm->reverseSize;
|
||||
int maxrqst = comm->numneigh;
|
||||
MD_FLOAT* buf;
|
||||
MPI_Request requests[maxrqst];
|
||||
|
||||
for(int ineigh = comm->recvfrom[iswap]; ineigh < comm->recvtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_recv[ineigh];
|
||||
packReverse(atom, comm->atom_recv[ineigh], comm->firstrecv[iswap] + offset, &comm->buf_send[offset*size]);
|
||||
}
|
||||
//Receives elements
|
||||
if(comm->othersend[iswap])
|
||||
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_send[ineigh]*size;
|
||||
nrecv = comm->atom_send[ineigh]*size;
|
||||
MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nsend[ineigh],0,world,&requests[nrqst++]);
|
||||
}
|
||||
//Send elements
|
||||
if(comm->othersend[iswap])
|
||||
for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_recv[ineigh]*size;
|
||||
nsend = comm->atom_recv[ineigh]*size;
|
||||
MPI_Send(&comm->buf_send[offset],nsend,type,comm->nrecv[ineigh],0,world);
|
||||
}
|
||||
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
|
||||
if(comm->othersend[iswap]) buf = comm->buf_recv;
|
||||
else buf = comm->buf_send;
|
||||
|
||||
/* unpack buffer */
|
||||
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_send[ineigh];
|
||||
unpackReverse(atom, comm->atom_send[ineigh], comm->sendlist[ineigh], &buf[offset*size]);
|
||||
}
|
||||
}
|
||||
|
||||
void ghostComm(Comm* comm, Atom* atom,int iswap){
|
||||
|
||||
MD_FLOAT xlo=0, xhi=0, ylo=0, yhi=0, zlo=0, zhi=0;
|
||||
MD_FLOAT* buf;
|
||||
int nrqst=0, nsend=0, nrecv=0, offset=0, ineigh=0, pbc[3];
|
||||
int all_recv=0, all_send=0, currentSend=0;
|
||||
int size = comm->ghostSize;
|
||||
int maxrqrst = comm->numneigh;
|
||||
MPI_Request requests[maxrqrst];
|
||||
for(int i = 0; i<maxrqrst; i++)
|
||||
requests[maxrqrst]=MPI_REQUEST_NULL;
|
||||
if(iswap%2==0) comm->iterAtom = LOCAL+GHOST;
|
||||
int iter = 0;
|
||||
for(int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++)
|
||||
{
|
||||
Box* tile = &comm->boxes[ineigh];
|
||||
|
||||
xlo = tile->lo[_x]; ylo = tile->lo[_y]; zlo = tile->lo[_z];
|
||||
xhi = tile->hi[_x]; yhi = tile->hi[_y]; zhi = tile->hi[_z];
|
||||
pbc[_x]=comm->pbc_x[ineigh]; pbc[_y]=comm->pbc_y[ineigh]; pbc[_z]=comm->pbc_z[ineigh];
|
||||
nsend = 0;
|
||||
|
||||
for(int i = 0; i < comm->iterAtom ; i++)
|
||||
{
|
||||
if(IsinRegionToSend(i)){
|
||||
if(nsend >= comm->maxsendlist[ineigh]) growList(comm,ineigh,nsend);
|
||||
if(currentSend + size >= comm->maxsend) growSend(comm,currentSend);
|
||||
comm->sendlist[ineigh][nsend++] = i;
|
||||
currentSend += packGhost(atom, i, &comm->buf_send[currentSend], pbc);
|
||||
}
|
||||
}
|
||||
comm->atom_send[ineigh] = nsend; //#atoms send per neigh
|
||||
comm->off_atom_send[ineigh] = all_send; //offset atom respect to neighbours in a swap
|
||||
all_send += nsend; //all atoms send
|
||||
}
|
||||
//Receives how many elements to be received.
|
||||
if(comm->othersend[iswap])
|
||||
for(nrqst=0, ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++)
|
||||
MPI_Irecv(&comm->atom_recv[ineigh],1,MPI_INT,comm->nrecv[ineigh],0,world,&requests[nrqst++]);
|
||||
|
||||
if(!comm->othersend[iswap]) comm->atom_recv[comm->recvfrom[iswap]] = nsend;
|
||||
|
||||
//Communicate how many elements to be sent.
|
||||
if(comm->othersend[iswap])
|
||||
for(int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++)
|
||||
MPI_Send(&comm->atom_send[ineigh],1,MPI_INT,comm->nsend[ineigh],0,world);
|
||||
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
|
||||
|
||||
//Define offset to store in the recv_buff
|
||||
for(int ineigh = comm->recvfrom[iswap]; ineigh<comm->recvtill[iswap]; ineigh++){
|
||||
comm->off_atom_recv[ineigh] = all_recv;
|
||||
all_recv += comm->atom_recv[ineigh];
|
||||
}
|
||||
|
||||
if(all_recv*size>=comm->maxrecv) growRecv(comm,all_recv*size);
|
||||
|
||||
//Receives elements
|
||||
if(comm->othersend[iswap])
|
||||
for (nrqst=0, ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_recv[ineigh]*size;
|
||||
nrecv = comm->atom_recv[ineigh]*size;
|
||||
MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nrecv[ineigh],0,world,&requests[nrqst++]);
|
||||
}
|
||||
//Send elements
|
||||
if(comm->othersend[iswap])
|
||||
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_send[ineigh]*size;
|
||||
nsend = comm->atom_send[ineigh]*size;
|
||||
MPI_Send(&comm->buf_send[offset],nsend,type,comm->nsend[ineigh],0,world);
|
||||
}
|
||||
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
|
||||
|
||||
if(comm->othersend[iswap]) buf = comm->buf_recv;
|
||||
else buf = comm->buf_send;
|
||||
//unpack elements
|
||||
comm->firstrecv[iswap] = LOCAL+GHOST;
|
||||
for(int i = 0; i < all_recv; i++)
|
||||
unpackGhost(atom, LOCAL+GHOST, &buf[i*size]);
|
||||
|
||||
//Increases the buffer if needed
|
||||
int max_size = MAX(comm->forwardSize,comm->reverseSize);
|
||||
int max_buf = max_size * MAX(all_recv, all_send);
|
||||
if(max_buf>=comm->maxrecv) growRecv(comm,max_buf);
|
||||
if(max_buf>=comm->maxsend) growSend(comm,max_buf);
|
||||
}
|
||||
|
||||
void exchangeComm(Comm* comm, Atom* atom){
|
||||
|
||||
MD_FLOAT x,y,z;
|
||||
MD_FLOAT *lo = atom->mybox.lo;
|
||||
MD_FLOAT *hi = atom->mybox.hi;
|
||||
int size = comm->exchangeSize;
|
||||
int numneigh = comm->numneighexch;
|
||||
int offset_recv[numneigh];
|
||||
int size_recv[numneigh];
|
||||
MPI_Request requests[numneigh];
|
||||
int i =0, nsend = 0, nrecv = 0;
|
||||
int nrqst = 0;
|
||||
int nlocal, offset,m;
|
||||
|
||||
/* enforce PBC */
|
||||
pbc(atom);
|
||||
|
||||
if(comm->numneigh == 0) return;
|
||||
|
||||
nlocal = atom->Nlocal;
|
||||
while(i < nlocal) {
|
||||
if(atom_x(i) < lo[_x] || atom_x(i) >= hi[_x] ||
|
||||
atom_y(i) < lo[_y] || atom_y(i) >= hi[_y] ||
|
||||
atom_z(i) < lo[_z] || atom_z(i) >= hi[_z]) {
|
||||
if(nsend+size >= comm->maxsend) growSend(comm, nsend);
|
||||
nsend += packExchange(atom, i, &comm->buf_send[nsend]);
|
||||
copy(atom, i, nlocal-1);
|
||||
nlocal--;
|
||||
} else i++;
|
||||
}
|
||||
atom->Nlocal = nlocal;
|
||||
|
||||
/* send/recv number of to share atoms with neighbouring procs*/
|
||||
for(int ineigh = 0; ineigh < numneigh; ineigh++)
|
||||
MPI_Irecv(&size_recv[ineigh],1,MPI_INT,comm->nexch[ineigh],0,world,&requests[nrqst++]);
|
||||
|
||||
for (int ineigh = 0; ineigh < numneigh; ineigh++)
|
||||
MPI_Send(&nsend,1,MPI_INT,comm->nexch[ineigh],0,world);
|
||||
MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
|
||||
|
||||
//Define offset to store in the recv_buff
|
||||
for(int ineigh = 0; ineigh<numneigh; ineigh++){
|
||||
offset_recv[ineigh] = nrecv;
|
||||
nrecv += size_recv[ineigh];
|
||||
}
|
||||
|
||||
if(nrecv >= comm->maxrecv) growRecv(comm,nrecv);
|
||||
|
||||
//Receives elements
|
||||
nrqst=0;
|
||||
for (int ineigh = 0; ineigh< numneigh; ineigh++){
|
||||
offset = offset_recv[ineigh];
|
||||
MPI_Irecv(&comm->buf_recv[offset], size_recv[ineigh], type, comm->nexch[ineigh],0,world,&requests[nrqst++]);
|
||||
}
|
||||
//Send elements
|
||||
for (int ineigh = 0; ineigh< numneigh; ineigh++)
|
||||
MPI_Send(comm->buf_send,nsend,type,comm->nexch[ineigh],0,world);
|
||||
MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
|
||||
|
||||
nlocal = atom->Nlocal;
|
||||
m = 0;
|
||||
while(m < nrecv) {
|
||||
x = comm->buf_recv[m + _x];
|
||||
y = comm->buf_recv[m + _y];
|
||||
z = comm->buf_recv[m + _z];
|
||||
|
||||
if(x >= lo[_x] && x < hi[_x] &&
|
||||
y >= lo[_y] && y < hi[_y] &&
|
||||
z >= lo[_z] && z < hi[_z]){
|
||||
m += unpackExchange(atom, nlocal++, &comm->buf_recv[m]);
|
||||
} else {
|
||||
m += size;
|
||||
}
|
||||
}
|
||||
atom->Nlocal = nlocal;
|
||||
|
||||
int all_atoms=0;
|
||||
MPI_Allreduce(&atom->Nlocal, &all_atoms, 1, MPI_INT, MPI_SUM, world);
|
||||
if(atom->Natoms!=all_atoms && comm->myproc ==0){
|
||||
printf("Losing atoms! current atoms:%d expected atoms:%d\n",all_atoms,atom->Natoms);
|
||||
}
|
||||
}
|
||||
|
||||
//Internal functions
|
||||
|
||||
inline void growRecv(Comm* comm, int n)
|
||||
{
|
||||
comm -> maxrecv = BUFFACTOR * n;
|
||||
if(comm->buf_recv) free(comm -> buf_recv);
|
||||
comm -> buf_recv = (MD_FLOAT*) allocate(ALIGNMENT, comm->maxrecv * sizeof(MD_FLOAT));
|
||||
}
|
||||
|
||||
inline void growSend(Comm* comm, int n)
|
||||
{
|
||||
size_t oldByteSize = (comm->maxsend+BUFEXTRA)*sizeof(MD_FLOAT);
|
||||
comm -> maxsend = BUFFACTOR * n;
|
||||
comm -> buf_send = (MD_FLOAT*) reallocate(comm->buf_send, ALIGNMENT, (comm->maxsend + BUFEXTRA) * sizeof(MD_FLOAT), oldByteSize);
|
||||
}
|
||||
|
||||
inline void growList(Comm* comm, int ineigh, int n)
|
||||
{
|
||||
size_t oldByteSize = comm->maxsendlist[ineigh]*sizeof(int);
|
||||
comm->maxsendlist[ineigh] = BUFFACTOR * n;
|
||||
comm->sendlist[ineigh] = (int*) reallocate(comm->sendlist[ineigh],ALIGNMENT, comm->maxsendlist[ineigh] * sizeof(int), oldByteSize);
|
||||
}
|
||||
|
||||
static inline void allocDynamicBuffers(Comm* comm)
|
||||
{
|
||||
//Buffers depending on the # of my neighs
|
||||
int numneigh = comm->numneigh;
|
||||
comm->atom_send = (int*) allocate(ALIGNMENT, numneigh * sizeof(int));
|
||||
comm->atom_recv = (int*) allocate(ALIGNMENT, numneigh * sizeof(int));
|
||||
comm->off_atom_send = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
|
||||
comm->off_atom_recv = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
|
||||
comm->maxsendlist = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
|
||||
|
||||
for(int i = 0; i < numneigh; i++)
|
||||
comm->maxsendlist[i] = BUFMIN;
|
||||
|
||||
comm->sendlist = (int**) allocate(ALIGNMENT, numneigh * sizeof(int*));
|
||||
for(int i = 0; i < numneigh; i++)
|
||||
comm->sendlist[i] = (int*) allocate(ALIGNMENT, comm->maxsendlist[i] * sizeof(int));
|
||||
}
|
||||
|
||||
static inline void freeDynamicBuffers(Comm* comm)
|
||||
{
|
||||
int numneigh =comm->numneigh;
|
||||
|
||||
if(comm->atom_send) free(comm->atom_send);
|
||||
if(comm->atom_recv) free(comm->atom_recv);
|
||||
if(comm->off_atom_send) free(comm->off_atom_send);
|
||||
if(comm->off_atom_recv) free(comm->off_atom_recv);
|
||||
if(comm->maxsendlist) free(comm->maxsendlist);
|
||||
if(comm->sendlist){
|
||||
for(int i = 0; i < numneigh; i++)
|
||||
if(comm->sendlist[i]) free(comm->sendlist[i]);
|
||||
}
|
||||
if(comm->sendlist) free(comm->sendlist);
|
||||
}
|
||||
|
||||
static inline void freeBuffers(Comm* comm)
|
||||
{
|
||||
if(comm->nrecv) free(comm->nrecv);
|
||||
if(comm->nsend) free(comm->nsend);
|
||||
if(comm->nexch) free(comm->nexch);
|
||||
if(comm->pbc_x) free(comm->pbc_x);
|
||||
if(comm->pbc_y) free(comm->pbc_y);
|
||||
if(comm->pbc_z) free(comm->pbc_z);
|
||||
if(comm->boxes) free(comm->boxes);
|
||||
if(comm->atom_send) free(comm->atom_send);
|
||||
if(comm->atom_recv) free(comm->atom_recv);
|
||||
if(comm->off_atom_send) free(comm->off_atom_send);
|
||||
if(comm->off_atom_recv) free(comm->off_atom_recv);
|
||||
if(comm->maxsendlist) free(comm->maxsendlist);
|
||||
|
||||
if(comm->sendlist){
|
||||
for(int i = 0; i < comm->numneigh; i++)
|
||||
if(comm->sendlist[i]) free(comm->sendlist[i]);
|
||||
}
|
||||
if(comm->sendlist) free(comm->sendlist);
|
||||
|
||||
if(comm->buf_send) free(comm->buf_send);
|
||||
if(comm->buf_recv) free(comm->buf_recv);
|
||||
}
|
490
common/grid.c
Normal file
490
common/grid.c
Normal file
@ -0,0 +1,490 @@
|
||||
#include <stdio.h>
|
||||
#include <grid.h>
|
||||
#include <mpi.h>
|
||||
#include <parameter.h>
|
||||
#include <allocate.h>
|
||||
#include <util.h>
|
||||
#include <math.h>
|
||||
|
||||
static MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
|
||||
|
||||
//Grommacs Balancing
|
||||
MD_FLOAT f_normalization(MD_FLOAT* x,MD_FLOAT* fx, MD_FLOAT minx, int nprocs) {
|
||||
|
||||
MD_FLOAT sum=0;
|
||||
for(int n = 0; n<nprocs; n++){
|
||||
fx[n] = MAX(minx,x[n]);
|
||||
sum+=fx[n];
|
||||
}
|
||||
|
||||
for(int n = 0; n<nprocs; n++)
|
||||
fx[n] /= sum;
|
||||
}
|
||||
|
||||
void fixedPointIteration(MD_FLOAT* x0, int nprocs, MD_FLOAT minx)
|
||||
{
|
||||
MD_FLOAT tolerance = 1e-3;
|
||||
MD_FLOAT alpha = 0.5;
|
||||
MD_FLOAT *fx = (MD_FLOAT*) malloc(nprocs*sizeof(MD_FLOAT));
|
||||
int maxIterations = 100;
|
||||
|
||||
for (int i = 0; i < maxIterations; i++) {
|
||||
|
||||
int converged = 1;
|
||||
f_normalization(x0,fx,minx,nprocs);
|
||||
|
||||
for(int n=0; n<nprocs; n++)
|
||||
fx[n]= (1-alpha) * x0[n] + alpha * fx[n];
|
||||
|
||||
for (int n=0; n<nprocs; n++) {
|
||||
if (fabs(fx[n] - x0[n]) >= tolerance) {
|
||||
converged = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (int n=0; n<nprocs; n++)
|
||||
x0[n] = fx[n];
|
||||
|
||||
if(converged){
|
||||
for(int n = 0; n<nprocs; n++)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
void staggeredBalance(Grid* grid, Atom* atom, Parameter* param, double newTime)
|
||||
{
|
||||
int me;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
int *coord = grid->coord;
|
||||
int *nprocs = grid ->nprocs;
|
||||
//Elapsed time since the last rebalance
|
||||
double time = newTime - grid->Timer;
|
||||
grid->Timer = newTime;
|
||||
//store the older dimm to compare later for exchange
|
||||
MD_FLOAT lo[3], hi[3];
|
||||
for(int dim = 0; dim< 3; dim++){
|
||||
lo[dim] = atom->mybox.lo[dim];
|
||||
hi[dim] = atom->mybox.hi[dim];
|
||||
}
|
||||
|
||||
//Define parameters
|
||||
MPI_Comm subComm[3];
|
||||
int color[3] = {0,0,0};
|
||||
int id[3] = {0,0,0};
|
||||
MD_FLOAT ** load = (MD_FLOAT**) malloc(3*sizeof(MD_FLOAT*));
|
||||
for(int dim = 0; dim<3; dim++)
|
||||
load[dim] = (MD_FLOAT*) malloc(nprocs[dim]*sizeof(MD_FLOAT));
|
||||
|
||||
int maxprocs = MAX(MAX(nprocs[_x],nprocs[_y]),nprocs[_z]);
|
||||
MD_FLOAT* cellSize = (MD_FLOAT*) malloc(maxprocs*sizeof(MD_FLOAT));
|
||||
MD_FLOAT* limits = (MD_FLOAT*) malloc(2*maxprocs*sizeof(MD_FLOAT)); //limits: (x0, x1), (x1, x2)... Repeat values in between to perfom MPI_Scatter later
|
||||
MD_FLOAT t_sum[3] = {0,0,0};
|
||||
MD_FLOAT recv_buf[2] = {0,0}; //Each proc only receives 2 elments per dimension xlo and xhi
|
||||
MD_FLOAT balancedLoad[3] = {0,0,0}; //1/nprocs
|
||||
MD_FLOAT minLoad[3] = {0,0,0}; //beta*(1/nprocs)
|
||||
MD_FLOAT prd[3] = {param->xprd, param->yprd, param->zprd};
|
||||
MD_FLOAT boundaries[6] ={0,0,0,0,0,0}; // xlo,xhi,ylo,yhi,zlo,zhi
|
||||
|
||||
//Create sub-communications along each dimension
|
||||
for(int dim = 0; dim<3; dim++){
|
||||
if(dim == _x){
|
||||
color[_x] = (coord[_y] == 0 && coord[_z] ==0) ? 1:MPI_UNDEFINED;
|
||||
id[_x] = me;
|
||||
} else if(dim == _y) {
|
||||
color[_y] = coord[_z] == 0 ? coord[_x]:MPI_UNDEFINED;
|
||||
id[_y] = (coord[_y] == 0 && coord[_z] == 0) ? 0:me;
|
||||
} else {
|
||||
color[_z]= coord[_y]*nprocs[_x]+coord[_x];
|
||||
id[_z] = coord[_z] == 0 ? 0 : me;
|
||||
}
|
||||
MPI_Comm_split(world, color[dim], id[dim], &subComm[dim]);
|
||||
}
|
||||
|
||||
//Set the minimum load and the balance load
|
||||
for(int dim = 0; dim<3; dim++){
|
||||
balancedLoad[dim] = 1./nprocs[dim];
|
||||
minLoad[dim] = 0.8*balancedLoad[dim];
|
||||
}
|
||||
//set and communicate the workload in reverse order
|
||||
for(int dim = _z; dim>= _x; dim--)
|
||||
{
|
||||
if(subComm[dim] != MPI_COMM_NULL){
|
||||
MPI_Gather(&time,1,type,load[dim],1,type,0,subComm[dim]);
|
||||
|
||||
if(id[dim] == 0)
|
||||
{
|
||||
for(int n=0; n<nprocs[dim]; n++)
|
||||
t_sum[dim] += load[dim][n];
|
||||
|
||||
for(int n=0; n<nprocs[dim]; n++)
|
||||
load[dim][n] /= t_sum[dim];
|
||||
}
|
||||
time =t_sum[dim];
|
||||
}
|
||||
MPI_Barrier(world);
|
||||
}
|
||||
|
||||
//Brodacast the new boundaries along dimensions
|
||||
for(int dim=0; dim<3; dim++){
|
||||
|
||||
if(subComm[dim] != MPI_COMM_NULL){
|
||||
|
||||
MPI_Bcast(boundaries,6,type,0,subComm[dim]);
|
||||
if(id[dim] == 0) {
|
||||
fixedPointIteration(load[dim], nprocs[dim], minLoad[dim]);
|
||||
MD_FLOAT inv_sum=0;
|
||||
for(int n=0; n<nprocs[dim];n++)
|
||||
inv_sum +=(1/load[dim][n]);
|
||||
|
||||
for(int n=0; n<nprocs[dim];n++)
|
||||
cellSize[n] = (prd[dim]/load[dim][n])*(1./inv_sum);
|
||||
|
||||
MD_FLOAT sum=0;
|
||||
for(int n=0; n<nprocs[dim]; n++){
|
||||
limits[2*n] = sum;
|
||||
limits[2*n+1] = sum+cellSize[n];
|
||||
sum+= cellSize[n];
|
||||
}
|
||||
limits[2*nprocs[dim]-1] = prd[dim];
|
||||
}
|
||||
MPI_Scatter(limits,2,type,recv_buf,2,type,0,subComm[dim]);
|
||||
boundaries[2*dim] = recv_buf[0];
|
||||
boundaries[2*dim+1] = recv_buf[1];
|
||||
}
|
||||
MPI_Barrier(world);
|
||||
}
|
||||
|
||||
atom->mybox.lo[_x]=boundaries[0]; atom->mybox.hi[_x]=boundaries[1];
|
||||
atom->mybox.lo[_y]=boundaries[2]; atom->mybox.hi[_y]=boundaries[3];
|
||||
atom->mybox.lo[_z]=boundaries[4]; atom->mybox.hi[_z]=boundaries[5];
|
||||
|
||||
MD_FLOAT domain[6] = {boundaries[0], boundaries[2], boundaries[4], boundaries[1], boundaries[3], boundaries[5]};
|
||||
MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
|
||||
|
||||
//because cells change dynamically, It is required to increase the neighbouring exchange region
|
||||
for(int dim =_x; dim<=_z; dim++){
|
||||
MD_FLOAT dr,dr_max;
|
||||
int n = grid->nprocs[dim];
|
||||
MD_FLOAT maxdelta = 0.2*prd[dim];
|
||||
dr = MAX(fabs(lo[dim] - atom->mybox.lo[dim]),fabs(hi[dim] - atom->mybox.hi[dim]));
|
||||
MPI_Allreduce(&dr, &dr_max, 1, type, MPI_MAX, world);
|
||||
grid->cutneigh[dim] = param->cutneigh+dr_max;
|
||||
}
|
||||
|
||||
for(int dim=0; dim<3; dim++) {
|
||||
if(subComm[dim] != MPI_COMM_NULL){
|
||||
MPI_Comm_free(&subComm[dim]);
|
||||
}
|
||||
free(load[dim]);
|
||||
}
|
||||
free(load);
|
||||
free(limits);
|
||||
}
|
||||
|
||||
//RCB Balancing
|
||||
MD_FLOAT meanTimeBisect(Atom *atom, MPI_Comm subComm, int dim, double time)
|
||||
{
|
||||
MD_FLOAT mean=0, sum=0, total_sum=0, weightAtoms= 0, total_weight=0;
|
||||
|
||||
for(int i=0; i<atom->Nlocal; i++){
|
||||
sum += atom_pos(i);
|
||||
}
|
||||
sum*=time;
|
||||
weightAtoms = atom->Nlocal*time;
|
||||
MPI_Allreduce(&sum, &total_sum, 1, type, MPI_SUM, subComm);
|
||||
MPI_Allreduce(&weightAtoms, &total_weight, 1, type, MPI_SUM, subComm);
|
||||
|
||||
mean = total_sum/total_weight;
|
||||
return mean;
|
||||
}
|
||||
|
||||
MD_FLOAT meanBisect(Atom* atom, MPI_Comm subComm, int dim, double time)
|
||||
{
|
||||
int Natoms = 0;
|
||||
MD_FLOAT sum=0, mean=0, total_sum=0;
|
||||
|
||||
for(int i=0; i<atom->Nlocal; i++){
|
||||
sum += atom_pos(i);
|
||||
}
|
||||
MPI_Allreduce(&sum, &total_sum, 1, type, MPI_SUM, subComm);
|
||||
MPI_Allreduce(&atom->Nlocal, &Natoms, 1, MPI_INT, MPI_SUM, subComm);
|
||||
mean = total_sum/Natoms;
|
||||
return mean;
|
||||
}
|
||||
|
||||
void nextBisectionLevel(Grid* grid, Atom* atom, RCB_Method method, MPI_Comm subComm, int dim ,int* color, int ilevel, double time)
|
||||
{
|
||||
int rank, size;
|
||||
int branch = 0, i = 0, m = 0;
|
||||
int nsend = 0, nrecv = 0, nrecv2 = 0;
|
||||
int values_per_atom = 7;
|
||||
MD_FLOAT bisection, pos;
|
||||
MPI_Request request[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
|
||||
MPI_Comm_rank(subComm,&rank);
|
||||
MPI_Comm_size(subComm,&size);
|
||||
|
||||
int odd = size%2;
|
||||
int extraProc = odd ? size-1:size;
|
||||
int half = (int) (0.5*size);
|
||||
int partner = (rank<half) ? rank+half:rank-half;
|
||||
if(odd && rank == extraProc) partner = 0;
|
||||
//Apply the bisection
|
||||
bisection = method(atom,subComm,dim,time);
|
||||
//Define the new boundaries
|
||||
if(rank<half){
|
||||
atom->mybox.hi[dim] = bisection;
|
||||
branch = 0;
|
||||
} else {
|
||||
atom->mybox.lo[dim] = bisection;
|
||||
branch = 1;
|
||||
}
|
||||
//Define new color for the further communicaton
|
||||
*color = (branch << ilevel) | *color;
|
||||
//Grow the send buffer
|
||||
if(atom->Nlocal>=grid->maxsend){
|
||||
if(grid->buf_send) free(grid->buf_send);
|
||||
grid->buf_send = (MD_FLOAT*) malloc(atom->Nlocal*values_per_atom* sizeof(MD_FLOAT));
|
||||
grid->maxsend = atom->Nlocal;
|
||||
}
|
||||
//buffer particles to send
|
||||
while(i < atom->Nlocal) {
|
||||
pos = atom_pos(i);
|
||||
if(pos < atom->mybox.lo[dim] || pos >= atom->mybox.hi[dim]) {
|
||||
nsend += packExchange(atom, i, &grid->buf_send[nsend]);
|
||||
copy(atom, i, atom->Nlocal-1);
|
||||
atom->Nlocal--;
|
||||
} else i++;
|
||||
}
|
||||
|
||||
//Communicate the number of elements to be sent
|
||||
if(rank < extraProc){
|
||||
MPI_Irecv(&nrecv,1,MPI_INT,partner,0,subComm,&request[0]);
|
||||
}
|
||||
if(odd && rank == 0){
|
||||
MPI_Irecv(&nrecv2,1,MPI_INT,extraProc,0,subComm,&request[1]);
|
||||
}
|
||||
MPI_Send(&nsend,1,MPI_INT,partner,0,subComm);
|
||||
MPI_Waitall(2,request,MPI_STATUS_IGNORE);
|
||||
|
||||
//Grow the recv buffer
|
||||
if(nrecv+nrecv2>=grid->maxrecv){
|
||||
if(grid->buf_recv) free(grid->buf_recv);
|
||||
grid->buf_recv = (MD_FLOAT*) malloc((nrecv+nrecv2)*values_per_atom*sizeof(MD_FLOAT));
|
||||
grid->maxrecv = nrecv+nrecv2;
|
||||
}
|
||||
|
||||
//communicate elements in the buffer
|
||||
request[0] = MPI_REQUEST_NULL;
|
||||
request[1] = MPI_REQUEST_NULL;
|
||||
|
||||
if(rank < extraProc){
|
||||
MPI_Irecv(grid->buf_recv,nrecv,type,partner,0,subComm,&request[0]);
|
||||
}
|
||||
if(odd && rank == 0){
|
||||
MPI_Irecv(&grid->buf_recv[nrecv],nrecv2,type,extraProc,0,subComm,&request[1]);
|
||||
}
|
||||
MPI_Send (grid->buf_send,nsend,type,partner,0,subComm);
|
||||
MPI_Waitall(2,request,MPI_STATUS_IGNORE);
|
||||
|
||||
//store atoms in atom list
|
||||
while(m < nrecv+nrecv2){
|
||||
m += unpackExchange(atom, atom->Nlocal++, &grid->buf_recv[m]);
|
||||
}
|
||||
}
|
||||
|
||||
void rcbBalance(Grid* grid, Atom* atom, Parameter* param, RCB_Method method, int ndim, double newTime)
|
||||
{
|
||||
int me, nprocs=0, ilevel=0, nboxes=1;
|
||||
int color = 0, size =0;
|
||||
int index, prd[3];
|
||||
MPI_Comm subComm;
|
||||
MPI_Comm_size(world, &nprocs);
|
||||
MPI_Comm_rank(world, &me);
|
||||
|
||||
//set the elapsed time since the last dynamic balance
|
||||
double time = newTime - grid->Timer;
|
||||
|
||||
prd[_x] = atom->mybox.xprd = param->xprd;
|
||||
prd[_y] = atom->mybox.yprd = param->yprd;
|
||||
prd[_z] = atom->mybox.zprd = param->zprd;
|
||||
|
||||
//Sort by larger dimension
|
||||
int largerDim[3] ={_x, _y, _z};
|
||||
|
||||
for(int i = 0; i< 2; i++){
|
||||
for(int j = i+1; j<3; j++)
|
||||
{
|
||||
if(prd[largerDim[j]]>prd[largerDim[i]]){
|
||||
MD_FLOAT tmp = largerDim[j];
|
||||
largerDim[j] = largerDim[i];
|
||||
largerDim[i] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
//Initial Partition
|
||||
atom->mybox.lo[_x] = 0; atom->mybox.hi[_x] = atom->mybox.xprd;
|
||||
atom->mybox.lo[_y] = 0; atom->mybox.hi[_y] = atom->mybox.yprd;
|
||||
atom->mybox.lo[_z] = 0; atom->mybox.hi[_z] = atom->mybox.zprd;
|
||||
|
||||
//Recursion tree
|
||||
while(nboxes<nprocs)
|
||||
{
|
||||
index = ilevel%ndim;
|
||||
MPI_Comm_split(world, color, me, &subComm);
|
||||
MPI_Comm_size(subComm,&size);
|
||||
if(size > 1){
|
||||
nextBisectionLevel(grid, atom, method, subComm, largerDim[index], &color, ilevel, time);
|
||||
}
|
||||
MPI_Comm_free(&subComm);
|
||||
nboxes = pow(2,++ilevel);
|
||||
}
|
||||
//Set the new timer grid
|
||||
grid->Timer = newTime;
|
||||
|
||||
//Creating the global map
|
||||
MD_FLOAT domain[6] = {atom->mybox.lo[_x], atom->mybox.lo[_y], atom->mybox.lo[_z], atom->mybox.hi[_x], atom->mybox.hi[_y], atom->mybox.hi[_z]};
|
||||
MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
|
||||
|
||||
//Define the same cutneighbour in all dimensions for the exchange communication
|
||||
for(int dim =_x; dim<=_z; dim++)
|
||||
grid->cutneigh[dim] = param->cutneigh;
|
||||
}
|
||||
|
||||
//Regular grid
|
||||
void cartisian3d(Grid* grid, Parameter* param, Box* box)
|
||||
{
|
||||
int me, nproc;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
|
||||
int numdim=3;
|
||||
int reorder=0;
|
||||
int periods[3]={1,1,1};
|
||||
int mycoord[3]={0,0,0};
|
||||
int griddim[3]={0,0,0};
|
||||
MD_FLOAT len[3];
|
||||
MPI_Comm cartesian;
|
||||
|
||||
box->xprd = param->xprd;
|
||||
box->yprd = param->yprd;
|
||||
box->zprd = param->zprd;
|
||||
|
||||
//Creates a cartesian 3d grid
|
||||
MPI_Dims_create(nproc, numdim, griddim);
|
||||
MPI_Cart_create(world,numdim,griddim,periods,reorder,&cartesian);
|
||||
grid->nprocs[_x] = griddim[_x];
|
||||
grid->nprocs[_y] = griddim[_y];
|
||||
grid->nprocs[_z] = griddim[_z];
|
||||
|
||||
//Coordinates position in the grid
|
||||
MPI_Cart_coords(cartesian,me,3,mycoord);
|
||||
grid->coord[_x] = mycoord[_x];
|
||||
grid->coord[_y] = mycoord[_y];
|
||||
grid->coord[_z] = mycoord[_z];
|
||||
|
||||
//boundaries of my local box, with origin in (0,0,0).
|
||||
len[_x] = param->xprd / griddim[_x];
|
||||
len[_y] = param->yprd / griddim[_y];
|
||||
len[_z] = param->zprd / griddim[_z];
|
||||
|
||||
box->lo[_x] = mycoord[_x] * len[_x];
|
||||
box->hi[_x] = (mycoord[_x] + 1) * len[_x];
|
||||
box->lo[_y] = mycoord[_y] * len[_y];
|
||||
box->hi[_y] = (mycoord[_y] + 1) * len[_y];
|
||||
box->lo[_z] = mycoord[_z] * len[_z];
|
||||
box->hi[_z] = (mycoord[_z] + 1) * len[_z];
|
||||
|
||||
MD_FLOAT domain[6] = {box->lo[_x], box->lo[_y], box->lo[_z], box->hi[_x], box->hi[_y], box->hi[_z]};
|
||||
MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
|
||||
MPI_Comm_free(&cartesian);
|
||||
|
||||
//Define the same cutneighbour in all dimensions for the exchange communication
|
||||
for(int dim =_x; dim<=_z; dim++)
|
||||
grid->cutneigh[dim] = param->cutneigh;
|
||||
}
|
||||
|
||||
//Other Functions from the grid
|
||||
void initGrid(Grid* grid)
|
||||
{ //start with regular grid
|
||||
int nprocs;
|
||||
MPI_Comm_size(world, &nprocs);
|
||||
grid->map_size = 6 * nprocs;
|
||||
grid->map = (MD_FLOAT*) allocate(ALIGNMENT, grid->map_size * sizeof(MD_FLOAT));
|
||||
//========rcb=======
|
||||
grid->maxsend = 0;
|
||||
grid->maxrecv = 0;
|
||||
grid->buf_send = NULL;
|
||||
grid->buf_recv = NULL;
|
||||
//====staggered=====
|
||||
grid->Timer = 0.;
|
||||
}
|
||||
|
||||
void setupGrid(Grid* grid, Atom* atom, Parameter* param)
|
||||
{
|
||||
int me;
|
||||
MD_FLOAT xlo, ylo, zlo, xhi, yhi, zhi;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
initGrid(grid);
|
||||
|
||||
//Set the origin at (0,0,0)
|
||||
if(param->input_file){
|
||||
for(int i=0; i<atom->Nlocal; i++){
|
||||
atom_x(i) = atom_x(i) - param->xlo;
|
||||
atom_y(i) = atom_y(i) - param->ylo;
|
||||
atom_z(i) = atom_z(i) - param->zlo;
|
||||
}
|
||||
}
|
||||
|
||||
cartisian3d(grid, param, &atom->mybox);
|
||||
|
||||
xlo = atom->mybox.lo[_x]; xhi = atom->mybox.hi[_x];
|
||||
ylo = atom->mybox.lo[_y]; yhi = atom->mybox.hi[_y];
|
||||
zlo = atom->mybox.lo[_z]; zhi = atom->mybox.hi[_z];
|
||||
|
||||
int i = 0;
|
||||
while(i < atom->Nlocal)
|
||||
{
|
||||
if(atom_x(i) >= xlo && atom_x(i)< xhi &&
|
||||
atom_y(i) >= ylo && atom_y(i)< yhi &&
|
||||
atom_z(i) >= zlo && atom_z(i)< zhi)
|
||||
{
|
||||
i++;
|
||||
} else {
|
||||
copy(atom, i, atom->Nlocal-1);
|
||||
atom->Nlocal--;
|
||||
}
|
||||
}
|
||||
|
||||
//printGrid(grid);
|
||||
if(!param->balance){
|
||||
MPI_Allreduce(&atom->Nlocal, &atom->Natoms, 1, MPI_INT, MPI_SUM, world);
|
||||
printf("Processor:%i, Local atoms:%i, Total atoms:%i\n",me, atom->Nlocal,atom->Natoms);
|
||||
MPI_Barrier(world);
|
||||
}
|
||||
}
|
||||
|
||||
void printGrid(Grid* grid)
|
||||
{
|
||||
int me, nprocs;
|
||||
MPI_Comm_size(world, &nprocs);
|
||||
MPI_Comm_rank(world, &me);
|
||||
MD_FLOAT* map = grid->map;
|
||||
if(me==0)
|
||||
{
|
||||
|
||||
printf("GRID:\n");
|
||||
printf("===================================================================================================\n");
|
||||
for(int i=0; i<nprocs; i++)
|
||||
printf("Box:%i\txlo:%.4f\txhi:%.4f\tylo:%.4f\tyhi:%.4f\tzlo:%.4f\tzhi:%.4f\n", i,map[6*i],map[6*i+3],map[6*i+1],map[6*i+4],map[6*i+2],map[6*i+5]);
|
||||
printf("\n\n");
|
||||
//printf("Box processor:%i\n xlo:%.4f\txhi:%.4f\n ylo:%.4f\tyhi:%.4f\n zlo:%.4f\tzhi:%.4f\n", i,map[6*i],map[6*i+3],map[6*i+1],map[6*i+4],map[6*i+2],map[6*i+5]);
|
||||
}
|
||||
MPI_Barrier(world);
|
||||
}
|
||||
|
||||
|
||||
|
22
common/includes/box.h
Normal file
22
common/includes/box.h
Normal file
@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <parameter.h>
|
||||
|
||||
#ifndef __BOX_H_
|
||||
#define __BOX_H_
|
||||
|
||||
typedef struct {
|
||||
int id;
|
||||
MD_FLOAT xprd, yprd, zprd; //Domain Dimension
|
||||
MD_FLOAT lo[3]; //smallest coordinate of my subdomain
|
||||
MD_FLOAT hi[3]; //Highest coordinate of my subdomain
|
||||
} Box;
|
||||
|
||||
int overlapBox(int, int , const Box*, const Box* , Box* , MD_FLOAT , MD_FLOAT);
|
||||
int overlapFullBox(Parameter*, MD_FLOAT*, const Box*, const Box*);
|
||||
void expandBox(int , const Box*, const Box* , Box* , MD_FLOAT);
|
||||
#endif
|
104
common/includes/comm.h
Normal file
104
common/includes/comm.h
Normal file
@ -0,0 +1,104 @@
|
||||
#include <atom.h>
|
||||
#include <parameter.h>
|
||||
#include <box.h>
|
||||
#include <grid.h>
|
||||
|
||||
#ifndef COMM_H
|
||||
#define COMM_H
|
||||
|
||||
#ifdef GROMACS
|
||||
#define FORWARD_SIZE (3*CLUSTER_N)
|
||||
#define REVERSE_SIZE (3*CLUSTER_N)
|
||||
#define GHOST_SIZE (4*CLUSTER_N+10)
|
||||
#define EXCHANGE_SIZE 7
|
||||
|
||||
#define JFAC MAX(1, CLUSTER_N / CLUSTER_M)
|
||||
#define LOCAL atom->Nclusters_local / JFAC
|
||||
#define GHOST atom->Nclusters_ghost
|
||||
|
||||
#define IsinRegionToSend(cj) \
|
||||
((atom->jclusters[(cj)].bbminx >= xlo || atom->jclusters[(cj)].bbmaxx >= xlo) && \
|
||||
(atom->jclusters[(cj)].bbminx < xhi || atom->jclusters[(cj)].bbmaxx < xhi) && \
|
||||
(atom->jclusters[(cj)].bbminy >= ylo || atom->jclusters[(cj)].bbmaxy >= ylo) && \
|
||||
(atom->jclusters[(cj)].bbminy < yhi || atom->jclusters[(cj)].bbmaxy < yhi) && \
|
||||
(atom->jclusters[(cj)].bbminz >= zlo || atom->jclusters[(cj)].bbmaxz >= zlo) && \
|
||||
(atom->jclusters[(cj)].bbminz < zhi || atom->jclusters[(cj)].bbmaxz < zhi))
|
||||
|
||||
#else
|
||||
|
||||
#define FORWARD_SIZE 3
|
||||
#define REVERSE_SIZE 3
|
||||
#define GHOST_SIZE 4
|
||||
#define EXCHANGE_SIZE 7
|
||||
#define LOCAL atom->Nlocal
|
||||
#define GHOST atom->Nghost
|
||||
|
||||
#define IsinRegionToSend(i) \
|
||||
((atom_x((i)) >= xlo && atom_x((i)) < xhi) && \
|
||||
(atom_y((i)) >= ylo && atom_y((i)) < yhi) && \
|
||||
(atom_z((i)) >= zlo && atom_z((i)) < zhi))
|
||||
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
int myproc; // my proc ID
|
||||
int numproc; // # of processors
|
||||
|
||||
int numneigh; // # of all my neighs along all swaps
|
||||
int maxneigh; // Buffer size for my neighs
|
||||
int sendfrom[6]; //return the lowest neigh index to send in each swap
|
||||
int sendtill[6]; //return the highest neigh index to send in each swao
|
||||
int recvfrom[6]; //return the lowest neigh index to recv in each swap
|
||||
int recvtill[6]; //return the highest neigh index to recv in each swap
|
||||
int* nsend; // neigh whose I want to send
|
||||
int* nrecv; // neigh whose I want to recv
|
||||
|
||||
int* pbc_x; // if pbc in x
|
||||
int* pbc_y; // if pbc in y
|
||||
int* pbc_z; // if pbc in z
|
||||
|
||||
int* atom_send, *atom_recv; // # of atoms to send/recv for each of my neighs
|
||||
int* off_atom_send; // atom offset to send, inside of a swap
|
||||
int* off_atom_recv; // atom offset to recv, inside of a swap
|
||||
|
||||
int* nexch; //procs to exchange
|
||||
int numneighexch; //# of neighbours to exchange
|
||||
int maxneighexch; //max buff size to store neighbours
|
||||
|
||||
int numswap; // # of swaps to perform, it is 6
|
||||
int swapdim[6]; // dimension of the swap (_x, _y or _z)
|
||||
int swapdir[6]; // direction of the swap 0 or 1
|
||||
int swap[3][2]; // given a dim and dir, knows the swap
|
||||
int othersend[6]; // Determine if a proc interact with more procs in a given swap
|
||||
|
||||
int firstrecv[6]; // where to put 1st recv atom in each swap
|
||||
int** sendlist; // list of atoms to send in each swap
|
||||
int* maxsendlist; // max # of atoms send in each list-swap
|
||||
|
||||
int maxsend; // max elements in buff sender
|
||||
int maxrecv; // max elements in buff receiver
|
||||
MD_FLOAT* buf_send; // sender buffer for all comm
|
||||
MD_FLOAT* buf_recv; // receicer buffer for all comm
|
||||
|
||||
int forwardSize; // # of paramaters per atom in forward comm.
|
||||
int reverseSize; // # of parameters per atom in reverse
|
||||
int exchangeSize; // # of parameters per atom in exchange
|
||||
int ghostSize; // # of parameters per atom in ghost list
|
||||
|
||||
int iterAtom; //last atom to iterate in each swap.
|
||||
Box* boxes; // Boundaries to be sent to other procs as ghost.
|
||||
} Comm;
|
||||
|
||||
|
||||
void initComm(int*, char***, Comm*); //Init MPI
|
||||
void endComm(Comm*); //End MPI
|
||||
void setupComm(Comm*,Parameter*,Grid*); //Creates a 3d grid or rcb grid
|
||||
void neighComm(Comm*,Parameter*,Grid*); //Find neighbours within cut-off and defines ghost regions
|
||||
void forwardComm(Comm*,Atom*,int); //Send info in one direction
|
||||
void reverseComm(Comm*,Atom*,int); //Return info after forward communication
|
||||
void exchangeComm(Comm*,Atom*); //Exchange info between procs
|
||||
void ghostComm(Comm*, Atom*,int); //Build the ghost neighbours to send during next forwards
|
||||
void growSend(Comm*,int); //Grows the size of the buffer sender
|
||||
void growRecv(Comm*,int); //Grows the size of the buffer receiver
|
||||
void growList(Comm*, int, int); //Grows the size of the list to send
|
||||
#endif
|
51
common/includes/grid.h
Normal file
51
common/includes/grid.h
Normal file
@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
|
||||
|
||||
#include <parameter.h>
|
||||
#include <box.h>
|
||||
#include <atom.h>
|
||||
#include <mpi.h>
|
||||
|
||||
#ifndef __MAP_H_
|
||||
#define __MAP_H_
|
||||
|
||||
#define world MPI_COMM_WORLD
|
||||
#define atom_pos(i) ((dim == _x) ? atom_x((i)) : (dim == _y) ? atom_y((i)) : atom_z((i)))
|
||||
|
||||
enum {RCB=1, meanTimeRCB, Staggered};
|
||||
|
||||
typedef struct {
|
||||
int balance_every;
|
||||
int map_size;
|
||||
MD_FLOAT* map;
|
||||
//===Param for Staggerd balance
|
||||
int nprocs[3];
|
||||
int coord[3];
|
||||
MD_FLOAT cutneigh[3];
|
||||
double Timer;
|
||||
//===Param for RCB balance
|
||||
MD_FLOAT* buf_send;
|
||||
MD_FLOAT* buf_recv;
|
||||
int maxsend;
|
||||
int maxrecv;
|
||||
} Grid;
|
||||
|
||||
|
||||
typedef MD_FLOAT(*RCB_Method)(Atom*,MPI_Comm,int,double);
|
||||
|
||||
void setupGrid(Grid*, Atom*, Parameter*);
|
||||
void cartisian3d(Grid*, Parameter*, Box*);
|
||||
void rcbBalance(Grid*, Atom*, Parameter* ,RCB_Method, int, double);
|
||||
void staggeredBalance(Grid*, Atom*, Parameter*, double);
|
||||
void printGrid(Grid*);
|
||||
//rcb methods
|
||||
MD_FLOAT meanBisect(Atom* , MPI_Comm, int, double);
|
||||
MD_FLOAT meanTimeBisect(Atom*, MPI_Comm, int, double);
|
||||
#endif
|
||||
|
||||
|
@ -53,6 +53,10 @@ typedef struct {
|
||||
MD_FLOAT k_dn;
|
||||
MD_FLOAT gx, gy, gz;
|
||||
MD_FLOAT reflect_x, reflect_y, reflect_z;
|
||||
//MPI implementation
|
||||
int balance;
|
||||
int method;
|
||||
int balance_every;
|
||||
} Parameter;
|
||||
|
||||
void initParameter(Parameter*);
|
||||
|
71
common/includes/shell_methods.h
Normal file
71
common/includes/shell_methods.h
Normal file
@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
#include <comm.h>
|
||||
#include <atom.h>
|
||||
#include <timing.h>
|
||||
#include <parameter.h>
|
||||
#include <util.h>
|
||||
|
||||
//static void addDummyCluster(Atom*);
|
||||
|
||||
double forward(Comm* comm, Atom *atom, Parameter* param){
|
||||
double S, E;
|
||||
S = getTimeStamp();
|
||||
if(param->method == halfShell){
|
||||
for(int iswap = 0; iswap < 5; iswap++)
|
||||
forwardComm(comm, atom, iswap);
|
||||
} else if(param->method == eightShell){
|
||||
for(int iswap = 0; iswap < 6; iswap+=2)
|
||||
forwardComm(comm, atom, iswap);
|
||||
} else {
|
||||
for(int iswap = 0; iswap < 6; iswap++)
|
||||
forwardComm(comm, atom, iswap);
|
||||
}
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double reverse(Comm* comm, Atom *atom, Parameter* param){
|
||||
double S, E;
|
||||
S = getTimeStamp();
|
||||
if(param->method == halfShell){
|
||||
for(int iswap = 4; iswap >= 0; iswap--)
|
||||
reverseComm(comm, atom, iswap);
|
||||
} else if(param->method == eightShell){
|
||||
for(int iswap = 4; iswap >= 0; iswap-=2)
|
||||
reverseComm(comm, atom, iswap);
|
||||
} else if(param->method == halfStencil){
|
||||
for(int iswap = 5; iswap >= 0; iswap--)
|
||||
reverseComm(comm, atom, iswap);
|
||||
} else { } //Full Shell Reverse does nothing
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
void ghostNeighbor(Comm* comm, Atom* atom, Parameter* param)
|
||||
{
|
||||
#ifdef GROMACS
|
||||
atom->Nclusters_ghost = 0;
|
||||
#endif
|
||||
atom->Nghost = 0;
|
||||
if(param->method == halfShell){
|
||||
for(int iswap=0; iswap<5; iswap++)
|
||||
ghostComm(comm,atom,iswap);
|
||||
} else if(param->method == eightShell){
|
||||
for(int iswap = 0; iswap<6; iswap+=2)
|
||||
ghostComm(comm, atom,iswap);
|
||||
} else {
|
||||
for(int iswap=0; iswap<6; iswap++)
|
||||
ghostComm(comm,atom,iswap);
|
||||
}
|
||||
}
|
@ -9,9 +9,15 @@
|
||||
|
||||
typedef enum {
|
||||
TOTAL = 0,
|
||||
NEIGH,
|
||||
FORCE,
|
||||
NEIGH,
|
||||
FORWARD,
|
||||
REVERSE,
|
||||
UPDATE,
|
||||
BALANCE,
|
||||
SETUP,
|
||||
REST,
|
||||
NUMTIMER
|
||||
} timertype;
|
||||
} timerComm;
|
||||
|
||||
#endif
|
||||
|
@ -4,6 +4,8 @@
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <math.h>
|
||||
|
||||
#ifndef __UTIL_H_
|
||||
#define __UTIL_H_
|
||||
|
||||
@ -35,6 +37,13 @@
|
||||
# define PRECISION_STRING "double"
|
||||
#endif
|
||||
|
||||
#define BigOrEqual(a,b) (fabs((a)-(b))<1e-9 || (a)>(b))
|
||||
#define Equal(a,b) (fabs((a)-(b))<1e-9)
|
||||
|
||||
enum {_x=0, _y, _z};
|
||||
enum {fullShell=0, halfShell, eightShell, halfStencil};
|
||||
|
||||
|
||||
extern double myrandom(int*);
|
||||
extern void random_reset(int *seed, int ibase, double *coord);
|
||||
extern int str2ff(const char *string);
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <atom.h>
|
||||
#include <parameter.h>
|
||||
#include <util.h>
|
||||
#include <mpi.h>
|
||||
|
||||
void initParameter(Parameter *param) {
|
||||
param->input_file = NULL;
|
||||
@ -54,6 +55,10 @@ void initParameter(Parameter *param) {
|
||||
param->reflect_x = 0.0;
|
||||
param->reflect_y = 0.0;
|
||||
param->reflect_z = 0.0;
|
||||
//MPI
|
||||
param->balance = 0;
|
||||
param->method = 0;
|
||||
param->balance_every =param->reneigh_every;
|
||||
}
|
||||
|
||||
void readParameter(Parameter *param, const char *filename) {
|
||||
@ -72,8 +77,8 @@ void readParameter(Parameter *param, const char *filename) {
|
||||
for(i = 0; line[i] != '\0' && line[i] != '#'; i++);
|
||||
line[i] = '\0';
|
||||
|
||||
char *tok = strtok(line, " ");
|
||||
char *val = strtok(NULL, " ");
|
||||
char *tok = strtok(line, "\t ");
|
||||
char *val = strtok(NULL, "\t ");
|
||||
|
||||
#define PARSE_PARAM(p,f) if(strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) { param->p = f(val); }
|
||||
#define PARSE_STRING(p) PARSE_PARAM(p, strdup)
|
||||
@ -117,15 +122,20 @@ void readParameter(Parameter *param, const char *filename) {
|
||||
PARSE_INT(x_out_every);
|
||||
PARSE_INT(v_out_every);
|
||||
PARSE_INT(half_neigh);
|
||||
PARSE_INT(method);
|
||||
PARSE_INT(balance);
|
||||
PARSE_INT(balance_every);
|
||||
}
|
||||
}
|
||||
|
||||
// Update dtforce
|
||||
param->dtforce = 0.5 * param->dt;
|
||||
|
||||
// Update sigma6 parameter
|
||||
MD_FLOAT s2 = param->sigma * param->sigma;
|
||||
param->sigma6 = s2 * s2 * s2;
|
||||
|
||||
//Update balance parameter, 10 could be change
|
||||
param->balance_every *=param->reneigh_every;
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
@ -183,4 +193,19 @@ void printParameter(Parameter *param) {
|
||||
printf("\tSkin: %e\n", param->skin);
|
||||
printf("\tHalf neighbor lists: %d\n", param->half_neigh);
|
||||
printf("\tProcessor frequency (GHz): %.4f\n", param->proc_freq);
|
||||
|
||||
// ================ New MPI features =============
|
||||
char str[20];
|
||||
strcpy(str, (param->method == 1) ? "Half Shell" :
|
||||
(param->method == 2) ? "Eight Shell" :
|
||||
(param->method == 3) ? "Half Stencil":
|
||||
"Full Shell");
|
||||
printf("\tMethod: %s\n", str);
|
||||
strcpy(str, (param->balance == 1) ? "mean RCB" :
|
||||
(param->balance == 2) ? "mean Time RCB" :
|
||||
(param->balance == 3) ? "Staggered" :
|
||||
"cartisian");
|
||||
printf("\tPartition: %s\n", str);
|
||||
if(param->balance)
|
||||
printf("\tRebalancing every (timesteps): %d\n",param->balance_every);
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
#include <thermo.h>
|
||||
#include <util.h>
|
||||
#include <mpi.h>
|
||||
|
||||
static int *steparr;
|
||||
static MD_FLOAT *tmparr;
|
||||
@ -24,6 +25,7 @@ static MD_FLOAT t_act;
|
||||
static MD_FLOAT p_act;
|
||||
static MD_FLOAT e_act;
|
||||
static int mstat;
|
||||
static MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
|
||||
|
||||
/* exported subroutines */
|
||||
void setupThermo(Parameter *param, int natoms)
|
||||
@ -53,12 +55,19 @@ void setupThermo(Parameter *param, int natoms)
|
||||
|
||||
void computeThermo(int iflag, Parameter *param, Atom *atom)
|
||||
{
|
||||
MD_FLOAT t = 0.0, p;
|
||||
MD_FLOAT t_sum = 0.0, t = 0.0, p;
|
||||
int me;
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
|
||||
}
|
||||
|
||||
t = t * t_scale;
|
||||
MPI_Reduce(&t, &t_sum, 1, type, MPI_SUM, 0 ,MPI_COMM_WORLD);
|
||||
if(me == 0)
|
||||
{
|
||||
t = t_sum * t_scale;
|
||||
p = (t * dof_boltz) * p_scale;
|
||||
int istep = iflag;
|
||||
|
||||
@ -75,11 +84,13 @@ void computeThermo(int iflag, Parameter *param, Atom *atom)
|
||||
mstat++;
|
||||
fprintf(stdout, "%i\t%e\t%e\n", istep, t, p);
|
||||
}
|
||||
}
|
||||
|
||||
void adjustThermo(Parameter *param, Atom *atom)
|
||||
{
|
||||
/* zero center-of-mass motion */
|
||||
MD_FLOAT vxtot = 0.0; MD_FLOAT vytot = 0.0; MD_FLOAT vztot = 0.0;
|
||||
MD_FLOAT v_sum[3], vtot[3];
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
vxtot += atom_vx(i);
|
||||
@ -87,9 +98,13 @@ void adjustThermo(Parameter *param, Atom *atom)
|
||||
vztot += atom_vz(i);
|
||||
}
|
||||
|
||||
vxtot = vxtot / atom->Natoms;
|
||||
vytot = vytot / atom->Natoms;
|
||||
vztot = vztot / atom->Natoms;
|
||||
vtot[0] = vxtot; vtot[1] = vytot; vtot[2] = vztot;
|
||||
|
||||
MPI_Allreduce(vtot, v_sum, 3, type, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
vxtot = v_sum[0] / atom->Natoms;
|
||||
vytot = v_sum[1] / atom->Natoms;
|
||||
vztot = v_sum[2] / atom->Natoms;
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
atom_vx(i) -= vxtot;
|
||||
@ -97,13 +112,16 @@ void adjustThermo(Parameter *param, Atom *atom)
|
||||
atom_vz(i) -= vztot;
|
||||
}
|
||||
|
||||
t_act = 0;
|
||||
MD_FLOAT t = 0.0;
|
||||
MD_FLOAT t_sum = 0.0;
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
|
||||
}
|
||||
|
||||
MPI_Allreduce(&t, &t_sum, 1,type, MPI_SUM,MPI_COMM_WORLD);
|
||||
|
||||
t = t_sum;
|
||||
t *= t_scale;
|
||||
MD_FLOAT factor = sqrt(param->temp / t);
|
||||
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <util.h>
|
||||
#include <math.h>
|
||||
|
||||
/* Park/Miller RNG w/out MASKING, so as to be like f90s version */
|
||||
#define IA 16807
|
||||
@ -86,6 +87,7 @@ int get_cuda_num_threads() {
|
||||
|
||||
void readline(char *line, FILE *fp) {
|
||||
if(fgets(line, MAXLINE, fp) == NULL) {
|
||||
printf("error %i\n",errno);
|
||||
if(errno != 0) {
|
||||
perror("readline()");
|
||||
exit(-1);
|
||||
|
16
config.mk
16
config.mk
@ -1,15 +1,15 @@
|
||||
# Compiler tag (GCC/CLANG/ICC/ICX/ONEAPI/NVCC)
|
||||
TAG ?= ICC
|
||||
TAG ?= MPIICC
|
||||
# Instruction set (SSE/AVX/AVX_FMA/AVX2/AVX512)
|
||||
ISA ?= AVX512
|
||||
# Optimization scheme (lammps/gromacs/clusters_per_bin)
|
||||
OPT_SCHEME ?= lammps
|
||||
OPT_SCHEME ?= gromacs
|
||||
# Enable likwid (true or false)
|
||||
ENABLE_LIKWID ?= true
|
||||
ENABLE_LIKWID ?= false
|
||||
# SP or DP
|
||||
DATA_TYPE ?= DP
|
||||
# AOS or SOA
|
||||
DATA_LAYOUT ?= AOS
|
||||
DATA_LAYOUT ?= SOA
|
||||
# Assembly syntax to generate (ATT/INTEL)
|
||||
ASM_SYNTAX ?= ATT
|
||||
# Debug
|
||||
@ -24,13 +24,13 @@ MEM_TRACER ?= false
|
||||
# Trace indexes and distances for gather-md (true or false)
|
||||
INDEX_TRACER ?= false
|
||||
# Compute statistics
|
||||
COMPUTE_STATS ?= true
|
||||
COMPUTE_STATS ?= false
|
||||
|
||||
# Configurations for lammps optimization scheme
|
||||
# Use omp simd pragma when running with half neighbor-lists
|
||||
ENABLE_OMP_SIMD ?= true
|
||||
ENABLE_OMP_SIMD ?= false
|
||||
# Use kernel with explicit SIMD intrinsics
|
||||
USE_SIMD_KERNEL ?= false
|
||||
USE_SIMD_KERNEL ?= true
|
||||
|
||||
# Configurations for gromacs optimization scheme
|
||||
# Use reference version
|
||||
@ -38,7 +38,7 @@ USE_REFERENCE_VERSION ?= false
|
||||
# Enable XTC output
|
||||
XTC_OUTPUT ?= false
|
||||
# Check if cj is local when decreasing reaction force
|
||||
HALF_NEIGHBOR_LISTS_CHECK_CJ ?= true
|
||||
HALF_NEIGHBOR_LISTS_CHECK_CJ ?= false
|
||||
|
||||
# Configurations for CUDA
|
||||
# Use CUDA host memory to optimize transfers
|
||||
|
370
gromacs/atom.c
370
gromacs/atom.c
@ -12,6 +12,7 @@
|
||||
#include <atom.h>
|
||||
#include <allocate.h>
|
||||
#include <util.h>
|
||||
#include <mpi.h>
|
||||
|
||||
void initAtom(Atom *atom) {
|
||||
atom->x = NULL; atom->y = NULL; atom->z = NULL;
|
||||
@ -27,6 +28,7 @@ void initAtom(Atom *atom) {
|
||||
atom->Nclusters = 0;
|
||||
atom->Nclusters_local = 0;
|
||||
atom->Nclusters_ghost = 0;
|
||||
atom->NmaxGhost = 0; //Temporal
|
||||
atom->Nclusters_max = 0;
|
||||
atom->type = NULL;
|
||||
atom->ntypes = 0;
|
||||
@ -37,10 +39,19 @@ void initAtom(Atom *atom) {
|
||||
atom->iclusters = NULL;
|
||||
atom->jclusters = NULL;
|
||||
atom->icluster_bin = NULL;
|
||||
atom->PBCx = NULL;
|
||||
atom->PBCy = NULL;
|
||||
atom->PBCz = NULL;
|
||||
initMasks(atom);
|
||||
//MPI
|
||||
Box *mybox = &(atom->mybox);
|
||||
mybox->xprd = mybox->yprd = mybox->zprd = 0;
|
||||
mybox->lo[0] = mybox->lo[1] = mybox->lo[2] = 0;
|
||||
mybox->hi[0] = mybox->hi[1] = mybox->hi[2] = 0;
|
||||
}
|
||||
|
||||
void createAtom(Atom *atom, Parameter *param) {
|
||||
|
||||
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
|
||||
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
|
||||
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
|
||||
@ -128,22 +139,26 @@ int type_str2int(const char *type) {
|
||||
}
|
||||
|
||||
int readAtom(Atom* atom, Parameter* param) {
|
||||
int me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
int len = strlen(param->input_file);
|
||||
if(strncmp(¶m->input_file[len - 4], ".pdb", 4) == 0) { return readAtom_pdb(atom, param); }
|
||||
if(strncmp(¶m->input_file[len - 4], ".gro", 4) == 0) { return readAtom_gro(atom, param); }
|
||||
if(strncmp(¶m->input_file[len - 4], ".dmp", 4) == 0) { return readAtom_dmp(atom, param); }
|
||||
fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp\n", param->input_file);
|
||||
if(me==0) fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp\n", param->input_file);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||
int me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
FILE *fp = fopen(param->input_file, "r");
|
||||
char line[MAXLINE];
|
||||
int read_atoms = 0;
|
||||
|
||||
if(!fp) {
|
||||
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@ -153,11 +168,11 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||
char *item = strtok(line, " ");
|
||||
if(strncmp(item, "CRYST1", 6) == 0) {
|
||||
param->xlo = 0.0;
|
||||
param->xhi = atof(strtok(NULL, " "));
|
||||
param->xhi = atof(strtok(NULL, "\t "));
|
||||
param->ylo = 0.0;
|
||||
param->yhi = atof(strtok(NULL, " "));
|
||||
param->yhi = atof(strtok(NULL, "\t "));
|
||||
param->zlo = 0.0;
|
||||
param->zhi = atof(strtok(NULL, " "));
|
||||
param->zhi = atof(strtok(NULL, "\t "));
|
||||
param->xprd = param->xhi - param->xlo;
|
||||
param->yprd = param->yhi - param->ylo;
|
||||
param->zprd = param->zhi - param->zlo;
|
||||
@ -166,23 +181,23 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||
char *label;
|
||||
int atom_id, comp_id;
|
||||
MD_FLOAT occupancy, charge;
|
||||
atom_id = atoi(strtok(NULL, " ")) - 1;
|
||||
atom_id = atoi(strtok(NULL, "\t ")) - 1;
|
||||
|
||||
while(atom_id + 1 >= atom->Nmax) {
|
||||
growAtom(atom);
|
||||
}
|
||||
|
||||
atom->type[atom_id] = type_str2int(strtok(NULL, " "));
|
||||
label = strtok(NULL, " ");
|
||||
comp_id = atoi(strtok(NULL, " "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, " "));
|
||||
atom->type[atom_id] = type_str2int(strtok(NULL, "\t "));
|
||||
label = strtok(NULL, "\t ");
|
||||
comp_id = atoi(strtok(NULL, "\t "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom->vx[atom_id] = 0.0;
|
||||
atom->vy[atom_id] = 0.0;
|
||||
atom->vz[atom_id] = 0.0;
|
||||
occupancy = atof(strtok(NULL, " "));
|
||||
charge = atof(strtok(NULL, " "));
|
||||
occupancy = atof(strtok(NULL, "\t "));
|
||||
charge = atof(strtok(NULL, "\t "));
|
||||
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
|
||||
atom->Natoms++;
|
||||
atom->Nlocal++;
|
||||
@ -194,14 +209,14 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||
strncmp(item, "ENDMDL", 6) == 0) {
|
||||
// Do nothing
|
||||
} else {
|
||||
fprintf(stderr, "Invalid item: %s\n", item);
|
||||
if(me==0) fprintf(stderr, "Invalid item: %s\n", item);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if(!read_atoms) {
|
||||
fprintf(stderr, "Input error: No atoms read!\n");
|
||||
if(me==0) fprintf(stderr, "Input error: No atoms read!\n");
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@ -217,12 +232,15 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||
}
|
||||
|
||||
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
|
||||
if(me==0) fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
|
||||
fclose(fp);
|
||||
return read_atoms;
|
||||
}
|
||||
|
||||
int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
int me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
|
||||
FILE *fp = fopen(param->input_file, "r");
|
||||
char line[MAXLINE];
|
||||
char desc[MAXLINE];
|
||||
@ -231,7 +249,7 @@ int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
int i = 0;
|
||||
|
||||
if(!fp) {
|
||||
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@ -241,25 +259,25 @@ int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
desc[i] = '\0';
|
||||
readline(line, fp);
|
||||
atoms_to_read = atoi(strtok(line, " "));
|
||||
fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
|
||||
if(me==0) fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
|
||||
|
||||
while(!feof(fp) && read_atoms < atoms_to_read) {
|
||||
readline(line, fp);
|
||||
char *label = strtok(line, " ");
|
||||
int type = type_str2int(strtok(NULL, " "));
|
||||
int atom_id = atoi(strtok(NULL, " ")) - 1;
|
||||
char *label = strtok(line, "\t ");
|
||||
int type = type_str2int(strtok(NULL, "\t "));
|
||||
int atom_id = atoi(strtok(NULL, "\t ")) - 1;
|
||||
atom_id = read_atoms;
|
||||
while(atom_id + 1 >= atom->Nmax) {
|
||||
growAtom(atom);
|
||||
}
|
||||
|
||||
atom->type[atom_id] = type;
|
||||
atom_x(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, " "));
|
||||
atom->vx[atom_id] = atof(strtok(NULL, " "));
|
||||
atom->vy[atom_id] = atof(strtok(NULL, " "));
|
||||
atom->vz[atom_id] = atof(strtok(NULL, " "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom->vx[atom_id] = atof(strtok(NULL, "\t "));
|
||||
atom->vy[atom_id] = atof(strtok(NULL, "\t "));
|
||||
atom->vz[atom_id] = atof(strtok(NULL, "\t "));
|
||||
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
|
||||
atom->Natoms++;
|
||||
atom->Nlocal++;
|
||||
@ -269,18 +287,18 @@ int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
if(!feof(fp)) {
|
||||
readline(line, fp);
|
||||
param->xlo = 0.0;
|
||||
param->xhi = atof(strtok(line, " "));
|
||||
param->xhi = atof(strtok(line, "\t "));
|
||||
param->ylo = 0.0;
|
||||
param->yhi = atof(strtok(NULL, " "));
|
||||
param->yhi = atof(strtok(NULL, "\t "));
|
||||
param->zlo = 0.0;
|
||||
param->zhi = atof(strtok(NULL, " "));
|
||||
param->zhi = atof(strtok(NULL, "\t "));
|
||||
param->xprd = param->xhi - param->xlo;
|
||||
param->yprd = param->yhi - param->ylo;
|
||||
param->zprd = param->zhi - param->zlo;
|
||||
}
|
||||
|
||||
if(read_atoms != atoms_to_read) {
|
||||
fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
|
||||
if(me==0) fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@ -296,12 +314,14 @@ int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||
}
|
||||
|
||||
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
|
||||
if(me==0) fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
|
||||
fclose(fp);
|
||||
return read_atoms;
|
||||
}
|
||||
|
||||
int readAtom_dmp(Atom* atom, Parameter* param) {
|
||||
int me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
FILE *fp = fopen(param->input_file, "r");
|
||||
char line[MAXLINE];
|
||||
int natoms = 0;
|
||||
@ -310,7 +330,7 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
|
||||
int ts = -1;
|
||||
|
||||
if(!fp) {
|
||||
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@ -333,47 +353,47 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
|
||||
}
|
||||
} else if(strncmp(item, "BOX BOUNDS pp pp pp", 19) == 0) {
|
||||
readline(line, fp);
|
||||
param->xlo = atof(strtok(line, " "));
|
||||
param->xhi = atof(strtok(NULL, " "));
|
||||
param->xlo = atof(strtok(line, "\t "));
|
||||
param->xhi = atof(strtok(NULL, "\t "));
|
||||
param->xprd = param->xhi - param->xlo;
|
||||
|
||||
readline(line, fp);
|
||||
param->ylo = atof(strtok(line, " "));
|
||||
param->yhi = atof(strtok(NULL, " "));
|
||||
param->ylo = atof(strtok(line, "\t "));
|
||||
param->yhi = atof(strtok(NULL, "\t "));
|
||||
param->yprd = param->yhi - param->ylo;
|
||||
|
||||
readline(line, fp);
|
||||
param->zlo = atof(strtok(line, " "));
|
||||
param->zhi = atof(strtok(NULL, " "));
|
||||
param->zlo = atof(strtok(line, "\t "));
|
||||
param->zhi = atof(strtok(NULL, "\t "));
|
||||
param->zprd = param->zhi - param->zlo;
|
||||
} else if(strncmp(item, "ATOMS id type x y z vx vy vz", 28) == 0) {
|
||||
for(int i = 0; i < natoms; i++) {
|
||||
readline(line, fp);
|
||||
atom_id = atoi(strtok(line, " ")) - 1;
|
||||
atom->type[atom_id] = atoi(strtok(NULL, " "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, " "));
|
||||
atom->vx[atom_id] = atof(strtok(NULL, " "));
|
||||
atom->vy[atom_id] = atof(strtok(NULL, " "));
|
||||
atom->vz[atom_id] = atof(strtok(NULL, " "));
|
||||
atom_id = atoi(strtok(line, "\t ")) - 1;
|
||||
atom->type[atom_id] = atoi(strtok(NULL, "\t "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom->vx[atom_id] = atof(strtok(NULL, "\t "));
|
||||
atom->vy[atom_id] = atof(strtok(NULL, "\t "));
|
||||
atom->vz[atom_id] = atof(strtok(NULL, "\t "));
|
||||
atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
|
||||
read_atoms++;
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "Invalid item: %s\n", item);
|
||||
if(me==0) fprintf(stderr, "Invalid item: %s\n", item);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
|
||||
if(me==0) fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if(ts < 0 || !natoms || !read_atoms) {
|
||||
fprintf(stderr, "Input error: atom data was not read!\n");
|
||||
if(me==0) fprintf(stderr, "Input error: atom data was not read!\n");
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@ -389,7 +409,7 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
|
||||
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||
}
|
||||
|
||||
fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
|
||||
if(me==0) fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
|
||||
fclose(fp);
|
||||
return natoms;
|
||||
}
|
||||
@ -530,3 +550,249 @@ void growClusters(Atom *atom) {
|
||||
atom->cl_v = (MD_FLOAT*) reallocate(atom->cl_v, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT), nold * CLUSTER_M * 3 * sizeof(MD_FLOAT));
|
||||
atom->cl_type = (int*) reallocate(atom->cl_type, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * sizeof(int), nold * CLUSTER_M * sizeof(int));
|
||||
}
|
||||
|
||||
/* MPI added*/
|
||||
void growPbc(Atom* atom) {
|
||||
int nold = atom->NmaxGhost;
|
||||
atom->NmaxGhost += DELTA;
|
||||
|
||||
if (atom->PBCx || atom->PBCy || atom->PBCz){
|
||||
atom->PBCx = (int*) reallocate(atom->PBCx, ALIGNMENT, atom->NmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
atom->PBCy = (int*) reallocate(atom->PBCy, ALIGNMENT, atom->NmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
atom->PBCz = (int*) reallocate(atom->PBCz, ALIGNMENT, atom->NmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
} else {
|
||||
atom->PBCx = (int*) malloc(atom->NmaxGhost * sizeof(int));
|
||||
atom->PBCy = (int*) malloc(atom->NmaxGhost * sizeof(int));
|
||||
atom->PBCz = (int*) malloc(atom->NmaxGhost * sizeof(int));
|
||||
}
|
||||
}
|
||||
|
||||
void packForward(Atom* atom, int nc, int* list, MD_FLOAT* buf, int* pbc)
|
||||
{
|
||||
for(int i = 0; i < nc; i++) {
|
||||
int cj = list[i];
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
int displ = i*CLUSTER_N;
|
||||
|
||||
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
|
||||
buf[3*(displ+cjj)+0] = cj_x[CL_X_OFFSET + cjj] + pbc[_x] * atom->mybox.xprd;
|
||||
buf[3*(displ+cjj)+1] = cj_x[CL_Y_OFFSET + cjj] + pbc[_y] * atom->mybox.yprd;
|
||||
buf[3*(displ+cjj)+2] = cj_x[CL_Z_OFFSET + cjj] + pbc[_z] * atom->mybox.zprd;
|
||||
}
|
||||
|
||||
for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
|
||||
buf[3*(displ+cjj)+0] = -1; //x
|
||||
buf[3*(displ+cjj)+1] = -1; //y
|
||||
buf[3*(displ+cjj)+2] = -1; //z
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void unpackForward(Atom* atom, int nc, int c0, MD_FLOAT* buf)
|
||||
{
|
||||
for(int i = 0; i < nc; i++) {
|
||||
int cj = c0+i;
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
int displ = i*CLUSTER_N;
|
||||
|
||||
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
|
||||
if(cj_x[CL_X_OFFSET + cjj]<INFINITY) cj_x[CL_X_OFFSET + cjj] = buf[3*(displ+cjj)+0];
|
||||
if(cj_x[CL_Y_OFFSET + cjj]<INFINITY) cj_x[CL_Y_OFFSET + cjj] = buf[3*(displ+cjj)+1];
|
||||
if(cj_x[CL_Z_OFFSET + cjj]<INFINITY) cj_x[CL_Z_OFFSET + cjj] = buf[3*(displ+cjj)+2];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int packGhost(Atom* atom, int cj, MD_FLOAT* buf, int* pbc)
|
||||
{
|
||||
//#of elements per cluster natoms,x0,y0,z0,type_0, . . ,xn,yn,zn,type_n,bbminx,bbmaxxy,bbminy,bbmaxy,bbminz,bbmaxz
|
||||
//count = 4*N_CLUSTER+7, if N_CLUSTER =4 => count = 23 value/cluster + trackpbc[x] + trackpbc[y] + trackpbc[z]
|
||||
int m = 0;
|
||||
if(atom->jclusters[cj].natoms > 0) {
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
int cj_sca_base = CJ_SCALAR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY;
|
||||
MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
|
||||
MD_FLOAT bbminz = INFINITY, bbmaxz = -INFINITY;
|
||||
|
||||
buf[m++] = atom->jclusters[cj].natoms;
|
||||
|
||||
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
|
||||
|
||||
MD_FLOAT xtmp = cj_x[CL_X_OFFSET + cjj] + pbc[_x] * atom->mybox.xprd;
|
||||
MD_FLOAT ytmp = cj_x[CL_Y_OFFSET + cjj] + pbc[_y] * atom->mybox.yprd;
|
||||
MD_FLOAT ztmp = cj_x[CL_Z_OFFSET + cjj] + pbc[_z] * atom->mybox.zprd;
|
||||
|
||||
buf[m++] = xtmp;
|
||||
buf[m++] = ytmp;
|
||||
buf[m++] = ztmp;
|
||||
buf[m++]= atom->cl_type[cj_sca_base + cjj];
|
||||
|
||||
if(bbminx > xtmp) { bbminx = xtmp; }
|
||||
if(bbmaxx < xtmp) { bbmaxx = xtmp; }
|
||||
if(bbminy > ytmp) { bbminy = ytmp; }
|
||||
if(bbmaxy < ytmp) { bbmaxy = ytmp; }
|
||||
if(bbminz > ztmp) { bbminz = ztmp; }
|
||||
if(bbmaxz < ztmp) { bbmaxz = ztmp; }
|
||||
}
|
||||
|
||||
for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
|
||||
buf[m++] = -1; //x
|
||||
buf[m++] = -1; //y
|
||||
buf[m++] = -1; //z
|
||||
buf[m++] = -1; //type
|
||||
}
|
||||
|
||||
buf[m++] = bbminx;
|
||||
buf[m++] = bbmaxx;
|
||||
buf[m++] = bbminy;
|
||||
buf[m++] = bbmaxy;
|
||||
buf[m++] = bbminz;
|
||||
buf[m++] = bbmaxz;
|
||||
//TODO: check atom->ncj
|
||||
int ghostId = cj-atom->ncj;
|
||||
//check for ghost particles
|
||||
buf[m++] = (cj-atom->ncj>=0) ? pbc[_x]+atom->PBCx[ghostId]:pbc[_x];
|
||||
buf[m++] = (cj-atom->ncj>=0) ? pbc[_y]+atom->PBCy[ghostId]:pbc[_y];
|
||||
buf[m++] = (cj-atom->ncj>=0) ? pbc[_z]+atom->PBCz[ghostId]:pbc[_z];
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
int unpackGhost(Atom* atom, int cj, MD_FLOAT* buf)
|
||||
{
|
||||
int m = 0;
|
||||
int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
|
||||
if(cj*jfac>=atom->Nclusters_max) growClusters(atom);
|
||||
if(atom->Nclusters_ghost>=atom->NmaxGhost) growPbc(atom);
|
||||
|
||||
int cj_sca_base = CJ_SCALAR_BASE_INDEX(cj);
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
|
||||
atom->jclusters[cj].natoms = buf[m++];
|
||||
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
|
||||
|
||||
cj_x[CL_X_OFFSET + cjj] = buf[m++];
|
||||
cj_x[CL_Y_OFFSET + cjj] = buf[m++];
|
||||
cj_x[CL_Z_OFFSET + cjj] = buf[m++];
|
||||
atom->cl_type[cj_sca_base + cjj] = buf[m++];
|
||||
atom->Nghost++;
|
||||
}
|
||||
|
||||
for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
|
||||
cj_x[CL_X_OFFSET + cjj] = INFINITY;
|
||||
cj_x[CL_Y_OFFSET + cjj] = INFINITY;
|
||||
cj_x[CL_Z_OFFSET + cjj] = INFINITY;
|
||||
atom->cl_type[cj_sca_base + cjj] = -1;
|
||||
m+=4;
|
||||
}
|
||||
|
||||
atom->jclusters[cj].bbminx = buf[m++];
|
||||
atom->jclusters[cj].bbmaxx = buf[m++];
|
||||
atom->jclusters[cj].bbminy = buf[m++];
|
||||
atom->jclusters[cj].bbmaxy = buf[m++];
|
||||
atom->jclusters[cj].bbminz = buf[m++];
|
||||
atom->jclusters[cj].bbmaxz = buf[m++];
|
||||
atom->PBCx[atom->Nclusters_ghost] = buf[m++];
|
||||
atom->PBCy[atom->Nclusters_ghost] = buf[m++];
|
||||
atom->PBCz[atom->Nclusters_ghost] = buf[m++];
|
||||
atom->Nclusters_ghost++;
|
||||
|
||||
}
|
||||
|
||||
void packReverse(Atom* atom, int nc, int c0, MD_FLOAT* buf)
|
||||
{
|
||||
for(int i = 0; i < nc; i++) {
|
||||
int cj = c0+i;
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
||||
int displ = i*CLUSTER_N;
|
||||
|
||||
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
|
||||
buf[3*(displ+cjj)+0] = cj_f[CL_X_OFFSET + cjj];
|
||||
buf[3*(displ+cjj)+1] = cj_f[CL_Y_OFFSET + cjj];
|
||||
buf[3*(displ+cjj)+2] = cj_f[CL_Z_OFFSET + cjj];
|
||||
}
|
||||
|
||||
for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
|
||||
buf[3*(displ+cjj)+0] = -1; //x
|
||||
buf[3*(displ+cjj)+1] = -1; //y
|
||||
buf[3*(displ+cjj)+2] = -1; //z
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void unpackReverse(Atom* atom, int nc, int* list, MD_FLOAT* buf)
|
||||
{
|
||||
for(int i = 0; i < nc; i++) {
|
||||
int cj = list[i];
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
||||
int displ = i*CLUSTER_N;
|
||||
|
||||
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
|
||||
cj_f[CL_X_OFFSET + cjj] += buf[3*(displ+cjj)+0];
|
||||
cj_f[CL_Y_OFFSET + cjj] += buf[3*(displ+cjj)+1];
|
||||
cj_f[CL_Z_OFFSET + cjj] += buf[3*(displ+cjj)+2];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int packExchange(Atom* atom, int i, MD_FLOAT* buf)
|
||||
{
|
||||
int m = 0;
|
||||
buf[m++] = atom_x(i);
|
||||
buf[m++] = atom_y(i);
|
||||
buf[m++] = atom_z(i);
|
||||
buf[m++] = atom_vx(i);
|
||||
buf[m++] = atom_vy(i);
|
||||
buf[m++] = atom_vz(i);
|
||||
buf[m++] = atom->type[i];
|
||||
return m;
|
||||
}
|
||||
|
||||
int unpackExchange(Atom* atom, int i, MD_FLOAT* buf)
|
||||
{
|
||||
while(i >= atom->Nmax) growAtom(atom);
|
||||
int m = 0;
|
||||
atom_x(i) = buf[m++];
|
||||
atom_y(i) = buf[m++];
|
||||
atom_z(i) = buf[m++];
|
||||
atom_vx(i) = buf[m++];
|
||||
atom_vy(i) = buf[m++];
|
||||
atom_vz(i) = buf[m++];
|
||||
atom->type[i] = buf[m++];
|
||||
return m;
|
||||
}
|
||||
|
||||
void pbc(Atom* atom)
|
||||
{
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
|
||||
MD_FLOAT xprd = atom->mybox.xprd;
|
||||
MD_FLOAT yprd = atom->mybox.yprd;
|
||||
MD_FLOAT zprd = atom->mybox.zprd;
|
||||
|
||||
if(atom_x(i) < 0.0) atom_x(i) += xprd;
|
||||
if(atom_y(i) < 0.0) atom_y(i) += yprd;
|
||||
if(atom_z(i) < 0.0) atom_z(i) +=zprd;
|
||||
if(atom_x(i) >= xprd) atom_x(i) -=xprd;
|
||||
if(atom_y(i) >= yprd) atom_y(i) -=yprd;
|
||||
if(atom_z(i) >= zprd) atom_z(i) -=zprd;
|
||||
}
|
||||
}
|
||||
|
||||
void copy(Atom* atom, int i, int j)
|
||||
{
|
||||
atom_x(i) = atom_x(j);
|
||||
atom_y(i) = atom_y(j);
|
||||
atom_z(i) = atom_z(j);
|
||||
atom_vx(i) = atom_vx(j);
|
||||
atom_vy(i) = atom_vy(j);
|
||||
atom_vz(i) = atom_vz(j);
|
||||
atom->type[i] = atom->type[j];
|
||||
}
|
||||
|
@ -14,8 +14,9 @@
|
||||
#include <stats.h>
|
||||
#include <util.h>
|
||||
#include <simd.h>
|
||||
#include <math.h>
|
||||
|
||||
|
||||
void computeForceGhostShell(Parameter*, Atom*, Neighbor*);
|
||||
/*
|
||||
static inline void gmx_load_simd_2xnn_interactions(
|
||||
int excl,
|
||||
@ -49,7 +50,6 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
||||
@ -60,13 +60,23 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
||||
}
|
||||
}
|
||||
|
||||
for(int cg = atom->ncj; cg < atom->ncj+atom->Nclusters_ghost; cg++) {
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
|
||||
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
||||
for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
|
||||
cj_f[CL_X_OFFSET + cjj] = 0.0;
|
||||
cj_f[CL_Y_OFFSET + cjj] = 0.0;
|
||||
cj_f[CL_Z_OFFSET + cjj] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
double S = getTimeStamp();
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
LIKWID_MARKER_START("force");
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
#pragma omp for
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||
int ci_cj1 = CJ1_FROM_CI(ci);
|
||||
@ -103,6 +113,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
||||
cond = neighbor->half_neigh ? (ci_cj0 != cj || cii < cjj) && (ci_cj1 != cj || cii < cjj + CLUSTER_N) :
|
||||
(ci_cj0 != cj || cii != cjj) && (ci_cj1 != cj || cii != cjj + CLUSTER_N);
|
||||
#endif
|
||||
|
||||
if(cond) {
|
||||
MD_FLOAT delx = xtmp - cj_x[CL_X_OFFSET + cjj];
|
||||
MD_FLOAT dely = ytmp - cj_x[CL_Y_OFFSET + cjj];
|
||||
@ -113,12 +124,11 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
|
||||
|
||||
if(neighbor->half_neigh) {
|
||||
if(neighbor->half_neigh || param->method) {
|
||||
cj_f[CL_X_OFFSET + cjj] -= delx * force;
|
||||
cj_f[CL_Y_OFFSET + cjj] -= dely * force;
|
||||
cj_f[CL_Z_OFFSET + cjj] -= delz * force;
|
||||
}
|
||||
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
@ -129,13 +139,11 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(any != 0) {
|
||||
addStat(stats->clusters_within_cutoff, 1);
|
||||
} else {
|
||||
addStat(stats->clusters_outside_cutoff, 1);
|
||||
}
|
||||
|
||||
ci_f[CL_X_OFFSET + cii] += fix;
|
||||
ci_f[CL_Y_OFFSET + cii] += fiy;
|
||||
ci_f[CL_Z_OFFSET + cii] += fiz;
|
||||
@ -146,7 +154,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
||||
addStat(stats->num_neighs, numneighs);
|
||||
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
|
||||
}
|
||||
|
||||
if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor);
|
||||
LIKWID_MARKER_STOP("force");
|
||||
}
|
||||
|
||||
@ -168,7 +176,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
||||
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
|
||||
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
|
||||
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
for(int ci = 0; ci < atom->Nclusters_local+atom->Nclusters_ghost; ci++) {
|
||||
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
||||
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
|
||||
@ -178,6 +186,16 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
||||
}
|
||||
}
|
||||
|
||||
for(int cg = atom->ncj; cg < atom->ncj+atom->Nclusters_ghost; cg++) {
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
|
||||
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
||||
for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
|
||||
cj_f[CL_X_OFFSET + cjj] = 0.0;
|
||||
cj_f[CL_Y_OFFSET + cjj] = 0.0;
|
||||
cj_f[CL_Z_OFFSET + cjj] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
double S = getTimeStamp();
|
||||
|
||||
#pragma omp parallel
|
||||
@ -213,7 +231,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
||||
#endif
|
||||
*/
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
#pragma omp for
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||
#if CLUSTER_M > CLUSTER_N
|
||||
@ -322,7 +340,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
||||
fiz2 += tz2;
|
||||
|
||||
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
|
||||
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
|
||||
if(cj < CJ1_FROM_CI(atom->Nlocal)|| param->method) {
|
||||
simd_h_decr3(cj_f, tx0 + tx2, ty0 + ty2, tz0 + tz2);
|
||||
}
|
||||
#else
|
||||
@ -373,7 +391,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
||||
fiz2 += tz2;
|
||||
|
||||
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
|
||||
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
|
||||
if(cj < CJ1_FROM_CI(atom->Nlocal) || param->method) {
|
||||
simd_h_decr3(cj_f, tx0 + tx2, ty0 + ty2, tz0 + tz2);
|
||||
}
|
||||
#else
|
||||
@ -389,7 +407,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
||||
addStat(stats->num_neighs, numneighs);
|
||||
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
|
||||
}
|
||||
|
||||
if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor);
|
||||
LIKWID_MARKER_STOP("force");
|
||||
}
|
||||
|
||||
@ -427,7 +445,7 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor
|
||||
{
|
||||
LIKWID_MARKER_START("force");
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
#pragma omp for
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||
#if CLUSTER_M > CLUSTER_N
|
||||
@ -562,7 +580,6 @@ double computeForceLJ_2xnn(Parameter *param, Atom *atom, Neighbor *neighbor, Sta
|
||||
if(neighbor->half_neigh) {
|
||||
return computeForceLJ_2xnn_half(param, atom, neighbor, stats);
|
||||
}
|
||||
|
||||
return computeForceLJ_2xnn_full(param, atom, neighbor, stats);
|
||||
}
|
||||
|
||||
@ -589,13 +606,23 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
}
|
||||
}
|
||||
|
||||
for(int cg = atom->ncj; cg < atom->ncj+atom->Nclusters_ghost; cg++) {
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
|
||||
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
||||
for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
|
||||
cj_f[CL_X_OFFSET + cjj] = 0.0;
|
||||
cj_f[CL_Y_OFFSET + cjj] = 0.0;
|
||||
cj_f[CL_Z_OFFSET + cjj] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
double S = getTimeStamp();
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
LIKWID_MARKER_START("force");
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
#pragma omp for
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||
#if CLUSTER_M > CLUSTER_N
|
||||
@ -726,7 +753,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
fiz3 = simd_add(fiz3, tz3);
|
||||
|
||||
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
|
||||
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
|
||||
if(cj < CJ1_FROM_CI(atom->Nlocal) || param->method) {
|
||||
simd_store(&cj_f[CL_X_OFFSET], simd_load(&cj_f[CL_X_OFFSET]) - (tx0 + tx1 + tx2 + tx3));
|
||||
simd_store(&cj_f[CL_Y_OFFSET], simd_load(&cj_f[CL_Y_OFFSET]) - (ty0 + ty1 + ty2 + ty3));
|
||||
simd_store(&cj_f[CL_Z_OFFSET], simd_load(&cj_f[CL_Z_OFFSET]) - (tz0 + tz1 + tz2 + tz3));
|
||||
@ -811,7 +838,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
fiz3 = simd_add(fiz3, tz3);
|
||||
|
||||
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
|
||||
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
|
||||
if(cj < CJ1_FROM_CI(atom->Nlocal) || param->method) {
|
||||
simd_store(&cj_f[CL_X_OFFSET], simd_load(&cj_f[CL_X_OFFSET]) - (tx0 + tx1 + tx2 + tx3));
|
||||
simd_store(&cj_f[CL_Y_OFFSET], simd_load(&cj_f[CL_Y_OFFSET]) - (ty0 + ty1 + ty2 + ty3));
|
||||
simd_store(&cj_f[CL_Z_OFFSET], simd_load(&cj_f[CL_Z_OFFSET]) - (tz0 + tz1 + tz2 + tz3));
|
||||
@ -831,7 +858,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
addStat(stats->num_neighs, numneighs);
|
||||
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
|
||||
}
|
||||
|
||||
if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor);
|
||||
LIKWID_MARKER_STOP("force");
|
||||
}
|
||||
|
||||
@ -869,7 +896,7 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
{
|
||||
LIKWID_MARKER_START("force");
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
#pragma omp for
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||
#if CLUSTER_M > CLUSTER_N
|
||||
@ -1070,3 +1097,120 @@ double computeForceLJ_4xn(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
||||
|
||||
return computeForceLJ_4xn_full(param, atom, neighbor, stats);
|
||||
}
|
||||
|
||||
//Routine for eight shell method
|
||||
void computeForceGhostShell(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||
DEBUG_MESSAGE("computeForceLJ begin\n");
|
||||
|
||||
int Nshell = neighbor->Nshell;
|
||||
int *neighs;
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
|
||||
for(int ci = 0; ci < Nshell; ci++) {
|
||||
neighs = &neighbor->neighshell[ci * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numNeighShell[ci];
|
||||
int cs = neighbor->listshell[ci];
|
||||
int cs_vec_base = CJ_VECTOR_BASE_INDEX(cs);
|
||||
MD_FLOAT *cs_x = &atom->cl_x[cs_vec_base];
|
||||
MD_FLOAT *cs_f = &atom->cl_f[cs_vec_base];
|
||||
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int cj = neighs[k];
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
||||
|
||||
for(int css = 0; css < CLUSTER_N; css++) {
|
||||
MD_FLOAT x = cs_x[CL_X_OFFSET + css];
|
||||
MD_FLOAT y = cs_x[CL_Y_OFFSET + css];
|
||||
MD_FLOAT z = cs_x[CL_Z_OFFSET + css];
|
||||
|
||||
MD_FLOAT fix = 0;
|
||||
MD_FLOAT fiy = 0;
|
||||
MD_FLOAT fiz = 0;
|
||||
|
||||
for(int cjj = 0; cjj < CLUSTER_N; cjj++) {
|
||||
|
||||
MD_FLOAT delx = x - cj_x[CL_X_OFFSET + cjj];
|
||||
MD_FLOAT dely = y - cj_x[CL_Y_OFFSET + cjj];
|
||||
MD_FLOAT delz = z - cj_x[CL_Z_OFFSET + cjj];
|
||||
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
if(rsq < cutforcesq) {
|
||||
MD_FLOAT sr2 = 1.0 / rsq;
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
|
||||
|
||||
cj_f[CL_X_OFFSET + cjj] -= delx * force;
|
||||
cj_f[CL_Y_OFFSET + cjj] -= dely * force;
|
||||
cj_f[CL_Z_OFFSET + cjj] -= delz * force;
|
||||
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
}
|
||||
}
|
||||
|
||||
cs_f[CL_X_OFFSET + css] += fix;
|
||||
cs_f[CL_Y_OFFSET + css] += fiy;
|
||||
cs_f[CL_Z_OFFSET + css] += fiz;
|
||||
}
|
||||
}
|
||||
// addStat(stats->calculated_forces, 1);
|
||||
// addStat(stats->num_neighs, numneighs);
|
||||
// addStat(stats->force_iters, (long long int)((double)numneighs));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
void computeForceGhostShell(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||
|
||||
int Nshell = neighbor->Nshell;
|
||||
Pair* neighs;
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
|
||||
for(int ci = 0; ci < Nshell; ci++) {
|
||||
neighs = &neighbor->neighshell[ci * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numNeighShell[ci];
|
||||
int cs = neighbor->listshell[ci].cluster;
|
||||
int css = neighbor->listshell[ci].atom;
|
||||
int cs_vec_base = CJ_VECTOR_BASE_INDEX(cs);
|
||||
MD_FLOAT *cs_x = &atom->cl_x[cs_vec_base];
|
||||
MD_FLOAT *cs_f = &atom->cl_f[cs_vec_base];
|
||||
MD_FLOAT x = cs_x[CL_X_OFFSET + css];
|
||||
MD_FLOAT y = cs_x[CL_Y_OFFSET + css];
|
||||
MD_FLOAT z = cs_x[CL_Z_OFFSET + css];
|
||||
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int cj = neighs[k].cluster;
|
||||
int cjj = neighs[k].atom;
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
||||
|
||||
MD_FLOAT delx = x - cj_x[CL_X_OFFSET + cjj];
|
||||
MD_FLOAT dely = y - cj_x[CL_Y_OFFSET + cjj];
|
||||
MD_FLOAT delz = z - cj_x[CL_Z_OFFSET + cjj];
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
if(rsq < cutforcesq) {
|
||||
MD_FLOAT sr2 = 1.0 / rsq;
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
|
||||
|
||||
cj_f[CL_X_OFFSET + cjj] -= delx * force;
|
||||
cj_f[CL_Y_OFFSET + cjj] -= dely * force;
|
||||
cj_f[CL_Z_OFFSET + cjj] -= delz * force;
|
||||
cs_f[CL_X_OFFSET + css] += delx * force;
|
||||
cs_f[CL_Y_OFFSET + css] += delx * force;
|
||||
cs_f[CL_Z_OFFSET + css] += delx * force;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
@ -5,6 +5,7 @@
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <parameter.h>
|
||||
#include <box.h>
|
||||
|
||||
#ifndef __ATOM_H_
|
||||
#define __ATOM_H_
|
||||
@ -102,7 +103,7 @@ typedef struct {
|
||||
|
||||
typedef struct {
|
||||
int Natoms, Nlocal, Nghost, Nmax;
|
||||
int Nclusters, Nclusters_local, Nclusters_ghost, Nclusters_max;
|
||||
int Nclusters, Nclusters_local, Nclusters_ghost, Nclusters_max, NmaxGhost,ncj;
|
||||
MD_FLOAT *x, *y, *z;
|
||||
MD_FLOAT *vx, *vy, *vz;
|
||||
int *border_map;
|
||||
@ -112,6 +113,7 @@ typedef struct {
|
||||
MD_FLOAT *sigma6;
|
||||
MD_FLOAT *cutforcesq;
|
||||
MD_FLOAT *cutneighsq;
|
||||
//track the movement of a particle along boundaries
|
||||
int *PBCx, *PBCy, *PBCz;
|
||||
// Data in cluster format
|
||||
MD_FLOAT *cl_x;
|
||||
@ -128,6 +130,9 @@ typedef struct {
|
||||
unsigned int masks_2xnn_fn[8];
|
||||
unsigned int masks_4xn_hn[16];
|
||||
unsigned int masks_4xn_fn[16];
|
||||
|
||||
//Info Subdomain
|
||||
Box mybox;
|
||||
} Atom;
|
||||
|
||||
extern void initAtom(Atom*);
|
||||
@ -140,6 +145,18 @@ extern int readAtom_dmp(Atom*, Parameter*);
|
||||
extern void growAtom(Atom*);
|
||||
extern void growClusters(Atom*);
|
||||
|
||||
int packGhost(Atom*, int, MD_FLOAT* , int*);
|
||||
int unpackGhost(Atom*, int, MD_FLOAT*);
|
||||
int packExchange(Atom*, int, MD_FLOAT*);
|
||||
int unpackExchange(Atom*, int, MD_FLOAT*);
|
||||
void packForward(Atom*, int, int*, MD_FLOAT*, int*);
|
||||
void unpackForward(Atom*, int, int, MD_FLOAT*);
|
||||
void packReverse(Atom* , int , int , MD_FLOAT*);
|
||||
void unpackReverse(Atom*, int, int*, MD_FLOAT*);
|
||||
void pbc(Atom*);
|
||||
void copy(Atom*, int, int);
|
||||
|
||||
|
||||
#ifdef AOS
|
||||
# define POS_DATA_LAYOUT "AoS"
|
||||
# define atom_x(i) atom->x[(i) * 3 + 0]
|
||||
|
@ -9,10 +9,13 @@
|
||||
#include <atom.h>
|
||||
#include <parameter.h>
|
||||
#include <util.h>
|
||||
|
||||
#include <timers.h>
|
||||
#include <timing.h>
|
||||
#include <simd.h>
|
||||
/*
|
||||
void cpuInitialIntegrate(Parameter *param, Atom *atom) {
|
||||
DEBUG_MESSAGE("cpuInitialIntegrate start\n");
|
||||
|
||||
DEBUG_MESSAGE("cpuInitialIntegrate start\n");
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||
@ -33,8 +36,8 @@ void cpuInitialIntegrate(Parameter *param, Atom *atom) {
|
||||
}
|
||||
|
||||
void cpuFinalIntegrate(Parameter *param, Atom *atom) {
|
||||
DEBUG_MESSAGE("cpuFinalIntegrate start\n");
|
||||
|
||||
DEBUG_MESSAGE("cpuFinalIntegrate start\n");
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
|
||||
@ -46,6 +49,56 @@ void cpuFinalIntegrate(Parameter *param, Atom *atom) {
|
||||
ci_v[CL_Z_OFFSET + cii] += param->dtforce * ci_f[CL_Z_OFFSET + cii];
|
||||
}
|
||||
}
|
||||
DEBUG_MESSAGE("cpuFinalIntegrate end\n");
|
||||
}
|
||||
*/
|
||||
|
||||
void cpuInitialIntegrate(Parameter *param, Atom *atom) {
|
||||
|
||||
DEBUG_MESSAGE("cpuInitialIntegrate start\n");
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci+=2) {
|
||||
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
|
||||
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
||||
|
||||
MD_SIMD_FLOAT dtforce = simd_broadcast(param->dtforce);
|
||||
MD_SIMD_FLOAT dt = simd_broadcast(param->dt);
|
||||
|
||||
MD_SIMD_FLOAT vx_vector = simd_fma(simd_load(&ci_f[CL_X_OFFSET]), dtforce, simd_load(&ci_v[CL_X_OFFSET]));
|
||||
MD_SIMD_FLOAT vy_vector = simd_fma(simd_load(&ci_f[CL_Y_OFFSET]), dtforce, simd_load(&ci_v[CL_Y_OFFSET]));
|
||||
MD_SIMD_FLOAT vz_vector = simd_fma(simd_load(&ci_f[CL_Z_OFFSET]), dtforce, simd_load(&ci_v[CL_Z_OFFSET]));
|
||||
MD_SIMD_FLOAT x_vector = simd_fma(vx_vector, dt, simd_load(&ci_x[CL_X_OFFSET]));
|
||||
MD_SIMD_FLOAT y_vector = simd_fma(vy_vector, dt, simd_load(&ci_x[CL_Y_OFFSET]));
|
||||
MD_SIMD_FLOAT z_vector = simd_fma(vz_vector, dt, simd_load(&ci_x[CL_Z_OFFSET]));
|
||||
|
||||
simd_store(&ci_v[CL_X_OFFSET], vx_vector);
|
||||
simd_store(&ci_v[CL_Y_OFFSET], vy_vector);
|
||||
simd_store(&ci_v[CL_Z_OFFSET], vz_vector);
|
||||
simd_store(&ci_x[CL_X_OFFSET], x_vector);
|
||||
simd_store(&ci_x[CL_Y_OFFSET], y_vector);
|
||||
simd_store(&ci_x[CL_Z_OFFSET], z_vector);
|
||||
}
|
||||
|
||||
DEBUG_MESSAGE("cpuInitialIntegrate end\n");
|
||||
}
|
||||
|
||||
void cpuFinalIntegrate(Parameter *param, Atom *atom) {
|
||||
|
||||
DEBUG_MESSAGE("cpuFinalIntegrate start\n");
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci+=2) {
|
||||
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
|
||||
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
||||
|
||||
MD_SIMD_FLOAT dtforce = simd_broadcast(param->dtforce);
|
||||
MD_SIMD_FLOAT vx_vector = simd_fma(simd_load(&ci_f[CL_X_OFFSET]), dtforce, simd_load(&ci_v[CL_X_OFFSET]));
|
||||
MD_SIMD_FLOAT vy_vector = simd_fma(simd_load(&ci_f[CL_Y_OFFSET]), dtforce, simd_load(&ci_v[CL_Y_OFFSET]));
|
||||
MD_SIMD_FLOAT vz_vector = simd_fma(simd_load(&ci_f[CL_Z_OFFSET]), dtforce, simd_load(&ci_v[CL_Z_OFFSET]));
|
||||
simd_store(&ci_v[CL_X_OFFSET], vx_vector);
|
||||
simd_store(&ci_v[CL_Y_OFFSET], vy_vector);
|
||||
simd_store(&ci_v[CL_Z_OFFSET], vz_vector);
|
||||
}
|
||||
|
||||
DEBUG_MESSAGE("cpuFinalIntegrate end\n");
|
||||
}
|
||||
@ -54,3 +107,6 @@ void cpuFinalIntegrate(Parameter *param, Atom *atom) {
|
||||
void cudaInitialIntegrate(Parameter*, Atom*);
|
||||
void cudaFinalIntegrate(Parameter*, Atom*);
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -25,6 +25,11 @@
|
||||
#define NBNXN_INTERACTION_MASK_DIAG_J8_0 0xf0f8fcfeU
|
||||
#define NBNXN_INTERACTION_MASK_DIAG_J8_1 0x0080c0e0U
|
||||
|
||||
typedef struct {
|
||||
int cluster;
|
||||
int atom;
|
||||
} Pair;
|
||||
|
||||
typedef struct {
|
||||
int every;
|
||||
int ncalls;
|
||||
@ -34,8 +39,20 @@ typedef struct {
|
||||
int half_neigh;
|
||||
int* neighbors;
|
||||
unsigned int* neighbors_imask;
|
||||
//MPI
|
||||
/*
|
||||
int Nshell; //# of atoms in listShell(Cluster here cover all possible ghost interactions)
|
||||
int *numNeighShell; //# of neighs for each atom in listShell
|
||||
Pair *neighshell; //list of neighs for each atom in listShell
|
||||
Pair *listshell; //Atoms to compute the force
|
||||
*/
|
||||
int Nshell; //# of cluster in listShell(Cluster here cover all possible ghost interactions)
|
||||
int *numNeighShell; //# of neighs for each atom in listShell
|
||||
int *neighshell; //list of neighs for each atom in listShell
|
||||
int *listshell; //Atoms to compute the force
|
||||
} Neighbor;
|
||||
|
||||
|
||||
extern void initNeighbor(Neighbor*, Parameter*);
|
||||
extern void setupNeighbor(Parameter*, Atom*);
|
||||
extern void binatoms(Atom*);
|
||||
|
@ -5,6 +5,8 @@
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <atom.h>
|
||||
#include <comm.h>
|
||||
#include <parameter.h>
|
||||
|
||||
#ifndef __VTK_H_
|
||||
#define __VTK_H_
|
||||
@ -13,4 +15,5 @@ extern int write_local_atoms_to_vtk_file(const char* filename, Atom* atom, int t
|
||||
extern int write_ghost_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
|
||||
extern int write_local_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep);
|
||||
extern int write_ghost_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep);
|
||||
extern void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep);
|
||||
#endif
|
||||
|
214
gromacs/main.c
214
gromacs/main.c
@ -5,9 +5,7 @@
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <omp.h>
|
||||
//--
|
||||
#include <likwid-marker.h>
|
||||
//--
|
||||
@ -26,6 +24,10 @@
|
||||
#include <util.h>
|
||||
#include <vtk.h>
|
||||
#include <xtc.h>
|
||||
#include <comm.h>
|
||||
#include <grid.h>
|
||||
#include <shell_methods.h>
|
||||
#include <mpi.h>
|
||||
|
||||
#define HLINE "----------------------------------------------------------------------------\n"
|
||||
|
||||
@ -42,17 +44,55 @@ extern void copyDataFromCUDADevice(Atom *atom);
|
||||
extern void cudaDeviceFree();
|
||||
#endif
|
||||
|
||||
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
double dynamicBalance(Comm* comm, Grid* grid, Atom* atom, Parameter* param, double time)
|
||||
{
|
||||
double S, E;
|
||||
int dims = 3; //TODO: Adjust to do in 3d and 2d
|
||||
S = getTimeStamp();
|
||||
if(param->balance == RCB) {
|
||||
rcbBalance(grid, atom, param, meanBisect,dims,0);
|
||||
neighComm(comm, param, grid);
|
||||
}else if(param->balance == meanTimeRCB){
|
||||
rcbBalance(grid, atom, param, meanTimeBisect,dims,time);
|
||||
neighComm(comm, param, grid);
|
||||
}else if(param->balance == Staggered) {
|
||||
staggeredBalance(grid, atom, param, time);
|
||||
neighComm(comm, param, grid);
|
||||
exchangeComm(comm,atom);
|
||||
}else { } //Do nothing
|
||||
//printGrid(grid);
|
||||
E = getTimeStamp();
|
||||
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double initialBalance(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid)
|
||||
{
|
||||
double E,S,time;
|
||||
int me;
|
||||
MPI_Comm_rank(world,&me);
|
||||
S = getTimeStamp();
|
||||
if(param->balance == meanTimeRCB || param->balance == RCB){
|
||||
rcbBalance(grid, atom, param, meanBisect,3,0);
|
||||
neighComm(comm, param, grid);
|
||||
}
|
||||
MPI_Allreduce(&atom->Nlocal, &atom->Natoms, 1, MPI_INT, MPI_SUM, world);
|
||||
printf("Processor:%i, Local atoms:%i, Total atoms:%i\n",me, atom->Nlocal,atom->Natoms);
|
||||
MPI_Barrier(world);
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid) {
|
||||
if(param->force_field == FF_EAM) { initEam(eam, param); }
|
||||
double S, E;
|
||||
param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
|
||||
param->xprd = param->nx * param->lattice;
|
||||
param->yprd = param->ny * param->lattice;
|
||||
param->zprd = param->nz * param->lattice;
|
||||
|
||||
S = getTimeStamp();
|
||||
initAtom(atom);
|
||||
initPbc(atom);
|
||||
//initPbc(atom);
|
||||
initStats(stats);
|
||||
initNeighbor(neighbor, param);
|
||||
if(param->input_file == NULL) {
|
||||
@ -60,13 +100,18 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
|
||||
} else {
|
||||
readAtom(atom, param);
|
||||
}
|
||||
|
||||
setupGrid(grid,atom,param);
|
||||
setupNeighbor(param, atom);
|
||||
setupComm(comm, param, grid);
|
||||
if(param->balance){
|
||||
initialBalance(param, eam, atom, neighbor, stats, comm, grid);
|
||||
}
|
||||
setupThermo(param, atom->Natoms);
|
||||
if(param->input_file == NULL) { adjustThermo(param, atom); }
|
||||
buildClusters(atom);
|
||||
defineJClusters(atom);
|
||||
setupPbc(atom, param);
|
||||
//setupPbc(atom, param);
|
||||
ghostNeighbor(comm, atom, param); //change
|
||||
binClusters(atom);
|
||||
buildNeighbor(atom, neighbor);
|
||||
initDevice(atom, neighbor);
|
||||
@ -74,15 +119,15 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||
double reneighbour(Comm* comm, Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||
double S, E;
|
||||
S = getTimeStamp();
|
||||
LIKWID_MARKER_START("reneighbour");
|
||||
updateSingleAtoms(atom);
|
||||
updateAtomsPbc(atom, param);
|
||||
//updateAtomsPbc(atom, param);
|
||||
buildClusters(atom);
|
||||
defineJClusters(atom);
|
||||
setupPbc(atom, param);
|
||||
//setupPbc(atom, param);
|
||||
ghostNeighbor(comm, atom, param);
|
||||
binClusters(atom);
|
||||
buildNeighbor(atom, neighbor);
|
||||
LIKWID_MARKER_STOP("reneighbour");
|
||||
@ -90,15 +135,13 @@ double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||
return E-S;
|
||||
}
|
||||
|
||||
void printAtomState(Atom *atom) {
|
||||
printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n",
|
||||
atom->Natoms, atom->Nlocal, atom->Nghost, atom->Nmax);
|
||||
|
||||
/* int nall = atom->Nlocal + atom->Nghost; */
|
||||
|
||||
/* for (int i=0; i<nall; i++) { */
|
||||
/* printf("%d %f %f %f\n", i, atom->x[i], atom->y[i], atom->z[i]); */
|
||||
/* } */
|
||||
double updateAtoms(Comm* comm, Atom* atom){
|
||||
double S,E;
|
||||
S = getTimeStamp();
|
||||
updateSingleAtoms(atom);
|
||||
exchangeComm(comm, atom);
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
@ -108,7 +151,8 @@ int main(int argc, char** argv) {
|
||||
Neighbor neighbor;
|
||||
Stats stats;
|
||||
Parameter param;
|
||||
|
||||
Comm comm;
|
||||
Grid grid;
|
||||
LIKWID_MARKER_INIT;
|
||||
#pragma omp parallel
|
||||
{
|
||||
@ -116,7 +160,7 @@ int main(int argc, char** argv) {
|
||||
//LIKWID_MARKER_REGISTER("reneighbour");
|
||||
//LIKWID_MARKER_REGISTER("pbc");
|
||||
}
|
||||
|
||||
initComm(&argc, &argv, &comm); //change
|
||||
initParameter(¶m);
|
||||
for(int i = 0; i < argc; i++) {
|
||||
if((strcmp(argv[i], "-p") == 0) || (strcmp(argv[i], "--param") == 0)) {
|
||||
@ -158,6 +202,24 @@ int main(int argc, char** argv) {
|
||||
param.half_neigh = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-method") == 0)) {
|
||||
param.method = atoi(argv[++i]);
|
||||
if (param.method>2 || param.method< 0){
|
||||
if(comm.myproc == 0) fprintf(stderr, "Method does not exist!\n");
|
||||
endComm(&comm);
|
||||
exit(0);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-bal") == 0)) {
|
||||
param.balance = atoi(argv[++i]);
|
||||
if (param.balance>3 || param.balance< 0){
|
||||
if(comm.myproc == 0) fprintf(stderr, "Load balance does not exist!\n");
|
||||
endComm(&comm);
|
||||
exit(0);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-m") == 0) || (strcmp(argv[i], "--mass") == 0)) {
|
||||
param.mass = atof(argv[++i]);
|
||||
continue;
|
||||
@ -188,6 +250,7 @@ int main(int argc, char** argv) {
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
|
||||
//TODO: add the shell and ac print options
|
||||
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
|
||||
printf(HLINE);
|
||||
printf("-p <string>: file to read parameters from (can be specified more than once)\n");
|
||||
@ -206,83 +269,86 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
}
|
||||
|
||||
param.cutneigh = param.cutforce + param.skin;
|
||||
setup(¶m, &eam, &atom, &neighbor, &stats);
|
||||
printParameter(¶m);
|
||||
printf(HLINE);
|
||||
if(param.balance>0 && param.method == 1){
|
||||
if(comm.myproc == 0) fprintf(stderr, "Half Shell is not supported by load balance!\n");
|
||||
endComm(&comm);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
printf("step\ttemp\t\tpressure\n");
|
||||
param.cutneigh = param.cutforce + param.skin;
|
||||
timer[SETUP]=setup(¶m, &eam, &atom, &neighbor, &stats, &comm, &grid);
|
||||
if(comm.myproc == 0) printParameter(¶m);
|
||||
if(comm.myproc == 0) printf(HLINE);
|
||||
if(comm.myproc == 0) printf("step\ttemp\t\tpressure\n");
|
||||
computeThermo(0, ¶m, &atom);
|
||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
||||
traceAddresses(¶m, &atom, &neighbor, n + 1);
|
||||
#endif
|
||||
|
||||
#ifdef CUDA_TARGET
|
||||
copyDataToCUDADevice(&atom);
|
||||
#endif
|
||||
|
||||
if(param.force_field == FF_EAM) {
|
||||
timer[FORCE] = computeForceEam(&eam, ¶m, &atom, &neighbor, &stats);
|
||||
} else {
|
||||
timer[FORCE] = computeForceLJ(¶m, &atom, &neighbor, &stats);
|
||||
}
|
||||
|
||||
timer[NEIGH] = 0.0;
|
||||
timer[FORWARD] = 0.0;
|
||||
timer[UPDATE] = 0.0;
|
||||
timer[BALANCE] = 0.0;
|
||||
timer[REVERSE] = reverse(&comm, &atom, ¶m);
|
||||
MPI_Barrier(world);
|
||||
timer[TOTAL] = getTimeStamp();
|
||||
|
||||
if(param.vtk_file != NULL) {
|
||||
write_data_to_vtk_file(param.vtk_file, &atom, 0);
|
||||
//write_data_to_vtk_file(param.vtk_file, &comm ,&atom, 0);
|
||||
printvtk(param.vtk_file, &comm, &atom, ¶m, 0);
|
||||
}
|
||||
|
||||
//TODO: modify xct
|
||||
if(param.xtc_file != NULL) {
|
||||
xtc_init(param.xtc_file, &atom, 0);
|
||||
}
|
||||
|
||||
double forceTime=0.0;
|
||||
double commTime=0.0;
|
||||
for(int n = 0; n < param.ntimes; n++) {
|
||||
initialIntegrate(¶m, &atom);
|
||||
|
||||
if((n + 1) % param.reneigh_every) {
|
||||
timer[FORWARD]+=forward(&comm, &atom, ¶m);
|
||||
if(!((n + 1) % param.prune_every)){
|
||||
pruneNeighbor(¶m, &atom, &neighbor);
|
||||
}
|
||||
|
||||
updatePbc(&atom, ¶m, 0);
|
||||
} else {
|
||||
#ifdef CUDA_TARGET
|
||||
copyDataFromCUDADevice(&atom);
|
||||
#endif
|
||||
|
||||
timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
|
||||
|
||||
timer[UPDATE] +=updateAtoms(&comm,&atom);
|
||||
if(param.balance && !((n+1)%param.balance_every))
|
||||
timer[BALANCE] +=dynamicBalance(&comm, &grid, &atom , ¶m, timer[FORCE]);
|
||||
timer[NEIGH] += reneighbour(&comm, ¶m, &atom, &neighbor);
|
||||
#ifdef CUDA_TARGET
|
||||
copyDataToCUDADevice(&atom);
|
||||
isReneighboured = 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
||||
traceAddresses(¶m, &atom, &neighbor, n + 1);
|
||||
#endif
|
||||
|
||||
if(param.force_field == FF_EAM) {
|
||||
timer[FORCE] += computeForceEam(&eam, ¶m, &atom, &neighbor, &stats);
|
||||
} else {
|
||||
timer[FORCE] += computeForceLJ(¶m, &atom, &neighbor, &stats);
|
||||
}
|
||||
|
||||
timer[REVERSE] += reverse(&comm, &atom, ¶m);
|
||||
finalIntegrate(¶m, &atom);
|
||||
|
||||
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
|
||||
computeThermo(n + 1, ¶m, &atom);
|
||||
}
|
||||
|
||||
int write_pos = !((n + 1) % param.x_out_every);
|
||||
int write_vel = !((n + 1) % param.v_out_every);
|
||||
if(write_pos || write_vel) {
|
||||
if(param.vtk_file != NULL) {
|
||||
write_data_to_vtk_file(param.vtk_file, &atom, n + 1);
|
||||
printvtk(param.vtk_file, &comm, &atom, ¶m, n+1);
|
||||
}
|
||||
|
||||
//TODO: xtc file
|
||||
if(param.xtc_file != NULL) {
|
||||
xtc_write(&atom, n + 1, write_pos, write_vel);
|
||||
}
|
||||
@ -292,11 +358,11 @@ int main(int argc, char** argv) {
|
||||
#ifdef CUDA_TARGET
|
||||
copyDataFromCUDADevice(&atom);
|
||||
#endif
|
||||
|
||||
MPI_Barrier(world);
|
||||
timer[TOTAL] = getTimeStamp() - timer[TOTAL];
|
||||
updateSingleAtoms(&atom);
|
||||
updateAtoms(&comm,&atom);
|
||||
computeThermo(-1, ¶m, &atom);
|
||||
|
||||
//TODO:
|
||||
if(param.xtc_file != NULL) {
|
||||
xtc_end();
|
||||
}
|
||||
@ -304,41 +370,35 @@ int main(int argc, char** argv) {
|
||||
#ifdef CUDA_TARGET
|
||||
cudaDeviceFree();
|
||||
#endif
|
||||
double mint[NUMTIMER];
|
||||
double maxt[NUMTIMER];
|
||||
double sumt[NUMTIMER];
|
||||
timer[REST] = timer[TOTAL]-timer[FORCE]-timer[NEIGH]-timer[BALANCE]-timer[FORWARD]-timer[REVERSE];
|
||||
MPI_Reduce(timer,mint,NUMTIMER,MPI_DOUBLE,MPI_MIN,0,world);
|
||||
MPI_Reduce(timer,maxt,NUMTIMER,MPI_DOUBLE,MPI_MAX,0,world);
|
||||
MPI_Reduce(timer,sumt,NUMTIMER,MPI_DOUBLE,MPI_SUM,0,world);
|
||||
int Nghost;
|
||||
MPI_Reduce(&atom.Nghost,&Nghost,1,MPI_INT,MPI_SUM,0,world);
|
||||
|
||||
if(comm.myproc == 0){
|
||||
int n = comm.numproc;
|
||||
printf(HLINE);
|
||||
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
|
||||
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
|
||||
timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
|
||||
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, Nghost, param.ntimes);
|
||||
printf("TOTAL %.2fs\n\n",timer[TOTAL]);
|
||||
printf("%4s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|\n","","FORCE ", "NEIGH ", "BALANCE", "FORWARD", "REVERSE","UPDATE","REST ","SETUP");
|
||||
printf("----|-------|-------|-------|-------|-------|-------|-------|-------|\n");
|
||||
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "AVG", sumt[FORCE]/n,sumt[NEIGH]/n,sumt[BALANCE]/n,sumt[FORWARD]/n,sumt[REVERSE]/n,sumt[UPDATE]/n,sumt[REST]/n,sumt[SETUP]/n);
|
||||
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MIN", mint[FORCE],mint[NEIGH],mint[BALANCE],mint[FORWARD],mint[REVERSE],mint[UPDATE],mint[REST],mint[SETUP]);
|
||||
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MAX", maxt[FORCE],maxt[NEIGH],maxt[BALANCE],maxt[FORWARD],maxt[REVERSE],maxt[UPDATE],maxt[REST],maxt[SETUP]);
|
||||
printf(HLINE);
|
||||
|
||||
int nthreads = 0;
|
||||
int chunkSize = 0;
|
||||
omp_sched_t schedKind;
|
||||
char schedType[10];
|
||||
#pragma omp parallel
|
||||
#pragma omp master
|
||||
{
|
||||
omp_get_schedule(&schedKind, &chunkSize);
|
||||
|
||||
switch (schedKind)
|
||||
{
|
||||
case omp_sched_static: strcpy(schedType, "static"); break;
|
||||
case omp_sched_dynamic: strcpy(schedType, "dynamic"); break;
|
||||
case omp_sched_guided: strcpy(schedType, "guided"); break;
|
||||
case omp_sched_auto: strcpy(schedType, "auto"); break;
|
||||
}
|
||||
|
||||
nthreads = omp_get_max_threads();
|
||||
}
|
||||
|
||||
printf("Num threads: %d\n", nthreads);
|
||||
printf("Schedule: (%s,%d)\n", schedType, chunkSize);
|
||||
|
||||
printf("Performance: %.2f million atom updates per second\n",
|
||||
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
|
||||
|
||||
#ifdef COMPUTE_STATS
|
||||
displayStatistics(&atom, ¶m, &stats, timer);
|
||||
#endif
|
||||
}
|
||||
endComm(&comm);
|
||||
LIKWID_MARKER_CLOSE;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
@ -7,15 +7,15 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <atom.h>
|
||||
#include <util.h>
|
||||
#include <mpi.h>
|
||||
|
||||
#define SMALL 1.0e-6
|
||||
#define FACTOR 0.999
|
||||
|
||||
#define eps 1.0e-9
|
||||
static MD_FLOAT xprd, yprd, zprd;
|
||||
static MD_FLOAT bininvx, bininvy;
|
||||
static int mbinxlo, mbinylo;
|
||||
@ -34,9 +34,16 @@ static int nmax;
|
||||
static int nstencil; // # of bins in stencil
|
||||
static int* stencil; // stencil list of bin offsets
|
||||
static MD_FLOAT binsizex, binsizey;
|
||||
int me; //rank
|
||||
int method; // method
|
||||
int shellMethod; //If shell method exist
|
||||
|
||||
static int coord2bin(MD_FLOAT, MD_FLOAT);
|
||||
static MD_FLOAT bindist(int, int);
|
||||
//static int ghostZone(Atom*, int);
|
||||
static int halfZoneCluster(Atom*,int);
|
||||
static int ghostClusterinRange(Atom*, int, int, MD_FLOAT);
|
||||
static void neighborGhost(Atom*, Neighbor*);
|
||||
|
||||
/* exported subroutines */
|
||||
void initNeighbor(Neighbor *neighbor, Parameter *param) {
|
||||
@ -53,12 +60,25 @@ void initNeighbor(Neighbor *neighbor, Parameter *param) {
|
||||
bincount = NULL;
|
||||
bin_clusters = NULL;
|
||||
bin_nclusters = NULL;
|
||||
neighbor->half_neigh = param->half_neigh;
|
||||
neighbor->maxneighs = 100;
|
||||
neighbor->maxneighs = 200;
|
||||
neighbor->numneigh = NULL;
|
||||
neighbor->numneigh_masked = NULL;
|
||||
neighbor->neighbors = NULL;
|
||||
neighbor->neighbors_imask = NULL;
|
||||
//MPI
|
||||
shellMethod = 0;
|
||||
method = param->method;
|
||||
if(method == halfShell || method == eightShell){
|
||||
param->half_neigh = 1;
|
||||
shellMethod = 1;
|
||||
}
|
||||
me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
neighbor->half_neigh = param->half_neigh;
|
||||
neighbor->Nshell = 0;
|
||||
neighbor->numNeighShell = NULL;
|
||||
neighbor->neighshell = NULL;
|
||||
neighbor->listshell = NULL;
|
||||
}
|
||||
|
||||
void setupNeighbor(Parameter *param, Atom *atom) {
|
||||
@ -77,7 +97,7 @@ void setupNeighbor(Parameter *param, Atom *atom) {
|
||||
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
|
||||
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = zprd;
|
||||
|
||||
MD_FLOAT atom_density = ((MD_FLOAT)(atom->Nlocal)) / ((xhi - xlo) * (yhi - ylo) * (zhi - zlo));
|
||||
MD_FLOAT atom_density = ((MD_FLOAT)(atom->Natoms)) / ((xhi - xlo) * (yhi - ylo) * (zhi - zlo));
|
||||
MD_FLOAT atoms_in_cell = MAX(CLUSTER_M, CLUSTER_N);
|
||||
MD_FLOAT targetsizex = cbrt(atoms_in_cell / atom_density);
|
||||
MD_FLOAT targetsizey = cbrt(atoms_in_cell / atom_density);
|
||||
@ -146,6 +166,7 @@ void setupNeighbor(Parameter *param, Atom *atom) {
|
||||
}
|
||||
|
||||
MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
|
||||
|
||||
MD_FLOAT dl = atom->iclusters[ci].bbminx - atom->jclusters[cj].bbmaxx;
|
||||
MD_FLOAT dh = atom->jclusters[cj].bbminx - atom->iclusters[ci].bbmaxx;
|
||||
MD_FLOAT dm = MAX(dl, dh);
|
||||
@ -163,6 +184,7 @@ MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
|
||||
dm = MAX(dl, dh);
|
||||
dm0 = MAX(dm, 0.0);
|
||||
d2 += dm0 * dm0;
|
||||
|
||||
return d2;
|
||||
}
|
||||
|
||||
@ -225,7 +247,6 @@ static unsigned int get_imask_simd_j8(int rdiag, int ci, int cj) {
|
||||
|
||||
void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
||||
DEBUG_MESSAGE("buildNeighbor start\n");
|
||||
|
||||
/* extend atom arrays if necessary */
|
||||
if(atom->Nclusters_local > nmax) {
|
||||
nmax = atom->Nclusters_local;
|
||||
@ -249,7 +270,6 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
||||
while(resize) {
|
||||
int new_maxneighs = neighbor->maxneighs;
|
||||
resize = 0;
|
||||
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_cj1 = CJ1_FROM_CI(ci);
|
||||
int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
|
||||
@ -262,14 +282,12 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
||||
MD_FLOAT ibb_ymax = atom->iclusters[ci].bbmaxy;
|
||||
MD_FLOAT ibb_zmin = atom->iclusters[ci].bbminz;
|
||||
MD_FLOAT ibb_zmax = atom->iclusters[ci].bbmaxz;
|
||||
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
int jbin = ibin + stencil[k];
|
||||
int *loc_bin = &bin_clusters[jbin * clusters_per_bin];
|
||||
int cj, m = -1;
|
||||
MD_FLOAT jbb_xmin, jbb_xmax, jbb_ymin, jbb_ymax, jbb_zmin, jbb_zmax;
|
||||
const int c = bin_nclusters[jbin];
|
||||
|
||||
if(c > 0) {
|
||||
MD_FLOAT dl, dh, dm, dm0, d_bb_sq;
|
||||
|
||||
@ -279,6 +297,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
||||
if(neighbor->half_neigh && ci_cj1 > cj) {
|
||||
continue;
|
||||
}
|
||||
|
||||
jbb_zmin = atom->jclusters[cj].bbminz;
|
||||
jbb_zmax = atom->jclusters[cj].bbmaxz;
|
||||
dl = ibb_zmin - jbb_zmax;
|
||||
@ -287,7 +306,6 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
||||
dm0 = MAX(dm, 0.0);
|
||||
d_bb_sq = dm0 * dm0;
|
||||
} while(m + 1 < c && d_bb_sq > cutneighsq);
|
||||
|
||||
jbb_xmin = atom->jclusters[cj].bbminx;
|
||||
jbb_xmax = atom->jclusters[cj].bbmaxx;
|
||||
jbb_ymin = atom->jclusters[cj].bbminy;
|
||||
@ -383,13 +401,14 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
||||
|
||||
if(resize) {
|
||||
neighbor->maxneighs = new_maxneighs * 1.2;
|
||||
fprintf(stdout, "RESIZE %d\n", neighbor->maxneighs);
|
||||
fprintf(stdout, "RESIZE %d, PROC %d\n", neighbor->maxneighs,me);
|
||||
free(neighbor->neighbors);
|
||||
free(neighbor->neighbors_imask);
|
||||
neighbor->neighbors = (int *) malloc(nmax * neighbor->maxneighs * sizeof(int));
|
||||
neighbor->neighbors_imask = (unsigned int *) malloc(nmax * neighbor->maxneighs * sizeof(unsigned int));
|
||||
}
|
||||
}
|
||||
if(method == eightShell) neighborGhost(atom, neighbor);
|
||||
|
||||
/*
|
||||
DEBUG_MESSAGE("\ncutneighsq = %f, rbb_sq = %f\n", cutneighsq, rbb_sq);
|
||||
@ -511,19 +530,19 @@ int coord2bin(MD_FLOAT xin, MD_FLOAT yin) {
|
||||
int ix, iy;
|
||||
|
||||
if(xin >= xprd) {
|
||||
ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
|
||||
ix = (int)((xin + eps - xprd) * bininvx) + nbinx - mbinxlo;
|
||||
} else if(xin >= 0.0) {
|
||||
ix = (int)(xin * bininvx) - mbinxlo;
|
||||
ix = (int)((xin+eps) * bininvx) - mbinxlo;
|
||||
} else {
|
||||
ix = (int)(xin * bininvx) - mbinxlo - 1;
|
||||
ix = (int)((xin+eps) * bininvx) - mbinxlo - 1;
|
||||
}
|
||||
|
||||
if(yin >= yprd) {
|
||||
iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
|
||||
iy = (int)(((yin+eps) - yprd) * bininvy) + nbiny - mbinylo;
|
||||
} else if(yin >= 0.0) {
|
||||
iy = (int)(yin * bininvy) - mbinylo;
|
||||
iy = (int)((yin+eps) * bininvy) - mbinylo;
|
||||
} else {
|
||||
iy = (int)(yin * bininvy) - mbinylo - 1;
|
||||
iy = (int)((yin+eps) * bininvy) - mbinylo - 1;
|
||||
}
|
||||
|
||||
return (iy * mbinx + ix + 1);
|
||||
@ -531,26 +550,24 @@ int coord2bin(MD_FLOAT xin, MD_FLOAT yin) {
|
||||
|
||||
void coord2bin2D(MD_FLOAT xin, MD_FLOAT yin, int *ix, int *iy) {
|
||||
if(xin >= xprd) {
|
||||
*ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
|
||||
*ix = (int)((xin + eps - xprd) * bininvx) + nbinx - mbinxlo;
|
||||
} else if(xin >= 0.0) {
|
||||
*ix = (int)(xin * bininvx) - mbinxlo;
|
||||
*ix = (int)((xin+eps) * bininvx) - mbinxlo;
|
||||
} else {
|
||||
*ix = (int)(xin * bininvx) - mbinxlo - 1;
|
||||
*ix = (int)((xin+eps) * bininvx) - mbinxlo - 1;
|
||||
}
|
||||
|
||||
if(yin >= yprd) {
|
||||
*iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
|
||||
*iy = (int)((yin + eps - yprd) * bininvy) + nbiny - mbinylo;
|
||||
} else if(yin >= 0.0) {
|
||||
*iy = (int)(yin * bininvy) - mbinylo;
|
||||
*iy = (int)((yin+eps) * bininvy) - mbinylo;
|
||||
} else {
|
||||
*iy = (int)(yin * bininvy) - mbinylo - 1;
|
||||
*iy = (int)((yin+eps) * bininvy) - mbinylo - 1;
|
||||
}
|
||||
}
|
||||
|
||||
void binAtoms(Atom *atom) {
|
||||
DEBUG_MESSAGE("binAtoms start\n");
|
||||
int resize = 1;
|
||||
|
||||
while(resize > 0) {
|
||||
resize = 0;
|
||||
|
||||
@ -616,7 +633,6 @@ void buildClusters(Atom *atom) {
|
||||
/* bin local atoms */
|
||||
binAtoms(atom);
|
||||
sortAtomsByZCoord(atom);
|
||||
|
||||
for(int bin = 0; bin < mbins; bin++) {
|
||||
int c = bincount[bin];
|
||||
int ac = 0;
|
||||
@ -688,6 +704,9 @@ void buildClusters(Atom *atom) {
|
||||
void defineJClusters(Atom *atom) {
|
||||
DEBUG_MESSAGE("defineJClusters start\n");
|
||||
|
||||
const int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
|
||||
atom->ncj = atom->Nclusters_local / jfac;
|
||||
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int cj0 = CJ0_FROM_CI(ci);
|
||||
|
||||
@ -830,12 +849,11 @@ void binClusters(Atom *atom) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(int cg = 0; cg < atom->Nclusters_ghost && !resize; cg++) {
|
||||
const int cj = ncj + cg;
|
||||
int ix = -1, iy = -1;
|
||||
MD_FLOAT xtmp, ytmp;
|
||||
|
||||
if(shellMethod == halfShell && !halfZoneCluster(atom, cj)) continue;
|
||||
if(atom->jclusters[cj].natoms > 0) {
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
@ -846,6 +864,7 @@ void binClusters(Atom *atom) {
|
||||
coord2bin2D(xtmp, ytmp, &ix, &iy);
|
||||
ix = MAX(MIN(ix, mbinx - 1), 0);
|
||||
iy = MAX(MIN(iy, mbiny - 1), 0);
|
||||
|
||||
for(int cjj = 1; cjj < atom->jclusters[cj].natoms; cjj++) {
|
||||
int nix, niy;
|
||||
xtmp = cj_x[CL_X_OFFSET + cjj];
|
||||
@ -861,7 +880,6 @@ void binClusters(Atom *atom) {
|
||||
if(atom->PBCy[cg] > 0 && iy > niy) { iy = niy; }
|
||||
if(atom->PBCy[cg] < 0 && iy < niy) { iy = niy; }
|
||||
}
|
||||
|
||||
int bin = iy * mbinx + ix + 1;
|
||||
int c = bin_nclusters[bin];
|
||||
if(c < clusters_per_bin) {
|
||||
@ -883,25 +901,21 @@ void binClusters(Atom *atom) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(!inserted) {
|
||||
bin_clusters[bin * clusters_per_bin + c] = cj;
|
||||
}
|
||||
|
||||
bin_nclusters[bin]++;
|
||||
} else {
|
||||
resize = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(resize) {
|
||||
free(bin_clusters);
|
||||
clusters_per_bin *= 2;
|
||||
bin_clusters = (int*) malloc(mbins * clusters_per_bin * sizeof(int));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
DEBUG_MESSAGE("bin_nclusters\n");
|
||||
for(int i = 0; i < mbins; i++) { DEBUG_MESSAGE("%d, ", bin_nclusters[i]); }
|
||||
@ -919,7 +933,6 @@ void updateSingleAtoms(Atom *atom) {
|
||||
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
|
||||
|
||||
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
|
||||
atom_x(Natom) = ci_x[CL_X_OFFSET + cii];
|
||||
atom_y(Natom) = ci_x[CL_Y_OFFSET + cii];
|
||||
@ -930,10 +943,172 @@ void updateSingleAtoms(Atom *atom) {
|
||||
Natom++;
|
||||
}
|
||||
}
|
||||
|
||||
if(Natom != atom->Nlocal) {
|
||||
fprintf(stderr, "updateSingleAtoms(): Number of atoms changed!\n");
|
||||
}
|
||||
|
||||
DEBUG_MESSAGE("updateSingleAtoms stop\n");
|
||||
}
|
||||
|
||||
//MPI Shell Methods
|
||||
|
||||
static int eightZoneCluster(Atom* atom, int cj)
|
||||
{
|
||||
//Mapping: 0->0, 1->1, 2->2, 3->6, 4->3, 5->5, 6->4, 7->7
|
||||
int zoneMapping[] = {0, 1, 2, 6, 3, 5, 4, 7};
|
||||
int zone = 0;
|
||||
MD_FLOAT *hi = atom->mybox.hi;
|
||||
|
||||
if (atom->jclusters[cj].bbminx +eps >=hi[_x]){
|
||||
zone += 1;
|
||||
}
|
||||
if (atom->jclusters[cj].bbminy +eps >=hi[_y]){
|
||||
zone += 2;
|
||||
}
|
||||
if (atom->jclusters[cj].bbminz +eps >=hi[_z]){
|
||||
zone += 4;
|
||||
}
|
||||
return zoneMapping[zone];
|
||||
}
|
||||
|
||||
static int halfZoneCluster(Atom* atom, int cj)
|
||||
{
|
||||
MD_FLOAT *hi = atom->mybox.hi;
|
||||
MD_FLOAT *lo = atom->mybox.lo;
|
||||
|
||||
if(atom->jclusters[cj].bbmaxx < lo[_x] && atom->jclusters[cj].bbmaxy < hi[_y] &&
|
||||
atom->jclusters[cj].bbmaxz < hi[_z]){
|
||||
return 0;
|
||||
} else if(atom->jclusters[cj].bbmaxy < lo[_y] && atom->jclusters[cj].bbmaxz < hi[_z]){
|
||||
return 0;
|
||||
} else if(atom->jclusters[cj].bbmaxz < lo[_z]){
|
||||
return 0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int BoxGhostDistance(Atom *atom, int ci, int cj) {
|
||||
|
||||
MD_FLOAT dl = atom->jclusters[ci].bbminx - atom->jclusters[cj].bbmaxx;
|
||||
MD_FLOAT dh = atom->jclusters[cj].bbminx - atom->jclusters[ci].bbmaxx;
|
||||
MD_FLOAT dm = MAX(dl, dh);
|
||||
MD_FLOAT dm0 = MAX(dm, 0.0);
|
||||
MD_FLOAT dx2 = dm0 * dm0;
|
||||
|
||||
dl = atom->jclusters[ci].bbminy - atom->jclusters[cj].bbmaxy;
|
||||
dh = atom->jclusters[cj].bbminy - atom->jclusters[ci].bbmaxy;
|
||||
dm = MAX(dl, dh);
|
||||
dm0 = MAX(dm, 0.0);
|
||||
MD_FLOAT dy2 = dm0 * dm0;
|
||||
|
||||
dl = atom->jclusters[ci].bbminz - atom->jclusters[cj].bbmaxz;
|
||||
dh = atom->jclusters[cj].bbminz - atom->jclusters[ci].bbmaxz;
|
||||
dm = MAX(dl, dh);
|
||||
dm0 = MAX(dm, 0.0);
|
||||
MD_FLOAT dz2 = dm0 * dm0;
|
||||
|
||||
return dx2 > cutneighsq ? 0 : dy2 > cutneighsq ? 0 : dz2 > cutneighsq ? 0 : 1;
|
||||
}
|
||||
|
||||
static int ghostClusterinRange(Atom *atom, int cs, int cg, MD_FLOAT rsq) {
|
||||
int cs_vec_base = CJ_VECTOR_BASE_INDEX(cs);
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
|
||||
MD_FLOAT *cs_x = &atom->cl_x[cs_vec_base];
|
||||
MD_FLOAT *cg_x = &atom->cl_x[cj_vec_base];
|
||||
|
||||
for(int cii = 0; cii < atom->jclusters[cs].natoms; cii++) {
|
||||
for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
|
||||
MD_FLOAT delx = cs_x[CL_X_OFFSET + cii] - cg_x[CL_X_OFFSET + cjj];
|
||||
MD_FLOAT dely = cs_x[CL_Y_OFFSET + cii] - cg_x[CL_Y_OFFSET + cjj];
|
||||
MD_FLOAT delz = cs_x[CL_Z_OFFSET + cii] - cg_x[CL_Z_OFFSET + cjj];
|
||||
if(delx * delx + dely * dely + delz * delz < rsq) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void neighborGhost(Atom *atom, Neighbor *neighbor) {
|
||||
int Nshell=0;
|
||||
int Ncluster_local = atom->Nclusters_local;
|
||||
int Nclusterghost = atom->Nclusters_ghost;
|
||||
if(neighbor->listshell) free(neighbor->listshell);
|
||||
neighbor->listshell = (int*) malloc(Nclusterghost * sizeof(int));
|
||||
int* listzone = (int*) malloc(8 * Nclusterghost * sizeof(int));
|
||||
int countCluster[8] = {0,0,0,0,0,0,0,0};
|
||||
|
||||
//Selecting ghost atoms for interaction and putting them into regions
|
||||
for(int cg = atom->ncj; cg < atom->ncj+Nclusterghost; cg++) {
|
||||
int czone = eightZoneCluster(atom,cg);
|
||||
int *list = &listzone[Nclusterghost*czone];
|
||||
int n = countCluster[czone];
|
||||
list[n] = cg;
|
||||
countCluster[czone]++;
|
||||
//It is only necessary to find neighbour particles for 3 regions
|
||||
//if(czone == 1 || czone == 2 || czone == 3)
|
||||
//neighbor->listshell[Nshell++] = cg;
|
||||
}
|
||||
|
||||
for(int zone = 1; zone<=3; zone++){
|
||||
int *list = &listzone[Nclusterghost*zone];
|
||||
for(int n=0; n<countCluster[zone]; n++)
|
||||
neighbor->listshell[Nshell++] = list[n];
|
||||
}
|
||||
|
||||
neighbor->Nshell = Nshell;
|
||||
if(neighbor->numNeighShell) free(neighbor->numNeighShell);
|
||||
if(neighbor->neighshell) free(neighbor->neighshell);
|
||||
neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
|
||||
neighbor->numNeighShell = (int*) malloc(Nshell * sizeof(int));
|
||||
|
||||
int resize = 1;
|
||||
|
||||
while(resize)
|
||||
{
|
||||
resize = 0;
|
||||
for(int ic = 0; ic < Nshell; ic++) {
|
||||
int *neighshell = &(neighbor->neighshell[ic*neighbor->maxneighs]);
|
||||
int n = 0;
|
||||
int icluster = neighbor->listshell[ic];
|
||||
int iczone = eightZoneCluster(atom, icluster);
|
||||
|
||||
for(int jczone=0; jczone<8; jczone++){
|
||||
|
||||
if(jczone <=iczone) continue;
|
||||
if(iczone == 1 && (jczone==5||jczone==6||jczone==7)) continue;
|
||||
if(iczone == 2 && (jczone==4||jczone==6||jczone==7)) continue;
|
||||
if(iczone == 3 && (jczone==4||jczone==5||jczone==7)) continue;
|
||||
|
||||
int Ncluster = countCluster[jczone];
|
||||
int* loc_zone = &listzone[jczone * Nclusterghost];
|
||||
|
||||
for(int k = 0; k < Ncluster ; k++) {
|
||||
int jcluster = loc_zone[k];
|
||||
|
||||
if(BoxGhostDistance(atom, icluster, jcluster))
|
||||
{
|
||||
if(ghostClusterinRange(atom, icluster, jcluster, cutneighsq))
|
||||
neighshell[n++] = jcluster;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
neighbor->numNeighShell[ic] = n;
|
||||
|
||||
if(n >= neighbor->maxneighs){
|
||||
resize = 1;
|
||||
neighbor->maxneighs = n * 1.2;
|
||||
fprintf(stdout, "RESIZE EIGHT SHELL %d, PROC %d\n", neighbor->maxneighs,me);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(resize) {
|
||||
free(neighbor->neighshell);
|
||||
neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
|
||||
}
|
||||
}
|
||||
free(listzone);
|
||||
}
|
||||
|
130
gromacs/vtk.c
130
gromacs/vtk.c
@ -9,6 +9,11 @@
|
||||
|
||||
#include <atom.h>
|
||||
#include <vtk.h>
|
||||
#include <mpi.h>
|
||||
#include <string.h>
|
||||
|
||||
static MPI_File _fh;
|
||||
static inline void flushBuffer(char*);
|
||||
|
||||
void write_data_to_vtk_file(const char *filename, Atom* atom, int timestep) {
|
||||
write_local_atoms_to_vtk_file(filename, atom, timestep);
|
||||
@ -188,3 +193,128 @@ int write_ghost_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vtkOpen(const char* filename, Comm* comm, Atom* atom ,int timestep)
|
||||
{
|
||||
char msg[256];
|
||||
char timestep_filename[128];
|
||||
snprintf(timestep_filename, sizeof timestep_filename, "%s_%d.vtk", filename, timestep);
|
||||
MPI_File_open(MPI_COMM_WORLD, timestep_filename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &_fh);
|
||||
if(_fh == MPI_FILE_NULL) {
|
||||
if(comm->myproc == 0) fprintf(stderr, "Could not open VTK file for writing!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (comm->myproc==0){
|
||||
sprintf(msg, "# vtk DataFile Version 2.0\n");
|
||||
sprintf(msg, "%sParticle data\n",msg);
|
||||
sprintf(msg, "%sASCII\n",msg);
|
||||
sprintf(msg, "%sDATASET UNSTRUCTURED_GRID\n",msg);
|
||||
sprintf(msg, "%sPOINTS %d double\n",msg, atom->Natoms);
|
||||
flushBuffer(msg);
|
||||
}
|
||||
}
|
||||
|
||||
int vtkVector(Comm* comm, Atom* atom, Parameter* param)
|
||||
{
|
||||
if (_fh == MPI_FILE_NULL) {
|
||||
if(comm->myproc==0) printf("vtk not initialize! Call vtkOpen first!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
int sizeline= 25; //#initial guess of characters in "%.4f %.4f %.4f\n"
|
||||
int extrabuff = 100;
|
||||
int sizebuff = sizeline*atom->Nlocal+extrabuff;
|
||||
int mysize = 0;
|
||||
char* msg = (char*) malloc(sizebuff);
|
||||
sprintf(msg, "");
|
||||
for(int i = 0; i < atom->Nlocal; i++){
|
||||
if(mysize+extrabuff >= sizebuff){
|
||||
sizebuff*= 1.5;
|
||||
msg = (char*) realloc(msg, sizebuff);
|
||||
}
|
||||
//TODO: do not forget to add param->xlo, param->ylo, param->zlo
|
||||
sprintf(msg, "%s%.4f %.4f %.4f\n",msg, atom_x(i), atom_y(i), atom_z(i));
|
||||
mysize = strlen(msg);
|
||||
}
|
||||
int gatherSize[comm->numproc];
|
||||
|
||||
MPI_Allgather(&mysize, 1, MPI_INT, gatherSize, 1, MPI_INT, MPI_COMM_WORLD);
|
||||
int offset=0;
|
||||
int globalSize = 0;
|
||||
|
||||
for(int i = 0; i < comm->myproc; i++)
|
||||
offset+= gatherSize[i];
|
||||
|
||||
for(int i = 0; i < comm->numproc; i++)
|
||||
globalSize+= gatherSize[i];
|
||||
|
||||
MPI_Offset displ;
|
||||
MPI_Datatype FileType;
|
||||
int GlobalSize[] = {globalSize};
|
||||
int LocalSize[] = {mysize};
|
||||
int Start[] = {offset};
|
||||
|
||||
if(LocalSize[0]>0){
|
||||
MPI_Type_create_subarray(1, GlobalSize, LocalSize, Start, MPI_ORDER_C, MPI_CHAR, &FileType);
|
||||
} else {
|
||||
MPI_Type_vector(0,0,0,MPI_CHAR,&FileType);
|
||||
}
|
||||
MPI_Type_commit(&FileType);
|
||||
MPI_File_get_size(_fh, &displ);
|
||||
MPI_File_set_view(_fh, displ, MPI_CHAR, FileType, "native", MPI_INFO_NULL);
|
||||
MPI_File_write_all (_fh, msg, mysize , MPI_CHAR ,MPI_STATUS_IGNORE);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
MPI_File_set_view(_fh,0,MPI_CHAR, MPI_CHAR, "native", MPI_INFO_NULL);
|
||||
|
||||
if (comm->myproc==0){
|
||||
|
||||
sprintf(msg, "\n\n");
|
||||
sprintf(msg, "%sCELLS %d %d\n", msg, atom->Natoms, atom->Natoms * 2);
|
||||
|
||||
for(int i = 0; i < atom->Natoms; i++)
|
||||
sprintf(msg, "%s1 %d\n", msg, i);
|
||||
flushBuffer(msg);
|
||||
|
||||
sprintf(msg, "\n\n");
|
||||
sprintf(msg, "%sCELL_TYPES %d\n",msg, atom->Natoms);
|
||||
for(int i = 0; i < atom->Natoms; i++)
|
||||
sprintf(msg, "%s1\n",msg);
|
||||
flushBuffer(msg);
|
||||
|
||||
sprintf(msg, "\n\n");
|
||||
sprintf(msg, "%sPOINT_DATA %d\n",msg,atom->Natoms);
|
||||
sprintf(msg, "%sSCALARS mass double\n",msg);
|
||||
sprintf(msg, "%sLOOKUP_TABLE default\n",msg);
|
||||
for(int i = 0; i < atom->Natoms; i++)
|
||||
sprintf(msg, "%s1.0\n",msg);
|
||||
sprintf(msg, "%s\n\n",msg);
|
||||
flushBuffer(msg);
|
||||
}
|
||||
}
|
||||
|
||||
void vtkClose()
|
||||
{
|
||||
MPI_File_close(&_fh);
|
||||
_fh=MPI_FILE_NULL;
|
||||
}
|
||||
|
||||
//TODO: print ghost and cluster using MPI
|
||||
void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep)
|
||||
{
|
||||
if(comm->numproc == 1)
|
||||
{
|
||||
write_data_to_vtk_file(filename, atom, timestep);
|
||||
return;
|
||||
}
|
||||
|
||||
vtkOpen(filename, comm, atom, timestep);
|
||||
vtkVector(comm, atom, param);
|
||||
vtkClose();
|
||||
}
|
||||
|
||||
static inline void flushBuffer(char* msg){
|
||||
MPI_Offset displ;
|
||||
MPI_File_get_size(_fh, &displ);
|
||||
MPI_File_write_at(_fh, displ, msg, strlen(msg), MPI_CHAR, MPI_STATUS_IGNORE);
|
||||
}
|
32
include_MPIICC.mk
Normal file
32
include_MPIICC.mk
Normal file
@ -0,0 +1,32 @@
|
||||
CC = mpiicc
|
||||
LINKER = $(CC)
|
||||
|
||||
OPENMP = #-qopenmp
|
||||
PROFILE = #-profile-functions -g -pg
|
||||
|
||||
ifeq ($(ISA),AVX512)
|
||||
OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE) #-g -debug
|
||||
endif
|
||||
|
||||
ifeq ($(ISA),AVX2)
|
||||
OPTS = -Ofast -xCORE-AVX2 $(PROFILE)
|
||||
#OPTS = -Ofast -xAVX2 $(PROFILE)
|
||||
#OPTS = -Ofast -march=core-avx2 $(PROFILE)
|
||||
endif
|
||||
|
||||
ifeq ($(ISA),AVX)
|
||||
OPTS = -Ofast -xAVX $(PROFILE)
|
||||
endif
|
||||
|
||||
ifeq ($(ISA),SSE)
|
||||
OPTS = -Ofast -xSSE4.2 $(PROFILE)
|
||||
endif
|
||||
|
||||
#OPTS = -Ofast -no-vec $(PROFILE)
|
||||
#OPTS = -Ofast -xHost $(PROFILE)
|
||||
CFLAGS = $(PROFILE) -restrict $(OPENMP) $(OPTS)
|
||||
ASFLAGS = #-masm=intel
|
||||
LFLAGS = $(PROFILE) $(OPTS) $(OPENMP)
|
||||
DEFINES = -std=c11 -pedantic-errors -D_GNU_SOURCE -DNO_ZMM_INTRIN
|
||||
INCLUDES =
|
||||
LIBS = -lm
|
311
lammps/atom.c
311
lammps/atom.c
@ -9,10 +9,12 @@
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <parameter.h>
|
||||
#include <atom.h>
|
||||
#include <allocate.h>
|
||||
#include <device.h>
|
||||
#include <util.h>
|
||||
#include <mpi.h>
|
||||
|
||||
#define DELTA 20000
|
||||
|
||||
@ -41,6 +43,7 @@ void initAtom(Atom *atom) {
|
||||
atom->radius = NULL;
|
||||
atom->av = NULL;
|
||||
atom->r = NULL;
|
||||
atom->border_map = NULL;
|
||||
|
||||
DeviceAtom *d_atom = &(atom->d_atom);
|
||||
d_atom->x = NULL; d_atom->y = NULL; d_atom->z = NULL;
|
||||
@ -52,12 +55,19 @@ void initAtom(Atom *atom) {
|
||||
d_atom->sigma6 = NULL;
|
||||
d_atom->cutforcesq = NULL;
|
||||
d_atom->cutneighsq = NULL;
|
||||
//MPI
|
||||
Box *mybox = &(atom->mybox);
|
||||
mybox->xprd = mybox->yprd = mybox->zprd = 0;
|
||||
mybox->lo[_x] = mybox->lo[_y] = mybox->lo[_z] = 0;
|
||||
mybox->hi[_x] = mybox->hi[_y] = mybox->hi[_z] = 0;
|
||||
}
|
||||
|
||||
void createAtom(Atom *atom, Parameter *param) {
|
||||
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
|
||||
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
|
||||
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
|
||||
|
||||
MD_FLOAT xlo = 0; MD_FLOAT xhi = param->xprd;
|
||||
MD_FLOAT ylo = 0; MD_FLOAT yhi = param->yprd;
|
||||
MD_FLOAT zlo = 0; MD_FLOAT zhi = param->zprd;
|
||||
|
||||
atom->Natoms = 4 * param->nx * param->ny * param->nz;
|
||||
atom->Nlocal = 0;
|
||||
atom->ntypes = param->ntypes;
|
||||
@ -131,7 +141,7 @@ void createAtom(Atom *atom, Parameter *param) {
|
||||
}
|
||||
vztmp = myrandom(&n);
|
||||
|
||||
if(atom->Nlocal == atom->Nmax) {
|
||||
while(atom->Nlocal >= atom->Nmax) {
|
||||
growAtom(atom);
|
||||
}
|
||||
|
||||
@ -164,37 +174,41 @@ int type_str2int(const char *type) {
|
||||
}
|
||||
|
||||
int readAtom(Atom *atom, Parameter *param) {
|
||||
int me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
int len = strlen(param->input_file);
|
||||
if(strncmp(¶m->input_file[len - 4], ".pdb", 4) == 0) { return readAtom_pdb(atom, param); }
|
||||
if(strncmp(¶m->input_file[len - 4], ".gro", 4) == 0) { return readAtom_gro(atom, param); }
|
||||
if(strncmp(¶m->input_file[len - 4], ".dmp", 4) == 0) { return readAtom_dmp(atom, param); }
|
||||
if(strncmp(¶m->input_file[len - 3], ".in", 3) == 0) { return readAtom_in(atom, param); }
|
||||
fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp, in\n", param->input_file);
|
||||
if(me==0) fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp, in\n", param->input_file);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||
int me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
FILE *fp = fopen(param->input_file, "r");
|
||||
char line[MAXLINE];
|
||||
int read_atoms = 0;
|
||||
|
||||
if(!fp) {
|
||||
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
if(me==0)fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
|
||||
while(!feof(fp)) {
|
||||
readline(line, fp);
|
||||
char *item = strtok(line, " ");
|
||||
char *item = strtok(line, "\t ");
|
||||
if(strncmp(item, "CRYST1", 6) == 0) {
|
||||
param->xlo = 0.0;
|
||||
param->xhi = atof(strtok(NULL, " "));
|
||||
param->xhi = atof(strtok(NULL, "\t "));
|
||||
param->ylo = 0.0;
|
||||
param->yhi = atof(strtok(NULL, " "));
|
||||
param->yhi = atof(strtok(NULL, "\t "));
|
||||
param->zlo = 0.0;
|
||||
param->zhi = atof(strtok(NULL, " "));
|
||||
param->zhi = atof(strtok(NULL, "\t "));
|
||||
param->xprd = param->xhi - param->xlo;
|
||||
param->yprd = param->yhi - param->ylo;
|
||||
param->zprd = param->zhi - param->zlo;
|
||||
@ -203,23 +217,23 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||
char *label;
|
||||
int atom_id, comp_id;
|
||||
MD_FLOAT occupancy, charge;
|
||||
atom_id = atoi(strtok(NULL, " ")) - 1;
|
||||
atom_id = atoi(strtok(NULL, "\t ")) - 1;
|
||||
|
||||
while(atom_id + 1 >= atom->Nmax) {
|
||||
growAtom(atom);
|
||||
}
|
||||
|
||||
atom->type[atom_id] = type_str2int(strtok(NULL, " "));
|
||||
label = strtok(NULL, " ");
|
||||
comp_id = atoi(strtok(NULL, " "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, " "));
|
||||
atom->type[atom_id] = type_str2int(strtok(NULL, "\t "));
|
||||
label = strtok(NULL, "\t ");
|
||||
comp_id = atoi(strtok(NULL, "\t "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vx(atom_id) = 0.0;
|
||||
atom_vy(atom_id) = 0.0;
|
||||
atom_vz(atom_id) = 0.0;
|
||||
occupancy = atof(strtok(NULL, " "));
|
||||
charge = atof(strtok(NULL, " "));
|
||||
occupancy = atof(strtok(NULL, "\t "));
|
||||
charge = atof(strtok(NULL, "\t "));
|
||||
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
|
||||
atom->Natoms++;
|
||||
atom->Nlocal++;
|
||||
@ -231,14 +245,14 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||
strncmp(item, "ENDMDL", 6) == 0) {
|
||||
// Do nothing
|
||||
} else {
|
||||
fprintf(stderr, "Invalid item: %s\n", item);
|
||||
if(me==0)fprintf(stderr, "Invalid item: %s\n", item);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if(!read_atoms) {
|
||||
fprintf(stderr, "Input error: No atoms read!\n");
|
||||
if(me==0)fprintf(stderr, "Input error: No atoms read!\n");
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@ -254,12 +268,15 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||
}
|
||||
|
||||
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
|
||||
if(me==0)fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
|
||||
fclose(fp);
|
||||
return read_atoms;
|
||||
}
|
||||
|
||||
int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
int me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
|
||||
FILE *fp = fopen(param->input_file, "r");
|
||||
char line[MAXLINE];
|
||||
char desc[MAXLINE];
|
||||
@ -268,7 +285,7 @@ int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
int i = 0;
|
||||
|
||||
if(!fp) {
|
||||
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
if(me==0)fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@ -277,26 +294,26 @@ int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
for(i = 0; desc[i] != '\n'; i++);
|
||||
desc[i] = '\0';
|
||||
readline(line, fp);
|
||||
atoms_to_read = atoi(strtok(line, " "));
|
||||
fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
|
||||
atoms_to_read = atoi(strtok(line, "\t "));
|
||||
if(me==0)fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
|
||||
|
||||
while(!feof(fp) && read_atoms < atoms_to_read) {
|
||||
readline(line, fp);
|
||||
char *label = strtok(line, " ");
|
||||
int type = type_str2int(strtok(NULL, " "));
|
||||
int atom_id = atoi(strtok(NULL, " ")) - 1;
|
||||
char *label = strtok(line, "\t ");
|
||||
int type = type_str2int(strtok(NULL, "\t "));
|
||||
int atom_id = atoi(strtok(NULL, "\t ")) - 1;
|
||||
atom_id = read_atoms;
|
||||
while(atom_id + 1 >= atom->Nmax) {
|
||||
growAtom(atom);
|
||||
}
|
||||
|
||||
atom->type[atom_id] = type;
|
||||
atom_x(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_vx(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_vy(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_vz(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vx(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vy(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vz(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
|
||||
atom->Natoms++;
|
||||
atom->Nlocal++;
|
||||
@ -306,18 +323,18 @@ int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
if(!feof(fp)) {
|
||||
readline(line, fp);
|
||||
param->xlo = 0.0;
|
||||
param->xhi = atof(strtok(line, " "));
|
||||
param->xhi = atof(strtok(line, "\t "));
|
||||
param->ylo = 0.0;
|
||||
param->yhi = atof(strtok(NULL, " "));
|
||||
param->yhi = atof(strtok(NULL, "\t "));
|
||||
param->zlo = 0.0;
|
||||
param->zhi = atof(strtok(NULL, " "));
|
||||
param->zhi = atof(strtok(NULL, "\t "));
|
||||
param->xprd = param->xhi - param->xlo;
|
||||
param->yprd = param->yhi - param->ylo;
|
||||
param->zprd = param->zhi - param->zlo;
|
||||
}
|
||||
|
||||
if(read_atoms != atoms_to_read) {
|
||||
fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
|
||||
if(me==0)fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@ -333,12 +350,14 @@ int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||
}
|
||||
|
||||
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
|
||||
if(me==0)fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
|
||||
fclose(fp);
|
||||
return read_atoms;
|
||||
}
|
||||
|
||||
int readAtom_dmp(Atom* atom, Parameter* param) {
|
||||
int me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
FILE *fp = fopen(param->input_file, "r");
|
||||
char line[MAXLINE];
|
||||
int natoms = 0;
|
||||
@ -347,7 +366,7 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
|
||||
int ts = -1;
|
||||
|
||||
if(!fp) {
|
||||
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
if(me==0)fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@ -370,47 +389,47 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
|
||||
}
|
||||
} else if(strncmp(item, "BOX BOUNDS pp pp pp", 19) == 0) {
|
||||
readline(line, fp);
|
||||
param->xlo = atof(strtok(line, " "));
|
||||
param->xhi = atof(strtok(NULL, " "));
|
||||
param->xlo = atof(strtok(line, "\t "));
|
||||
param->xhi = atof(strtok(NULL, "\t "));
|
||||
param->xprd = param->xhi - param->xlo;
|
||||
|
||||
readline(line, fp);
|
||||
param->ylo = atof(strtok(line, " "));
|
||||
param->yhi = atof(strtok(NULL, " "));
|
||||
param->ylo = atof(strtok(line, "\t "));
|
||||
param->yhi = atof(strtok(NULL, "\t "));
|
||||
param->yprd = param->yhi - param->ylo;
|
||||
|
||||
readline(line, fp);
|
||||
param->zlo = atof(strtok(line, " "));
|
||||
param->zhi = atof(strtok(NULL, " "));
|
||||
param->zlo = atof(strtok(line, "\t "));
|
||||
param->zhi = atof(strtok(NULL, "\t "));
|
||||
param->zprd = param->zhi - param->zlo;
|
||||
} else if(strncmp(item, "ATOMS id type x y z vx vy vz", 28) == 0) {
|
||||
for(int i = 0; i < natoms; i++) {
|
||||
readline(line, fp);
|
||||
atom_id = atoi(strtok(line, " ")) - 1;
|
||||
atom->type[atom_id] = atoi(strtok(NULL, " "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_vx(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_vy(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_vz(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_id = atoi(strtok(line, "\t ")) - 1;
|
||||
atom->type[atom_id] = atoi(strtok(NULL, "\t "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vx(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vy(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vz(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
|
||||
read_atoms++;
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "Invalid item: %s\n", item);
|
||||
if(me==0)fprintf(stderr, "Invalid item: %s\n", item);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
|
||||
if(me==0)fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if(ts < 0 || !natoms || !read_atoms) {
|
||||
fprintf(stderr, "Input error: atom data was not read!\n");
|
||||
if(me==0)fprintf(stderr, "Input error: atom data was not read!\n");
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@ -426,30 +445,34 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
|
||||
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||
}
|
||||
|
||||
fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
|
||||
if(me==0)fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
|
||||
return natoms;
|
||||
}
|
||||
|
||||
int readAtom_in(Atom* atom, Parameter* param) {
|
||||
int me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
FILE *fp = fopen(param->input_file, "r");
|
||||
char line[MAXLINE];
|
||||
int natoms = 0;
|
||||
int atom_id = 0;
|
||||
|
||||
if(!fp) {
|
||||
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
|
||||
readline(line, fp);
|
||||
natoms = atoi(strtok(line, " "));
|
||||
param->xlo = atof(strtok(NULL, " "));
|
||||
param->xhi = atof(strtok(NULL, " "));
|
||||
param->ylo = atof(strtok(NULL, " "));
|
||||
param->yhi = atof(strtok(NULL, " "));
|
||||
param->zlo = atof(strtok(NULL, " "));
|
||||
param->zhi = atof(strtok(NULL, " "));
|
||||
natoms = atoi(strtok(line, "\t "));
|
||||
param->xlo = atof(strtok(NULL, "\t "));
|
||||
param->xhi = atof(strtok(NULL, "\t "));
|
||||
param->ylo = atof(strtok(NULL, "\t "));
|
||||
param->yhi = atof(strtok(NULL, "\t "));
|
||||
param->zlo = atof(strtok(NULL, "\t "));
|
||||
param->zhi = atof(strtok(NULL, "\t "));
|
||||
param->xprd = param->xhi - param->xlo;
|
||||
param->yprd = param->yhi - param->ylo;
|
||||
param->zprd = param->zhi - param->zlo;
|
||||
atom->Natoms = natoms;
|
||||
atom->Nlocal = natoms;
|
||||
atom->ntypes = 1;
|
||||
@ -462,27 +485,26 @@ int readAtom_in(Atom* atom, Parameter* param) {
|
||||
readline(line, fp);
|
||||
|
||||
// TODO: store mass per atom
|
||||
char *s_mass = strtok(line, " ");
|
||||
char *s_mass = strtok(line, "\t ");
|
||||
if(strncmp(s_mass, "inf", 3) == 0) {
|
||||
// Set atom's mass to INFINITY
|
||||
} else {
|
||||
param->mass = atof(s_mass);
|
||||
}
|
||||
|
||||
atom->radius[atom_id] = atof(strtok(NULL, " "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_vx(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_vy(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_vz(atom_id) = atof(strtok(NULL, " "));
|
||||
atom->radius[atom_id] = atof(strtok(NULL, "\t "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vx(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vy(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vz(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom->type[atom_id] = 0;
|
||||
atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
|
||||
atom_id++;
|
||||
}
|
||||
|
||||
if(!natoms) {
|
||||
fprintf(stderr, "Input error: atom data was not read!\n");
|
||||
if(me==0)fprintf(stderr, "Input error: atom data was not read!\n");
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@ -498,25 +520,10 @@ int readAtom_in(Atom* atom, Parameter* param) {
|
||||
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||
}
|
||||
|
||||
fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
|
||||
if(me==0)fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
|
||||
return natoms;
|
||||
}
|
||||
|
||||
void writeAtom(Atom *atom, Parameter *param) {
|
||||
FILE *fp = fopen(param->write_atom_file, "w");
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
fprintf(fp, "%d,%f,%f,%f,%f,%f,%f,%f,0\n",
|
||||
atom->type[i], 1.0,
|
||||
atom_x(i), atom_y(i), atom_z(i),
|
||||
atom_vx(i), atom_vy(i), atom_vz(i));
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
fprintf(stdout, "Wrote input data to %s, grid size: %f, %f, %f\n",
|
||||
param->write_atom_file, param->xprd, param->yprd, param->zprd);
|
||||
}
|
||||
|
||||
void growAtom(Atom *atom) {
|
||||
DeviceAtom *d_atom = &(atom->d_atom);
|
||||
int nold = atom->Nmax;
|
||||
@ -549,3 +556,121 @@ void growAtom(Atom *atom) {
|
||||
atom->av = (MD_FLOAT*) reallocate(atom->av, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
|
||||
atom->r = (MD_FLOAT*) reallocate(atom->r, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 4, nold * sizeof(MD_FLOAT) * 4);
|
||||
}
|
||||
|
||||
/* MPI added*/
|
||||
void packForward(Atom* atom, int n ,int* list, MD_FLOAT* buf, int* pbc)
|
||||
{
|
||||
int i, j;
|
||||
for(i = 0; i < n; i++) {
|
||||
j = list[i];
|
||||
buf_x(i) = atom_x(j) + pbc[0] * atom->mybox.xprd;
|
||||
buf_y(i) = atom_y(j) + pbc[1] * atom->mybox.yprd;
|
||||
buf_z(i) = atom_z(j) + pbc[2] * atom->mybox.zprd;
|
||||
}
|
||||
}
|
||||
|
||||
void unpackForward(Atom* atom, int n, int first, MD_FLOAT* buf)
|
||||
{
|
||||
for(int i = 0; i < n; i++) {
|
||||
atom_x((first + i)) = buf_x(i);
|
||||
atom_y((first + i)) = buf_y(i);
|
||||
atom_z((first + i)) = buf_z(i);
|
||||
}
|
||||
}
|
||||
|
||||
int packGhost(Atom* atom, int i, MD_FLOAT* buf, int* pbc)
|
||||
{
|
||||
int m = 0;
|
||||
buf[m++] = atom_x(i) + pbc[_x] * atom->mybox.xprd;
|
||||
buf[m++] = atom_y(i) + pbc[_y] * atom->mybox.yprd;
|
||||
buf[m++] = atom_z(i) + pbc[_z] * atom->mybox.zprd;
|
||||
buf[m++] = atom->type[i];
|
||||
return m;
|
||||
}
|
||||
|
||||
int unpackGhost(Atom* atom, int i, MD_FLOAT* buf)
|
||||
{
|
||||
while (i>=atom->Nmax) growAtom(atom);
|
||||
int m = 0;
|
||||
atom_x(i) = buf[m++];
|
||||
atom_y(i) = buf[m++];
|
||||
atom_z(i) = buf[m++];
|
||||
atom->type[i] = buf[m++];
|
||||
atom->Nghost++;
|
||||
return m;
|
||||
}
|
||||
|
||||
void packReverse(Atom* atom, int n, int first, MD_FLOAT* buf)
|
||||
{
|
||||
for(int i = 0; i < n; i++) {
|
||||
buf_x(i) = atom_fx(first + i);
|
||||
buf_y(i) = atom_fy(first + i);
|
||||
buf_z(i) = atom_fz(first + i);
|
||||
}
|
||||
}
|
||||
|
||||
void unpackReverse(Atom* atom, int n, int* list, MD_FLOAT* buf)
|
||||
{
|
||||
int i, j;
|
||||
for(i = 0; i < n; i++) {
|
||||
j = list[i];
|
||||
atom_fx(j) += buf_x(i);
|
||||
atom_fy(j) += buf_y(i);
|
||||
atom_fz(j) += buf_z(i);
|
||||
}
|
||||
}
|
||||
|
||||
int packExchange(Atom* atom, int i, MD_FLOAT* buf)
|
||||
{
|
||||
int m = 0;
|
||||
buf[m++] = atom_x(i);
|
||||
buf[m++] = atom_y(i);
|
||||
buf[m++] = atom_z(i);
|
||||
buf[m++] = atom_vx(i);
|
||||
buf[m++] = atom_vy(i);
|
||||
buf[m++] = atom_vz(i);
|
||||
buf[m++] = atom->type[i];
|
||||
return m;
|
||||
}
|
||||
|
||||
int unpackExchange(Atom* atom, int i, MD_FLOAT* buf)
|
||||
{
|
||||
while(i >= atom->Nmax) growAtom(atom);
|
||||
int m = 0;
|
||||
atom_x(i) = buf[m++];
|
||||
atom_y(i) = buf[m++];
|
||||
atom_z(i) = buf[m++];
|
||||
atom_vx(i) = buf[m++];
|
||||
atom_vy(i) = buf[m++];
|
||||
atom_vz(i) = buf[m++];
|
||||
atom->type[i] = buf[m++];
|
||||
return m;
|
||||
}
|
||||
|
||||
void pbc(Atom* atom)
|
||||
{
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
|
||||
MD_FLOAT xprd = atom->mybox.xprd;
|
||||
MD_FLOAT yprd = atom->mybox.yprd;
|
||||
MD_FLOAT zprd = atom->mybox.zprd;
|
||||
|
||||
if(atom_x(i) < 0.0) atom_x(i) += xprd;
|
||||
if(atom_y(i) < 0.0) atom_y(i) += yprd;
|
||||
if(atom_z(i) < 0.0) atom_z(i)+= zprd;
|
||||
if(atom_x(i) >= xprd) atom_x(i) -= xprd;
|
||||
if(atom_y(i) >= yprd) atom_y(i) -= yprd;
|
||||
if(atom_z(i) >= zprd) atom_z(i) -= zprd;
|
||||
}
|
||||
}
|
||||
|
||||
void copy(Atom* atom, int i, int j)
|
||||
{
|
||||
atom_x(i) = atom_x(j);
|
||||
atom_y(i) = atom_y(j);
|
||||
atom_z(i) = atom_z(j);
|
||||
atom_vx(i) = atom_vx(j);
|
||||
atom_vy(i) = atom_vy(j);
|
||||
atom_vz(i) = atom_vz(j);
|
||||
atom->type[i] = atom->type[j];
|
||||
}
|
||||
|
@ -13,13 +13,17 @@
|
||||
#include <parameter.h>
|
||||
#include <stats.h>
|
||||
#include <timing.h>
|
||||
|
||||
#include <mpi.h>
|
||||
#include <util.h>
|
||||
#ifdef __SIMD_KERNEL__
|
||||
#include <simd.h>
|
||||
#endif
|
||||
|
||||
void computeForceGhostShell(Parameter*, Atom*, Neighbor*);
|
||||
|
||||
double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
int Nlocal = atom->Nlocal;
|
||||
int Nghost = atom->Nghost;
|
||||
int* neighs;
|
||||
#ifndef EXPLICIT_TYPES
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
@ -41,16 +45,16 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
|
||||
{
|
||||
LIKWID_MARKER_START("force");
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
#pragma omp for
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numneigh[i];
|
||||
MD_FLOAT xtmp = atom_x(i);
|
||||
MD_FLOAT ytmp = atom_y(i);
|
||||
MD_FLOAT ztmp = atom_z(i);
|
||||
MD_FLOAT fix = 0;
|
||||
MD_FLOAT fiy = 0;
|
||||
MD_FLOAT fiz = 0;
|
||||
MD_FLOAT fix = 0.0;
|
||||
MD_FLOAT fiy = 0.0;
|
||||
MD_FLOAT fiz = 0.0;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_i = atom->type[i];
|
||||
@ -70,7 +74,6 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
|
||||
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
||||
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
||||
#endif
|
||||
|
||||
if(rsq < cutforcesq) {
|
||||
MD_FLOAT sr2 = num1 / rsq;
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
@ -78,6 +81,7 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
|
||||
#ifdef USE_REFERENCE_VERSION
|
||||
addStat(stats->atoms_within_cutoff, 1);
|
||||
} else {
|
||||
@ -85,7 +89,6 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
atom_fx(i) += fix;
|
||||
atom_fy(i) += fiy;
|
||||
atom_fz(i) += fiz;
|
||||
@ -102,13 +105,13 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
|
||||
|
||||
LIKWID_MARKER_STOP("force");
|
||||
}
|
||||
|
||||
double E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
int Nlocal = atom->Nlocal;
|
||||
int Nghost = atom->Nghost;
|
||||
int* neighs;
|
||||
#ifndef EXPLICIT_TYPES
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
@ -119,19 +122,18 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
const MD_FLOAT num48 = 48.0;
|
||||
const MD_FLOAT num05 = 0.5;
|
||||
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
for(int i = 0; i < Nlocal+Nghost; i++) {
|
||||
atom_fx(i) = 0.0;
|
||||
atom_fy(i) = 0.0;
|
||||
atom_fz(i) = 0.0;
|
||||
}
|
||||
|
||||
double S = getTimeStamp();
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
LIKWID_MARKER_START("forceLJ-halfneigh");
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
#pragma omp for
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numneigh[i];
|
||||
@ -172,16 +174,14 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
|
||||
// We do not need to update forces for ghost atoms
|
||||
if(j < Nlocal) {
|
||||
// We need to update forces for ghost atoms if shell_method or half stencil is requiered
|
||||
if((param->half_neigh && j<Nlocal) || param->method){
|
||||
atom_fx(j) -= delx * force;
|
||||
atom_fy(j) -= dely * force;
|
||||
atom_fz(j) -= delz * force;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
atom_fx(i) += fix;
|
||||
atom_fy(i) += fiy;
|
||||
atom_fz(i) += fiz;
|
||||
@ -190,6 +190,7 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
|
||||
}
|
||||
|
||||
if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor);
|
||||
LIKWID_MARKER_STOP("forceLJ-halfneigh");
|
||||
}
|
||||
|
||||
@ -227,7 +228,7 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig
|
||||
{
|
||||
LIKWID_MARKER_START("force");
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
#pragma omp for
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numneigh[i];
|
||||
@ -276,3 +277,58 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig
|
||||
double E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
void computeForceGhostShell(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||
int Nshell = neighbor->Nshell;
|
||||
int* neighs;
|
||||
#ifndef EXPLICIT_TYPES
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
#endif
|
||||
const MD_FLOAT num1 = 1.0;
|
||||
const MD_FLOAT num48 = 48.0;
|
||||
const MD_FLOAT num05 = 0.5;
|
||||
|
||||
for(int i = 0; i < Nshell; i++) {
|
||||
neighs = &(neighbor->neighshell[i * neighbor->maxneighs]);
|
||||
int numneigh = neighbor->numNeighShell[i];
|
||||
int iatom = neighbor->listshell[i];
|
||||
MD_FLOAT xtmp = atom_x(iatom);
|
||||
MD_FLOAT ytmp = atom_y(iatom);
|
||||
MD_FLOAT ztmp = atom_z(iatom);
|
||||
MD_FLOAT fix = 0;
|
||||
MD_FLOAT fiy = 0;
|
||||
MD_FLOAT fiz = 0;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_i = atom->type[i];
|
||||
#endif
|
||||
|
||||
for(int k = 0; k < numneigh; k++) {
|
||||
int jatom = neighs[k];
|
||||
MD_FLOAT delx = xtmp - atom_x(jatom);
|
||||
MD_FLOAT dely = ytmp - atom_y(jatom);
|
||||
MD_FLOAT delz = ztmp - atom_z(jatom);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
if(rsq < cutforcesq) {
|
||||
MD_FLOAT sr2 = num1 / rsq;
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
|
||||
atom_fx(jatom) -= delx * force;
|
||||
atom_fy(jatom) -= dely * force;
|
||||
atom_fz(jatom) -= delz * force;
|
||||
}
|
||||
}
|
||||
atom_fx(iatom) += fix;
|
||||
atom_fy(iatom) += fiy;
|
||||
atom_fz(iatom) += fiz;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4,8 +4,9 @@
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <parameter.h>
|
||||
|
||||
#include <box.h>
|
||||
#include <parameter.h>
|
||||
#ifndef __ATOM_H_
|
||||
#define __ATOM_H_
|
||||
|
||||
@ -56,6 +57,8 @@ typedef struct {
|
||||
MD_FLOAT *sigma6;
|
||||
MD_FLOAT *cutforcesq;
|
||||
MD_FLOAT *cutneighsq;
|
||||
//TODO: insert the id number
|
||||
//MD_FLOAT *Atom_id;
|
||||
|
||||
// DEM
|
||||
MD_FLOAT *radius;
|
||||
@ -64,6 +67,9 @@ typedef struct {
|
||||
|
||||
// Device data
|
||||
DeviceAtom d_atom;
|
||||
|
||||
//Info Subdomain
|
||||
Box mybox;
|
||||
} Atom;
|
||||
|
||||
extern void initAtom(Atom*);
|
||||
@ -73,9 +79,19 @@ extern int readAtom_pdb(Atom*, Parameter*);
|
||||
extern int readAtom_gro(Atom*, Parameter*);
|
||||
extern int readAtom_dmp(Atom*, Parameter*);
|
||||
extern int readAtom_in(Atom*, Parameter*);
|
||||
extern void writeAtom(Atom*, Parameter*);
|
||||
extern void growAtom(Atom*);
|
||||
|
||||
int packGhost(Atom*, int, MD_FLOAT*, int*);
|
||||
int unpackGhost(Atom*, int, MD_FLOAT*);
|
||||
int packExchange(Atom*, int, MD_FLOAT*);
|
||||
int unpackExchange(Atom*, int, MD_FLOAT*);
|
||||
void packForward(Atom*, int, int*, MD_FLOAT*, int*);
|
||||
void unpackForward(Atom*, int, int, MD_FLOAT*);
|
||||
void packReverse(Atom* , int , int , MD_FLOAT*);
|
||||
void unpackReverse(Atom*, int, int*, MD_FLOAT*);
|
||||
void pbc(Atom*);
|
||||
void copy(Atom*, int, int);
|
||||
|
||||
#ifdef AOS
|
||||
# define POS_DATA_LAYOUT "AoS"
|
||||
# define atom_x(i) atom->x[(i) * 3 + 0]
|
||||
@ -100,4 +116,8 @@ extern void growAtom(Atom*);
|
||||
# define atom_fz(i) atom->fz[i]
|
||||
#endif
|
||||
|
||||
# define buf_x(i) buf[3*(i)]
|
||||
# define buf_y(i) buf[3*(i)+1]
|
||||
# define buf_z(i) buf[3*(i)+2]
|
||||
|
||||
#endif
|
||||
|
@ -20,9 +20,14 @@ typedef struct {
|
||||
int ncalls;
|
||||
int maxneighs;
|
||||
int half_neigh;
|
||||
int half_stencil;
|
||||
int *neighbors;
|
||||
int *numneigh;
|
||||
|
||||
//MPI
|
||||
int Nshell; //# of atoms in listShell
|
||||
int *numNeighShell; //# of neighs for each atom in listShell
|
||||
int *neighshell; //list of neighs for each atom in listShell
|
||||
int *listshell; //Atoms to compute the force
|
||||
// Device data
|
||||
DeviceNeighbor d_neighbor;
|
||||
} Neighbor;
|
||||
|
@ -5,8 +5,11 @@
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <atom.h>
|
||||
#include <comm.h>
|
||||
#include <parameter.h>
|
||||
|
||||
#ifndef __VTK_H_
|
||||
#define __VTK_H_
|
||||
extern int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
|
||||
extern void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep);
|
||||
#endif
|
||||
|
304
lammps/main.c
304
lammps/main.c
@ -11,10 +11,7 @@
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
#include <omp.h>
|
||||
|
||||
#include <likwid-marker.h>
|
||||
|
||||
#include <allocate.h>
|
||||
#include <atom.h>
|
||||
#include <device.h>
|
||||
@ -24,13 +21,19 @@
|
||||
#include <timing.h>
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <pbc.h>
|
||||
#include <stats.h>
|
||||
#include <timers.h>
|
||||
#include <util.h>
|
||||
#include <vtk.h>
|
||||
#include <comm.h>
|
||||
#include <grid.h>
|
||||
#include <shell_methods.h>
|
||||
#include <mpi.h>
|
||||
|
||||
#define HLINE "----------------------------------------------------------------------------\n"
|
||||
#ifdef CUDA_TARGET
|
||||
extern double computeForceLJFullNeigh_cuda(Parameter*, Atom*, Neighbor*);
|
||||
#endif
|
||||
|
||||
extern double computeForceLJFullNeigh_plain_c(Parameter*, Atom*, Neighbor*, Stats*);
|
||||
extern double computeForceLJFullNeigh_simd(Parameter*, Atom*, Neighbor*, Stats*);
|
||||
@ -38,69 +41,6 @@ extern double computeForceLJHalfNeigh(Parameter*, Atom*, Neighbor*, Stats*);
|
||||
extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
|
||||
extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
|
||||
|
||||
#ifdef CUDA_TARGET
|
||||
extern double computeForceLJFullNeigh_cuda(Parameter*, Atom*, Neighbor*);
|
||||
#endif
|
||||
|
||||
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
if(param->force_field == FF_EAM) { initEam(eam, param); }
|
||||
double S, E;
|
||||
param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
|
||||
param->xprd = param->nx * param->lattice;
|
||||
param->yprd = param->ny * param->lattice;
|
||||
param->zprd = param->nz * param->lattice;
|
||||
|
||||
S = getTimeStamp();
|
||||
initAtom(atom);
|
||||
initPbc(atom);
|
||||
initStats(stats);
|
||||
initNeighbor(neighbor, param);
|
||||
if(param->input_file == NULL) {
|
||||
createAtom(atom, param);
|
||||
} else {
|
||||
readAtom(atom, param);
|
||||
}
|
||||
|
||||
setupNeighbor(param);
|
||||
setupThermo(param, atom->Natoms);
|
||||
if(param->input_file == NULL) { adjustThermo(param, atom); }
|
||||
#ifdef SORT_ATOMS
|
||||
atom->Nghost = 0;
|
||||
sortAtom(atom);
|
||||
#endif
|
||||
setupPbc(atom, param);
|
||||
initDevice(atom, neighbor);
|
||||
updatePbc(atom, param, true);
|
||||
buildNeighbor(atom, neighbor);
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||
double S, E;
|
||||
S = getTimeStamp();
|
||||
LIKWID_MARKER_START("reneighbour");
|
||||
updateAtomsPbc(atom, param);
|
||||
#ifdef SORT_ATOMS
|
||||
atom->Nghost = 0;
|
||||
sortAtom(atom);
|
||||
#endif
|
||||
setupPbc(atom, param);
|
||||
updatePbc(atom, param, true);
|
||||
buildNeighbor(atom, neighbor);
|
||||
LIKWID_MARKER_STOP("reneighbour");
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
void printAtomState(Atom *atom) {
|
||||
printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n", atom->Natoms, atom->Nlocal, atom->Nghost, atom->Nmax);
|
||||
// int nall = atom->Nlocal + atom->Nghost;
|
||||
// for (int i=0; i<nall; i++) {
|
||||
// printf("%d %f %f %f\n", i, atom->x[i], atom->y[i], atom->z[i]);
|
||||
// }
|
||||
}
|
||||
|
||||
double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
if(param->force_field == FF_EAM) {
|
||||
return computeForceEam(eam, param, atom, neighbor, stats);
|
||||
@ -113,7 +53,7 @@ double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
}
|
||||
}
|
||||
|
||||
if(param->half_neigh) {
|
||||
if(param->half_neigh || param->method) {
|
||||
return computeForceLJHalfNeigh(param, atom, neighbor, stats);
|
||||
}
|
||||
|
||||
@ -124,6 +64,102 @@ double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
#endif
|
||||
}
|
||||
|
||||
double dynamicBalance(Comm* comm, Grid* grid, Atom* atom, Parameter* param, double time){
|
||||
double S, E;
|
||||
int dims = 3; //TODO: Adjust to do in 3d and 2d
|
||||
S = getTimeStamp();
|
||||
if(param->balance == RCB) {
|
||||
rcbBalance(grid, atom, param, meanBisect,dims,0);
|
||||
neighComm(comm, param, grid);
|
||||
}else if(param->balance == meanTimeRCB){
|
||||
rcbBalance(grid, atom, param, meanTimeBisect,dims,time);
|
||||
neighComm(comm, param, grid);
|
||||
}else if(param->balance == Staggered) {
|
||||
staggeredBalance(grid, atom, param, time);
|
||||
neighComm(comm, param, grid);
|
||||
exchangeComm(comm,atom);
|
||||
}else { } //Do nothing
|
||||
//printGrid(grid);
|
||||
E = getTimeStamp();
|
||||
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double initialBalance(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid)
|
||||
{
|
||||
double E,S,time;
|
||||
int me;
|
||||
MPI_Comm_rank(world,&me);
|
||||
S = getTimeStamp();
|
||||
if(param->balance == meanTimeRCB || param->balance == RCB){
|
||||
rcbBalance(grid, atom, param, meanBisect,3,0);
|
||||
neighComm(comm, param, grid);
|
||||
}
|
||||
MPI_Allreduce(&atom->Nlocal, &atom->Natoms, 1, MPI_INT, MPI_SUM, world);
|
||||
printf("Processor:%i, Local atoms:%i, Total atoms:%i\n",me, atom->Nlocal,atom->Natoms);
|
||||
MPI_Barrier(world);
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid) {
|
||||
if(param->force_field == FF_EAM) { initEam(eam, param); }
|
||||
double S, E;
|
||||
param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
|
||||
param->xprd = param->nx * param->lattice;
|
||||
param->yprd = param->ny * param->lattice;
|
||||
param->zprd = param->nz * param->lattice;
|
||||
S = getTimeStamp();
|
||||
initAtom(atom);
|
||||
initStats(stats);
|
||||
initNeighbor(neighbor, param);
|
||||
if(param->input_file == NULL) {
|
||||
createAtom(atom, param);
|
||||
} else {
|
||||
readAtom(atom, param);
|
||||
}
|
||||
setupGrid(grid,atom,param);
|
||||
setupNeighbor(param);
|
||||
setupComm(comm, param, grid);
|
||||
if(param->balance){
|
||||
initialBalance(param, eam, atom, neighbor, stats, comm, grid);
|
||||
}
|
||||
setupThermo(param, atom->Natoms);
|
||||
if(param->input_file == NULL) { adjustThermo(param, atom); }
|
||||
#ifdef SORT_ATOMS
|
||||
atom->Nghost = 0;
|
||||
sortAtom(atom);
|
||||
#endif
|
||||
initDevice(atom, neighbor);
|
||||
ghostNeighbor(comm, atom, param);
|
||||
buildNeighbor(atom, neighbor);
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double reneighbour(Comm* comm, Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||
double S, E;
|
||||
S = getTimeStamp();
|
||||
LIKWID_MARKER_START("reneighbour");
|
||||
#ifdef SORT_ATOMS
|
||||
atom->Nghost = 0;
|
||||
sortAtom(atom);
|
||||
#endif
|
||||
ghostNeighbor(comm, atom, param);
|
||||
buildNeighbor(atom, neighbor);
|
||||
LIKWID_MARKER_STOP("reneighbour");
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double updateAtoms(Comm* comm, Atom* atom){
|
||||
double S,E;
|
||||
S = getTimeStamp();
|
||||
exchangeComm(comm, atom);
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
void writeInput(Parameter *param, Atom *atom) {
|
||||
FILE *fpin = fopen("input.in", "w");
|
||||
fprintf(fpin, "0,%f,0,%f,0,%f\n", param->xprd, param->yprd, param->zprd);
|
||||
@ -142,7 +178,8 @@ int main(int argc, char** argv) {
|
||||
Neighbor neighbor;
|
||||
Stats stats;
|
||||
Parameter param;
|
||||
|
||||
Comm comm;
|
||||
Grid grid;
|
||||
LIKWID_MARKER_INIT;
|
||||
#pragma omp parallel
|
||||
{
|
||||
@ -150,10 +187,10 @@ int main(int argc, char** argv) {
|
||||
//LIKWID_MARKER_REGISTER("reneighbour");
|
||||
//LIKWID_MARKER_REGISTER("pbc");
|
||||
}
|
||||
|
||||
initComm(&argc, &argv, &comm);
|
||||
initParameter(¶m);
|
||||
for(int i = 0; i < argc; i++) {
|
||||
if((strcmp(argv[i], "-p") == 0) || strcmp(argv[i], "--params") == 0) {
|
||||
if((strcmp(argv[i], "-p") == 0)) {
|
||||
readParameter(¶m, argv[++i]);
|
||||
continue;
|
||||
}
|
||||
@ -192,6 +229,24 @@ int main(int argc, char** argv) {
|
||||
param.half_neigh = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-method") == 0)) {
|
||||
param.method = atoi(argv[++i]);
|
||||
if (param.method>3 || param.method< 0){
|
||||
if(comm.myproc == 0) fprintf(stderr, "Method does not exist!\n");
|
||||
endComm(&comm);
|
||||
exit(0);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-bal") == 0)) {
|
||||
param.balance = atoi(argv[++i]);
|
||||
if (param.balance>3 || param.balance< 0){
|
||||
if(comm.myproc == 0) fprintf(stderr, "Load Balance does not exist!\n");
|
||||
endComm(&comm);
|
||||
exit(0);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-r") == 0) || (strcmp(argv[i], "--radius") == 0)) {
|
||||
param.cutforce = atof(argv[++i]);
|
||||
continue;
|
||||
@ -208,69 +263,69 @@ int main(int argc, char** argv) {
|
||||
param.vtk_file = strdup(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-w") == 0)) {
|
||||
param.write_atom_file = strdup(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
|
||||
if(comm.myproc ==0 ){
|
||||
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
|
||||
printf(HLINE);
|
||||
printf("-p / --params <string>: file to read parameters from (can be specified more than once)\n");
|
||||
printf("-p <string>: file to read parameters from (can be specified more than once)\n");
|
||||
printf("-f <string>: force field (lj, eam or dem), default lj\n");
|
||||
printf("-i <string>: input file with atom positions (dump)\n");
|
||||
printf("-e <string>: input file for EAM\n");
|
||||
printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
|
||||
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
|
||||
printf("-half <int>: use half (1) or full (0) neighbor lists\n");
|
||||
printf("-r / --radius <real>: set cutoff radius\n");
|
||||
printf("-s / --skin <real>: set skin (verlet buffer)\n");
|
||||
printf("-w <file>: write input atoms to file\n");
|
||||
printf("--freq <real>: processor frequency (GHz)\n");
|
||||
printf("--vtk <string>: VTK file for visualization\n");
|
||||
printf(HLINE);
|
||||
}
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
param.cutneigh = param.cutforce + param.skin;
|
||||
setup(¶m, &eam, &atom, &neighbor, &stats);
|
||||
printParameter(¶m);
|
||||
printf(HLINE);
|
||||
if(param.balance>0 && param.method == 1){
|
||||
if(comm.myproc == 0) fprintf(stderr, "Half Shell is not supported by load balance!\n");
|
||||
endComm(&comm);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
printf("step\ttemp\t\tpressure\n");
|
||||
param.cutneigh = param.cutforce + param.skin;
|
||||
timer[SETUP]=setup(¶m, &eam, &atom, &neighbor, &stats, &comm, &grid);
|
||||
if(comm.myproc == 0)printParameter(¶m);
|
||||
if(comm.myproc == 0)printf(HLINE);
|
||||
if(comm.myproc == 0) printf("step\ttemp\t\tpressure\n");
|
||||
computeThermo(0, ¶m, &atom);
|
||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
||||
traceAddresses(¶m, &atom, &neighbor, n + 1);
|
||||
traceAddresses(¶m, &atom, &neighbor, n + 1);// TODO: trace adress
|
||||
#endif
|
||||
|
||||
if(param.write_atom_file != NULL) {
|
||||
writeAtom(&atom, ¶m);
|
||||
}
|
||||
|
||||
//writeInput(¶m, &atom);
|
||||
|
||||
timer[FORCE] = computeForce(&eam, ¶m, &atom, &neighbor, &stats);
|
||||
timer[NEIGH] = 0.0;
|
||||
timer[FORWARD] = 0.0;
|
||||
timer[UPDATE] = 0.0;
|
||||
timer[BALANCE] = 0.0;
|
||||
timer[REVERSE] = reverse(&comm, &atom, ¶m);
|
||||
MPI_Barrier(world);
|
||||
timer[TOTAL] = getTimeStamp();
|
||||
|
||||
if(param.vtk_file != NULL) {
|
||||
write_atoms_to_vtk_file(param.vtk_file, &atom, 0);
|
||||
printvtk(param.vtk_file, &comm, &atom, ¶m, 0);
|
||||
}
|
||||
|
||||
for(int n = 0; n < param.ntimes; n++) {
|
||||
bool reneigh = (n + 1) % param.reneigh_every == 0;
|
||||
initialIntegrate(reneigh, ¶m, &atom);
|
||||
if((n + 1) % param.reneigh_every) {
|
||||
updatePbc(&atom, ¶m, false);
|
||||
if(reneigh) {
|
||||
timer[UPDATE] +=updateAtoms(&comm,&atom);
|
||||
if(param.balance && !((n+1)%param.balance_every))
|
||||
timer[BALANCE] +=dynamicBalance(&comm, &grid, &atom , ¶m, timer[FORCE]);
|
||||
timer[NEIGH] += reneighbour(&comm, ¶m, &atom, &neighbor);
|
||||
} else {
|
||||
timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
|
||||
timer[FORWARD] += forward(&comm, &atom, ¶m);
|
||||
}
|
||||
|
||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
||||
traceAddresses(¶m, &atom, &neighbor, n + 1);
|
||||
#endif
|
||||
|
||||
timer[FORCE] += computeForce(&eam, ¶m, &atom, &neighbor, &stats);
|
||||
timer[REVERSE] += reverse(&comm, &atom, ¶m);
|
||||
finalIntegrate(reneigh, ¶m, &atom);
|
||||
|
||||
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
|
||||
@ -281,47 +336,42 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
|
||||
if(param.vtk_file != NULL) {
|
||||
write_atoms_to_vtk_file(param.vtk_file, &atom, n + 1);
|
||||
printvtk(param.vtk_file, &comm, &atom ,¶m, n+1);
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Barrier(world);
|
||||
timer[TOTAL] = getTimeStamp() - timer[TOTAL];
|
||||
computeThermo(-1, ¶m, &atom);
|
||||
|
||||
double mint[NUMTIMER];
|
||||
double maxt[NUMTIMER];
|
||||
double sumt[NUMTIMER];
|
||||
timer[REST] = timer[TOTAL]-timer[FORCE]-timer[NEIGH]-timer[BALANCE]-timer[FORWARD]-timer[REVERSE];
|
||||
MPI_Reduce(timer,mint,NUMTIMER,MPI_DOUBLE,MPI_MIN,0,world);
|
||||
MPI_Reduce(timer,maxt,NUMTIMER,MPI_DOUBLE,MPI_MAX,0,world);
|
||||
MPI_Reduce(timer,sumt,NUMTIMER,MPI_DOUBLE,MPI_SUM,0,world);
|
||||
int Nghost;
|
||||
MPI_Reduce(&atom.Nghost,&Nghost,1,MPI_INT,MPI_SUM,0,world);
|
||||
|
||||
if(comm.myproc == 0){
|
||||
int n = comm.numproc;
|
||||
printf(HLINE);
|
||||
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
|
||||
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
|
||||
timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
|
||||
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, Nghost, param.ntimes);
|
||||
printf("TOTAL %.2fs\n\n",timer[TOTAL]);
|
||||
printf("%4s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|\n","","FORCE ", "NEIGH ", "BALANCE", "FORWARD", "REVERSE","UPDATE","REST ","SETUP");
|
||||
printf("----|-------|-------|-------|-------|-------|-------|-------|-------|\n");
|
||||
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "AVG", sumt[FORCE]/n,sumt[NEIGH]/n,sumt[BALANCE]/n,sumt[FORWARD]/n,sumt[REVERSE]/n,sumt[UPDATE]/n,sumt[REST]/n,sumt[SETUP]/n);
|
||||
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MIN", mint[FORCE],mint[NEIGH],mint[BALANCE],mint[FORWARD],mint[REVERSE],mint[UPDATE],mint[REST],mint[SETUP]);
|
||||
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MAX", maxt[FORCE],maxt[NEIGH],maxt[BALANCE],maxt[FORWARD],maxt[REVERSE],maxt[UPDATE],maxt[REST],maxt[SETUP]);
|
||||
printf(HLINE);
|
||||
|
||||
int nthreads = 0;
|
||||
int chunkSize = 0;
|
||||
omp_sched_t schedKind;
|
||||
char schedType[10];
|
||||
#pragma omp parallel
|
||||
#pragma omp master
|
||||
{
|
||||
omp_get_schedule(&schedKind, &chunkSize);
|
||||
|
||||
switch (schedKind)
|
||||
{
|
||||
case omp_sched_static: strcpy(schedType, "static"); break;
|
||||
case omp_sched_dynamic: strcpy(schedType, "dynamic"); break;
|
||||
case omp_sched_guided: strcpy(schedType, "guided"); break;
|
||||
case omp_sched_auto: strcpy(schedType, "auto"); break;
|
||||
}
|
||||
|
||||
nthreads = omp_get_max_threads();
|
||||
}
|
||||
|
||||
printf("Num threads: %d\n", nthreads);
|
||||
printf("Schedule: (%s,%d)\n", schedType, chunkSize);
|
||||
|
||||
printf("Performance: %.2f million atom updates per second\n",
|
||||
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
|
||||
|
||||
#ifdef COMPUTE_STATS
|
||||
displayStatistics(&atom, ¶m, &stats, timer);
|
||||
#endif
|
||||
}
|
||||
endComm(&comm);
|
||||
LIKWID_MARKER_CLOSE;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
@ -11,15 +11,18 @@
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <atom.h>
|
||||
#include <util.h>
|
||||
#include <mpi.h>
|
||||
#include <sort.h>
|
||||
|
||||
#define SMALL 1.0e-6
|
||||
#define FACTOR 0.999
|
||||
|
||||
MD_FLOAT xprd, yprd, zprd;
|
||||
MD_FLOAT bininvx, bininvy, bininvz;
|
||||
int mbinxlo, mbinylo, mbinzlo;
|
||||
int pad_x, pad_y, pad_z;
|
||||
int nbinx, nbiny, nbinz;
|
||||
int mbinx, mbiny, mbinz; // n bins in x, y, z
|
||||
int mbinx, mbiny, mbinz; // m bins in x, y, z
|
||||
int *bincount;
|
||||
int *bins;
|
||||
int mbins; //total number of bins
|
||||
@ -30,8 +33,18 @@ int nmax;
|
||||
int nstencil; // # of bins in stencil
|
||||
int* stencil; // stencil list of bin offsets
|
||||
MD_FLOAT binsizex, binsizey, binsizez;
|
||||
int me; //rank
|
||||
int method; // method
|
||||
int half_stencil; //If half stencil exist
|
||||
int shellMethod; //If shell method exist
|
||||
|
||||
static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT);
|
||||
static MD_FLOAT bindist(int, int, int);
|
||||
static int ghostZone(Atom*, int);
|
||||
static int eightZone(Atom*, int);
|
||||
static int halfZone(Atom*, int);
|
||||
static void neighborGhost(Atom*, Neighbor*);
|
||||
static inline int interaction(Atom* atom, int i, int j);
|
||||
|
||||
/* exported subroutines */
|
||||
void initNeighbor(Neighbor *neighbor, Parameter *param) {
|
||||
@ -51,7 +64,25 @@ void initNeighbor(Neighbor *neighbor, Parameter *param) {
|
||||
neighbor->maxneighs = 100;
|
||||
neighbor->numneigh = NULL;
|
||||
neighbor->neighbors = NULL;
|
||||
//========== MPI =============
|
||||
shellMethod = 0;
|
||||
half_stencil = 0;
|
||||
method = param->method;
|
||||
if(method == halfShell || method == eightShell){
|
||||
param->half_neigh = 1;
|
||||
shellMethod = 1;
|
||||
}
|
||||
if(method == halfStencil){
|
||||
param->half_neigh = 0;
|
||||
half_stencil = 1;
|
||||
}
|
||||
me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD,&me);
|
||||
neighbor->half_neigh = param->half_neigh;
|
||||
neighbor->Nshell = 0;
|
||||
neighbor->numNeighShell = NULL;
|
||||
neighbor->neighshell = NULL;
|
||||
neighbor->listshell = NULL;
|
||||
}
|
||||
|
||||
void setupNeighbor(Parameter* param) {
|
||||
@ -64,7 +95,6 @@ void setupNeighbor(Parameter* param) {
|
||||
yprd = param->yprd;
|
||||
zprd = param->zprd;
|
||||
}
|
||||
|
||||
// TODO: update lo and hi for standard case and use them here instead
|
||||
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = xprd;
|
||||
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
|
||||
@ -93,54 +123,48 @@ void setupNeighbor(Parameter* param) {
|
||||
bininvy = 1.0 / binsizey;
|
||||
bininvz = 1.0 / binsizez;
|
||||
}
|
||||
|
||||
coord = xlo - cutneigh - SMALL * xprd;
|
||||
mbinxlo = (int) (coord * bininvx);
|
||||
if (coord < 0.0) { mbinxlo = mbinxlo - 1; }
|
||||
coord = xhi + cutneigh + SMALL * xprd;
|
||||
mbinxhi = (int) (coord * bininvx);
|
||||
|
||||
coord = ylo - cutneigh - SMALL * yprd;
|
||||
mbinylo = (int) (coord * bininvy);
|
||||
if (coord < 0.0) { mbinylo = mbinylo - 1; }
|
||||
coord = yhi + cutneigh + SMALL * yprd;
|
||||
mbinyhi = (int) (coord * bininvy);
|
||||
|
||||
coord = zlo - cutneigh - SMALL * zprd;
|
||||
mbinzlo = (int) (coord * bininvz);
|
||||
if (coord < 0.0) { mbinzlo = mbinzlo - 1; }
|
||||
coord = zhi + cutneigh + SMALL * zprd;
|
||||
mbinzhi = (int) (coord * bininvz);
|
||||
|
||||
mbinxlo = mbinxlo - 1;
|
||||
mbinxhi = mbinxhi + 1;
|
||||
mbinx = mbinxhi - mbinxlo + 1;
|
||||
|
||||
mbinylo = mbinylo - 1;
|
||||
mbinyhi = mbinyhi + 1;
|
||||
mbiny = mbinyhi - mbinylo + 1;
|
||||
|
||||
mbinzlo = mbinzlo - 1;
|
||||
mbinzhi = mbinzhi + 1;
|
||||
mbinz = mbinzhi - mbinzlo + 1;
|
||||
pad_x = (int)(cutneigh*bininvx);
|
||||
while(pad_x * binsizex < FACTOR * cutneigh) pad_x++;
|
||||
pad_y = (int)(cutneigh*bininvy);
|
||||
while(pad_y * binsizey < FACTOR * cutneigh) pad_y++;
|
||||
pad_z = (int)(cutneigh*bininvz);
|
||||
while(pad_z * binsizez < FACTOR * cutneigh) pad_z++;
|
||||
|
||||
nextx = (int) (cutneigh * bininvx);
|
||||
if(nextx * binsizex < FACTOR * cutneigh) nextx++;
|
||||
if(nextx * binsizex < FACTOR * cutneigh){
|
||||
nextx++;
|
||||
pad_x++;
|
||||
}
|
||||
nexty = (int) (cutneigh * bininvy);
|
||||
if(nexty * binsizey < FACTOR * cutneigh) nexty++;
|
||||
if(nexty * binsizey < FACTOR * cutneigh){
|
||||
nexty++;
|
||||
pad_y++;
|
||||
}
|
||||
nextz = (int) (cutneigh * bininvz);
|
||||
if(nextz * binsizez < FACTOR * cutneigh) nextz++;
|
||||
if(nextz * binsizez < FACTOR * cutneigh){
|
||||
nextz++;
|
||||
pad_z++;
|
||||
}
|
||||
|
||||
mbinx = nbinx+4*pad_x;
|
||||
mbiny = nbiny+4*pad_y;
|
||||
mbinz = nbinz+4*pad_z;
|
||||
|
||||
if (stencil) { free(stencil); }
|
||||
stencil = (int*) malloc((2 * nextz + 1) * (2 * nexty + 1) * (2 * nextx + 1) * sizeof(int));
|
||||
nstencil = 0;
|
||||
int kstart = -nextz;
|
||||
|
||||
int kstart = -nextz;
|
||||
int jstart = -nexty;
|
||||
int istart = -nextx;
|
||||
int ibin = 0;
|
||||
for(int k = kstart; k <= nextz; k++) {
|
||||
for(int j = -nexty; j <= nexty; j++) {
|
||||
for(int i = -nextx; i <= nextx; i++) {
|
||||
for(int j = jstart; j <= nexty; j++) {
|
||||
for(int i = istart; i <= nextx; i++) {
|
||||
if(bindist(i, j, k) < cutneighsq) {
|
||||
stencil[nstencil++] = k * mbiny * mbinx + j * mbinx + i;
|
||||
int jbin = k * mbiny * mbinx + j * mbinx + i;
|
||||
if(ibin>jbin && half_stencil) continue;
|
||||
stencil[nstencil++] = jbin;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -155,7 +179,6 @@ void setupNeighbor(Parameter* param) {
|
||||
|
||||
void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
|
||||
int nall = atom->Nlocal + atom->Nghost;
|
||||
|
||||
/* extend atom arrays if necessary */
|
||||
if(nall > nmax) {
|
||||
nmax = nall;
|
||||
@ -164,16 +187,13 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
|
||||
neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
|
||||
neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*));
|
||||
}
|
||||
|
||||
/* bin local & ghost atoms */
|
||||
binatoms(atom);
|
||||
int resize = 1;
|
||||
|
||||
/* loop over each atom, storing neighbors */
|
||||
while(resize) {
|
||||
int new_maxneighs = neighbor->maxneighs;
|
||||
resize = 0;
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
int* neighptr = &(neighbor->neighbors[i * neighbor->maxneighs]);
|
||||
int n = 0;
|
||||
@ -184,21 +204,22 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
|
||||
#ifdef EXPLICIT_TYPES
|
||||
int type_i = atom->type[i];
|
||||
#endif
|
||||
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
int jbin = ibin + stencil[k];
|
||||
int* loc_bin = &bins[jbin * atoms_per_bin];
|
||||
|
||||
for(int m = 0; m < bincount[jbin]; m++) {
|
||||
int j = loc_bin[m];
|
||||
if((j == i) || (neighbor->half_neigh && (j < i))) {
|
||||
|
||||
if((j==i) || (neighbor->half_neigh && (j<i)))
|
||||
continue;
|
||||
if(half_stencil && ibin==jbin && !interaction(atom,i,j))
|
||||
continue;
|
||||
}
|
||||
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
int type_j = atom->type[j];
|
||||
const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
|
||||
@ -210,8 +231,8 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
neighbor->numneigh[i] = n;
|
||||
|
||||
if(n >= neighbor->maxneighs) {
|
||||
resize = 1;
|
||||
|
||||
@ -220,14 +241,15 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(resize) {
|
||||
printf("RESIZE %d\n", neighbor->maxneighs);
|
||||
printf("RESIZE %d, PROC %d\n", neighbor->maxneighs,me);
|
||||
neighbor->maxneighs = new_maxneighs * 1.2;
|
||||
free(neighbor->neighbors);
|
||||
neighbor->neighbors = (int*) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
|
||||
}
|
||||
}
|
||||
|
||||
if(method == eightShell) neighborGhost(atom, neighbor);
|
||||
}
|
||||
|
||||
/* internal subroutines */
|
||||
@ -257,38 +279,22 @@ MD_FLOAT bindist(int i, int j, int k) {
|
||||
} else {
|
||||
delz = (k + 1) * binsizez;
|
||||
}
|
||||
|
||||
return (delx * delx + dely * dely + delz * delz);
|
||||
}
|
||||
|
||||
int coord2bin(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin) {
|
||||
int ix, iy, iz;
|
||||
MD_FLOAT eps = 1e-9;
|
||||
MD_FLOAT xlo=0.0; MD_FLOAT ylo=0.0; MD_FLOAT zlo=0.0;
|
||||
xlo = fabs(xlo - pad_x*binsizex)+eps;
|
||||
ylo = fabs(ylo - pad_y*binsizey)+eps;
|
||||
zlo = fabs(zlo - pad_z*binsizez)+eps;
|
||||
ix = (int) ((xin + xlo)*bininvx);
|
||||
iy = (int) ((yin + ylo)*bininvy);
|
||||
iz = (int) ((zin + zlo)*bininvz);
|
||||
|
||||
if(xin >= xprd) {
|
||||
ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
|
||||
} else if(xin >= 0.0) {
|
||||
ix = (int)(xin * bininvx) - mbinxlo;
|
||||
} else {
|
||||
ix = (int)(xin * bininvx) - mbinxlo - 1;
|
||||
}
|
||||
|
||||
if(yin >= yprd) {
|
||||
iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
|
||||
} else if(yin >= 0.0) {
|
||||
iy = (int)(yin * bininvy) - mbinylo;
|
||||
} else {
|
||||
iy = (int)(yin * bininvy) - mbinylo - 1;
|
||||
}
|
||||
|
||||
if(zin >= zprd) {
|
||||
iz = (int)((zin - zprd) * bininvz) + nbinz - mbinzlo;
|
||||
} else if(zin >= 0.0) {
|
||||
iz = (int)(zin * bininvz) - mbinzlo;
|
||||
} else {
|
||||
iz = (int)(zin * bininvz) - mbinzlo - 1;
|
||||
}
|
||||
|
||||
return (iz * mbiny * mbinx + iy * mbinx + ix + 1);
|
||||
return (iz * mbiny * mbinx + iy * mbinx + ix);
|
||||
//return (iz * mbiny * mbinx + iy * mbinx + ix + 1);
|
||||
}
|
||||
|
||||
void binatoms(Atom *atom) {
|
||||
@ -304,7 +310,7 @@ void binatoms(Atom *atom) {
|
||||
|
||||
for(int i = 0; i < nall; i++) {
|
||||
int ibin = coord2bin(atom_x(i), atom_y(i), atom_z(i));
|
||||
|
||||
if(shellMethod && !ghostZone(atom, i)) continue;
|
||||
if(bincount[ibin] < atoms_per_bin) {
|
||||
int ac = bincount[ibin]++;
|
||||
bins[ibin * atoms_per_bin + ac] = i;
|
||||
@ -325,11 +331,9 @@ void sortAtom(Atom* atom) {
|
||||
binatoms(atom);
|
||||
int Nmax = atom->Nmax;
|
||||
int* binpos = bincount;
|
||||
|
||||
for(int i = 1; i < mbins; i++) {
|
||||
binpos[i] += binpos[i - 1];
|
||||
}
|
||||
|
||||
#ifdef AOS
|
||||
MD_FLOAT* new_x = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
|
||||
MD_FLOAT* new_vx = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
|
||||
@ -367,7 +371,6 @@ void sortAtom(Atom* atom) {
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
free(atom->x);
|
||||
free(atom->vx);
|
||||
atom->x = new_x;
|
||||
@ -383,3 +386,158 @@ void sortAtom(Atom* atom) {
|
||||
atom->vz = new_vz;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* internal subroutines
|
||||
Added with MPI*/
|
||||
|
||||
static int ghostZone(Atom* atom, int i){
|
||||
if(i<atom->Nlocal) return 1;
|
||||
else if(method == halfShell) return halfZone(atom,i);
|
||||
else if(method == eightShell) return eightZone(atom,i);
|
||||
else return 0;
|
||||
}
|
||||
|
||||
static int eightZone(Atom* atom, int i)
|
||||
{
|
||||
//Mapping: 0->0, 1->1, 2->2, 3->6, 4->3, 5->5, 6->4, 7->7
|
||||
int zoneMapping[] = {0, 1, 2, 6, 3, 5, 4, 7};
|
||||
MD_FLOAT *hi = atom->mybox.hi;
|
||||
int zone = 0;
|
||||
|
||||
if(BigOrEqual(atom_x(i),hi[_x])) {
|
||||
zone += 1;
|
||||
}
|
||||
if(BigOrEqual(atom_y(i),hi[_y])) {
|
||||
zone += 2;
|
||||
}
|
||||
if(BigOrEqual(atom_z(i),hi[_z])) {
|
||||
zone += 4;
|
||||
}
|
||||
return zoneMapping[zone];
|
||||
}
|
||||
|
||||
static int halfZone(Atom* atom, int i)
|
||||
{
|
||||
MD_FLOAT *hi = atom->mybox.hi;
|
||||
MD_FLOAT *lo = atom->mybox.lo;
|
||||
|
||||
if(atom_x(i)<lo[_x] && atom_y(i)<hi[_y] && atom_z(i)<hi[_z]){
|
||||
return 0;
|
||||
} else if(atom_y(i)<lo[_y] && atom_z(i)<hi[_z]){
|
||||
return 0;
|
||||
} else if(atom_z(i)<lo[_z]){
|
||||
return 0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void neighborGhost(Atom *atom, Neighbor *neighbor) {
|
||||
int Nshell=0;
|
||||
int Nlocal = atom->Nlocal;
|
||||
int Nghost = atom->Nghost;
|
||||
if(neighbor->listshell) free(neighbor->listshell);
|
||||
neighbor->listshell = (int*) malloc(Nghost * sizeof(int));
|
||||
int* listzone = (int*) malloc(8 * Nghost * sizeof(int));
|
||||
int countAtoms[8] = {0,0,0,0,0,0,0,0};
|
||||
|
||||
//Selecting ghost atoms for interaction
|
||||
for(int i = Nlocal; i < Nlocal+Nghost; i++) {
|
||||
int izone = ghostZone(atom,i);
|
||||
int *list = &listzone[Nghost*izone];
|
||||
int n = countAtoms[izone];
|
||||
list[n] = i;
|
||||
countAtoms[izone]++;
|
||||
}
|
||||
|
||||
for(int zone = 1; zone<=3; zone++){
|
||||
int *list = &listzone[Nghost*zone];
|
||||
for(int n=0; n<countAtoms[zone]; n++)
|
||||
neighbor->listshell[Nshell++] = list[n];
|
||||
}
|
||||
|
||||
neighbor->Nshell = Nshell;
|
||||
if(neighbor->numNeighShell) free(neighbor->numNeighShell);
|
||||
if(neighbor->neighshell) free(neighbor->neighshell);
|
||||
neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
|
||||
neighbor->numNeighShell = (int*) malloc(Nshell * sizeof(int));
|
||||
int resize = 1;
|
||||
|
||||
while(resize)
|
||||
{
|
||||
resize = 0;
|
||||
for(int i = 0; i < Nshell; i++) {
|
||||
int *neighshell = &(neighbor->neighshell[i*neighbor->maxneighs]);
|
||||
int n = 0;
|
||||
int iatom = neighbor->listshell[i];
|
||||
int izone = ghostZone(atom, iatom);
|
||||
MD_FLOAT xtmp = atom_x(iatom);
|
||||
MD_FLOAT ytmp = atom_y(iatom);
|
||||
MD_FLOAT ztmp = atom_z(iatom);
|
||||
int ibin = coord2bin(xtmp, ytmp, ztmp);
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
int type_i = atom->type[iatom];
|
||||
#endif
|
||||
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
int jbin = ibin + stencil[k];
|
||||
int* loc_bin = &bins[jbin * atoms_per_bin];
|
||||
for(int m = 0; m < bincount[jbin]; m++) {
|
||||
int jatom = loc_bin[m];
|
||||
|
||||
int jzone = ghostZone(atom,jatom);
|
||||
|
||||
if(jzone <=izone) continue;
|
||||
if(izone == 1 && (jzone==5||jzone==6||jzone==7)) continue;
|
||||
if(izone == 2 && (jzone==4||jzone==6||jzone==7)) continue;
|
||||
if(izone == 3 && (jzone==4||jzone==5||jzone==7)) continue;
|
||||
|
||||
MD_FLOAT delx = xtmp - atom_x(jatom);
|
||||
MD_FLOAT dely = ytmp - atom_y(jatom);
|
||||
MD_FLOAT delz = ztmp - atom_z(jatom);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
int type_j = atom->type[jatom];
|
||||
const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
|
||||
#else
|
||||
const MD_FLOAT cutoff = cutneighsq;
|
||||
#endif
|
||||
if(rsq <= cutoff) {
|
||||
neighshell[n++] = jatom;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
neighbor->numNeighShell[i] = n;
|
||||
if(n >= neighbor->maxneighs){
|
||||
resize = 1;
|
||||
neighbor->maxneighs = n * 1.2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(resize) {
|
||||
free(neighbor->neighshell);
|
||||
neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
|
||||
}
|
||||
}
|
||||
free(listzone);
|
||||
}
|
||||
|
||||
static inline int interaction(Atom* atom, int i, int j) {
|
||||
|
||||
if(i<j && j<atom->Nlocal) {
|
||||
return 1;
|
||||
} else if( atom_z(j)>atom_z(i) && j>=atom->Nlocal) {
|
||||
return 1;
|
||||
} else if(Equal(atom_z(j),atom_z(i)) && atom_y(j)<atom_y(i) && j>=atom->Nlocal){
|
||||
return 1;
|
||||
} else if(Equal(atom_z(j),atom_z(i)) && Equal(atom_y(j),atom_y(i)) && atom_x(j)<atom_x(i) && j>=atom->Nlocal){
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
173
lammps/vtk.c
173
lammps/vtk.c
@ -6,8 +6,12 @@
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <vtk.h>
|
||||
#include <mpi.h>
|
||||
|
||||
#include <atom.h>
|
||||
static MPI_File _fh;
|
||||
static inline void flushBuffer(char*);
|
||||
|
||||
int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
|
||||
char timestep_filename[128];
|
||||
@ -18,12 +22,12 @@ int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
|
||||
fprintf(stderr, "Could not open VTK file for writing!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
fprintf(fp, "# vtk DataFile Version 2.0\n");
|
||||
fprintf(fp, "Particle data\n");
|
||||
fprintf(fp, "ASCII\n");
|
||||
fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
|
||||
fprintf(fp, "POINTS %d double\n", atom->Nlocal);
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; ++i) {
|
||||
fprintf(fp, "%.4f %.4f %.4f\n", atom_x(i), atom_y(i), atom_z(i));
|
||||
}
|
||||
@ -48,3 +52,168 @@ int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vtkOpen(const char* filename, Comm* comm, Atom* atom ,int timestep)
|
||||
{
|
||||
char msg[256];
|
||||
char timestep_filename[128];
|
||||
snprintf(timestep_filename, sizeof timestep_filename, "%s_%d.vtk", filename, timestep);
|
||||
MPI_File_open(MPI_COMM_WORLD, timestep_filename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &_fh);
|
||||
if(_fh == MPI_FILE_NULL) {
|
||||
if(comm->myproc == 0) fprintf(stderr, "Could not open VTK file for writing!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (comm->myproc==0){
|
||||
sprintf(msg, "# vtk DataFile Version 2.0\n");
|
||||
sprintf(msg, "%sParticle data\n",msg);
|
||||
sprintf(msg, "%sASCII\n",msg);
|
||||
sprintf(msg, "%sDATASET UNSTRUCTURED_GRID\n",msg);
|
||||
sprintf(msg, "%sPOINTS %d double\n",msg, atom->Natoms);
|
||||
flushBuffer(msg);
|
||||
}
|
||||
}
|
||||
|
||||
int vtkVector(Comm* comm, Atom* atom, Parameter* param)
|
||||
{
|
||||
if (_fh == MPI_FILE_NULL) {
|
||||
if(comm->myproc==0) printf("vtk not initialize! Call vtkOpen first!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
int sizeline= 25; //#initial guess of characters in "%.4f %.4f %.4f\n"
|
||||
int extrabuff = 100;
|
||||
int sizebuff = sizeline*atom->Nlocal+extrabuff;
|
||||
int mysize = 0;
|
||||
char* msg = (char*) malloc(sizebuff);
|
||||
sprintf(msg, "");
|
||||
for(int i = 0; i < atom->Nlocal; i++){
|
||||
if(mysize+extrabuff >= sizebuff){
|
||||
sizebuff*= 1.5;
|
||||
msg = (char*) realloc(msg, sizebuff);
|
||||
}
|
||||
//TODO: do not forget to add param->xlo, param->ylo, param->zlo
|
||||
sprintf(msg, "%s%.4f %.4f %.4f\n",msg, atom_x(i), atom_y(i), atom_z(i));
|
||||
mysize = strlen(msg);
|
||||
}
|
||||
int gatherSize[comm->numproc];
|
||||
|
||||
MPI_Allgather(&mysize, 1, MPI_INT, gatherSize, 1, MPI_INT, MPI_COMM_WORLD);
|
||||
int offset=0;
|
||||
int globalSize = 0;
|
||||
|
||||
for(int i = 0; i < comm->myproc; i++)
|
||||
offset+= gatherSize[i];
|
||||
|
||||
for(int i = 0; i < comm->numproc; i++)
|
||||
globalSize+= gatherSize[i];
|
||||
|
||||
MPI_Offset displ;
|
||||
MPI_Datatype FileType;
|
||||
int GlobalSize[] = {globalSize};
|
||||
int LocalSize[] = {mysize};
|
||||
int Start[] = {offset};
|
||||
|
||||
if(LocalSize[0]>0){
|
||||
MPI_Type_create_subarray(1, GlobalSize, LocalSize, Start, MPI_ORDER_C, MPI_CHAR, &FileType);
|
||||
} else {
|
||||
MPI_Type_vector(0,0,0,MPI_CHAR,&FileType);
|
||||
}
|
||||
MPI_Type_commit(&FileType);
|
||||
MPI_File_get_size(_fh, &displ);
|
||||
MPI_File_set_view(_fh, displ, MPI_CHAR, FileType, "native", MPI_INFO_NULL);
|
||||
MPI_File_write_all (_fh, msg, mysize , MPI_CHAR ,MPI_STATUS_IGNORE);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
MPI_File_set_view(_fh,0,MPI_CHAR, MPI_CHAR, "native", MPI_INFO_NULL);
|
||||
|
||||
if (comm->myproc==0){
|
||||
|
||||
sprintf(msg, "\n\n");
|
||||
sprintf(msg, "%sCELLS %d %d\n", msg, atom->Natoms, atom->Natoms * 2);
|
||||
|
||||
for(int i = 0; i < atom->Natoms; i++)
|
||||
sprintf(msg, "%s1 %d\n", msg, i);
|
||||
flushBuffer(msg);
|
||||
|
||||
sprintf(msg, "\n\n");
|
||||
sprintf(msg, "%sCELL_TYPES %d\n",msg, atom->Natoms);
|
||||
for(int i = 0; i < atom->Natoms; i++)
|
||||
sprintf(msg, "%s1\n",msg);
|
||||
flushBuffer(msg);
|
||||
|
||||
sprintf(msg, "\n\n");
|
||||
sprintf(msg, "%sPOINT_DATA %d\n",msg,atom->Natoms);
|
||||
sprintf(msg, "%sSCALARS mass double\n",msg);
|
||||
sprintf(msg, "%sLOOKUP_TABLE default\n",msg);
|
||||
for(int i = 0; i < atom->Natoms; i++)
|
||||
sprintf(msg, "%s1.0\n",msg);
|
||||
sprintf(msg, "%s\n\n",msg);
|
||||
flushBuffer(msg);
|
||||
}
|
||||
}
|
||||
|
||||
void vtkClose()
|
||||
{
|
||||
MPI_File_close(&_fh);
|
||||
_fh=MPI_FILE_NULL;
|
||||
}
|
||||
|
||||
int printGhost(const char* filename, Atom* atom, int timestep, int me) {
|
||||
char timestep_filename[128];
|
||||
snprintf(timestep_filename, sizeof timestep_filename, "%s_%d_ghost%i.vtk", filename, timestep,me);
|
||||
FILE* fp = fopen(timestep_filename, "wb");
|
||||
|
||||
if(fp == NULL) {
|
||||
fprintf(stderr, "Could not open VTK file for writing!\n");
|
||||
return -1;
|
||||
}
|
||||
fprintf(fp, "# vtk DataFile Version 2.0\n");
|
||||
fprintf(fp, "Particle data\n");
|
||||
fprintf(fp, "ASCII\n");
|
||||
fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
|
||||
fprintf(fp, "POINTS %d double\n", atom->Nghost);
|
||||
|
||||
for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; ++i) {
|
||||
fprintf(fp, "%.4f %.4f %.4f\n", atom_x(i), atom_y(i), atom_z(i));
|
||||
}
|
||||
fprintf(fp, "\n\n");
|
||||
fprintf(fp, "CELLS %d %d\n", atom->Nlocal, atom->Nlocal * 2);
|
||||
for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; ++i) {
|
||||
fprintf(fp, "1 %d\n", i);
|
||||
}
|
||||
fprintf(fp, "\n\n");
|
||||
fprintf(fp, "CELL_TYPES %d\n", atom->Nlocal);
|
||||
for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; ++i) {
|
||||
fprintf(fp, "1\n");
|
||||
}
|
||||
fprintf(fp, "\n\n");
|
||||
fprintf(fp, "POINT_DATA %d\n", atom->Nghost);
|
||||
fprintf(fp, "SCALARS mass double\n");
|
||||
fprintf(fp, "LOOKUP_TABLE default\n");
|
||||
for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; i++) {
|
||||
fprintf(fp, "1.0\n");
|
||||
}
|
||||
fprintf(fp, "\n\n");
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep)
|
||||
{
|
||||
if(comm->numproc == 1)
|
||||
{
|
||||
write_atoms_to_vtk_file(filename, atom, timestep);
|
||||
return;
|
||||
}
|
||||
|
||||
vtkOpen(filename, comm, atom, timestep);
|
||||
vtkVector(comm, atom, param);
|
||||
vtkClose();
|
||||
//printGhost(filename, atom, timestep, comm->myproc);
|
||||
}
|
||||
|
||||
static inline void flushBuffer(char* msg){
|
||||
MPI_Offset displ;
|
||||
MPI_File_get_size(_fh, &displ);
|
||||
MPI_File_write_at(_fh, displ, msg, strlen(msg), MPI_CHAR, MPI_STATUS_IGNORE);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user