Final MPI version
This commit is contained in:
		
							
								
								
									
										3
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										3
									
								
								Makefile
									
									
									
									
									
								
							@@ -17,6 +17,9 @@ include $(MAKE_DIR)/include_ISA.mk
 | 
				
			|||||||
include $(MAKE_DIR)/include_GROMACS.mk
 | 
					include $(MAKE_DIR)/include_GROMACS.mk
 | 
				
			||||||
INCLUDES  += -I./$(SRC_DIR)/includes -I./$(COMMON_DIR)/includes
 | 
					INCLUDES  += -I./$(SRC_DIR)/includes -I./$(COMMON_DIR)/includes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ifeq ($(strip $(OPT_SCHEME)),gromacs)
 | 
				
			||||||
 | 
					    DEFINES +=  -DGROMACS
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
ifeq ($(strip $(DATA_LAYOUT)),AOS)
 | 
					ifeq ($(strip $(DATA_LAYOUT)),AOS)
 | 
				
			||||||
    DEFINES +=  -DAOS
 | 
					    DEFINES +=  -DAOS
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										97
									
								
								common/box.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								common/box.c
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,97 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 | 
					 * All rights reserved. This file is part of MD-Bench.
 | 
				
			||||||
 | 
					 * Use of this source code is governed by a LGPL-3.0
 | 
				
			||||||
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <parameter.h>
 | 
				
			||||||
 | 
					#include <util.h>
 | 
				
			||||||
 | 
					#include <box.h>
 | 
				
			||||||
 | 
					#include <mpi.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int overlapBox(int dim, int dir, const Box* mybox, const Box* other, Box* cut, MD_FLOAT xprd, MD_FLOAT cutneigh)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int pbc = -100;
 | 
				
			||||||
 | 
					  MD_FLOAT min[3], max[3];
 | 
				
			||||||
 | 
					  int same = (mybox->id == other->id) ? 1 : 0;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //projections
 | 
				
			||||||
 | 
					  min[_x] = MAX(mybox->lo[_x], other->lo[_x]); max[_x] = MIN(mybox->hi[_x], other->hi[_x]); 
 | 
				
			||||||
 | 
					  min[_y] = MAX(mybox->lo[_y], other->lo[_y]); max[_y] = MIN(mybox->hi[_y], other->hi[_y]);
 | 
				
			||||||
 | 
					  min[_z] = MAX(mybox->lo[_z], other->lo[_z]); max[_z] = MIN(mybox->hi[_z], other->hi[_z]);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //Intersection no periodic case
 | 
				
			||||||
 | 
					  if(!same){
 | 
				
			||||||
 | 
					    if (dir ==  0)  max[dim] = MIN(mybox->hi[dim], other->hi[dim]+ cutneigh);
 | 
				
			||||||
 | 
					    if (dir ==  1)  min[dim] = MAX(mybox->lo[dim], other->lo[dim]- cutneigh);
 | 
				
			||||||
 | 
					    if ((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z])) pbc = 0;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //Intersection periodic case
 | 
				
			||||||
 | 
					  if(pbc < 0)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    if(dir == 0){
 | 
				
			||||||
 | 
					      min[dim] = MAX(mybox->lo[dim] , other->lo[dim]- xprd);
 | 
				
			||||||
 | 
					      max[dim] = MIN(mybox->hi[dim] , other->hi[dim]- xprd + cutneigh);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      min[dim] = MAX(mybox->lo[dim], other->lo[dim]+ xprd - cutneigh);
 | 
				
			||||||
 | 
					      max[dim] = MIN(mybox->hi[dim], other->hi[dim]+ xprd); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    } 
 | 
				
			||||||
 | 
					    if((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z])) 
 | 
				
			||||||
 | 
					      pbc = (dir == 0) ? 1:-1;
 | 
				
			||||||
 | 
					  }   
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //storing the cuts
 | 
				
			||||||
 | 
					  cut->lo[_x] = min[_x]; cut->hi[_x] = max[_x]; 
 | 
				
			||||||
 | 
					  cut->lo[_y] = min[_y]; cut->hi[_y] = max[_y];
 | 
				
			||||||
 | 
					  cut->lo[_z] = min[_z]; cut->hi[_z] = max[_z];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return pbc;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int overlapFullBox(Parameter* param, MD_FLOAT *cutneigh ,const Box* mybox, const Box* other)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  MD_FLOAT min[3], max[3];
 | 
				
			||||||
 | 
					  MD_FLOAT xprd = param->xprd; 
 | 
				
			||||||
 | 
					  MD_FLOAT yprd = param->yprd; 
 | 
				
			||||||
 | 
					  MD_FLOAT zprd = param->zprd;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  for(int k = -1; k < 2; k++)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    for(int j = -1; j < 2; j++)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      for(int i= -1; i < 2; i++)
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					        min[_x] = MAX(mybox->lo[_x], other->lo[_x]-cutneigh[_x] + i*xprd);
 | 
				
			||||||
 | 
					        min[_y] = MAX(mybox->lo[_y], other->lo[_y]-cutneigh[_y] + j*yprd); 
 | 
				
			||||||
 | 
					        min[_z] = MAX(mybox->lo[_z], other->lo[_z]-cutneigh[_z] + k*zprd);
 | 
				
			||||||
 | 
					        max[_x] = MIN(mybox->hi[_x], other->hi[_x]+cutneigh[_x] + i*xprd);
 | 
				
			||||||
 | 
					        max[_y] = MIN(mybox->hi[_y], other->hi[_y]+cutneigh[_y] + j*yprd);
 | 
				
			||||||
 | 
					        max[_z] = MIN(mybox->hi[_z], other->hi[_z]+cutneigh[_z] + k*zprd);
 | 
				
			||||||
 | 
					        if ((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z])) 
 | 
				
			||||||
 | 
					          return 1;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void expandBox(int iswap, const Box* me, const Box* other, Box* cut, MD_FLOAT cutneigh)
 | 
				
			||||||
 | 
					 {
 | 
				
			||||||
 | 
					    if(iswap==2 || iswap==3){
 | 
				
			||||||
 | 
					      if(me->lo[_x] <= other->lo[_x]) cut->lo[_x] -= cutneigh;
 | 
				
			||||||
 | 
					      if(me->hi[_x] >= other->hi[_x]) cut->hi[_x] += cutneigh;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(iswap==4 || iswap==5){
 | 
				
			||||||
 | 
					      if(me->lo[_x] <= other->lo[_x]) cut->lo[_x] -= cutneigh;
 | 
				
			||||||
 | 
					      if(me->hi[_x] >= other->hi[_x]) cut->hi[_x] += cutneigh;
 | 
				
			||||||
 | 
					      if(me->lo[_y] <= other->lo[_y]) cut->lo[_y] -= cutneigh;
 | 
				
			||||||
 | 
					      if(me->hi[_y] >= other->hi[_y]) cut->hi[_y] += cutneigh;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										556
									
								
								common/comm.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										556
									
								
								common/comm.c
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,556 @@
 | 
				
			|||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#include <string.h>
 | 
				
			||||||
 | 
					#include <comm.h>   
 | 
				
			||||||
 | 
					#include <allocate.h>
 | 
				
			||||||
 | 
					#include <mpi.h>
 | 
				
			||||||
 | 
					#include <util.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define NEIGHMIN  6       
 | 
				
			||||||
 | 
					#define BUFFACTOR 2
 | 
				
			||||||
 | 
					#define BUFMIN    1000
 | 
				
			||||||
 | 
					#define BUFEXTRA  100
 | 
				
			||||||
 | 
					#define world MPI_COMM_WORLD
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE; 
 | 
				
			||||||
 | 
					static inline void allocDynamicBuffers(Comm*);
 | 
				
			||||||
 | 
					static inline void freeDynamicBuffers(Comm*);
 | 
				
			||||||
 | 
					static inline void freeBuffers(Comm*);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void defineReverseList(Comm* comm){
 | 
				
			||||||
 | 
					  int dim = 0;
 | 
				
			||||||
 | 
					  int index = 0;
 | 
				
			||||||
 | 
					  int me = comm->myproc;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //Set the inverse list
 | 
				
			||||||
 | 
					  for(int iswap = 0; iswap<6; iswap++){
 | 
				
			||||||
 | 
					    int dim = comm->swapdim[iswap]; 
 | 
				
			||||||
 | 
					    int dir = comm->swapdir[iswap];
 | 
				
			||||||
 | 
					    int invswap = comm->swap[dim][(dir+1)%2]; 
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    for(int ineigh = comm->sendfrom[invswap]; ineigh< comm->sendtill[invswap]; ineigh++)
 | 
				
			||||||
 | 
					      comm->nrecv[index++] = comm->nsend[ineigh]; 
 | 
				
			||||||
 | 
					     
 | 
				
			||||||
 | 
					    comm->recvfrom[iswap] = (iswap == 0) ? 0 : comm->recvtill[iswap-1];
 | 
				
			||||||
 | 
					    comm->recvtill[iswap] = index;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //set if myproc is unique in the swap 
 | 
				
			||||||
 | 
					  for(int iswap = 0; iswap<6; iswap++){
 | 
				
			||||||
 | 
					    int sizeswap = comm->sendtill[iswap]-comm->sendfrom[iswap]; 
 | 
				
			||||||
 | 
					    int index = comm->sendfrom[iswap];
 | 
				
			||||||
 | 
					    int myneigh = comm->nsend[index];
 | 
				
			||||||
 | 
					    comm->othersend[iswap] = (sizeswap != 1 || comm->myproc != myneigh) ?  1 : 0;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void addNeighToExchangeList(Comm* comm, int newneigh){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int numneigh = comm->numneighexch;
 | 
				
			||||||
 | 
					   
 | 
				
			||||||
 | 
					    if(comm->numneighexch>=comm->maxneighexch){
 | 
				
			||||||
 | 
					      size_t oldByteSize = comm->maxneighexch*sizeof(int);
 | 
				
			||||||
 | 
					      comm->maxneighexch *=2; 
 | 
				
			||||||
 | 
					      comm->nexch = (int*) reallocate(comm->nexch, ALIGNMENT,  comm->maxneighexch * sizeof(int), oldByteSize);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Add the new element to the list
 | 
				
			||||||
 | 
					    comm->nexch[numneigh] = newneigh;
 | 
				
			||||||
 | 
					    comm->numneighexch++;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//Exported functions
 | 
				
			||||||
 | 
					void neighComm(Comm *comm, Parameter* param, Grid *grid)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int me = comm->myproc; 
 | 
				
			||||||
 | 
					  int numproc = comm ->numproc;
 | 
				
			||||||
 | 
					  int PAD = 6;   //number of elements for processor in the map
 | 
				
			||||||
 | 
					  int ineigh = 0;
 | 
				
			||||||
 | 
					  int sneigh = 0;
 | 
				
			||||||
 | 
					  MD_FLOAT *map = grid->map;
 | 
				
			||||||
 | 
					  MD_FLOAT cutneigh = param->cutneigh;
 | 
				
			||||||
 | 
					  MD_FLOAT prd[3] = {param->xprd, param->yprd, param->zprd};
 | 
				
			||||||
 | 
					  Box mybox, other, cut;
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					  //needed for rebalancing
 | 
				
			||||||
 | 
					  freeDynamicBuffers(comm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //Local box
 | 
				
			||||||
 | 
					  mybox.id = me;
 | 
				
			||||||
 | 
					  mybox.lo[_x] = map[me*PAD+0];  mybox.hi[_x] = map[me*PAD+3];
 | 
				
			||||||
 | 
					  mybox.lo[_y] = map[me*PAD+1];  mybox.hi[_y] = map[me*PAD+4];
 | 
				
			||||||
 | 
					  mybox.lo[_z] = map[me*PAD+2];  mybox.hi[_z] = map[me*PAD+5];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //Check for all possible neighbours only for exchange atoms
 | 
				
			||||||
 | 
					  comm->numneighexch = 0;
 | 
				
			||||||
 | 
					  for(int proc = 0; proc <numproc; proc++){
 | 
				
			||||||
 | 
					      other.id = proc;
 | 
				
			||||||
 | 
					      other.lo[_x] = map[proc*PAD+0];  other.hi[_x] = map[proc*PAD+3];
 | 
				
			||||||
 | 
					      other.lo[_y] = map[proc*PAD+1];  other.hi[_y] = map[proc*PAD+4];
 | 
				
			||||||
 | 
					      other.lo[_z] = map[proc*PAD+2];  other.hi[_z] = map[proc*PAD+5];
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if(proc != me){
 | 
				
			||||||
 | 
					      int intersection = overlapFullBox(param,grid->cutneigh,&mybox,&other);
 | 
				
			||||||
 | 
					      if(intersection) addNeighToExchangeList(comm,proc);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //MAP is stored as follows: xlo,ylo,zlo,xhi,yhi,zhi
 | 
				
			||||||
 | 
					  for(int iswap = 0; iswap <6; iswap++)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    int dir = comm->swapdir[iswap]; 
 | 
				
			||||||
 | 
					    int dim = comm->swapdim[iswap]; 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(int proc = 0; proc < numproc; proc++)
 | 
				
			||||||
 | 
					    {      
 | 
				
			||||||
 | 
					      //Check for neighbours along dimmensions, for forwardComm, backwardComm  and ghostComm
 | 
				
			||||||
 | 
					      other.id = proc;
 | 
				
			||||||
 | 
					      other.lo[_x] = map[proc*PAD+0];  other.hi[_x] = map[proc*PAD+3];
 | 
				
			||||||
 | 
					      other.lo[_y] = map[proc*PAD+1];  other.hi[_y] = map[proc*PAD+4];
 | 
				
			||||||
 | 
					      other.lo[_z] = map[proc*PAD+2];  other.hi[_z] = map[proc*PAD+5]; 
 | 
				
			||||||
 | 
					          
 | 
				
			||||||
 | 
					      //return if two boxes intersect: -100 not intersection, 0, 1 and -1 intersection for each different pbc.  
 | 
				
			||||||
 | 
					      int pbc = overlapBox(dim,dir,&mybox,&other,&cut,prd[dim],cutneigh);
 | 
				
			||||||
 | 
					      if(pbc == -100) continue;   
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      expandBox(iswap, &mybox, &other, &cut, cutneigh);
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					      if(ineigh >= comm->maxneigh) {
 | 
				
			||||||
 | 
					          size_t oldByteSize = comm->maxneigh*sizeof(int);
 | 
				
			||||||
 | 
					          size_t oldBoxSize = comm->maxneigh*sizeof(Box); 
 | 
				
			||||||
 | 
					          comm->maxneigh  = 2*ineigh;  
 | 
				
			||||||
 | 
					          comm->nsend     = (int*) reallocate(comm->nsend, ALIGNMENT,  comm->maxneigh * sizeof(int), oldByteSize);
 | 
				
			||||||
 | 
					          comm->nrecv     = (int*) reallocate(comm->nrecv, ALIGNMENT,  comm->maxneigh * sizeof(int), oldByteSize);
 | 
				
			||||||
 | 
					          comm->pbc_x     = (int*) reallocate(comm->pbc_x, ALIGNMENT,  comm->maxneigh * sizeof(int), oldByteSize);
 | 
				
			||||||
 | 
					          comm->pbc_y     = (int*) reallocate(comm->pbc_y, ALIGNMENT,  comm->maxneigh * sizeof(int), oldByteSize);
 | 
				
			||||||
 | 
					          comm->pbc_z     = (int*) reallocate(comm->pbc_z, ALIGNMENT,  comm->maxneigh * sizeof(int), oldByteSize);
 | 
				
			||||||
 | 
					          comm->boxes     = (Box*) reallocate(comm->boxes, ALIGNMENT,  comm->maxneigh * sizeof(Box), oldBoxSize);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      comm->boxes[ineigh] = cut;  
 | 
				
			||||||
 | 
					      comm->nsend[ineigh] = proc;
 | 
				
			||||||
 | 
					      comm->pbc_x[ineigh] = (dim == _x) ? pbc : 0;
 | 
				
			||||||
 | 
					      comm->pbc_y[ineigh] = (dim == _y) ? pbc : 0; 
 | 
				
			||||||
 | 
					      comm->pbc_z[ineigh] = (dim == _z) ? pbc : 0; 
 | 
				
			||||||
 | 
					      ineigh++; 
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    comm->sendfrom[iswap] = (iswap == 0) ? 0:comm->sendtill[iswap-1];
 | 
				
			||||||
 | 
					    comm->sendtill[iswap] = ineigh;
 | 
				
			||||||
 | 
					    comm->numneigh = ineigh; 
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  allocDynamicBuffers(comm);
 | 
				
			||||||
 | 
					  defineReverseList(comm);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					void initComm(int* argc, char*** argv, Comm* comm)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  //MPI Initialize
 | 
				
			||||||
 | 
					  MPI_Init(argc, argv);
 | 
				
			||||||
 | 
					  MPI_Comm_size(MPI_COMM_WORLD, &(comm->numproc));
 | 
				
			||||||
 | 
					  MPI_Comm_rank(MPI_COMM_WORLD, &(comm->myproc));
 | 
				
			||||||
 | 
					  comm->numneigh = 0;
 | 
				
			||||||
 | 
					  comm->numneighexch = 0;
 | 
				
			||||||
 | 
					  comm->nrecv=NULL;
 | 
				
			||||||
 | 
					  comm->nsend=NULL;
 | 
				
			||||||
 | 
					  comm->nexch=NULL;  
 | 
				
			||||||
 | 
					  comm->pbc_x=NULL; 
 | 
				
			||||||
 | 
					  comm->pbc_y=NULL;  
 | 
				
			||||||
 | 
					  comm->pbc_z=NULL;  
 | 
				
			||||||
 | 
					  comm->boxes=NULL;  
 | 
				
			||||||
 | 
					  comm->atom_send=NULL;     
 | 
				
			||||||
 | 
					  comm->atom_recv=NULL;   
 | 
				
			||||||
 | 
					  comm->off_atom_send=NULL; 
 | 
				
			||||||
 | 
					  comm->off_atom_recv=NULL;
 | 
				
			||||||
 | 
					  comm->maxsendlist=NULL; 
 | 
				
			||||||
 | 
					  comm->sendlist=NULL;
 | 
				
			||||||
 | 
					  comm->buf_send=NULL; 
 | 
				
			||||||
 | 
					  comm->buf_recv=NULL; 
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					void endComm(Comm* comm)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  comm->maxneigh = 0;
 | 
				
			||||||
 | 
					  comm->maxneighexch =0;
 | 
				
			||||||
 | 
					  comm->maxsend = 0; 
 | 
				
			||||||
 | 
					  comm->maxrecv = 0;
 | 
				
			||||||
 | 
					  freeBuffers(comm);
 | 
				
			||||||
 | 
					  MPI_Finalize();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void setupComm(Comm* comm, Parameter* param, Grid* grid){
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					  comm->swap[_x][0] = 0; comm->swap[_x][1] =1;
 | 
				
			||||||
 | 
					  comm->swap[_y][0] = 2; comm->swap[_y][1] =3;
 | 
				
			||||||
 | 
					  comm->swap[_z][0] = 4; comm->swap[_z][1] =5;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  comm->swapdim[0] = comm->swapdim[1] = _x;
 | 
				
			||||||
 | 
					  comm->swapdim[2] = comm->swapdim[3] = _y;
 | 
				
			||||||
 | 
					  comm->swapdim[4] = comm->swapdim[5] = _z;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  comm->swapdir[0] = comm->swapdir[2] = comm->swapdir[4] = 0;
 | 
				
			||||||
 | 
					  comm->swapdir[1] = comm->swapdir[3] = comm->swapdir[5] = 1;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  for(int i = 0;  i<6; i++){
 | 
				
			||||||
 | 
					    comm->sendfrom[i] = 0;
 | 
				
			||||||
 | 
					    comm->sendtill[i] = 0;
 | 
				
			||||||
 | 
					    comm->recvfrom[i] = 0;
 | 
				
			||||||
 | 
					    comm->recvtill[i] = 0;  
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  comm->forwardSize   = FORWARD_SIZE;      //send coordiantes x,y,z
 | 
				
			||||||
 | 
					  comm->reverseSize   = REVERSE_SIZE;      //return forces fx, fy, fz
 | 
				
			||||||
 | 
					  comm->ghostSize     = GHOST_SIZE;        //send x,y,z,type;
 | 
				
			||||||
 | 
					  comm->exchangeSize  = EXCHANGE_SIZE;     //send x,y,z,vx,vy,vz,type
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					  //Allocate memory for recv buffer and recv buffer
 | 
				
			||||||
 | 
					  comm->maxsend = BUFMIN; 
 | 
				
			||||||
 | 
					  comm->maxrecv = BUFMIN;
 | 
				
			||||||
 | 
					  comm->buf_send = (MD_FLOAT*) allocate(ALIGNMENT,(comm->maxsend + BUFEXTRA) * sizeof(MD_FLOAT));
 | 
				
			||||||
 | 
					  comm->buf_recv = (MD_FLOAT*) allocate(ALIGNMENT, comm->maxrecv * sizeof(MD_FLOAT)); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  comm->maxneighexch = NEIGHMIN;
 | 
				
			||||||
 | 
					  comm->nexch  = (int*) allocate(ALIGNMENT,  comm->maxneighexch * sizeof(int));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  comm->maxneigh = NEIGHMIN;
 | 
				
			||||||
 | 
					  comm->nsend  = (int*) allocate(ALIGNMENT,  comm->maxneigh * sizeof(int));
 | 
				
			||||||
 | 
					  comm->nrecv  = (int*) allocate(ALIGNMENT,  comm->maxneigh * sizeof(int));
 | 
				
			||||||
 | 
					  comm->pbc_x  = (int*) allocate(ALIGNMENT,  comm->maxneigh * sizeof(int));
 | 
				
			||||||
 | 
					  comm->pbc_y  = (int*) allocate(ALIGNMENT,  comm->maxneigh * sizeof(int));
 | 
				
			||||||
 | 
					  comm->pbc_z  = (int*) allocate(ALIGNMENT,  comm->maxneigh * sizeof(int));
 | 
				
			||||||
 | 
					  comm->boxes  = (Box*) allocate(ALIGNMENT,  comm->maxneigh * sizeof(Box));
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  neighComm(comm, param, grid); 
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void forwardComm(Comm* comm, Atom* atom, int iswap)
 | 
				
			||||||
 | 
					{ 
 | 
				
			||||||
 | 
					  int nrqst=0, offset=0, nsend=0, nrecv=0; 
 | 
				
			||||||
 | 
					  int pbc[3];
 | 
				
			||||||
 | 
					  int size = comm->forwardSize; 
 | 
				
			||||||
 | 
					  int maxrqst = comm->numneigh;
 | 
				
			||||||
 | 
					  MD_FLOAT* buf;
 | 
				
			||||||
 | 
					  MPI_Request requests[maxrqst];
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  for(int ineigh = comm->sendfrom[iswap]; ineigh < comm->sendtill[iswap]; ineigh++){
 | 
				
			||||||
 | 
					    offset = comm->off_atom_send[ineigh];
 | 
				
			||||||
 | 
					    pbc[_x]=comm->pbc_x[ineigh]; pbc[_y]=comm->pbc_y[ineigh];  pbc[_z]=comm->pbc_z[ineigh];
 | 
				
			||||||
 | 
					    packForward(atom, comm->atom_send[ineigh], comm->sendlist[ineigh], &comm->buf_send[offset*size],pbc);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					   
 | 
				
			||||||
 | 
					  //Receives elements 
 | 
				
			||||||
 | 
					  if(comm->othersend[iswap])  
 | 
				
			||||||
 | 
					    for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){      
 | 
				
			||||||
 | 
					      offset = comm->off_atom_recv[ineigh]*size;
 | 
				
			||||||
 | 
					      nrecv  = comm->atom_recv[ineigh]*size;
 | 
				
			||||||
 | 
					      MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nrecv[ineigh],0,world,&requests[nrqst++]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					   
 | 
				
			||||||
 | 
					  //Send elements 
 | 
				
			||||||
 | 
					  if(comm->othersend[iswap]) 
 | 
				
			||||||
 | 
					    for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){  
 | 
				
			||||||
 | 
					      offset = comm->off_atom_send[ineigh]*size;
 | 
				
			||||||
 | 
					      nsend  = comm->atom_send[ineigh]*size;
 | 
				
			||||||
 | 
					      MPI_Send(&comm->buf_send[offset],nsend,type,comm->nsend[ineigh],0,world);      
 | 
				
			||||||
 | 
					    } 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  if(comm->othersend[iswap]) buf = comm->buf_recv;
 | 
				
			||||||
 | 
					  else buf = comm->buf_send;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  /* unpack buffer */   
 | 
				
			||||||
 | 
					  for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
 | 
				
			||||||
 | 
					    offset = comm->off_atom_recv[ineigh];
 | 
				
			||||||
 | 
					    unpackForward(atom, comm->atom_recv[ineigh], comm->firstrecv[iswap] + offset, &buf[offset*size]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void reverseComm(Comm* comm, Atom* atom, int iswap)
 | 
				
			||||||
 | 
					{ 
 | 
				
			||||||
 | 
					  int nrqst=0, offset=0, nsend=0, nrecv=0 ;
 | 
				
			||||||
 | 
					  int size = comm->reverseSize; 
 | 
				
			||||||
 | 
					  int maxrqst = comm->numneigh;
 | 
				
			||||||
 | 
					  MD_FLOAT* buf;
 | 
				
			||||||
 | 
					  MPI_Request requests[maxrqst];
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  for(int ineigh = comm->recvfrom[iswap]; ineigh < comm->recvtill[iswap]; ineigh++){
 | 
				
			||||||
 | 
					    offset = comm->off_atom_recv[ineigh]; 
 | 
				
			||||||
 | 
					    packReverse(atom, comm->atom_recv[ineigh], comm->firstrecv[iswap] + offset, &comm->buf_send[offset*size]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  //Receives elements 
 | 
				
			||||||
 | 
					  if(comm->othersend[iswap])   
 | 
				
			||||||
 | 
					    for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){      
 | 
				
			||||||
 | 
					      offset = comm->off_atom_send[ineigh]*size;
 | 
				
			||||||
 | 
					      nrecv  = comm->atom_send[ineigh]*size; 
 | 
				
			||||||
 | 
					      MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nsend[ineigh],0,world,&requests[nrqst++]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  //Send elements  
 | 
				
			||||||
 | 
					  if(comm->othersend[iswap]) 
 | 
				
			||||||
 | 
					    for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){  
 | 
				
			||||||
 | 
					      offset = comm->off_atom_recv[ineigh]*size;
 | 
				
			||||||
 | 
					      nsend  = comm->atom_recv[ineigh]*size;  
 | 
				
			||||||
 | 
					      MPI_Send(&comm->buf_send[offset],nsend,type,comm->nrecv[ineigh],0,world);        
 | 
				
			||||||
 | 
					    } 
 | 
				
			||||||
 | 
					  if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
 | 
				
			||||||
 | 
					  if(comm->othersend[iswap])  buf = comm->buf_recv;
 | 
				
			||||||
 | 
					  else buf = comm->buf_send; 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /* unpack buffer */   
 | 
				
			||||||
 | 
					  for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
 | 
				
			||||||
 | 
					    offset =  comm->off_atom_send[ineigh]; 
 | 
				
			||||||
 | 
					    unpackReverse(atom, comm->atom_send[ineigh], comm->sendlist[ineigh], &buf[offset*size]);
 | 
				
			||||||
 | 
					  } 
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void ghostComm(Comm* comm, Atom* atom,int iswap){
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  MD_FLOAT xlo=0, xhi=0, ylo=0, yhi=0, zlo=0, zhi=0; 
 | 
				
			||||||
 | 
					  MD_FLOAT* buf;
 | 
				
			||||||
 | 
					  int nrqst=0, nsend=0, nrecv=0, offset=0, ineigh=0, pbc[3];
 | 
				
			||||||
 | 
					  int all_recv=0, all_send=0, currentSend=0; 
 | 
				
			||||||
 | 
					  int size = comm->ghostSize; 
 | 
				
			||||||
 | 
					  int maxrqrst = comm->numneigh;
 | 
				
			||||||
 | 
					  MPI_Request requests[maxrqrst];
 | 
				
			||||||
 | 
					  for(int i = 0; i<maxrqrst; i++) 
 | 
				
			||||||
 | 
					    requests[maxrqrst]=MPI_REQUEST_NULL;    
 | 
				
			||||||
 | 
					  if(iswap%2==0) comm->iterAtom = LOCAL+GHOST;
 | 
				
			||||||
 | 
					  int iter = 0; 
 | 
				
			||||||
 | 
					  for(int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++)
 | 
				
			||||||
 | 
					      {          
 | 
				
			||||||
 | 
					        Box* tile = &comm->boxes[ineigh];
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        xlo = tile->lo[_x]; ylo = tile->lo[_y]; zlo = tile->lo[_z]; 
 | 
				
			||||||
 | 
					        xhi = tile->hi[_x]; yhi = tile->hi[_y]; zhi = tile->hi[_z];   
 | 
				
			||||||
 | 
					        pbc[_x]=comm->pbc_x[ineigh]; pbc[_y]=comm->pbc_y[ineigh];  pbc[_z]=comm->pbc_z[ineigh];
 | 
				
			||||||
 | 
					        nsend = 0; 
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					        for(int i = 0; i < comm->iterAtom ; i++) 
 | 
				
			||||||
 | 
					        { 
 | 
				
			||||||
 | 
					          if(IsinRegionToSend(i)){
 | 
				
			||||||
 | 
					                if(nsend >= comm->maxsendlist[ineigh]) growList(comm,ineigh,nsend);
 | 
				
			||||||
 | 
					                if(currentSend + size >= comm->maxsend) growSend(comm,currentSend); 
 | 
				
			||||||
 | 
					                comm->sendlist[ineigh][nsend++] = i;
 | 
				
			||||||
 | 
					                currentSend += packGhost(atom, i, &comm->buf_send[currentSend], pbc);  
 | 
				
			||||||
 | 
					          }   
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        comm->atom_send[ineigh]     = nsend;          //#atoms send per neigh   
 | 
				
			||||||
 | 
					        comm->off_atom_send[ineigh] = all_send;       //offset atom respect to neighbours in a swap
 | 
				
			||||||
 | 
					        all_send += nsend;                            //all atoms send
 | 
				
			||||||
 | 
					      } 
 | 
				
			||||||
 | 
					  //Receives how many elements to be received.
 | 
				
			||||||
 | 
					  if(comm->othersend[iswap])
 | 
				
			||||||
 | 
					    for(nrqst=0, ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++)
 | 
				
			||||||
 | 
					      MPI_Irecv(&comm->atom_recv[ineigh],1,MPI_INT,comm->nrecv[ineigh],0,world,&requests[nrqst++]);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  if(!comm->othersend[iswap]) comm->atom_recv[comm->recvfrom[iswap]] = nsend; 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //Communicate how many elements to be sent.
 | 
				
			||||||
 | 
					  if(comm->othersend[iswap])
 | 
				
			||||||
 | 
					    for(int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++)
 | 
				
			||||||
 | 
					      MPI_Send(&comm->atom_send[ineigh],1,MPI_INT,comm->nsend[ineigh],0,world);    
 | 
				
			||||||
 | 
					   if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //Define offset to store in the recv_buff    
 | 
				
			||||||
 | 
					  for(int ineigh = comm->recvfrom[iswap]; ineigh<comm->recvtill[iswap]; ineigh++){ 
 | 
				
			||||||
 | 
					    comm->off_atom_recv[ineigh] = all_recv;
 | 
				
			||||||
 | 
					    all_recv += comm->atom_recv[ineigh];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if(all_recv*size>=comm->maxrecv) growRecv(comm,all_recv*size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //Receives elements 
 | 
				
			||||||
 | 
					  if(comm->othersend[iswap])
 | 
				
			||||||
 | 
					    for (nrqst=0, ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
 | 
				
			||||||
 | 
					      offset = comm->off_atom_recv[ineigh]*size;  
 | 
				
			||||||
 | 
					      nrecv = comm->atom_recv[ineigh]*size;
 | 
				
			||||||
 | 
					      MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nrecv[ineigh],0,world,&requests[nrqst++]);
 | 
				
			||||||
 | 
					    } 
 | 
				
			||||||
 | 
					  //Send elements
 | 
				
			||||||
 | 
					  if(comm->othersend[iswap])
 | 
				
			||||||
 | 
					    for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
 | 
				
			||||||
 | 
					      offset = comm->off_atom_send[ineigh]*size;
 | 
				
			||||||
 | 
					      nsend  = comm->atom_send[ineigh]*size;  
 | 
				
			||||||
 | 
					      MPI_Send(&comm->buf_send[offset],nsend,type,comm->nsend[ineigh],0,world); 
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  if(comm->othersend[iswap]) buf = comm->buf_recv;
 | 
				
			||||||
 | 
					  else buf = comm->buf_send; 
 | 
				
			||||||
 | 
					  //unpack elements
 | 
				
			||||||
 | 
					  comm->firstrecv[iswap] = LOCAL+GHOST; 
 | 
				
			||||||
 | 
					  for(int i = 0; i < all_recv; i++)
 | 
				
			||||||
 | 
					    unpackGhost(atom, LOCAL+GHOST, &buf[i*size]); 
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //Increases the buffer if needed
 | 
				
			||||||
 | 
					  int max_size = MAX(comm->forwardSize,comm->reverseSize);
 | 
				
			||||||
 | 
					  int max_buf = max_size * MAX(all_recv, all_send); 
 | 
				
			||||||
 | 
					  if(max_buf>=comm->maxrecv) growRecv(comm,max_buf);
 | 
				
			||||||
 | 
					  if(max_buf>=comm->maxsend) growSend(comm,max_buf);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void exchangeComm(Comm* comm, Atom* atom){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  MD_FLOAT x,y,z;
 | 
				
			||||||
 | 
					  MD_FLOAT *lo = atom->mybox.lo; 
 | 
				
			||||||
 | 
					  MD_FLOAT *hi = atom->mybox.hi;
 | 
				
			||||||
 | 
					  int size = comm->exchangeSize;
 | 
				
			||||||
 | 
					  int numneigh = comm->numneighexch;
 | 
				
			||||||
 | 
					  int offset_recv[numneigh];
 | 
				
			||||||
 | 
					  int size_recv[numneigh];
 | 
				
			||||||
 | 
					  MPI_Request requests[numneigh];
 | 
				
			||||||
 | 
					  int i =0,  nsend = 0, nrecv = 0;
 | 
				
			||||||
 | 
					  int nrqst = 0;
 | 
				
			||||||
 | 
					  int nlocal, offset,m;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /* enforce PBC */
 | 
				
			||||||
 | 
					  pbc(atom);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  if(comm->numneigh == 0) return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  nlocal = atom->Nlocal;
 | 
				
			||||||
 | 
					  while(i < nlocal) {
 | 
				
			||||||
 | 
					    if(atom_x(i) < lo[_x] || atom_x(i) >= hi[_x] ||
 | 
				
			||||||
 | 
					       atom_y(i) < lo[_y] || atom_y(i) >= hi[_y] ||
 | 
				
			||||||
 | 
					       atom_z(i) < lo[_z] || atom_z(i) >= hi[_z]) {
 | 
				
			||||||
 | 
					      if(nsend+size >= comm->maxsend) growSend(comm, nsend);
 | 
				
			||||||
 | 
					      nsend += packExchange(atom, i, &comm->buf_send[nsend]);
 | 
				
			||||||
 | 
					      copy(atom, i, nlocal-1);
 | 
				
			||||||
 | 
					      nlocal--;
 | 
				
			||||||
 | 
					    } else i++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  atom->Nlocal = nlocal;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /* send/recv number of to share atoms with neighbouring procs*/
 | 
				
			||||||
 | 
					  for(int ineigh = 0; ineigh < numneigh; ineigh++) 
 | 
				
			||||||
 | 
					    MPI_Irecv(&size_recv[ineigh],1,MPI_INT,comm->nexch[ineigh],0,world,&requests[nrqst++]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int ineigh = 0; ineigh < numneigh; ineigh++) 
 | 
				
			||||||
 | 
					    MPI_Send(&nsend,1,MPI_INT,comm->nexch[ineigh],0,world); 
 | 
				
			||||||
 | 
					  MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //Define offset to store in the recv_buff
 | 
				
			||||||
 | 
					  for(int ineigh = 0; ineigh<numneigh; ineigh++){ 
 | 
				
			||||||
 | 
					    offset_recv[ineigh] = nrecv; 
 | 
				
			||||||
 | 
					    nrecv += size_recv[ineigh];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if(nrecv >= comm->maxrecv) growRecv(comm,nrecv); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //Receives elements 
 | 
				
			||||||
 | 
					    nrqst=0;
 | 
				
			||||||
 | 
					    for (int ineigh = 0; ineigh< numneigh; ineigh++){
 | 
				
			||||||
 | 
					      offset = offset_recv[ineigh];
 | 
				
			||||||
 | 
					      MPI_Irecv(&comm->buf_recv[offset], size_recv[ineigh], type, comm->nexch[ineigh],0,world,&requests[nrqst++]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    //Send elements 
 | 
				
			||||||
 | 
					    for (int ineigh = 0; ineigh< numneigh; ineigh++)
 | 
				
			||||||
 | 
					      MPI_Send(comm->buf_send,nsend,type,comm->nexch[ineigh],0,world); 
 | 
				
			||||||
 | 
					    MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    nlocal = atom->Nlocal;
 | 
				
			||||||
 | 
					    m = 0;
 | 
				
			||||||
 | 
					    while(m < nrecv) {
 | 
				
			||||||
 | 
					      x = comm->buf_recv[m + _x]; 
 | 
				
			||||||
 | 
					      y = comm->buf_recv[m + _y];
 | 
				
			||||||
 | 
					      z = comm->buf_recv[m + _z];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if(x >= lo[_x] && x < hi[_x] &&
 | 
				
			||||||
 | 
					         y >= lo[_y] && y < hi[_y] &&
 | 
				
			||||||
 | 
					         z >= lo[_z] && z < hi[_z]){
 | 
				
			||||||
 | 
					        m += unpackExchange(atom, nlocal++, &comm->buf_recv[m]);
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
					        m += size;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    } 
 | 
				
			||||||
 | 
					    atom->Nlocal = nlocal;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    int all_atoms=0;
 | 
				
			||||||
 | 
					    MPI_Allreduce(&atom->Nlocal, &all_atoms, 1, MPI_INT, MPI_SUM, world);
 | 
				
			||||||
 | 
					    if(atom->Natoms!=all_atoms && comm->myproc ==0){
 | 
				
			||||||
 | 
					      printf("Losing atoms! current atoms:%d expected atoms:%d\n",all_atoms,atom->Natoms);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//Internal functions
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void growRecv(Comm* comm, int n)
 | 
				
			||||||
 | 
					{ 
 | 
				
			||||||
 | 
					  comm -> maxrecv = BUFFACTOR * n;
 | 
				
			||||||
 | 
					  if(comm->buf_recv) free(comm -> buf_recv);
 | 
				
			||||||
 | 
					  comm -> buf_recv = (MD_FLOAT*) allocate(ALIGNMENT, comm->maxrecv * sizeof(MD_FLOAT));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void growSend(Comm* comm, int n)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  size_t oldByteSize = (comm->maxsend+BUFEXTRA)*sizeof(MD_FLOAT);
 | 
				
			||||||
 | 
					  comm -> maxsend = BUFFACTOR * n;
 | 
				
			||||||
 | 
					  comm -> buf_send = (MD_FLOAT*) reallocate(comm->buf_send, ALIGNMENT, (comm->maxsend + BUFEXTRA) * sizeof(MD_FLOAT), oldByteSize);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline void growList(Comm* comm, int ineigh, int n)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  size_t oldByteSize = comm->maxsendlist[ineigh]*sizeof(int);
 | 
				
			||||||
 | 
					  comm->maxsendlist[ineigh] = BUFFACTOR * n;
 | 
				
			||||||
 | 
					  comm->sendlist[ineigh] = (int*) reallocate(comm->sendlist[ineigh],ALIGNMENT, comm->maxsendlist[ineigh] * sizeof(int), oldByteSize);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void  allocDynamicBuffers(Comm* comm)
 | 
				
			||||||
 | 
					{  
 | 
				
			||||||
 | 
					  //Buffers depending on the # of my neighs 
 | 
				
			||||||
 | 
					  int numneigh = comm->numneigh; 
 | 
				
			||||||
 | 
					  comm->atom_send   = (int*) allocate(ALIGNMENT,  numneigh * sizeof(int));
 | 
				
			||||||
 | 
					  comm->atom_recv   = (int*) allocate(ALIGNMENT,  numneigh * sizeof(int));
 | 
				
			||||||
 | 
					  comm->off_atom_send = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
 | 
				
			||||||
 | 
					  comm->off_atom_recv = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
 | 
				
			||||||
 | 
					  comm->maxsendlist   = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					  for(int i = 0; i < numneigh; i++) 
 | 
				
			||||||
 | 
					    comm->maxsendlist[i] = BUFMIN;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  comm->sendlist = (int**) allocate(ALIGNMENT, numneigh * sizeof(int*));
 | 
				
			||||||
 | 
					  for(int i = 0; i < numneigh; i++) 
 | 
				
			||||||
 | 
					    comm->sendlist[i] = (int*) allocate(ALIGNMENT, comm->maxsendlist[i] * sizeof(int));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void freeDynamicBuffers(Comm* comm)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int numneigh =comm->numneigh;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  if(comm->atom_send) free(comm->atom_send);
 | 
				
			||||||
 | 
					  if(comm->atom_recv) free(comm->atom_recv);
 | 
				
			||||||
 | 
					  if(comm->off_atom_send) free(comm->off_atom_send);
 | 
				
			||||||
 | 
					  if(comm->off_atom_recv) free(comm->off_atom_recv);
 | 
				
			||||||
 | 
					  if(comm->maxsendlist) free(comm->maxsendlist);
 | 
				
			||||||
 | 
					  if(comm->sendlist){
 | 
				
			||||||
 | 
					    for(int i = 0; i < numneigh; i++) 
 | 
				
			||||||
 | 
					      if(comm->sendlist[i]) free(comm->sendlist[i]);
 | 
				
			||||||
 | 
					  } 
 | 
				
			||||||
 | 
					  if(comm->sendlist) free(comm->sendlist);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void freeBuffers(Comm* comm)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  if(comm->nrecv) free(comm->nrecv);
 | 
				
			||||||
 | 
					  if(comm->nsend) free(comm->nsend);
 | 
				
			||||||
 | 
					  if(comm->nexch) free(comm->nexch);  
 | 
				
			||||||
 | 
					  if(comm->pbc_x) free(comm->pbc_x); 
 | 
				
			||||||
 | 
					  if(comm->pbc_y) free(comm->pbc_y);  
 | 
				
			||||||
 | 
					  if(comm->pbc_z) free(comm->pbc_z);  
 | 
				
			||||||
 | 
					  if(comm->boxes) free(comm->boxes);  
 | 
				
			||||||
 | 
					  if(comm->atom_send) free(comm->atom_send);     
 | 
				
			||||||
 | 
					  if(comm->atom_recv) free(comm->atom_recv);   
 | 
				
			||||||
 | 
					  if(comm->off_atom_send) free(comm->off_atom_send); 
 | 
				
			||||||
 | 
					  if(comm->off_atom_recv) free(comm->off_atom_recv);
 | 
				
			||||||
 | 
					  if(comm->maxsendlist) free(comm->maxsendlist); 
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  if(comm->sendlist){
 | 
				
			||||||
 | 
					    for(int i = 0; i < comm->numneigh; i++) 
 | 
				
			||||||
 | 
					      if(comm->sendlist[i]) free(comm->sendlist[i]); 
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if(comm->sendlist) free(comm->sendlist);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if(comm->buf_send) free(comm->buf_send); 
 | 
				
			||||||
 | 
					  if(comm->buf_recv) free(comm->buf_recv);   
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										490
									
								
								common/grid.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										490
									
								
								common/grid.c
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,490 @@
 | 
				
			|||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <grid.h>
 | 
				
			||||||
 | 
					#include <mpi.h>
 | 
				
			||||||
 | 
					#include <parameter.h>
 | 
				
			||||||
 | 
					#include <allocate.h>
 | 
				
			||||||
 | 
					#include <util.h>
 | 
				
			||||||
 | 
					#include <math.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//Grommacs Balancing
 | 
				
			||||||
 | 
					MD_FLOAT f_normalization(MD_FLOAT* x,MD_FLOAT* fx, MD_FLOAT minx, int nprocs) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  MD_FLOAT sum=0;
 | 
				
			||||||
 | 
					  for(int n = 0; n<nprocs; n++){
 | 
				
			||||||
 | 
					    fx[n] = MAX(minx,x[n]);
 | 
				
			||||||
 | 
					    sum+=fx[n];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(int n = 0; n<nprocs; n++)
 | 
				
			||||||
 | 
					    fx[n] /= sum;    
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void fixedPointIteration(MD_FLOAT* x0, int nprocs, MD_FLOAT minx)
 | 
				
			||||||
 | 
					{ 
 | 
				
			||||||
 | 
					  MD_FLOAT tolerance = 1e-3;
 | 
				
			||||||
 | 
					  MD_FLOAT alpha = 0.5;
 | 
				
			||||||
 | 
					  MD_FLOAT *fx = (MD_FLOAT*) malloc(nprocs*sizeof(MD_FLOAT));
 | 
				
			||||||
 | 
					  int maxIterations = 100; 
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  for (int i = 0; i < maxIterations; i++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int converged = 1; 
 | 
				
			||||||
 | 
					    f_normalization(x0,fx,minx,nprocs);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(int n=0; n<nprocs; n++)
 | 
				
			||||||
 | 
					      fx[n]= (1-alpha) * x0[n] + alpha * fx[n];
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    for (int n=0; n<nprocs; n++) {
 | 
				
			||||||
 | 
					        if (fabs(fx[n] - x0[n]) >= tolerance) {
 | 
				
			||||||
 | 
					            converged = 0;
 | 
				
			||||||
 | 
					            break;
 | 
				
			||||||
 | 
					        }      
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    for (int n=0; n<nprocs; n++) 
 | 
				
			||||||
 | 
					        x0[n] = fx[n];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(converged){
 | 
				
			||||||
 | 
					      for(int n = 0; n<nprocs; n++)    
 | 
				
			||||||
 | 
					      return;
 | 
				
			||||||
 | 
					    } 
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void staggeredBalance(Grid* grid, Atom* atom, Parameter* param, double newTime)
 | 
				
			||||||
 | 
					{ 
 | 
				
			||||||
 | 
					  int me;
 | 
				
			||||||
 | 
					  MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
				
			||||||
 | 
					  int *coord = grid->coord;
 | 
				
			||||||
 | 
					  int *nprocs  = grid ->nprocs;
 | 
				
			||||||
 | 
					  //Elapsed time since the last rebalance
 | 
				
			||||||
 | 
					  double time = newTime - grid->Timer;
 | 
				
			||||||
 | 
					  grid->Timer = newTime;
 | 
				
			||||||
 | 
					  //store the older dimm to compare later for exchange
 | 
				
			||||||
 | 
					  MD_FLOAT lo[3], hi[3];
 | 
				
			||||||
 | 
					  for(int dim = 0; dim< 3; dim++){ 
 | 
				
			||||||
 | 
					    lo[dim] = atom->mybox.lo[dim];
 | 
				
			||||||
 | 
					    hi[dim] = atom->mybox.hi[dim]; 
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //Define parameters
 | 
				
			||||||
 | 
					  MPI_Comm subComm[3]; 
 | 
				
			||||||
 | 
					  int color[3] = {0,0,0};
 | 
				
			||||||
 | 
					  int id[3] = {0,0,0};
 | 
				
			||||||
 | 
					  MD_FLOAT ** load = (MD_FLOAT**) malloc(3*sizeof(MD_FLOAT*));
 | 
				
			||||||
 | 
					  for(int dim = 0; dim<3; dim++) 
 | 
				
			||||||
 | 
					    load[dim] = (MD_FLOAT*) malloc(nprocs[dim]*sizeof(MD_FLOAT));
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					  int maxprocs = MAX(MAX(nprocs[_x],nprocs[_y]),nprocs[_z]);
 | 
				
			||||||
 | 
					  MD_FLOAT* cellSize = (MD_FLOAT*) malloc(maxprocs*sizeof(MD_FLOAT)); 
 | 
				
			||||||
 | 
					  MD_FLOAT* limits = (MD_FLOAT*) malloc(2*maxprocs*sizeof(MD_FLOAT)); //limits: (x0, x1), (x1, x2)... Repeat values in between to perfom MPI_Scatter later 
 | 
				
			||||||
 | 
					  MD_FLOAT t_sum[3] = {0,0,0}; 
 | 
				
			||||||
 | 
					  MD_FLOAT recv_buf[2] = {0,0};        //Each proc only receives 2 elments per dimension xlo and xhi
 | 
				
			||||||
 | 
					  MD_FLOAT balancedLoad[3] = {0,0,0};  //1/nprocs
 | 
				
			||||||
 | 
					  MD_FLOAT minLoad[3]  = {0,0,0};      //beta*(1/nprocs) 
 | 
				
			||||||
 | 
					  MD_FLOAT prd[3] = {param->xprd, param->yprd, param->zprd};
 | 
				
			||||||
 | 
					  MD_FLOAT boundaries[6] ={0,0,0,0,0,0}; // xlo,xhi,ylo,yhi,zlo,zhi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //Create sub-communications along each dimension
 | 
				
			||||||
 | 
					  for(int dim = 0; dim<3; dim++){
 | 
				
			||||||
 | 
					     if(dim == _x){
 | 
				
			||||||
 | 
					        color[_x] = (coord[_y] == 0 && coord[_z] ==0) ? 1:MPI_UNDEFINED;
 | 
				
			||||||
 | 
					        id[_x] = me;
 | 
				
			||||||
 | 
					     } else if(dim == _y) {
 | 
				
			||||||
 | 
					        color[_y] = coord[_z] == 0 ? coord[_x]:MPI_UNDEFINED; 
 | 
				
			||||||
 | 
					        id[_y] = (coord[_y] == 0 && coord[_z] == 0) ? 0:me;
 | 
				
			||||||
 | 
					     } else {
 | 
				
			||||||
 | 
					        color[_z]= coord[_y]*nprocs[_x]+coord[_x]; 
 | 
				
			||||||
 | 
					        id[_z] = coord[_z] == 0 ? 0 : me; 
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					    MPI_Comm_split(world, color[dim], id[dim], &subComm[dim]);
 | 
				
			||||||
 | 
					  } 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //Set the minimum load and the balance load
 | 
				
			||||||
 | 
					  for(int dim = 0; dim<3; dim++){
 | 
				
			||||||
 | 
					    balancedLoad[dim] = 1./nprocs[dim]; 
 | 
				
			||||||
 | 
					    minLoad[dim]  = 0.8*balancedLoad[dim]; 
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  //set and communicate the workload in reverse order
 | 
				
			||||||
 | 
					  for(int dim = _z; dim>= _x; dim--)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    if(subComm[dim] != MPI_COMM_NULL){
 | 
				
			||||||
 | 
					      MPI_Gather(&time,1,type,load[dim],1,type,0,subComm[dim]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if(id[dim] == 0)
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					        for(int n=0; n<nprocs[dim]; n++) 
 | 
				
			||||||
 | 
					          t_sum[dim] += load[dim][n];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for(int n=0; n<nprocs[dim]; n++)
 | 
				
			||||||
 | 
					          load[dim][n] /= t_sum[dim];
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      time =t_sum[dim];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    MPI_Barrier(world);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //Brodacast the new boundaries along dimensions
 | 
				
			||||||
 | 
					  for(int dim=0; dim<3; dim++){
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if(subComm[dim] != MPI_COMM_NULL){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      MPI_Bcast(boundaries,6,type,0,subComm[dim]);
 | 
				
			||||||
 | 
					      if(id[dim] == 0) {
 | 
				
			||||||
 | 
					        fixedPointIteration(load[dim], nprocs[dim], minLoad[dim]); 
 | 
				
			||||||
 | 
					        MD_FLOAT inv_sum=0;
 | 
				
			||||||
 | 
					        for(int n=0; n<nprocs[dim];n++)
 | 
				
			||||||
 | 
					          inv_sum +=(1/load[dim][n]);
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        for(int n=0; n<nprocs[dim];n++)
 | 
				
			||||||
 | 
					          cellSize[n] = (prd[dim]/load[dim][n])*(1./inv_sum); 
 | 
				
			||||||
 | 
					   
 | 
				
			||||||
 | 
					        MD_FLOAT sum=0;
 | 
				
			||||||
 | 
					        for(int n=0; n<nprocs[dim]; n++){
 | 
				
			||||||
 | 
					          limits[2*n] = sum; 
 | 
				
			||||||
 | 
					          limits[2*n+1] = sum+cellSize[n];
 | 
				
			||||||
 | 
					          sum+= cellSize[n]; 
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        limits[2*nprocs[dim]-1] = prd[dim];
 | 
				
			||||||
 | 
					      } 
 | 
				
			||||||
 | 
					      MPI_Scatter(limits,2,type,recv_buf,2,type,0,subComm[dim]); 
 | 
				
			||||||
 | 
					      boundaries[2*dim] = recv_buf[0];
 | 
				
			||||||
 | 
					      boundaries[2*dim+1] = recv_buf[1];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					     MPI_Barrier(world);
 | 
				
			||||||
 | 
					  }  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  atom->mybox.lo[_x]=boundaries[0]; atom->mybox.hi[_x]=boundaries[1];
 | 
				
			||||||
 | 
					  atom->mybox.lo[_y]=boundaries[2]; atom->mybox.hi[_y]=boundaries[3];
 | 
				
			||||||
 | 
					  atom->mybox.lo[_z]=boundaries[4]; atom->mybox.hi[_z]=boundaries[5];
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					  MD_FLOAT domain[6] = {boundaries[0], boundaries[2], boundaries[4], boundaries[1], boundaries[3], boundaries[5]};
 | 
				
			||||||
 | 
					  MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //because cells change dynamically, It is required to increase the neighbouring exchange region 
 | 
				
			||||||
 | 
					  for(int dim =_x; dim<=_z; dim++){
 | 
				
			||||||
 | 
					    MD_FLOAT dr,dr_max; 
 | 
				
			||||||
 | 
					    int n = grid->nprocs[dim]; 
 | 
				
			||||||
 | 
					    MD_FLOAT maxdelta = 0.2*prd[dim];
 | 
				
			||||||
 | 
					    dr = MAX(fabs(lo[dim] - atom->mybox.lo[dim]),fabs(hi[dim] - atom->mybox.hi[dim]));
 | 
				
			||||||
 | 
					    MPI_Allreduce(&dr, &dr_max, 1, type, MPI_MAX, world);
 | 
				
			||||||
 | 
					    grid->cutneigh[dim] = param->cutneigh+dr_max; 
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(int dim=0; dim<3; dim++) {
 | 
				
			||||||
 | 
					    if(subComm[dim] != MPI_COMM_NULL){
 | 
				
			||||||
 | 
					      MPI_Comm_free(&subComm[dim]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    free(load[dim]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  free(load); 
 | 
				
			||||||
 | 
					  free(limits);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//RCB Balancing
 | 
				
			||||||
 | 
					MD_FLOAT meanTimeBisect(Atom *atom, MPI_Comm subComm, int dim, double time)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  MD_FLOAT mean=0, sum=0, total_sum=0, weightAtoms= 0, total_weight=0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(int i=0; i<atom->Nlocal; i++){
 | 
				
			||||||
 | 
					    sum += atom_pos(i);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  sum*=time;
 | 
				
			||||||
 | 
					  weightAtoms = atom->Nlocal*time;
 | 
				
			||||||
 | 
					  MPI_Allreduce(&sum, &total_sum, 1, type, MPI_SUM, subComm);
 | 
				
			||||||
 | 
					  MPI_Allreduce(&weightAtoms, &total_weight, 1, type, MPI_SUM, subComm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  mean = total_sum/total_weight;
 | 
				
			||||||
 | 
					  return mean;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					MD_FLOAT meanBisect(Atom* atom, MPI_Comm subComm, int dim, double time)
 | 
				
			||||||
 | 
					{  
 | 
				
			||||||
 | 
					  int Natoms = 0;
 | 
				
			||||||
 | 
					  MD_FLOAT sum=0, mean=0, total_sum=0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(int i=0; i<atom->Nlocal; i++){
 | 
				
			||||||
 | 
					    sum += atom_pos(i);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  MPI_Allreduce(&sum, &total_sum, 1, type, MPI_SUM, subComm);
 | 
				
			||||||
 | 
					  MPI_Allreduce(&atom->Nlocal, &Natoms, 1, MPI_INT, MPI_SUM, subComm);
 | 
				
			||||||
 | 
					  mean = total_sum/Natoms;
 | 
				
			||||||
 | 
					  return mean;
 | 
				
			||||||
 | 
					} 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void nextBisectionLevel(Grid* grid, Atom* atom, RCB_Method method, MPI_Comm subComm, int dim ,int* color, int ilevel, double time)
 | 
				
			||||||
 | 
					{ 
 | 
				
			||||||
 | 
					  int rank, size;
 | 
				
			||||||
 | 
					  int branch = 0, i = 0, m = 0;
 | 
				
			||||||
 | 
					  int nsend = 0, nrecv = 0, nrecv2 = 0;
 | 
				
			||||||
 | 
					  int values_per_atom = 7; 
 | 
				
			||||||
 | 
					  MD_FLOAT bisection, pos;
 | 
				
			||||||
 | 
					  MPI_Request request[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
 | 
				
			||||||
 | 
					  MPI_Comm_rank(subComm,&rank);
 | 
				
			||||||
 | 
					  MPI_Comm_size(subComm,&size);
 | 
				
			||||||
 | 
					   
 | 
				
			||||||
 | 
					  int odd = size%2;
 | 
				
			||||||
 | 
					  int extraProc = odd ? size-1:size;
 | 
				
			||||||
 | 
					  int half = (int) (0.5*size);
 | 
				
			||||||
 | 
					  int partner = (rank<half) ? rank+half:rank-half;
 | 
				
			||||||
 | 
					  if(odd && rank == extraProc) partner = 0;
 | 
				
			||||||
 | 
					  //Apply the bisection 
 | 
				
			||||||
 | 
					  bisection = method(atom,subComm,dim,time);
 | 
				
			||||||
 | 
					  //Define the new boundaries
 | 
				
			||||||
 | 
					  if(rank<half){
 | 
				
			||||||
 | 
					    atom->mybox.hi[dim] = bisection;
 | 
				
			||||||
 | 
					    branch = 0;
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    atom->mybox.lo[dim] = bisection;
 | 
				
			||||||
 | 
					    branch = 1;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  //Define new color for the further communicaton
 | 
				
			||||||
 | 
					  *color = (branch << ilevel) | *color;
 | 
				
			||||||
 | 
					  //Grow the send buffer
 | 
				
			||||||
 | 
					  if(atom->Nlocal>=grid->maxsend){
 | 
				
			||||||
 | 
					      if(grid->buf_send) free(grid->buf_send); 
 | 
				
			||||||
 | 
					      grid->buf_send = (MD_FLOAT*) malloc(atom->Nlocal*values_per_atom* sizeof(MD_FLOAT));
 | 
				
			||||||
 | 
					      grid->maxsend = atom->Nlocal;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  //buffer particles to send
 | 
				
			||||||
 | 
					  while(i < atom->Nlocal) {
 | 
				
			||||||
 | 
					    pos = atom_pos(i);
 | 
				
			||||||
 | 
					    if(pos < atom->mybox.lo[dim] || pos >= atom->mybox.hi[dim]) {
 | 
				
			||||||
 | 
					      nsend += packExchange(atom, i, &grid->buf_send[nsend]);
 | 
				
			||||||
 | 
					      copy(atom, i, atom->Nlocal-1);
 | 
				
			||||||
 | 
					      atom->Nlocal--;
 | 
				
			||||||
 | 
					    } else i++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //Communicate the number of elements to be sent
 | 
				
			||||||
 | 
					  if(rank < extraProc){
 | 
				
			||||||
 | 
					    MPI_Irecv(&nrecv,1,MPI_INT,partner,0,subComm,&request[0]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if(odd && rank == 0){ 
 | 
				
			||||||
 | 
					    MPI_Irecv(&nrecv2,1,MPI_INT,extraProc,0,subComm,&request[1]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  MPI_Send(&nsend,1,MPI_INT,partner,0,subComm);
 | 
				
			||||||
 | 
					  MPI_Waitall(2,request,MPI_STATUS_IGNORE);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //Grow the recv buffer 
 | 
				
			||||||
 | 
					  if(nrecv+nrecv2>=grid->maxrecv){
 | 
				
			||||||
 | 
					      if(grid->buf_recv) free(grid->buf_recv); 
 | 
				
			||||||
 | 
					      grid->buf_recv = (MD_FLOAT*) malloc((nrecv+nrecv2)*values_per_atom*sizeof(MD_FLOAT));
 | 
				
			||||||
 | 
					      grid->maxrecv = nrecv+nrecv2;
 | 
				
			||||||
 | 
					  } 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //communicate elements in the buffer
 | 
				
			||||||
 | 
					  request[0] = MPI_REQUEST_NULL; 
 | 
				
			||||||
 | 
					  request[1] = MPI_REQUEST_NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if(rank < extraProc){
 | 
				
			||||||
 | 
					    MPI_Irecv(grid->buf_recv,nrecv,type,partner,0,subComm,&request[0]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if(odd && rank == 0){ 
 | 
				
			||||||
 | 
					    MPI_Irecv(&grid->buf_recv[nrecv],nrecv2,type,extraProc,0,subComm,&request[1]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  MPI_Send (grid->buf_send,nsend,type,partner,0,subComm); 
 | 
				
			||||||
 | 
					  MPI_Waitall(2,request,MPI_STATUS_IGNORE);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //store atoms in atom list
 | 
				
			||||||
 | 
					  while(m < nrecv+nrecv2){ 
 | 
				
			||||||
 | 
					    m += unpackExchange(atom, atom->Nlocal++, &grid->buf_recv[m]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void rcbBalance(Grid* grid, Atom* atom, Parameter* param, RCB_Method method, int ndim, double newTime)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int me, nprocs=0, ilevel=0, nboxes=1;
 | 
				
			||||||
 | 
					  int color = 0, size =0;
 | 
				
			||||||
 | 
					  int index, prd[3];
 | 
				
			||||||
 | 
					  MPI_Comm subComm;
 | 
				
			||||||
 | 
					  MPI_Comm_size(world, &nprocs);
 | 
				
			||||||
 | 
					  MPI_Comm_rank(world, &me);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //set the elapsed time since the last dynamic balance
 | 
				
			||||||
 | 
					  double time = newTime - grid->Timer;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  prd[_x] = atom->mybox.xprd = param->xprd; 
 | 
				
			||||||
 | 
					  prd[_y] = atom->mybox.yprd = param->yprd; 
 | 
				
			||||||
 | 
					  prd[_z] = atom->mybox.zprd = param->zprd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //Sort by larger dimension 
 | 
				
			||||||
 | 
					  int largerDim[3] ={_x, _y, _z};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(int i = 0; i< 2; i++){
 | 
				
			||||||
 | 
					    for(int j = i+1; j<3; j++)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      if(prd[largerDim[j]]>prd[largerDim[i]]){
 | 
				
			||||||
 | 
					        MD_FLOAT tmp = largerDim[j];
 | 
				
			||||||
 | 
					        largerDim[j] = largerDim[i];
 | 
				
			||||||
 | 
					        largerDim[i] = tmp;
 | 
				
			||||||
 | 
					      }  
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  //Initial Partition
 | 
				
			||||||
 | 
					  atom->mybox.lo[_x] = 0; atom->mybox.hi[_x] = atom->mybox.xprd;
 | 
				
			||||||
 | 
					  atom->mybox.lo[_y] = 0; atom->mybox.hi[_y] = atom->mybox.yprd;
 | 
				
			||||||
 | 
					  atom->mybox.lo[_z] = 0; atom->mybox.hi[_z] = atom->mybox.zprd;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //Recursion tree 
 | 
				
			||||||
 | 
					  while(nboxes<nprocs)
 | 
				
			||||||
 | 
					  {  
 | 
				
			||||||
 | 
					    index = ilevel%ndim; 
 | 
				
			||||||
 | 
					    MPI_Comm_split(world, color, me, &subComm);
 | 
				
			||||||
 | 
					    MPI_Comm_size(subComm,&size);
 | 
				
			||||||
 | 
					    if(size > 1){
 | 
				
			||||||
 | 
					      nextBisectionLevel(grid, atom, method, subComm, largerDim[index], &color, ilevel, time);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    MPI_Comm_free(&subComm);
 | 
				
			||||||
 | 
					    nboxes = pow(2,++ilevel);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  //Set the new timer grid
 | 
				
			||||||
 | 
					  grid->Timer = newTime;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //Creating the global map
 | 
				
			||||||
 | 
					  MD_FLOAT domain[6] = {atom->mybox.lo[_x], atom->mybox.lo[_y], atom->mybox.lo[_z], atom->mybox.hi[_x], atom->mybox.hi[_y], atom->mybox.hi[_z]};
 | 
				
			||||||
 | 
					  MPI_Allgather(domain, 6, type, grid->map, 6, type, world);  
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //Define the same cutneighbour in all dimensions for the exchange communication
 | 
				
			||||||
 | 
					  for(int dim =_x; dim<=_z; dim++)
 | 
				
			||||||
 | 
					    grid->cutneigh[dim] = param->cutneigh;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//Regular grid
 | 
				
			||||||
 | 
					void cartisian3d(Grid* grid, Parameter* param, Box* box)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int me, nproc;
 | 
				
			||||||
 | 
					  MPI_Comm_size(MPI_COMM_WORLD, &nproc);
 | 
				
			||||||
 | 
					  MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  int numdim=3;
 | 
				
			||||||
 | 
					  int reorder=0;
 | 
				
			||||||
 | 
					  int periods[3]={1,1,1}; 
 | 
				
			||||||
 | 
					  int mycoord[3]={0,0,0};
 | 
				
			||||||
 | 
					  int griddim[3]={0,0,0};
 | 
				
			||||||
 | 
					  MD_FLOAT len[3];
 | 
				
			||||||
 | 
					  MPI_Comm cartesian;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  box->xprd = param->xprd;
 | 
				
			||||||
 | 
					  box->yprd = param->yprd;
 | 
				
			||||||
 | 
					  box->zprd = param->zprd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 //Creates a cartesian 3d grid 
 | 
				
			||||||
 | 
					  MPI_Dims_create(nproc, numdim, griddim); 
 | 
				
			||||||
 | 
					  MPI_Cart_create(world,numdim,griddim,periods,reorder,&cartesian); 
 | 
				
			||||||
 | 
					  grid->nprocs[_x] = griddim[_x];
 | 
				
			||||||
 | 
					  grid->nprocs[_y] = griddim[_y]; 
 | 
				
			||||||
 | 
					  grid->nprocs[_z] = griddim[_z];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //Coordinates position in the grid
 | 
				
			||||||
 | 
					  MPI_Cart_coords(cartesian,me,3,mycoord); 
 | 
				
			||||||
 | 
					  grid->coord[_x] = mycoord[_x];
 | 
				
			||||||
 | 
					  grid->coord[_y] = mycoord[_y];
 | 
				
			||||||
 | 
					  grid->coord[_z] = mycoord[_z];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //boundaries of my local box, with origin in (0,0,0). 
 | 
				
			||||||
 | 
					  len[_x] = param->xprd / griddim[_x];
 | 
				
			||||||
 | 
					  len[_y] = param->yprd / griddim[_y];
 | 
				
			||||||
 | 
					  len[_z] = param->zprd / griddim[_z];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  box->lo[_x] = mycoord[_x] * len[_x];
 | 
				
			||||||
 | 
					  box->hi[_x] = (mycoord[_x] + 1) * len[_x];
 | 
				
			||||||
 | 
					  box->lo[_y] = mycoord[_y] * len[_y];
 | 
				
			||||||
 | 
					  box->hi[_y] = (mycoord[_y] + 1) * len[_y];
 | 
				
			||||||
 | 
					  box->lo[_z] = mycoord[_z] * len[_z];
 | 
				
			||||||
 | 
					  box->hi[_z] = (mycoord[_z] + 1) * len[_z];
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  MD_FLOAT domain[6] = {box->lo[_x], box->lo[_y], box->lo[_z], box->hi[_x], box->hi[_y], box->hi[_z]};
 | 
				
			||||||
 | 
					  MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
 | 
				
			||||||
 | 
					  MPI_Comm_free(&cartesian);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //Define the same cutneighbour in all dimensions for the exchange communication
 | 
				
			||||||
 | 
					  for(int dim =_x; dim<=_z; dim++)
 | 
				
			||||||
 | 
					    grid->cutneigh[dim] = param->cutneigh;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//Other Functions from the grid
 | 
				
			||||||
 | 
					void initGrid(Grid* grid)
 | 
				
			||||||
 | 
					{ //start with regular grid
 | 
				
			||||||
 | 
					  int nprocs;
 | 
				
			||||||
 | 
					  MPI_Comm_size(world, &nprocs);
 | 
				
			||||||
 | 
					  grid->map_size = 6 * nprocs;             
 | 
				
			||||||
 | 
					  grid->map  = (MD_FLOAT*) allocate(ALIGNMENT, grid->map_size * sizeof(MD_FLOAT));  
 | 
				
			||||||
 | 
					  //========rcb=======
 | 
				
			||||||
 | 
					  grid->maxsend = 0; 
 | 
				
			||||||
 | 
					  grid->maxrecv = 0;
 | 
				
			||||||
 | 
					  grid->buf_send = NULL;  
 | 
				
			||||||
 | 
					  grid->buf_recv = NULL;
 | 
				
			||||||
 | 
					  //====staggered=====
 | 
				
			||||||
 | 
					  grid->Timer = 0.;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void setupGrid(Grid* grid, Atom* atom, Parameter* param)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int me; 
 | 
				
			||||||
 | 
					  MD_FLOAT xlo, ylo, zlo, xhi, yhi, zhi; 
 | 
				
			||||||
 | 
					  MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
				
			||||||
 | 
					  initGrid(grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //Set the origin at (0,0,0)
 | 
				
			||||||
 | 
					  if(param->input_file){
 | 
				
			||||||
 | 
					    for(int i=0; i<atom->Nlocal; i++){
 | 
				
			||||||
 | 
					      atom_x(i) = atom_x(i) - param->xlo;
 | 
				
			||||||
 | 
					      atom_y(i) = atom_y(i) - param->ylo;
 | 
				
			||||||
 | 
					      atom_z(i) = atom_z(i) - param->zlo;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  cartisian3d(grid, param, &atom->mybox);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  xlo = atom->mybox.lo[_x]; xhi = atom->mybox.hi[_x];  
 | 
				
			||||||
 | 
					  ylo = atom->mybox.lo[_y]; yhi = atom->mybox.hi[_y];
 | 
				
			||||||
 | 
					  zlo = atom->mybox.lo[_z]; zhi = atom->mybox.hi[_z];  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int i = 0; 
 | 
				
			||||||
 | 
					  while(i < atom->Nlocal) 
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    if(atom_x(i) >= xlo && atom_x(i)< xhi &&  
 | 
				
			||||||
 | 
					       atom_y(i) >= ylo && atom_y(i)< yhi &&  
 | 
				
			||||||
 | 
					       atom_z(i) >= zlo && atom_z(i)< zhi)
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					        i++;
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
					        copy(atom, i, atom->Nlocal-1);
 | 
				
			||||||
 | 
					        atom->Nlocal--; 
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					  } 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //printGrid(grid);
 | 
				
			||||||
 | 
					  if(!param->balance){
 | 
				
			||||||
 | 
					    MPI_Allreduce(&atom->Nlocal, &atom->Natoms, 1, MPI_INT, MPI_SUM, world); 
 | 
				
			||||||
 | 
					    printf("Processor:%i, Local atoms:%i, Total atoms:%i\n",me, atom->Nlocal,atom->Natoms);
 | 
				
			||||||
 | 
					    MPI_Barrier(world);
 | 
				
			||||||
 | 
					  }  
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void printGrid(Grid* grid)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int me, nprocs;
 | 
				
			||||||
 | 
					  MPI_Comm_size(world, &nprocs);
 | 
				
			||||||
 | 
					  MPI_Comm_rank(world, &me);
 | 
				
			||||||
 | 
					  MD_FLOAT* map = grid->map;
 | 
				
			||||||
 | 
					  if(me==0)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					    printf("GRID:\n");
 | 
				
			||||||
 | 
					    printf("===================================================================================================\n");
 | 
				
			||||||
 | 
					    for(int i=0; i<nprocs; i++)
 | 
				
			||||||
 | 
					      printf("Box:%i\txlo:%.4f\txhi:%.4f\tylo:%.4f\tyhi:%.4f\tzlo:%.4f\tzhi:%.4f\n", i,map[6*i],map[6*i+3],map[6*i+1],map[6*i+4],map[6*i+2],map[6*i+5]);
 | 
				
			||||||
 | 
					    printf("\n\n");    
 | 
				
			||||||
 | 
					    //printf("Box processor:%i\n xlo:%.4f\txhi:%.4f\n ylo:%.4f\tyhi:%.4f\n zlo:%.4f\tzhi:%.4f\n", i,map[6*i],map[6*i+3],map[6*i+1],map[6*i+4],map[6*i+2],map[6*i+5]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  MPI_Barrier(world);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										22
									
								
								common/includes/box.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								common/includes/box.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,22 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 | 
					 * All rights reserved. This file is part of MD-Bench.
 | 
				
			||||||
 | 
					 * Use of this source code is governed by a LGPL-3.0
 | 
				
			||||||
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#include <parameter.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef __BOX_H_
 | 
				
			||||||
 | 
					#define __BOX_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int id;
 | 
				
			||||||
 | 
					  MD_FLOAT xprd, yprd, zprd;     //Domain Dimension
 | 
				
			||||||
 | 
					  MD_FLOAT lo[3];               //smallest coordinate of my subdomain
 | 
				
			||||||
 | 
					  MD_FLOAT hi[3];               //Highest coordinate of my subdomain
 | 
				
			||||||
 | 
					} Box;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int overlapBox(int, int , const Box*, const Box* , Box* , MD_FLOAT , MD_FLOAT);
 | 
				
			||||||
 | 
					int overlapFullBox(Parameter*, MD_FLOAT*, const Box*, const Box*);
 | 
				
			||||||
 | 
					void expandBox(int , const Box*, const Box* , Box* , MD_FLOAT);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
							
								
								
									
										104
									
								
								common/includes/comm.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										104
									
								
								common/includes/comm.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,104 @@
 | 
				
			|||||||
 | 
					#include <atom.h>
 | 
				
			||||||
 | 
					#include <parameter.h>
 | 
				
			||||||
 | 
					#include <box.h>
 | 
				
			||||||
 | 
					#include <grid.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef COMM_H
 | 
				
			||||||
 | 
					#define COMM_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef GROMACS
 | 
				
			||||||
 | 
					#define FORWARD_SIZE  (3*CLUSTER_N)   
 | 
				
			||||||
 | 
					#define REVERSE_SIZE  (3*CLUSTER_N)
 | 
				
			||||||
 | 
					#define GHOST_SIZE    (4*CLUSTER_N+10)
 | 
				
			||||||
 | 
					#define EXCHANGE_SIZE 7
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define JFAC MAX(1, CLUSTER_N / CLUSTER_M)
 | 
				
			||||||
 | 
					#define LOCAL atom->Nclusters_local / JFAC
 | 
				
			||||||
 | 
					#define GHOST atom->Nclusters_ghost 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define IsinRegionToSend(cj)                                                                  \
 | 
				
			||||||
 | 
					           ((atom->jclusters[(cj)].bbminx >= xlo || atom->jclusters[(cj)].bbmaxx >= xlo)  &&  \
 | 
				
			||||||
 | 
					            (atom->jclusters[(cj)].bbminx  < xhi || atom->jclusters[(cj)].bbmaxx  < xhi)  &&  \
 | 
				
			||||||
 | 
					            (atom->jclusters[(cj)].bbminy >= ylo || atom->jclusters[(cj)].bbmaxy >= ylo)  &&  \
 | 
				
			||||||
 | 
					            (atom->jclusters[(cj)].bbminy  < yhi || atom->jclusters[(cj)].bbmaxy  < yhi)  &&  \
 | 
				
			||||||
 | 
					            (atom->jclusters[(cj)].bbminz >= zlo || atom->jclusters[(cj)].bbmaxz >= zlo)  &&  \
 | 
				
			||||||
 | 
					            (atom->jclusters[(cj)].bbminz  < zhi || atom->jclusters[(cj)].bbmaxz  < zhi))  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define FORWARD_SIZE  3   
 | 
				
			||||||
 | 
					#define REVERSE_SIZE  3
 | 
				
			||||||
 | 
					#define GHOST_SIZE    4
 | 
				
			||||||
 | 
					#define EXCHANGE_SIZE 7
 | 
				
			||||||
 | 
					#define LOCAL atom->Nlocal
 | 
				
			||||||
 | 
					#define GHOST atom->Nghost
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define IsinRegionToSend(i)                                 \
 | 
				
			||||||
 | 
					           ((atom_x((i)) >= xlo && atom_x((i)) < xhi) &&    \
 | 
				
			||||||
 | 
					            (atom_y((i)) >= ylo && atom_y((i)) < yhi) &&    \
 | 
				
			||||||
 | 
					            (atom_z((i)) >= zlo && atom_z((i)) < zhi)) 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int myproc;                       // my proc ID
 | 
				
			||||||
 | 
					  int numproc;                      // # of processors
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					  int numneigh;                     // # of all my neighs along all swaps 
 | 
				
			||||||
 | 
					  int maxneigh;										  // Buffer size for my neighs
 | 
				
			||||||
 | 
						int sendfrom[6];                  //return the lowest neigh index to send in each swap
 | 
				
			||||||
 | 
					  int sendtill[6];                  //return the highest neigh index to send in each swao
 | 
				
			||||||
 | 
					  int recvfrom[6];                  //return the lowest neigh index to recv in each swap
 | 
				
			||||||
 | 
					  int recvtill[6];                  //return the highest neigh index to recv in each swap
 | 
				
			||||||
 | 
					  int* nsend;											  // neigh whose I want to send
 | 
				
			||||||
 | 
					  int* nrecv;                       // neigh whose I want to recv
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						int* pbc_x;                       // if pbc in x
 | 
				
			||||||
 | 
						int* pbc_y;                       // if pbc in y
 | 
				
			||||||
 | 
						int* pbc_z;                       // if pbc in z
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					  int* atom_send, *atom_recv;       // # of atoms to send/recv for each of my neighs 
 | 
				
			||||||
 | 
						int* off_atom_send;               // atom offset to send, inside of a swap
 | 
				
			||||||
 | 
					  int* off_atom_recv;               // atom offset to recv, inside of a swap
 | 
				
			||||||
 | 
					         
 | 
				
			||||||
 | 
					  int* nexch;                        //procs to exchange
 | 
				
			||||||
 | 
					  int numneighexch;                  //# of neighbours to exchange
 | 
				
			||||||
 | 
					  int maxneighexch;                  //max buff size to store neighbours
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						int numswap;                      // # of swaps to perform, it is 6
 | 
				
			||||||
 | 
					  int swapdim[6]; 									// dimension of the swap (_x, _y or _z)
 | 
				
			||||||
 | 
						int swapdir[6];										// direction of the swap 0 or 1
 | 
				
			||||||
 | 
					  int swap[3][2];                   // given a dim and dir, knows the swap
 | 
				
			||||||
 | 
					  int othersend[6];                 // Determine if a proc interact with more procs in a given swap
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						int firstrecv[6];                 // where to put 1st recv atom in each swap
 | 
				
			||||||
 | 
					  int** sendlist;                   // list of atoms to send in each swap   
 | 
				
			||||||
 | 
					  int* maxsendlist;								  // max # of atoms send in each list-swap
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						int maxsend;											// max elements in buff sender 									
 | 
				
			||||||
 | 
						int maxrecv;											// max elements in buff receiver
 | 
				
			||||||
 | 
					  MD_FLOAT* buf_send;               // sender buffer for all comm
 | 
				
			||||||
 | 
						MD_FLOAT* buf_recv;               // receicer buffer for all comm
 | 
				
			||||||
 | 
						 	  
 | 
				
			||||||
 | 
						int forwardSize;					        // # of paramaters per atom in forward comm.
 | 
				
			||||||
 | 
						int reverseSize;			        		// # of parameters per atom in reverse
 | 
				
			||||||
 | 
					  int exchangeSize;                 // # of parameters per atom in exchange
 | 
				
			||||||
 | 
						int ghostSize;                    // # of parameters per atom in ghost list                               
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int  iterAtom;                     //last atom to iterate in each swap.
 | 
				
			||||||
 | 
					  Box* boxes; 											 // Boundaries to  be sent to other procs as ghost.
 | 
				
			||||||
 | 
					} Comm;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void initComm(int*, char***, Comm*); 						    //Init MPI 
 | 
				
			||||||
 | 
					void endComm(Comm*);													      //End MPI
 | 
				
			||||||
 | 
					void setupComm(Comm*,Parameter*,Grid*);             //Creates a 3d grid or rcb grid
 | 
				
			||||||
 | 
					void neighComm(Comm*,Parameter*,Grid*);             //Find neighbours within cut-off and defines ghost regions
 | 
				
			||||||
 | 
					void forwardComm(Comm*,Atom*,int);							    //Send info in one direction
 | 
				
			||||||
 | 
					void reverseComm(Comm*,Atom*,int);							    //Return info after forward communication
 | 
				
			||||||
 | 
					void exchangeComm(Comm*,Atom*);							        //Exchange info between procs
 | 
				
			||||||
 | 
					void ghostComm(Comm*, Atom*,int);                   //Build the ghost neighbours to send during next forwards
 | 
				
			||||||
 | 
					void growSend(Comm*,int);										        //Grows the size of the buffer sender
 | 
				
			||||||
 | 
					void growRecv(Comm*,int);										        //Grows the size of the buffer receiver
 | 
				
			||||||
 | 
					void growList(Comm*, int, int);                     //Grows the size of the list to send
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
							
								
								
									
										51
									
								
								common/includes/grid.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								common/includes/grid.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,51 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 | 
					 * All rights reserved. This file is part of MD-Bench.
 | 
				
			||||||
 | 
					 * Use of this source code is governed by a LGPL-3.0
 | 
				
			||||||
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <parameter.h>
 | 
				
			||||||
 | 
					#include <box.h>
 | 
				
			||||||
 | 
					#include <atom.h>
 | 
				
			||||||
 | 
					#include <mpi.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef __MAP_H_
 | 
				
			||||||
 | 
					#define __MAP_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define world MPI_COMM_WORLD
 | 
				
			||||||
 | 
					#define atom_pos(i) ((dim == _x) ? atom_x((i)) : (dim == _y) ? atom_y((i)) : atom_z((i)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					enum {RCB=1, meanTimeRCB, Staggered};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					  int balance_every;
 | 
				
			||||||
 | 
					  int  map_size;
 | 
				
			||||||
 | 
					  MD_FLOAT* map;
 | 
				
			||||||
 | 
					  //===Param for Staggerd balance
 | 
				
			||||||
 | 
					  int nprocs[3]; 
 | 
				
			||||||
 | 
					  int coord[3];
 | 
				
			||||||
 | 
					  MD_FLOAT cutneigh[3];
 | 
				
			||||||
 | 
					  double Timer;
 | 
				
			||||||
 | 
					  //===Param for RCB balance 
 | 
				
			||||||
 | 
					  MD_FLOAT* buf_send;
 | 
				
			||||||
 | 
					  MD_FLOAT* buf_recv;
 | 
				
			||||||
 | 
					  int maxsend; 
 | 
				
			||||||
 | 
					  int maxrecv; 
 | 
				
			||||||
 | 
					} Grid; 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef MD_FLOAT(*RCB_Method)(Atom*,MPI_Comm,int,double);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void setupGrid(Grid*, Atom*, Parameter*);
 | 
				
			||||||
 | 
					void cartisian3d(Grid*, Parameter*, Box*);
 | 
				
			||||||
 | 
					void rcbBalance(Grid*, Atom*, Parameter* ,RCB_Method, int, double);
 | 
				
			||||||
 | 
					void staggeredBalance(Grid*, Atom*, Parameter*, double); 
 | 
				
			||||||
 | 
					void printGrid(Grid*); 
 | 
				
			||||||
 | 
					//rcb methods
 | 
				
			||||||
 | 
					MD_FLOAT meanBisect(Atom* , MPI_Comm, int, double);
 | 
				
			||||||
 | 
					MD_FLOAT meanTimeBisect(Atom*, MPI_Comm, int, double);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -53,6 +53,10 @@ typedef struct {
 | 
				
			|||||||
    MD_FLOAT k_dn;
 | 
					    MD_FLOAT k_dn;
 | 
				
			||||||
    MD_FLOAT gx, gy, gz;
 | 
					    MD_FLOAT gx, gy, gz;
 | 
				
			||||||
    MD_FLOAT reflect_x, reflect_y, reflect_z;
 | 
					    MD_FLOAT reflect_x, reflect_y, reflect_z;
 | 
				
			||||||
 | 
					    //MPI implementation
 | 
				
			||||||
 | 
					    int balance;
 | 
				
			||||||
 | 
					    int method;
 | 
				
			||||||
 | 
					    int balance_every;
 | 
				
			||||||
} Parameter;
 | 
					} Parameter;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void initParameter(Parameter*);
 | 
					void initParameter(Parameter*);
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										71
									
								
								common/includes/shell_methods.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								common/includes/shell_methods.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,71 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
 | 
				
			||||||
 | 
					 * All rights reserved. This file is part of MD-Bench.
 | 
				
			||||||
 | 
					 * Use of this source code is governed by a LGPL-3.0
 | 
				
			||||||
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <string.h>
 | 
				
			||||||
 | 
					#include <unistd.h>
 | 
				
			||||||
 | 
					#include <limits.h>
 | 
				
			||||||
 | 
					#include <math.h>
 | 
				
			||||||
 | 
					#include <comm.h>
 | 
				
			||||||
 | 
					#include <atom.h>
 | 
				
			||||||
 | 
					#include <timing.h>
 | 
				
			||||||
 | 
					#include <parameter.h>
 | 
				
			||||||
 | 
					#include <util.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//static void addDummyCluster(Atom*);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					double forward(Comm* comm, Atom *atom, Parameter* param){
 | 
				
			||||||
 | 
					    double S, E;    
 | 
				
			||||||
 | 
					    S = getTimeStamp();  
 | 
				
			||||||
 | 
					    if(param->method == halfShell){
 | 
				
			||||||
 | 
					        for(int iswap = 0; iswap < 5; iswap++) 
 | 
				
			||||||
 | 
					            forwardComm(comm, atom, iswap);
 | 
				
			||||||
 | 
					    } else if(param->method == eightShell){
 | 
				
			||||||
 | 
					        for(int iswap = 0; iswap < 6; iswap+=2) 
 | 
				
			||||||
 | 
					            forwardComm(comm, atom, iswap);
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        for(int iswap = 0; iswap < 6; iswap++) 
 | 
				
			||||||
 | 
					            forwardComm(comm, atom, iswap);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    E = getTimeStamp();
 | 
				
			||||||
 | 
					    return E-S;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					double reverse(Comm* comm, Atom *atom, Parameter* param){
 | 
				
			||||||
 | 
					    double S, E;    
 | 
				
			||||||
 | 
					    S = getTimeStamp(); 
 | 
				
			||||||
 | 
					    if(param->method == halfShell){
 | 
				
			||||||
 | 
					        for(int iswap = 4; iswap >= 0; iswap--) 
 | 
				
			||||||
 | 
					            reverseComm(comm, atom, iswap);
 | 
				
			||||||
 | 
					    } else if(param->method == eightShell){
 | 
				
			||||||
 | 
					        for(int iswap = 4; iswap >= 0; iswap-=2) 
 | 
				
			||||||
 | 
					            reverseComm(comm, atom, iswap);
 | 
				
			||||||
 | 
					    } else if(param->method == halfStencil){
 | 
				
			||||||
 | 
					        for(int iswap = 5; iswap >= 0; iswap--) 
 | 
				
			||||||
 | 
					            reverseComm(comm, atom, iswap);
 | 
				
			||||||
 | 
					    }  else { }  //Full Shell Reverse does nothing 
 | 
				
			||||||
 | 
					    E = getTimeStamp();
 | 
				
			||||||
 | 
					    return E-S;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void ghostNeighbor(Comm* comm, Atom* atom, Parameter* param)
 | 
				
			||||||
 | 
					{   
 | 
				
			||||||
 | 
					    #ifdef GROMACS
 | 
				
			||||||
 | 
					    atom->Nclusters_ghost = 0;
 | 
				
			||||||
 | 
					    #endif
 | 
				
			||||||
 | 
					    atom->Nghost = 0;    
 | 
				
			||||||
 | 
					    if(param->method == halfShell){
 | 
				
			||||||
 | 
					        for(int iswap=0; iswap<5; iswap++) 
 | 
				
			||||||
 | 
					            ghostComm(comm,atom,iswap);
 | 
				
			||||||
 | 
					    } else if(param->method == eightShell){
 | 
				
			||||||
 | 
					        for(int iswap = 0; iswap<6; iswap+=2)
 | 
				
			||||||
 | 
					            ghostComm(comm, atom,iswap);
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        for(int iswap=0; iswap<6; iswap++) 
 | 
				
			||||||
 | 
					            ghostComm(comm,atom,iswap);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -9,9 +9,15 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
typedef enum {
 | 
					typedef enum {
 | 
				
			||||||
    TOTAL = 0,
 | 
					    TOTAL = 0,
 | 
				
			||||||
    NEIGH,
 | 
					 | 
				
			||||||
    FORCE,
 | 
					    FORCE,
 | 
				
			||||||
 | 
					    NEIGH,
 | 
				
			||||||
 | 
					    FORWARD,
 | 
				
			||||||
 | 
					    REVERSE,
 | 
				
			||||||
 | 
					    UPDATE,
 | 
				
			||||||
 | 
					    BALANCE,
 | 
				
			||||||
 | 
					    SETUP,
 | 
				
			||||||
 | 
					    REST,
 | 
				
			||||||
    NUMTIMER
 | 
					    NUMTIMER
 | 
				
			||||||
} timertype;
 | 
					 } timerComm;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -4,6 +4,8 @@
 | 
				
			|||||||
 * Use of this source code is governed by a LGPL-3.0
 | 
					 * Use of this source code is governed by a LGPL-3.0
 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					#include <math.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef __UTIL_H_
 | 
					#ifndef __UTIL_H_
 | 
				
			||||||
#define __UTIL_H_
 | 
					#define __UTIL_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -35,6 +37,13 @@
 | 
				
			|||||||
#   define PRECISION_STRING     "double"
 | 
					#   define PRECISION_STRING     "double"
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define BigOrEqual(a,b) (fabs((a)-(b))<1e-9 || (a)>(b))
 | 
				
			||||||
 | 
					#define Equal(a,b) (fabs((a)-(b))<1e-9)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					enum {_x=0, _y, _z}; 
 | 
				
			||||||
 | 
					enum {fullShell=0, halfShell, eightShell, halfStencil};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern double myrandom(int*);
 | 
					extern double myrandom(int*);
 | 
				
			||||||
extern void random_reset(int *seed, int ibase, double *coord);
 | 
					extern void random_reset(int *seed, int ibase, double *coord);
 | 
				
			||||||
extern int str2ff(const char *string);
 | 
					extern int str2ff(const char *string);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -11,6 +11,7 @@
 | 
				
			|||||||
#include <atom.h>
 | 
					#include <atom.h>
 | 
				
			||||||
#include <parameter.h>
 | 
					#include <parameter.h>
 | 
				
			||||||
#include <util.h>
 | 
					#include <util.h>
 | 
				
			||||||
 | 
					#include <mpi.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void initParameter(Parameter *param) {
 | 
					void initParameter(Parameter *param) {
 | 
				
			||||||
    param->input_file = NULL;
 | 
					    param->input_file = NULL;
 | 
				
			||||||
@@ -54,6 +55,10 @@ void initParameter(Parameter *param) {
 | 
				
			|||||||
    param->reflect_x = 0.0;
 | 
					    param->reflect_x = 0.0;
 | 
				
			||||||
    param->reflect_y = 0.0;
 | 
					    param->reflect_y = 0.0;
 | 
				
			||||||
    param->reflect_z = 0.0;
 | 
					    param->reflect_z = 0.0;
 | 
				
			||||||
 | 
					    //MPI
 | 
				
			||||||
 | 
					    param->balance = 0;
 | 
				
			||||||
 | 
					    param->method = 0;
 | 
				
			||||||
 | 
					    param->balance_every =param->reneigh_every; 
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void readParameter(Parameter *param, const char *filename) {
 | 
					void readParameter(Parameter *param, const char *filename) {
 | 
				
			||||||
@@ -72,8 +77,8 @@ void readParameter(Parameter *param, const char *filename) {
 | 
				
			|||||||
        for(i = 0; line[i] != '\0' && line[i] != '#'; i++);
 | 
					        for(i = 0; line[i] != '\0' && line[i] != '#'; i++);
 | 
				
			||||||
        line[i] = '\0';
 | 
					        line[i] = '\0';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        char *tok = strtok(line, " ");
 | 
					        char *tok = strtok(line, "\t ");
 | 
				
			||||||
        char *val = strtok(NULL, " ");
 | 
					        char *val = strtok(NULL, "\t ");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        #define PARSE_PARAM(p,f)   if(strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) { param->p = f(val); }
 | 
					        #define PARSE_PARAM(p,f)   if(strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) { param->p = f(val); }
 | 
				
			||||||
        #define PARSE_STRING(p)    PARSE_PARAM(p, strdup)
 | 
					        #define PARSE_STRING(p)    PARSE_PARAM(p, strdup)
 | 
				
			||||||
@@ -117,15 +122,20 @@ void readParameter(Parameter *param, const char *filename) {
 | 
				
			|||||||
            PARSE_INT(x_out_every);
 | 
					            PARSE_INT(x_out_every);
 | 
				
			||||||
            PARSE_INT(v_out_every);
 | 
					            PARSE_INT(v_out_every);
 | 
				
			||||||
            PARSE_INT(half_neigh);
 | 
					            PARSE_INT(half_neigh);
 | 
				
			||||||
 | 
					            PARSE_INT(method);
 | 
				
			||||||
 | 
					            PARSE_INT(balance);
 | 
				
			||||||
 | 
					            PARSE_INT(balance_every);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Update dtforce
 | 
					    // Update dtforce
 | 
				
			||||||
    param->dtforce = 0.5 * param->dt;
 | 
					    param->dtforce = 0.5 * param->dt;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Update sigma6 parameter
 | 
					    // Update sigma6 parameter
 | 
				
			||||||
    MD_FLOAT s2 = param->sigma * param->sigma;
 | 
					    MD_FLOAT s2 = param->sigma * param->sigma;
 | 
				
			||||||
    param->sigma6 = s2 * s2 * s2;
 | 
					    param->sigma6 = s2 * s2 * s2;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    //Update balance parameter, 10 could be change
 | 
				
			||||||
 | 
					    param->balance_every *=param->reneigh_every;
 | 
				
			||||||
    fclose(fp);
 | 
					    fclose(fp);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -183,4 +193,19 @@ void printParameter(Parameter *param) {
 | 
				
			|||||||
    printf("\tSkin: %e\n", param->skin);
 | 
					    printf("\tSkin: %e\n", param->skin);
 | 
				
			||||||
    printf("\tHalf neighbor lists: %d\n", param->half_neigh);
 | 
					    printf("\tHalf neighbor lists: %d\n", param->half_neigh);
 | 
				
			||||||
    printf("\tProcessor frequency (GHz): %.4f\n", param->proc_freq);
 | 
					    printf("\tProcessor frequency (GHz): %.4f\n", param->proc_freq);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // ================ New MPI features =============
 | 
				
			||||||
 | 
					    char str[20]; 
 | 
				
			||||||
 | 
					    strcpy(str, (param->method == 1) ? "Half Shell"  :
 | 
				
			||||||
 | 
					                (param->method == 2) ? "Eight Shell" :
 | 
				
			||||||
 | 
					                (param->method == 3) ? "Half Stencil":                      
 | 
				
			||||||
 | 
					                                       "Full Shell");
 | 
				
			||||||
 | 
					    printf("\tMethod: %s\n", str);
 | 
				
			||||||
 | 
					    strcpy(str, (param->balance == 1) ? "mean RCB"      : 
 | 
				
			||||||
 | 
					                (param->balance == 2) ? "mean Time RCB" :
 | 
				
			||||||
 | 
					                (param->balance == 3) ? "Staggered"     :
 | 
				
			||||||
 | 
					                                        "cartisian");
 | 
				
			||||||
 | 
					    printf("\tPartition: %s\n", str);
 | 
				
			||||||
 | 
					    if(param->balance) 
 | 
				
			||||||
 | 
					        printf("\tRebalancing every (timesteps): %d\n",param->balance_every); 
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -10,6 +10,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#include <thermo.h>
 | 
					#include <thermo.h>
 | 
				
			||||||
#include <util.h>
 | 
					#include <util.h>
 | 
				
			||||||
 | 
					#include <mpi.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int *steparr;
 | 
					static int *steparr;
 | 
				
			||||||
static MD_FLOAT *tmparr;
 | 
					static MD_FLOAT *tmparr;
 | 
				
			||||||
@@ -24,6 +25,7 @@ static MD_FLOAT t_act;
 | 
				
			|||||||
static MD_FLOAT p_act;
 | 
					static MD_FLOAT p_act;
 | 
				
			||||||
static MD_FLOAT e_act;
 | 
					static MD_FLOAT e_act;
 | 
				
			||||||
static int mstat;
 | 
					static int mstat;
 | 
				
			||||||
 | 
					static MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* exported subroutines */
 | 
					/* exported subroutines */
 | 
				
			||||||
void setupThermo(Parameter *param, int natoms)
 | 
					void setupThermo(Parameter *param, int natoms)
 | 
				
			||||||
@@ -53,12 +55,19 @@ void setupThermo(Parameter *param, int natoms)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
void computeThermo(int iflag, Parameter *param, Atom *atom)
 | 
					void computeThermo(int iflag, Parameter *param, Atom *atom)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    MD_FLOAT t = 0.0, p;
 | 
					    MD_FLOAT t_sum = 0.0, t = 0.0, p;
 | 
				
			||||||
 | 
					    int me; 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for(int i = 0; i < atom->Nlocal; i++) {
 | 
					    for(int i = 0; i < atom->Nlocal; i++) {
 | 
				
			||||||
        t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
 | 
					        t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    t = t * t_scale;
 | 
					    MPI_Reduce(&t, &t_sum, 1, type, MPI_SUM, 0 ,MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					    if(me == 0)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        t = t_sum * t_scale;
 | 
				
			||||||
        p = (t * dof_boltz) * p_scale;
 | 
					        p = (t * dof_boltz) * p_scale;
 | 
				
			||||||
        int istep = iflag;
 | 
					        int istep = iflag;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -74,12 +83,14 @@ void computeThermo(int iflag, Parameter *param, Atom *atom)
 | 
				
			|||||||
        prsarr[mstat] = p;
 | 
					        prsarr[mstat] = p;
 | 
				
			||||||
        mstat++;
 | 
					        mstat++;
 | 
				
			||||||
        fprintf(stdout, "%i\t%e\t%e\n", istep, t, p);
 | 
					        fprintf(stdout, "%i\t%e\t%e\n", istep, t, p);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void adjustThermo(Parameter *param, Atom *atom)
 | 
					void adjustThermo(Parameter *param, Atom *atom)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    /* zero center-of-mass motion */
 | 
					    /* zero center-of-mass motion */
 | 
				
			||||||
    MD_FLOAT vxtot = 0.0; MD_FLOAT vytot = 0.0; MD_FLOAT vztot = 0.0;
 | 
					    MD_FLOAT vxtot = 0.0; MD_FLOAT vytot = 0.0; MD_FLOAT vztot = 0.0;
 | 
				
			||||||
 | 
					    MD_FLOAT v_sum[3], vtot[3];  
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    for(int i = 0; i < atom->Nlocal; i++) {
 | 
					    for(int i = 0; i < atom->Nlocal; i++) {
 | 
				
			||||||
        vxtot += atom_vx(i);
 | 
					        vxtot += atom_vx(i);
 | 
				
			||||||
@@ -87,9 +98,13 @@ void adjustThermo(Parameter *param, Atom *atom)
 | 
				
			|||||||
        vztot += atom_vz(i);
 | 
					        vztot += atom_vz(i);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    vxtot = vxtot / atom->Natoms;
 | 
					    vtot[0] = vxtot; vtot[1] = vytot; vtot[2] = vztot;  
 | 
				
			||||||
    vytot = vytot / atom->Natoms;
 | 
					
 | 
				
			||||||
    vztot = vztot / atom->Natoms;
 | 
					    MPI_Allreduce(vtot, v_sum, 3, type, MPI_SUM, MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    vxtot = v_sum[0] / atom->Natoms;
 | 
				
			||||||
 | 
					    vytot = v_sum[1] / atom->Natoms;
 | 
				
			||||||
 | 
					    vztot = v_sum[2] / atom->Natoms;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for(int i = 0; i < atom->Nlocal; i++) {
 | 
					    for(int i = 0; i < atom->Nlocal; i++) {
 | 
				
			||||||
        atom_vx(i) -= vxtot;
 | 
					        atom_vx(i) -= vxtot;
 | 
				
			||||||
@@ -97,13 +112,16 @@ void adjustThermo(Parameter *param, Atom *atom)
 | 
				
			|||||||
        atom_vz(i) -= vztot;
 | 
					        atom_vz(i) -= vztot;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
   
 | 
					   
 | 
				
			||||||
    t_act = 0;
 | 
					 | 
				
			||||||
    MD_FLOAT t = 0.0;
 | 
					    MD_FLOAT t = 0.0;
 | 
				
			||||||
 | 
					    MD_FLOAT t_sum = 0.0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for(int i = 0; i < atom->Nlocal; i++) {
 | 
					    for(int i = 0; i < atom->Nlocal; i++) {
 | 
				
			||||||
        t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
 | 
					        t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MPI_Allreduce(&t, &t_sum, 1,type, MPI_SUM,MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    t = t_sum; 
 | 
				
			||||||
    t *= t_scale;
 | 
					    t *= t_scale;
 | 
				
			||||||
    MD_FLOAT factor = sqrt(param->temp / t);
 | 
					    MD_FLOAT factor = sqrt(param->temp / t);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -10,6 +10,7 @@
 | 
				
			|||||||
#include <stdlib.h>
 | 
					#include <stdlib.h>
 | 
				
			||||||
#include <string.h>
 | 
					#include <string.h>
 | 
				
			||||||
#include <util.h>
 | 
					#include <util.h>
 | 
				
			||||||
 | 
					#include <math.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Park/Miller RNG w/out MASKING, so as to be like f90s version */
 | 
					/* Park/Miller RNG w/out MASKING, so as to be like f90s version */
 | 
				
			||||||
#define IA 16807
 | 
					#define IA 16807
 | 
				
			||||||
@@ -86,6 +87,7 @@ int get_cuda_num_threads() {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
void readline(char *line, FILE *fp) {
 | 
					void readline(char *line, FILE *fp) {
 | 
				
			||||||
    if(fgets(line, MAXLINE, fp) == NULL) {
 | 
					    if(fgets(line, MAXLINE, fp) == NULL) {
 | 
				
			||||||
 | 
					        printf("error %i\n",errno);
 | 
				
			||||||
        if(errno != 0) {
 | 
					        if(errno != 0) {
 | 
				
			||||||
            perror("readline()");
 | 
					            perror("readline()");
 | 
				
			||||||
            exit(-1);
 | 
					            exit(-1);
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										16
									
								
								config.mk
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								config.mk
									
									
									
									
									
								
							@@ -1,15 +1,15 @@
 | 
				
			|||||||
# Compiler tag (GCC/CLANG/ICC/ICX/ONEAPI/NVCC)
 | 
					# Compiler tag (GCC/CLANG/ICC/ICX/ONEAPI/NVCC)
 | 
				
			||||||
TAG ?= ICC
 | 
					TAG ?= MPIICC
 | 
				
			||||||
# Instruction set (SSE/AVX/AVX_FMA/AVX2/AVX512)
 | 
					# Instruction set (SSE/AVX/AVX_FMA/AVX2/AVX512)
 | 
				
			||||||
ISA ?= AVX512
 | 
					ISA ?= AVX512
 | 
				
			||||||
# Optimization scheme (lammps/gromacs/clusters_per_bin)
 | 
					# Optimization scheme (lammps/gromacs/clusters_per_bin)
 | 
				
			||||||
OPT_SCHEME ?= lammps
 | 
					OPT_SCHEME ?= gromacs
 | 
				
			||||||
# Enable likwid (true or false)
 | 
					# Enable likwid (true or false)
 | 
				
			||||||
ENABLE_LIKWID ?= true
 | 
					ENABLE_LIKWID ?= false
 | 
				
			||||||
# SP or DP
 | 
					# SP or DP
 | 
				
			||||||
DATA_TYPE ?= DP
 | 
					DATA_TYPE ?= DP
 | 
				
			||||||
# AOS or SOA
 | 
					# AOS or SOA
 | 
				
			||||||
DATA_LAYOUT ?= AOS
 | 
					DATA_LAYOUT ?= SOA
 | 
				
			||||||
# Assembly syntax to generate (ATT/INTEL)
 | 
					# Assembly syntax to generate (ATT/INTEL)
 | 
				
			||||||
ASM_SYNTAX ?= ATT
 | 
					ASM_SYNTAX ?= ATT
 | 
				
			||||||
# Debug
 | 
					# Debug
 | 
				
			||||||
@@ -24,13 +24,13 @@ MEM_TRACER ?= false
 | 
				
			|||||||
# Trace indexes and distances for gather-md (true or false)
 | 
					# Trace indexes and distances for gather-md (true or false)
 | 
				
			||||||
INDEX_TRACER ?= false
 | 
					INDEX_TRACER ?= false
 | 
				
			||||||
# Compute statistics
 | 
					# Compute statistics
 | 
				
			||||||
COMPUTE_STATS ?= true
 | 
					COMPUTE_STATS ?= false
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Configurations for lammps optimization scheme
 | 
					# Configurations for lammps optimization scheme
 | 
				
			||||||
# Use omp simd pragma when running with half neighbor-lists
 | 
					# Use omp simd pragma when running with half neighbor-lists
 | 
				
			||||||
ENABLE_OMP_SIMD ?= true
 | 
					ENABLE_OMP_SIMD ?= false
 | 
				
			||||||
# Use kernel with explicit SIMD intrinsics
 | 
					# Use kernel with explicit SIMD intrinsics
 | 
				
			||||||
USE_SIMD_KERNEL ?= false
 | 
					USE_SIMD_KERNEL ?= true
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Configurations for gromacs optimization scheme
 | 
					# Configurations for gromacs optimization scheme
 | 
				
			||||||
# Use reference version
 | 
					# Use reference version
 | 
				
			||||||
@@ -38,7 +38,7 @@ USE_REFERENCE_VERSION ?= false
 | 
				
			|||||||
# Enable XTC output
 | 
					# Enable XTC output
 | 
				
			||||||
XTC_OUTPUT ?= false
 | 
					XTC_OUTPUT ?= false
 | 
				
			||||||
# Check if cj is local when decreasing reaction force
 | 
					# Check if cj is local when decreasing reaction force
 | 
				
			||||||
HALF_NEIGHBOR_LISTS_CHECK_CJ ?= true
 | 
					HALF_NEIGHBOR_LISTS_CHECK_CJ ?= false
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Configurations for CUDA
 | 
					# Configurations for CUDA
 | 
				
			||||||
# Use CUDA host memory to optimize transfers
 | 
					# Use CUDA host memory to optimize transfers
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										370
									
								
								gromacs/atom.c
									
									
									
									
									
								
							
							
						
						
									
										370
									
								
								gromacs/atom.c
									
									
									
									
									
								
							@@ -12,6 +12,7 @@
 | 
				
			|||||||
#include <atom.h>
 | 
					#include <atom.h>
 | 
				
			||||||
#include <allocate.h>
 | 
					#include <allocate.h>
 | 
				
			||||||
#include <util.h>
 | 
					#include <util.h>
 | 
				
			||||||
 | 
					#include <mpi.h>
 | 
				
			||||||
 
 | 
					 
 | 
				
			||||||
void initAtom(Atom *atom) {
 | 
					void initAtom(Atom *atom) {
 | 
				
			||||||
    atom->x  = NULL; atom->y  = NULL; atom->z  = NULL;
 | 
					    atom->x  = NULL; atom->y  = NULL; atom->z  = NULL;
 | 
				
			||||||
@@ -27,6 +28,7 @@ void initAtom(Atom *atom) {
 | 
				
			|||||||
    atom->Nclusters = 0;
 | 
					    atom->Nclusters = 0;
 | 
				
			||||||
    atom->Nclusters_local = 0;
 | 
					    atom->Nclusters_local = 0;
 | 
				
			||||||
    atom->Nclusters_ghost = 0;
 | 
					    atom->Nclusters_ghost = 0;
 | 
				
			||||||
 | 
					    atom->NmaxGhost = 0;        //Temporal
 | 
				
			||||||
    atom->Nclusters_max = 0;
 | 
					    atom->Nclusters_max = 0;
 | 
				
			||||||
    atom->type = NULL;
 | 
					    atom->type = NULL;
 | 
				
			||||||
    atom->ntypes = 0;
 | 
					    atom->ntypes = 0;
 | 
				
			||||||
@@ -37,10 +39,19 @@ void initAtom(Atom *atom) {
 | 
				
			|||||||
    atom->iclusters = NULL;
 | 
					    atom->iclusters = NULL;
 | 
				
			||||||
    atom->jclusters = NULL;
 | 
					    atom->jclusters = NULL;
 | 
				
			||||||
    atom->icluster_bin = NULL;
 | 
					    atom->icluster_bin = NULL;
 | 
				
			||||||
 | 
					    atom->PBCx = NULL; 
 | 
				
			||||||
 | 
					    atom->PBCy = NULL; 
 | 
				
			||||||
 | 
					    atom->PBCz = NULL;
 | 
				
			||||||
    initMasks(atom);
 | 
					    initMasks(atom);
 | 
				
			||||||
 | 
					    //MPI
 | 
				
			||||||
 | 
					    Box *mybox = &(atom->mybox);                  
 | 
				
			||||||
 | 
					    mybox->xprd = mybox->yprd = mybox->zprd = 0;          
 | 
				
			||||||
 | 
					    mybox->lo[0]  = mybox->lo[1]  = mybox->lo[2] = 0;             
 | 
				
			||||||
 | 
					    mybox->hi[0]  = mybox->hi[1]  = mybox->hi[2] = 0;   
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void createAtom(Atom *atom, Parameter *param) {
 | 
					void createAtom(Atom *atom, Parameter *param) {
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
    MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
 | 
					    MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
 | 
				
			||||||
    MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
 | 
					    MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
 | 
				
			||||||
    MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
 | 
					    MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
 | 
				
			||||||
@@ -128,22 +139,26 @@ int type_str2int(const char *type) {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int readAtom(Atom* atom, Parameter* param) {
 | 
					int readAtom(Atom* atom, Parameter* param) {
 | 
				
			||||||
 | 
					    int me = 0;
 | 
				
			||||||
 | 
					    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
				
			||||||
    int len = strlen(param->input_file);
 | 
					    int len = strlen(param->input_file);
 | 
				
			||||||
    if(strncmp(¶m->input_file[len - 4], ".pdb", 4) == 0) { return readAtom_pdb(atom, param); }
 | 
					    if(strncmp(¶m->input_file[len - 4], ".pdb", 4) == 0) { return readAtom_pdb(atom, param); }
 | 
				
			||||||
    if(strncmp(¶m->input_file[len - 4], ".gro", 4) == 0) { return readAtom_gro(atom, param); }
 | 
					    if(strncmp(¶m->input_file[len - 4], ".gro", 4) == 0) { return readAtom_gro(atom, param); }
 | 
				
			||||||
    if(strncmp(¶m->input_file[len - 4], ".dmp", 4) == 0) { return readAtom_dmp(atom, param); }
 | 
					    if(strncmp(¶m->input_file[len - 4], ".dmp", 4) == 0) { return readAtom_dmp(atom, param); }
 | 
				
			||||||
    fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp\n", param->input_file);
 | 
					    if(me==0) fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp\n", param->input_file);
 | 
				
			||||||
    exit(-1);
 | 
					    exit(-1);
 | 
				
			||||||
    return -1;
 | 
					    return -1;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
					int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
				
			||||||
 | 
					    int me = 0;
 | 
				
			||||||
 | 
					    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
				
			||||||
    FILE *fp = fopen(param->input_file, "r");
 | 
					    FILE *fp = fopen(param->input_file, "r");
 | 
				
			||||||
    char line[MAXLINE];
 | 
					    char line[MAXLINE];
 | 
				
			||||||
    int read_atoms = 0;
 | 
					    int read_atoms = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(!fp) {
 | 
					    if(!fp) {
 | 
				
			||||||
        fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
					        if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
				
			||||||
        exit(-1);
 | 
					        exit(-1);
 | 
				
			||||||
        return -1;
 | 
					        return -1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -153,11 +168,11 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
				
			|||||||
        char *item = strtok(line, " ");
 | 
					        char *item = strtok(line, " ");
 | 
				
			||||||
        if(strncmp(item, "CRYST1", 6) == 0) {
 | 
					        if(strncmp(item, "CRYST1", 6) == 0) {
 | 
				
			||||||
            param->xlo = 0.0;
 | 
					            param->xlo = 0.0;
 | 
				
			||||||
            param->xhi = atof(strtok(NULL, " "));
 | 
					            param->xhi = atof(strtok(NULL, "\t "));
 | 
				
			||||||
            param->ylo = 0.0;
 | 
					            param->ylo = 0.0;
 | 
				
			||||||
            param->yhi = atof(strtok(NULL, " "));
 | 
					            param->yhi = atof(strtok(NULL, "\t "));
 | 
				
			||||||
            param->zlo = 0.0;
 | 
					            param->zlo = 0.0;
 | 
				
			||||||
            param->zhi = atof(strtok(NULL, " "));
 | 
					            param->zhi = atof(strtok(NULL, "\t "));
 | 
				
			||||||
            param->xprd = param->xhi - param->xlo;
 | 
					            param->xprd = param->xhi - param->xlo;
 | 
				
			||||||
            param->yprd = param->yhi - param->ylo;
 | 
					            param->yprd = param->yhi - param->ylo;
 | 
				
			||||||
            param->zprd = param->zhi - param->zlo;
 | 
					            param->zprd = param->zhi - param->zlo;
 | 
				
			||||||
@@ -166,23 +181,23 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
				
			|||||||
            char *label;
 | 
					            char *label;
 | 
				
			||||||
            int atom_id, comp_id;
 | 
					            int atom_id, comp_id;
 | 
				
			||||||
            MD_FLOAT occupancy, charge;
 | 
					            MD_FLOAT occupancy, charge;
 | 
				
			||||||
            atom_id = atoi(strtok(NULL, " ")) - 1;
 | 
					            atom_id = atoi(strtok(NULL, "\t ")) - 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            while(atom_id + 1 >= atom->Nmax) {
 | 
					            while(atom_id + 1 >= atom->Nmax) {
 | 
				
			||||||
                growAtom(atom);
 | 
					                growAtom(atom);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            atom->type[atom_id] = type_str2int(strtok(NULL, " "));
 | 
					            atom->type[atom_id] = type_str2int(strtok(NULL, "\t "));
 | 
				
			||||||
            label = strtok(NULL, " ");
 | 
					            label = strtok(NULL, "\t ");
 | 
				
			||||||
            comp_id = atoi(strtok(NULL, " "));
 | 
					            comp_id = atoi(strtok(NULL, "\t "));
 | 
				
			||||||
            atom_x(atom_id) = atof(strtok(NULL, " "));
 | 
					            atom_x(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
            atom_y(atom_id) = atof(strtok(NULL, " "));
 | 
					            atom_y(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
            atom_z(atom_id) = atof(strtok(NULL, " "));
 | 
					            atom_z(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
            atom->vx[atom_id] = 0.0;
 | 
					            atom->vx[atom_id] = 0.0;
 | 
				
			||||||
            atom->vy[atom_id] = 0.0;
 | 
					            atom->vy[atom_id] = 0.0;
 | 
				
			||||||
            atom->vz[atom_id] = 0.0;
 | 
					            atom->vz[atom_id] = 0.0;
 | 
				
			||||||
            occupancy = atof(strtok(NULL, " "));
 | 
					            occupancy = atof(strtok(NULL, "\t "));
 | 
				
			||||||
            charge = atof(strtok(NULL, " "));
 | 
					            charge = atof(strtok(NULL, "\t "));
 | 
				
			||||||
            atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
 | 
					            atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
 | 
				
			||||||
            atom->Natoms++;
 | 
					            atom->Natoms++;
 | 
				
			||||||
            atom->Nlocal++;
 | 
					            atom->Nlocal++;
 | 
				
			||||||
@@ -194,14 +209,14 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
				
			|||||||
                  strncmp(item, "ENDMDL", 6) == 0) {
 | 
					                  strncmp(item, "ENDMDL", 6) == 0) {
 | 
				
			||||||
            // Do nothing
 | 
					            // Do nothing
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            fprintf(stderr, "Invalid item: %s\n", item);
 | 
					            if(me==0) fprintf(stderr, "Invalid item: %s\n", item);
 | 
				
			||||||
            exit(-1);
 | 
					            exit(-1);
 | 
				
			||||||
            return -1;
 | 
					            return -1;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(!read_atoms) {
 | 
					    if(!read_atoms) {
 | 
				
			||||||
        fprintf(stderr, "Input error: No atoms read!\n");
 | 
					        if(me==0) fprintf(stderr, "Input error: No atoms read!\n");
 | 
				
			||||||
        exit(-1);
 | 
					        exit(-1);
 | 
				
			||||||
        return -1;
 | 
					        return -1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -217,12 +232,15 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
				
			|||||||
        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
					        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
 | 
					    if(me==0) fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
 | 
				
			||||||
    fclose(fp);
 | 
					    fclose(fp);
 | 
				
			||||||
    return read_atoms;
 | 
					    return read_atoms;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int readAtom_gro(Atom* atom, Parameter* param) {
 | 
					int readAtom_gro(Atom* atom, Parameter* param) {
 | 
				
			||||||
 | 
					    int me = 0;
 | 
				
			||||||
 | 
					    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    FILE *fp = fopen(param->input_file, "r");
 | 
					    FILE *fp = fopen(param->input_file, "r");
 | 
				
			||||||
    char line[MAXLINE];
 | 
					    char line[MAXLINE];
 | 
				
			||||||
    char desc[MAXLINE];
 | 
					    char desc[MAXLINE];
 | 
				
			||||||
@@ -231,7 +249,7 @@ int readAtom_gro(Atom* atom, Parameter* param) {
 | 
				
			|||||||
    int i = 0;
 | 
					    int i = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(!fp) {
 | 
					    if(!fp) {
 | 
				
			||||||
        fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
					        if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
				
			||||||
        exit(-1);
 | 
					        exit(-1);
 | 
				
			||||||
        return -1;
 | 
					        return -1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -241,25 +259,25 @@ int readAtom_gro(Atom* atom, Parameter* param) {
 | 
				
			|||||||
    desc[i] = '\0';
 | 
					    desc[i] = '\0';
 | 
				
			||||||
    readline(line, fp);
 | 
					    readline(line, fp);
 | 
				
			||||||
    atoms_to_read = atoi(strtok(line, " "));
 | 
					    atoms_to_read = atoi(strtok(line, " "));
 | 
				
			||||||
    fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
 | 
					    if(me==0) fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    while(!feof(fp) && read_atoms < atoms_to_read) {
 | 
					    while(!feof(fp) && read_atoms < atoms_to_read) {
 | 
				
			||||||
        readline(line, fp);
 | 
					        readline(line, fp);
 | 
				
			||||||
        char *label = strtok(line, " ");
 | 
					        char *label = strtok(line, "\t ");
 | 
				
			||||||
        int type = type_str2int(strtok(NULL, " "));
 | 
					        int type = type_str2int(strtok(NULL, "\t "));
 | 
				
			||||||
        int atom_id = atoi(strtok(NULL, " ")) - 1;
 | 
					        int atom_id = atoi(strtok(NULL, "\t ")) - 1;
 | 
				
			||||||
        atom_id = read_atoms;
 | 
					        atom_id = read_atoms;
 | 
				
			||||||
        while(atom_id + 1 >= atom->Nmax) {
 | 
					        while(atom_id + 1 >= atom->Nmax) {
 | 
				
			||||||
            growAtom(atom);
 | 
					            growAtom(atom);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        atom->type[atom_id] = type;
 | 
					        atom->type[atom_id] = type;
 | 
				
			||||||
        atom_x(atom_id) = atof(strtok(NULL, " "));
 | 
					        atom_x(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        atom_y(atom_id) = atof(strtok(NULL, " "));
 | 
					        atom_y(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        atom_z(atom_id) = atof(strtok(NULL, " "));
 | 
					        atom_z(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        atom->vx[atom_id] = atof(strtok(NULL, " "));
 | 
					        atom->vx[atom_id] = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        atom->vy[atom_id] = atof(strtok(NULL, " "));
 | 
					        atom->vy[atom_id] = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        atom->vz[atom_id] = atof(strtok(NULL, " "));
 | 
					        atom->vz[atom_id] = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
 | 
					        atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
 | 
				
			||||||
        atom->Natoms++;
 | 
					        atom->Natoms++;
 | 
				
			||||||
        atom->Nlocal++;
 | 
					        atom->Nlocal++;
 | 
				
			||||||
@@ -269,18 +287,18 @@ int readAtom_gro(Atom* atom, Parameter* param) {
 | 
				
			|||||||
    if(!feof(fp)) {
 | 
					    if(!feof(fp)) {
 | 
				
			||||||
        readline(line, fp);
 | 
					        readline(line, fp);
 | 
				
			||||||
        param->xlo = 0.0;
 | 
					        param->xlo = 0.0;
 | 
				
			||||||
        param->xhi = atof(strtok(line, " "));
 | 
					        param->xhi = atof(strtok(line, "\t "));
 | 
				
			||||||
        param->ylo = 0.0;
 | 
					        param->ylo = 0.0;
 | 
				
			||||||
        param->yhi = atof(strtok(NULL, " "));
 | 
					        param->yhi = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        param->zlo = 0.0;
 | 
					        param->zlo = 0.0;
 | 
				
			||||||
        param->zhi = atof(strtok(NULL, " "));
 | 
					        param->zhi = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        param->xprd = param->xhi - param->xlo;
 | 
					        param->xprd = param->xhi - param->xlo;
 | 
				
			||||||
        param->yprd = param->yhi - param->ylo;
 | 
					        param->yprd = param->yhi - param->ylo;
 | 
				
			||||||
        param->zprd = param->zhi - param->zlo;
 | 
					        param->zprd = param->zhi - param->zlo;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(read_atoms != atoms_to_read) {
 | 
					    if(read_atoms != atoms_to_read) {
 | 
				
			||||||
        fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
 | 
					        if(me==0) fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
 | 
				
			||||||
        exit(-1);
 | 
					        exit(-1);
 | 
				
			||||||
        return -1;
 | 
					        return -1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -296,12 +314,14 @@ int readAtom_gro(Atom* atom, Parameter* param) {
 | 
				
			|||||||
        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
					        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
 | 
					    if(me==0) fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
 | 
				
			||||||
    fclose(fp);
 | 
					    fclose(fp);
 | 
				
			||||||
    return read_atoms;
 | 
					    return read_atoms;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int readAtom_dmp(Atom* atom, Parameter* param) {
 | 
					int readAtom_dmp(Atom* atom, Parameter* param) {
 | 
				
			||||||
 | 
					    int me = 0;
 | 
				
			||||||
 | 
					    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
				
			||||||
    FILE *fp = fopen(param->input_file, "r");
 | 
					    FILE *fp = fopen(param->input_file, "r");
 | 
				
			||||||
    char line[MAXLINE];
 | 
					    char line[MAXLINE];
 | 
				
			||||||
    int natoms = 0;
 | 
					    int natoms = 0;
 | 
				
			||||||
@@ -310,7 +330,7 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
 | 
				
			|||||||
    int ts = -1;
 | 
					    int ts = -1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(!fp) {
 | 
					    if(!fp) {
 | 
				
			||||||
        fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
					        if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
				
			||||||
        exit(-1);
 | 
					        exit(-1);
 | 
				
			||||||
        return -1;
 | 
					        return -1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -333,47 +353,47 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
 | 
				
			|||||||
                }
 | 
					                }
 | 
				
			||||||
            } else if(strncmp(item, "BOX BOUNDS pp pp pp", 19) == 0) {
 | 
					            } else if(strncmp(item, "BOX BOUNDS pp pp pp", 19) == 0) {
 | 
				
			||||||
                readline(line, fp);
 | 
					                readline(line, fp);
 | 
				
			||||||
                param->xlo = atof(strtok(line, " "));
 | 
					                param->xlo = atof(strtok(line, "\t "));
 | 
				
			||||||
                param->xhi = atof(strtok(NULL, " "));
 | 
					                param->xhi = atof(strtok(NULL, "\t "));
 | 
				
			||||||
                param->xprd = param->xhi - param->xlo;
 | 
					                param->xprd = param->xhi - param->xlo;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                readline(line, fp);
 | 
					                readline(line, fp);
 | 
				
			||||||
                param->ylo = atof(strtok(line, " "));
 | 
					                param->ylo = atof(strtok(line, "\t "));
 | 
				
			||||||
                param->yhi = atof(strtok(NULL, " "));
 | 
					                param->yhi = atof(strtok(NULL, "\t "));
 | 
				
			||||||
                param->yprd = param->yhi - param->ylo;
 | 
					                param->yprd = param->yhi - param->ylo;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                readline(line, fp);
 | 
					                readline(line, fp);
 | 
				
			||||||
                param->zlo = atof(strtok(line, " "));
 | 
					                param->zlo = atof(strtok(line, "\t "));
 | 
				
			||||||
                param->zhi = atof(strtok(NULL, " "));
 | 
					                param->zhi = atof(strtok(NULL, "\t "));
 | 
				
			||||||
                param->zprd = param->zhi - param->zlo;
 | 
					                param->zprd = param->zhi - param->zlo;
 | 
				
			||||||
            } else if(strncmp(item, "ATOMS id type x y z vx vy vz", 28) == 0) {
 | 
					            } else if(strncmp(item, "ATOMS id type x y z vx vy vz", 28) == 0) {
 | 
				
			||||||
                for(int i = 0; i < natoms; i++) {
 | 
					                for(int i = 0; i < natoms; i++) {
 | 
				
			||||||
                    readline(line, fp);
 | 
					                    readline(line, fp);
 | 
				
			||||||
                    atom_id = atoi(strtok(line, " ")) - 1;
 | 
					                    atom_id = atoi(strtok(line, "\t ")) - 1;
 | 
				
			||||||
                    atom->type[atom_id] = atoi(strtok(NULL, " "));
 | 
					                    atom->type[atom_id] = atoi(strtok(NULL, "\t "));
 | 
				
			||||||
                    atom_x(atom_id) = atof(strtok(NULL, " "));
 | 
					                    atom_x(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
                    atom_y(atom_id) = atof(strtok(NULL, " "));
 | 
					                    atom_y(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
                    atom_z(atom_id) = atof(strtok(NULL, " "));
 | 
					                    atom_z(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
                    atom->vx[atom_id] = atof(strtok(NULL, " "));
 | 
					                    atom->vx[atom_id] = atof(strtok(NULL, "\t "));
 | 
				
			||||||
                    atom->vy[atom_id] = atof(strtok(NULL, " "));
 | 
					                    atom->vy[atom_id] = atof(strtok(NULL, "\t "));
 | 
				
			||||||
                    atom->vz[atom_id] = atof(strtok(NULL, " "));
 | 
					                    atom->vz[atom_id] = atof(strtok(NULL, "\t "));
 | 
				
			||||||
                    atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
 | 
					                    atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
 | 
				
			||||||
                    read_atoms++;
 | 
					                    read_atoms++;
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            } else {
 | 
					            } else {
 | 
				
			||||||
                fprintf(stderr, "Invalid item: %s\n", item);
 | 
					                if(me==0) fprintf(stderr, "Invalid item: %s\n", item);
 | 
				
			||||||
                exit(-1);
 | 
					                exit(-1);
 | 
				
			||||||
                return -1;
 | 
					                return -1;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
 | 
					            if(me==0) fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
 | 
				
			||||||
            exit(-1);
 | 
					            exit(-1);
 | 
				
			||||||
            return -1;
 | 
					            return -1;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(ts < 0 || !natoms || !read_atoms) {
 | 
					    if(ts < 0 || !natoms || !read_atoms) {
 | 
				
			||||||
        fprintf(stderr, "Input error: atom data was not read!\n");
 | 
					        if(me==0) fprintf(stderr, "Input error: atom data was not read!\n");
 | 
				
			||||||
        exit(-1);
 | 
					        exit(-1);
 | 
				
			||||||
        return -1;
 | 
					        return -1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -389,7 +409,7 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
 | 
				
			|||||||
        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
					        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
 | 
					    if(me==0) fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
 | 
				
			||||||
    fclose(fp);
 | 
					    fclose(fp);
 | 
				
			||||||
    return natoms;
 | 
					    return natoms;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -530,3 +550,249 @@ void growClusters(Atom *atom) {
 | 
				
			|||||||
    atom->cl_v = (MD_FLOAT*) reallocate(atom->cl_v, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT), nold * CLUSTER_M * 3 * sizeof(MD_FLOAT));
 | 
					    atom->cl_v = (MD_FLOAT*) reallocate(atom->cl_v, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT), nold * CLUSTER_M * 3 * sizeof(MD_FLOAT));
 | 
				
			||||||
    atom->cl_type = (int*) reallocate(atom->cl_type, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * sizeof(int), nold * CLUSTER_M * sizeof(int));
 | 
					    atom->cl_type = (int*) reallocate(atom->cl_type, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * sizeof(int), nold * CLUSTER_M * sizeof(int));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* MPI added*/
 | 
				
			||||||
 | 
					void growPbc(Atom* atom) {
 | 
				
			||||||
 | 
					    int nold = atom->NmaxGhost;
 | 
				
			||||||
 | 
					    atom->NmaxGhost += DELTA;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (atom->PBCx || atom->PBCy || atom->PBCz){
 | 
				
			||||||
 | 
					        atom->PBCx = (int*) reallocate(atom->PBCx, ALIGNMENT, atom->NmaxGhost * sizeof(int), nold * sizeof(int));
 | 
				
			||||||
 | 
					        atom->PBCy = (int*) reallocate(atom->PBCy, ALIGNMENT, atom->NmaxGhost * sizeof(int), nold * sizeof(int));
 | 
				
			||||||
 | 
					        atom->PBCz = (int*) reallocate(atom->PBCz, ALIGNMENT, atom->NmaxGhost * sizeof(int), nold * sizeof(int));
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        atom->PBCx = (int*) malloc(atom->NmaxGhost * sizeof(int));
 | 
				
			||||||
 | 
					        atom->PBCy = (int*) malloc(atom->NmaxGhost * sizeof(int));
 | 
				
			||||||
 | 
					        atom->PBCz = (int*) malloc(atom->NmaxGhost * sizeof(int));
 | 
				
			||||||
 | 
					    } 
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void packForward(Atom* atom, int nc, int* list, MD_FLOAT* buf, int* pbc)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    for(int i = 0; i < nc; i++) {
 | 
				
			||||||
 | 
					        int cj = list[i];
 | 
				
			||||||
 | 
					        int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
 | 
				
			||||||
 | 
					        MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
 | 
				
			||||||
 | 
					        int displ = i*CLUSTER_N;
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
 | 
				
			||||||
 | 
					            buf[3*(displ+cjj)+0] = cj_x[CL_X_OFFSET + cjj] + pbc[_x] * atom->mybox.xprd;
 | 
				
			||||||
 | 
					            buf[3*(displ+cjj)+1] = cj_x[CL_Y_OFFSET + cjj] + pbc[_y] * atom->mybox.yprd;
 | 
				
			||||||
 | 
					            buf[3*(displ+cjj)+2] = cj_x[CL_Z_OFFSET + cjj] + pbc[_z] * atom->mybox.zprd; 
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
 | 
				
			||||||
 | 
					            buf[3*(displ+cjj)+0] = -1; //x 
 | 
				
			||||||
 | 
					            buf[3*(displ+cjj)+1] = -1; //y
 | 
				
			||||||
 | 
					            buf[3*(displ+cjj)+2] = -1; //z
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void unpackForward(Atom* atom, int nc, int c0, MD_FLOAT* buf)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    for(int i = 0; i < nc; i++) {
 | 
				
			||||||
 | 
					        int cj = c0+i;
 | 
				
			||||||
 | 
					        int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj); 
 | 
				
			||||||
 | 
					        MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
 | 
				
			||||||
 | 
					        int displ = i*CLUSTER_N;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
 | 
				
			||||||
 | 
					            if(cj_x[CL_X_OFFSET + cjj]<INFINITY) cj_x[CL_X_OFFSET + cjj] = buf[3*(displ+cjj)+0];
 | 
				
			||||||
 | 
					            if(cj_x[CL_Y_OFFSET + cjj]<INFINITY) cj_x[CL_Y_OFFSET + cjj] = buf[3*(displ+cjj)+1]; 
 | 
				
			||||||
 | 
					            if(cj_x[CL_Z_OFFSET + cjj]<INFINITY) cj_x[CL_Z_OFFSET + cjj] = buf[3*(displ+cjj)+2];
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int packGhost(Atom* atom, int cj, MD_FLOAT* buf, int* pbc)
 | 
				
			||||||
 | 
					{    
 | 
				
			||||||
 | 
					    //#of elements per cluster natoms,x0,y0,z0,type_0, . . ,xn,yn,zn,type_n,bbminx,bbmaxxy,bbminy,bbmaxy,bbminz,bbmaxz
 | 
				
			||||||
 | 
					    //count = 4*N_CLUSTER+7, if N_CLUSTER =4 => count = 23 value/cluster + trackpbc[x] + trackpbc[y] + trackpbc[z]
 | 
				
			||||||
 | 
					    int m = 0;
 | 
				
			||||||
 | 
					    if(atom->jclusters[cj].natoms > 0) {    
 | 
				
			||||||
 | 
					        int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
 | 
				
			||||||
 | 
					        int cj_sca_base = CJ_SCALAR_BASE_INDEX(cj); 
 | 
				
			||||||
 | 
					        MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
 | 
				
			||||||
 | 
					        MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY;
 | 
				
			||||||
 | 
					        MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
 | 
				
			||||||
 | 
					        MD_FLOAT bbminz = INFINITY, bbmaxz = -INFINITY;
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        buf[m++] = atom->jclusters[cj].natoms;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					            MD_FLOAT xtmp = cj_x[CL_X_OFFSET + cjj] + pbc[_x] * atom->mybox.xprd;
 | 
				
			||||||
 | 
					            MD_FLOAT ytmp = cj_x[CL_Y_OFFSET + cjj] + pbc[_y] * atom->mybox.yprd;
 | 
				
			||||||
 | 
					            MD_FLOAT ztmp = cj_x[CL_Z_OFFSET + cjj] + pbc[_z] * atom->mybox.zprd;
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            buf[m++] = xtmp;
 | 
				
			||||||
 | 
					            buf[m++] = ytmp;
 | 
				
			||||||
 | 
					            buf[m++] = ztmp;
 | 
				
			||||||
 | 
					            buf[m++]= atom->cl_type[cj_sca_base + cjj];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if(bbminx > xtmp) { bbminx = xtmp; }
 | 
				
			||||||
 | 
					            if(bbmaxx < xtmp) { bbmaxx = xtmp; }
 | 
				
			||||||
 | 
					            if(bbminy > ytmp) { bbminy = ytmp; }
 | 
				
			||||||
 | 
					            if(bbmaxy < ytmp) { bbmaxy = ytmp; }
 | 
				
			||||||
 | 
					            if(bbminz > ztmp) { bbminz = ztmp; }
 | 
				
			||||||
 | 
					            if(bbmaxz < ztmp) { bbmaxz = ztmp; }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
 | 
				
			||||||
 | 
					            buf[m++] = -1; //x 
 | 
				
			||||||
 | 
					            buf[m++] = -1; //y
 | 
				
			||||||
 | 
					            buf[m++] = -1; //z
 | 
				
			||||||
 | 
					            buf[m++] = -1; //type
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        buf[m++] = bbminx;
 | 
				
			||||||
 | 
					        buf[m++] = bbmaxx;
 | 
				
			||||||
 | 
					        buf[m++] = bbminy;
 | 
				
			||||||
 | 
					        buf[m++] = bbmaxy;
 | 
				
			||||||
 | 
					        buf[m++] = bbminz;
 | 
				
			||||||
 | 
					        buf[m++] = bbmaxz;
 | 
				
			||||||
 | 
					        //TODO: check atom->ncj
 | 
				
			||||||
 | 
					        int ghostId = cj-atom->ncj;
 | 
				
			||||||
 | 
					        //check for ghost particles  
 | 
				
			||||||
 | 
					        buf[m++] = (cj-atom->ncj>=0) ? pbc[_x]+atom->PBCx[ghostId]:pbc[_x];
 | 
				
			||||||
 | 
					        buf[m++] = (cj-atom->ncj>=0) ? pbc[_y]+atom->PBCy[ghostId]:pbc[_y];
 | 
				
			||||||
 | 
					        buf[m++] = (cj-atom->ncj>=0) ? pbc[_z]+atom->PBCz[ghostId]:pbc[_z];
 | 
				
			||||||
 | 
					    }  
 | 
				
			||||||
 | 
					    return m;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					int unpackGhost(Atom* atom, int cj, MD_FLOAT* buf)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int m = 0;
 | 
				
			||||||
 | 
					    int jfac =  MAX(1, CLUSTER_N / CLUSTER_M);
 | 
				
			||||||
 | 
					    if(cj*jfac>=atom->Nclusters_max) growClusters(atom);
 | 
				
			||||||
 | 
					    if(atom->Nclusters_ghost>=atom->NmaxGhost) growPbc(atom); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int cj_sca_base = CJ_SCALAR_BASE_INDEX(cj); 
 | 
				
			||||||
 | 
					    int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
 | 
				
			||||||
 | 
					    MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base]; 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    atom->jclusters[cj].natoms = buf[m++];
 | 
				
			||||||
 | 
					    for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        cj_x[CL_X_OFFSET + cjj] = buf[m++];
 | 
				
			||||||
 | 
					        cj_x[CL_Y_OFFSET + cjj] = buf[m++];
 | 
				
			||||||
 | 
					        cj_x[CL_Z_OFFSET + cjj] = buf[m++];
 | 
				
			||||||
 | 
					        atom->cl_type[cj_sca_base + cjj] = buf[m++];
 | 
				
			||||||
 | 
					        atom->Nghost++;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
 | 
				
			||||||
 | 
					        cj_x[CL_X_OFFSET + cjj] = INFINITY;
 | 
				
			||||||
 | 
					        cj_x[CL_Y_OFFSET + cjj] = INFINITY;
 | 
				
			||||||
 | 
					        cj_x[CL_Z_OFFSET + cjj] = INFINITY;
 | 
				
			||||||
 | 
					        atom->cl_type[cj_sca_base + cjj] = -1;
 | 
				
			||||||
 | 
					        m+=4;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    atom->jclusters[cj].bbminx = buf[m++];
 | 
				
			||||||
 | 
					    atom->jclusters[cj].bbmaxx = buf[m++];
 | 
				
			||||||
 | 
					    atom->jclusters[cj].bbminy = buf[m++];
 | 
				
			||||||
 | 
					    atom->jclusters[cj].bbmaxy = buf[m++];
 | 
				
			||||||
 | 
					    atom->jclusters[cj].bbminz = buf[m++];
 | 
				
			||||||
 | 
					    atom->jclusters[cj].bbmaxz = buf[m++];
 | 
				
			||||||
 | 
					    atom->PBCx[atom->Nclusters_ghost] = buf[m++];
 | 
				
			||||||
 | 
					    atom->PBCy[atom->Nclusters_ghost] = buf[m++];
 | 
				
			||||||
 | 
					    atom->PBCz[atom->Nclusters_ghost] = buf[m++]; 
 | 
				
			||||||
 | 
					    atom->Nclusters_ghost++;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					   
 | 
				
			||||||
 | 
					void packReverse(Atom* atom, int nc, int c0, MD_FLOAT* buf)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    for(int i = 0; i < nc; i++) {
 | 
				
			||||||
 | 
					        int cj = c0+i;
 | 
				
			||||||
 | 
					        int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj); 
 | 
				
			||||||
 | 
					        MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
 | 
				
			||||||
 | 
					        int displ = i*CLUSTER_N;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
 | 
				
			||||||
 | 
					            buf[3*(displ+cjj)+0] = cj_f[CL_X_OFFSET + cjj];
 | 
				
			||||||
 | 
					            buf[3*(displ+cjj)+1] = cj_f[CL_Y_OFFSET + cjj]; 
 | 
				
			||||||
 | 
					            buf[3*(displ+cjj)+2] = cj_f[CL_Z_OFFSET + cjj]; 
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
 | 
				
			||||||
 | 
					            buf[3*(displ+cjj)+0] = -1; //x 
 | 
				
			||||||
 | 
					            buf[3*(displ+cjj)+1] = -1; //y
 | 
				
			||||||
 | 
					            buf[3*(displ+cjj)+2] = -1; //z
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void unpackReverse(Atom* atom, int nc, int* list, MD_FLOAT* buf)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    for(int i = 0; i < nc; i++) {
 | 
				
			||||||
 | 
					        int cj = list[i];
 | 
				
			||||||
 | 
					        int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
 | 
				
			||||||
 | 
					        MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
 | 
				
			||||||
 | 
					        int displ = i*CLUSTER_N;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					        for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
 | 
				
			||||||
 | 
					            cj_f[CL_X_OFFSET + cjj] += buf[3*(displ+cjj)+0];
 | 
				
			||||||
 | 
					            cj_f[CL_Y_OFFSET + cjj] += buf[3*(displ+cjj)+1]; 
 | 
				
			||||||
 | 
					            cj_f[CL_Z_OFFSET + cjj] += buf[3*(displ+cjj)+2];
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int packExchange(Atom* atom, int i, MD_FLOAT* buf)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int m = 0;
 | 
				
			||||||
 | 
					  buf[m++] = atom_x(i);
 | 
				
			||||||
 | 
					  buf[m++] = atom_y(i);
 | 
				
			||||||
 | 
					  buf[m++] = atom_z(i);
 | 
				
			||||||
 | 
					  buf[m++] = atom_vx(i);
 | 
				
			||||||
 | 
					  buf[m++] = atom_vy(i);
 | 
				
			||||||
 | 
					  buf[m++] = atom_vz(i);
 | 
				
			||||||
 | 
					  buf[m++] = atom->type[i];
 | 
				
			||||||
 | 
					  return m;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int unpackExchange(Atom* atom, int i, MD_FLOAT* buf)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  while(i >= atom->Nmax) growAtom(atom);
 | 
				
			||||||
 | 
					  int m = 0;
 | 
				
			||||||
 | 
					  atom_x(i) = buf[m++];
 | 
				
			||||||
 | 
					  atom_y(i) = buf[m++];
 | 
				
			||||||
 | 
					  atom_z(i) = buf[m++];
 | 
				
			||||||
 | 
					  atom_vx(i) = buf[m++];
 | 
				
			||||||
 | 
					  atom_vy(i) = buf[m++];
 | 
				
			||||||
 | 
					  atom_vz(i) = buf[m++];
 | 
				
			||||||
 | 
					  atom->type[i] = buf[m++];
 | 
				
			||||||
 | 
					  return m;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void pbc(Atom* atom)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  for(int i = 0; i < atom->Nlocal; i++) {
 | 
				
			||||||
 | 
					   
 | 
				
			||||||
 | 
					    MD_FLOAT xprd = atom->mybox.xprd;
 | 
				
			||||||
 | 
					    MD_FLOAT yprd = atom->mybox.yprd;
 | 
				
			||||||
 | 
					    MD_FLOAT zprd = atom->mybox.zprd; 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(atom_x(i) < 0.0) atom_x(i) += xprd;
 | 
				
			||||||
 | 
					    if(atom_y(i) < 0.0) atom_y(i) += yprd;
 | 
				
			||||||
 | 
					    if(atom_z(i) < 0.0)  atom_z(i) +=zprd;
 | 
				
			||||||
 | 
					    if(atom_x(i) >= xprd) atom_x(i) -=xprd;    
 | 
				
			||||||
 | 
					    if(atom_y(i) >= yprd) atom_y(i) -=yprd;
 | 
				
			||||||
 | 
					    if(atom_z(i) >= zprd) atom_z(i) -=zprd;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void copy(Atom* atom, int i, int j)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  atom_x(i) = atom_x(j);
 | 
				
			||||||
 | 
					  atom_y(i) = atom_y(j);
 | 
				
			||||||
 | 
					  atom_z(i) = atom_z(j);
 | 
				
			||||||
 | 
					  atom_vx(i) = atom_vx(j);
 | 
				
			||||||
 | 
					  atom_vy(i) = atom_vy(j);
 | 
				
			||||||
 | 
					  atom_vz(i) = atom_vz(j);
 | 
				
			||||||
 | 
					  atom->type[i] = atom->type[j];
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -14,8 +14,9 @@
 | 
				
			|||||||
#include <stats.h>
 | 
					#include <stats.h>
 | 
				
			||||||
#include <util.h>
 | 
					#include <util.h>
 | 
				
			||||||
#include <simd.h>
 | 
					#include <simd.h>
 | 
				
			||||||
 | 
					#include <math.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void computeForceGhostShell(Parameter*, Atom*, Neighbor*);
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
static inline void gmx_load_simd_2xnn_interactions(
 | 
					static inline void gmx_load_simd_2xnn_interactions(
 | 
				
			||||||
    int excl,
 | 
					    int excl,
 | 
				
			||||||
@@ -49,7 +50,6 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
 | 
				
			|||||||
    MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
 | 
					    MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
 | 
				
			||||||
    MD_FLOAT sigma6 = param->sigma6;
 | 
					    MD_FLOAT sigma6 = param->sigma6;
 | 
				
			||||||
    MD_FLOAT epsilon = param->epsilon;
 | 
					    MD_FLOAT epsilon = param->epsilon;
 | 
				
			||||||
 | 
					 | 
				
			||||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
					    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
				
			||||||
        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
					        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
				
			||||||
        MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
 | 
					        MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
 | 
				
			||||||
@@ -60,13 +60,23 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(int cg = atom->ncj; cg < atom->ncj+atom->Nclusters_ghost; cg++) {
 | 
				
			||||||
 | 
					        int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
 | 
				
			||||||
 | 
					        MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
 | 
				
			||||||
 | 
					        for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
 | 
				
			||||||
 | 
					            cj_f[CL_X_OFFSET + cjj] = 0.0;
 | 
				
			||||||
 | 
					            cj_f[CL_Y_OFFSET + cjj] = 0.0;
 | 
				
			||||||
 | 
					            cj_f[CL_Z_OFFSET + cjj] = 0.0;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    double S = getTimeStamp();
 | 
					    double S = getTimeStamp();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #pragma omp parallel
 | 
					    #pragma omp parallel
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
    LIKWID_MARKER_START("force");
 | 
					    LIKWID_MARKER_START("force");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #pragma omp for schedule(runtime)
 | 
					    #pragma omp for
 | 
				
			||||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
					    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
				
			||||||
        int ci_cj0 = CJ0_FROM_CI(ci);
 | 
					        int ci_cj0 = CJ0_FROM_CI(ci);
 | 
				
			||||||
        int ci_cj1 = CJ1_FROM_CI(ci);
 | 
					        int ci_cj1 = CJ1_FROM_CI(ci);
 | 
				
			||||||
@@ -103,6 +113,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
 | 
				
			|||||||
                    cond = neighbor->half_neigh ? (ci_cj0 != cj || cii < cjj) && (ci_cj1 != cj || cii < cjj + CLUSTER_N) :
 | 
					                    cond = neighbor->half_neigh ? (ci_cj0 != cj || cii < cjj) && (ci_cj1 != cj || cii < cjj + CLUSTER_N) :
 | 
				
			||||||
                                                  (ci_cj0 != cj || cii != cjj) && (ci_cj1 != cj || cii != cjj + CLUSTER_N);
 | 
					                                                  (ci_cj0 != cj || cii != cjj) && (ci_cj1 != cj || cii != cjj + CLUSTER_N);
 | 
				
			||||||
                    #endif
 | 
					                    #endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    if(cond) {
 | 
					                    if(cond) {
 | 
				
			||||||
                        MD_FLOAT delx = xtmp - cj_x[CL_X_OFFSET + cjj];
 | 
					                        MD_FLOAT delx = xtmp - cj_x[CL_X_OFFSET + cjj];
 | 
				
			||||||
                        MD_FLOAT dely = ytmp - cj_x[CL_Y_OFFSET + cjj];
 | 
					                        MD_FLOAT dely = ytmp - cj_x[CL_Y_OFFSET + cjj];
 | 
				
			||||||
@@ -113,12 +124,11 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
 | 
				
			|||||||
                            MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
 | 
					                            MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
 | 
				
			||||||
                            MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
 | 
					                            MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                            if(neighbor->half_neigh) {
 | 
					                            if(neighbor->half_neigh || param->method) {
 | 
				
			||||||
                                cj_f[CL_X_OFFSET + cjj] -= delx * force;
 | 
					                                cj_f[CL_X_OFFSET + cjj] -= delx * force;
 | 
				
			||||||
                                cj_f[CL_Y_OFFSET + cjj] -= dely * force;
 | 
					                                cj_f[CL_Y_OFFSET + cjj] -= dely * force;
 | 
				
			||||||
                                cj_f[CL_Z_OFFSET + cjj] -= delz * force;
 | 
					                                cj_f[CL_Z_OFFSET + cjj] -= delz * force;
 | 
				
			||||||
                            }
 | 
					                            }
 | 
				
			||||||
 | 
					 | 
				
			||||||
                            fix += delx * force;
 | 
					                            fix += delx * force;
 | 
				
			||||||
                            fiy += dely * force;
 | 
					                            fiy += dely * force;
 | 
				
			||||||
                            fiz += delz * force;
 | 
					                            fiz += delz * force;
 | 
				
			||||||
@@ -129,13 +139,11 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
 | 
				
			|||||||
                        }
 | 
					                        }
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					 | 
				
			||||||
                if(any != 0) {
 | 
					                if(any != 0) {
 | 
				
			||||||
                    addStat(stats->clusters_within_cutoff, 1);
 | 
					                    addStat(stats->clusters_within_cutoff, 1);
 | 
				
			||||||
                } else {
 | 
					                } else {
 | 
				
			||||||
                    addStat(stats->clusters_outside_cutoff, 1);
 | 
					                    addStat(stats->clusters_outside_cutoff, 1);
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					 | 
				
			||||||
                ci_f[CL_X_OFFSET + cii] += fix;
 | 
					                ci_f[CL_X_OFFSET + cii] += fix;
 | 
				
			||||||
                ci_f[CL_Y_OFFSET + cii] += fiy;
 | 
					                ci_f[CL_Y_OFFSET + cii] += fiy;
 | 
				
			||||||
                ci_f[CL_Z_OFFSET + cii] += fiz;
 | 
					                ci_f[CL_Z_OFFSET + cii] += fiz;
 | 
				
			||||||
@@ -146,7 +154,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
 | 
				
			|||||||
        addStat(stats->num_neighs, numneighs);
 | 
					        addStat(stats->num_neighs, numneighs);
 | 
				
			||||||
        addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
 | 
					        addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor); 
 | 
				
			||||||
    LIKWID_MARKER_STOP("force");
 | 
					    LIKWID_MARKER_STOP("force");
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -168,7 +176,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
 | 
				
			|||||||
    MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
 | 
					    MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
 | 
				
			||||||
    MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
 | 
					    MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
					    for(int ci = 0; ci < atom->Nclusters_local+atom->Nclusters_ghost; ci++) {
 | 
				
			||||||
        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
					        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
				
			||||||
        MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
 | 
					        MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
 | 
				
			||||||
        for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
 | 
					        for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
 | 
				
			||||||
@@ -178,6 +186,16 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(int cg = atom->ncj; cg < atom->ncj+atom->Nclusters_ghost; cg++) {
 | 
				
			||||||
 | 
					        int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
 | 
				
			||||||
 | 
					        MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
 | 
				
			||||||
 | 
					        for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
 | 
				
			||||||
 | 
					            cj_f[CL_X_OFFSET + cjj] = 0.0;
 | 
				
			||||||
 | 
					            cj_f[CL_Y_OFFSET + cjj] = 0.0;
 | 
				
			||||||
 | 
					            cj_f[CL_Z_OFFSET + cjj] = 0.0;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    double S = getTimeStamp();
 | 
					    double S = getTimeStamp();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #pragma omp parallel
 | 
					    #pragma omp parallel
 | 
				
			||||||
@@ -213,7 +231,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
 | 
				
			|||||||
    #endif
 | 
					    #endif
 | 
				
			||||||
    */
 | 
					    */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #pragma omp for schedule(runtime)
 | 
					    #pragma omp for
 | 
				
			||||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
					    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
				
			||||||
        int ci_cj0 = CJ0_FROM_CI(ci);
 | 
					        int ci_cj0 = CJ0_FROM_CI(ci);
 | 
				
			||||||
        #if CLUSTER_M > CLUSTER_N
 | 
					        #if CLUSTER_M > CLUSTER_N
 | 
				
			||||||
@@ -322,7 +340,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
 | 
				
			|||||||
            fiz2 += tz2;
 | 
					            fiz2 += tz2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            #ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
 | 
					            #ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
 | 
				
			||||||
            if(cj < CJ1_FROM_CI(atom->Nlocal)) {
 | 
					            if(cj < CJ1_FROM_CI(atom->Nlocal)|| param->method) {
 | 
				
			||||||
                simd_h_decr3(cj_f, tx0 + tx2, ty0 + ty2, tz0 + tz2);
 | 
					                simd_h_decr3(cj_f, tx0 + tx2, ty0 + ty2, tz0 + tz2);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            #else
 | 
					            #else
 | 
				
			||||||
@@ -373,7 +391,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
 | 
				
			|||||||
            fiz2 += tz2;
 | 
					            fiz2 += tz2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            #ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
 | 
					            #ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
 | 
				
			||||||
            if(cj < CJ1_FROM_CI(atom->Nlocal)) {
 | 
					            if(cj < CJ1_FROM_CI(atom->Nlocal) || param->method) {
 | 
				
			||||||
                simd_h_decr3(cj_f, tx0 + tx2, ty0 + ty2, tz0 + tz2);
 | 
					                simd_h_decr3(cj_f, tx0 + tx2, ty0 + ty2, tz0 + tz2);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            #else
 | 
					            #else
 | 
				
			||||||
@@ -389,7 +407,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
 | 
				
			|||||||
        addStat(stats->num_neighs, numneighs);
 | 
					        addStat(stats->num_neighs, numneighs);
 | 
				
			||||||
        addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
 | 
					        addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor); 
 | 
				
			||||||
    LIKWID_MARKER_STOP("force");
 | 
					    LIKWID_MARKER_STOP("force");
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -427,7 +445,7 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor
 | 
				
			|||||||
    {
 | 
					    {
 | 
				
			||||||
    LIKWID_MARKER_START("force");
 | 
					    LIKWID_MARKER_START("force");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #pragma omp for schedule(runtime)
 | 
					    #pragma omp for
 | 
				
			||||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
					    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
				
			||||||
        int ci_cj0 = CJ0_FROM_CI(ci);
 | 
					        int ci_cj0 = CJ0_FROM_CI(ci);
 | 
				
			||||||
        #if CLUSTER_M > CLUSTER_N
 | 
					        #if CLUSTER_M > CLUSTER_N
 | 
				
			||||||
@@ -562,7 +580,6 @@ double computeForceLJ_2xnn(Parameter *param, Atom *atom, Neighbor *neighbor, Sta
 | 
				
			|||||||
    if(neighbor->half_neigh) {
 | 
					    if(neighbor->half_neigh) {
 | 
				
			||||||
        return computeForceLJ_2xnn_half(param, atom, neighbor, stats);
 | 
					        return computeForceLJ_2xnn_half(param, atom, neighbor, stats);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
    return computeForceLJ_2xnn_full(param, atom, neighbor, stats);
 | 
					    return computeForceLJ_2xnn_full(param, atom, neighbor, stats);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -589,13 +606,23 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(int cg = atom->ncj; cg < atom->ncj+atom->Nclusters_ghost; cg++) {
 | 
				
			||||||
 | 
					        int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
 | 
				
			||||||
 | 
					        MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
 | 
				
			||||||
 | 
					        for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
 | 
				
			||||||
 | 
					            cj_f[CL_X_OFFSET + cjj] = 0.0;
 | 
				
			||||||
 | 
					            cj_f[CL_Y_OFFSET + cjj] = 0.0;
 | 
				
			||||||
 | 
					            cj_f[CL_Z_OFFSET + cjj] = 0.0;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
    double S = getTimeStamp();
 | 
					    double S = getTimeStamp();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #pragma omp parallel
 | 
					    #pragma omp parallel
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
    LIKWID_MARKER_START("force");
 | 
					    LIKWID_MARKER_START("force");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #pragma omp for schedule(runtime)
 | 
					    #pragma omp for
 | 
				
			||||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
					    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
				
			||||||
        int ci_cj0 = CJ0_FROM_CI(ci);
 | 
					        int ci_cj0 = CJ0_FROM_CI(ci);
 | 
				
			||||||
        #if CLUSTER_M > CLUSTER_N
 | 
					        #if CLUSTER_M > CLUSTER_N
 | 
				
			||||||
@@ -726,7 +753,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
				
			|||||||
            fiz3 = simd_add(fiz3, tz3);
 | 
					            fiz3 = simd_add(fiz3, tz3);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            #ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
 | 
					            #ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
 | 
				
			||||||
            if(cj < CJ1_FROM_CI(atom->Nlocal)) {
 | 
					            if(cj < CJ1_FROM_CI(atom->Nlocal) || param->method) {
 | 
				
			||||||
                simd_store(&cj_f[CL_X_OFFSET], simd_load(&cj_f[CL_X_OFFSET]) - (tx0 + tx1 + tx2 + tx3));
 | 
					                simd_store(&cj_f[CL_X_OFFSET], simd_load(&cj_f[CL_X_OFFSET]) - (tx0 + tx1 + tx2 + tx3));
 | 
				
			||||||
                simd_store(&cj_f[CL_Y_OFFSET], simd_load(&cj_f[CL_Y_OFFSET]) - (ty0 + ty1 + ty2 + ty3));
 | 
					                simd_store(&cj_f[CL_Y_OFFSET], simd_load(&cj_f[CL_Y_OFFSET]) - (ty0 + ty1 + ty2 + ty3));
 | 
				
			||||||
                simd_store(&cj_f[CL_Z_OFFSET], simd_load(&cj_f[CL_Z_OFFSET]) - (tz0 + tz1 + tz2 + tz3));
 | 
					                simd_store(&cj_f[CL_Z_OFFSET], simd_load(&cj_f[CL_Z_OFFSET]) - (tz0 + tz1 + tz2 + tz3));
 | 
				
			||||||
@@ -811,7 +838,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
				
			|||||||
            fiz3 = simd_add(fiz3, tz3);
 | 
					            fiz3 = simd_add(fiz3, tz3);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            #ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
 | 
					            #ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
 | 
				
			||||||
            if(cj < CJ1_FROM_CI(atom->Nlocal)) {
 | 
					            if(cj < CJ1_FROM_CI(atom->Nlocal) || param->method) {
 | 
				
			||||||
                simd_store(&cj_f[CL_X_OFFSET], simd_load(&cj_f[CL_X_OFFSET]) - (tx0 + tx1 + tx2 + tx3));
 | 
					                simd_store(&cj_f[CL_X_OFFSET], simd_load(&cj_f[CL_X_OFFSET]) - (tx0 + tx1 + tx2 + tx3));
 | 
				
			||||||
                simd_store(&cj_f[CL_Y_OFFSET], simd_load(&cj_f[CL_Y_OFFSET]) - (ty0 + ty1 + ty2 + ty3));
 | 
					                simd_store(&cj_f[CL_Y_OFFSET], simd_load(&cj_f[CL_Y_OFFSET]) - (ty0 + ty1 + ty2 + ty3));
 | 
				
			||||||
                simd_store(&cj_f[CL_Z_OFFSET], simd_load(&cj_f[CL_Z_OFFSET]) - (tz0 + tz1 + tz2 + tz3));
 | 
					                simd_store(&cj_f[CL_Z_OFFSET], simd_load(&cj_f[CL_Z_OFFSET]) - (tz0 + tz1 + tz2 + tz3));
 | 
				
			||||||
@@ -831,7 +858,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
				
			|||||||
        addStat(stats->num_neighs, numneighs);
 | 
					        addStat(stats->num_neighs, numneighs);
 | 
				
			||||||
        addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
 | 
					        addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor); 
 | 
				
			||||||
    LIKWID_MARKER_STOP("force");
 | 
					    LIKWID_MARKER_STOP("force");
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -869,7 +896,7 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
				
			|||||||
    {
 | 
					    {
 | 
				
			||||||
    LIKWID_MARKER_START("force");
 | 
					    LIKWID_MARKER_START("force");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #pragma omp for schedule(runtime)
 | 
					    #pragma omp for
 | 
				
			||||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
					    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
				
			||||||
        int ci_cj0 = CJ0_FROM_CI(ci);
 | 
					        int ci_cj0 = CJ0_FROM_CI(ci);
 | 
				
			||||||
        #if CLUSTER_M > CLUSTER_N
 | 
					        #if CLUSTER_M > CLUSTER_N
 | 
				
			||||||
@@ -1070,3 +1097,120 @@ double computeForceLJ_4xn(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    return computeForceLJ_4xn_full(param, atom, neighbor, stats);
 | 
					    return computeForceLJ_4xn_full(param, atom, neighbor, stats);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//Routine for eight shell method
 | 
				
			||||||
 | 
					void computeForceGhostShell(Parameter *param, Atom *atom, Neighbor *neighbor) {
 | 
				
			||||||
 | 
					    DEBUG_MESSAGE("computeForceLJ begin\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int Nshell = neighbor->Nshell;
 | 
				
			||||||
 | 
					    int *neighs;
 | 
				
			||||||
 | 
					    MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
 | 
				
			||||||
 | 
					    MD_FLOAT sigma6 = param->sigma6;
 | 
				
			||||||
 | 
					    MD_FLOAT epsilon = param->epsilon;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    for(int ci = 0; ci < Nshell; ci++) {
 | 
				
			||||||
 | 
					        neighs = &neighbor->neighshell[ci * neighbor->maxneighs];
 | 
				
			||||||
 | 
					        int numneighs = neighbor->numNeighShell[ci];
 | 
				
			||||||
 | 
					        int cs = neighbor->listshell[ci];
 | 
				
			||||||
 | 
					        int cs_vec_base = CJ_VECTOR_BASE_INDEX(cs);
 | 
				
			||||||
 | 
					        MD_FLOAT *cs_x = &atom->cl_x[cs_vec_base];
 | 
				
			||||||
 | 
					        MD_FLOAT *cs_f = &atom->cl_f[cs_vec_base];
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        for(int k = 0; k < numneighs; k++) {
 | 
				
			||||||
 | 
					            int cj = neighs[k];
 | 
				
			||||||
 | 
					            int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
 | 
				
			||||||
 | 
					            MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
 | 
				
			||||||
 | 
					            MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            for(int css = 0; css < CLUSTER_N; css++) {
 | 
				
			||||||
 | 
					                MD_FLOAT x = cs_x[CL_X_OFFSET + css];
 | 
				
			||||||
 | 
					                MD_FLOAT y = cs_x[CL_Y_OFFSET + css];
 | 
				
			||||||
 | 
					                MD_FLOAT z = cs_x[CL_Z_OFFSET + css];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                MD_FLOAT fix = 0;
 | 
				
			||||||
 | 
					                MD_FLOAT fiy = 0;
 | 
				
			||||||
 | 
					                MD_FLOAT fiz = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                for(int cjj = 0; cjj < CLUSTER_N; cjj++) {
 | 
				
			||||||
 | 
					                    
 | 
				
			||||||
 | 
					                    MD_FLOAT delx = x - cj_x[CL_X_OFFSET + cjj];
 | 
				
			||||||
 | 
					                    MD_FLOAT dely = y - cj_x[CL_Y_OFFSET + cjj];
 | 
				
			||||||
 | 
					                    MD_FLOAT delz = z - cj_x[CL_Z_OFFSET + cjj];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
 | 
				
			||||||
 | 
					                    if(rsq < cutforcesq) {
 | 
				
			||||||
 | 
					                        MD_FLOAT sr2 = 1.0 / rsq;
 | 
				
			||||||
 | 
					                        MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
 | 
				
			||||||
 | 
					                        MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        cj_f[CL_X_OFFSET + cjj] -= delx * force;
 | 
				
			||||||
 | 
					                        cj_f[CL_Y_OFFSET + cjj] -= dely * force;
 | 
				
			||||||
 | 
					                        cj_f[CL_Z_OFFSET + cjj] -= delz * force;
 | 
				
			||||||
 | 
					                        
 | 
				
			||||||
 | 
					                        fix += delx * force;
 | 
				
			||||||
 | 
					                        fiy += dely * force;
 | 
				
			||||||
 | 
					                        fiz += delz * force;
 | 
				
			||||||
 | 
					                    } 
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                cs_f[CL_X_OFFSET + css] += fix;
 | 
				
			||||||
 | 
					                cs_f[CL_Y_OFFSET + css] += fiy;
 | 
				
			||||||
 | 
					                cs_f[CL_Z_OFFSET + css] += fiz;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					       // addStat(stats->calculated_forces, 1);
 | 
				
			||||||
 | 
					       // addStat(stats->num_neighs, numneighs);
 | 
				
			||||||
 | 
					       // addStat(stats->force_iters, (long long int)((double)numneighs));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					void computeForceGhostShell(Parameter *param, Atom *atom, Neighbor *neighbor) {
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    int Nshell = neighbor->Nshell;
 | 
				
			||||||
 | 
					    Pair* neighs;
 | 
				
			||||||
 | 
					    MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
 | 
				
			||||||
 | 
					    MD_FLOAT sigma6 = param->sigma6;
 | 
				
			||||||
 | 
					    MD_FLOAT epsilon = param->epsilon;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    for(int ci = 0; ci < Nshell; ci++) {
 | 
				
			||||||
 | 
					        neighs = &neighbor->neighshell[ci * neighbor->maxneighs];
 | 
				
			||||||
 | 
					        int numneighs = neighbor->numNeighShell[ci];
 | 
				
			||||||
 | 
					        int cs = neighbor->listshell[ci].cluster;
 | 
				
			||||||
 | 
					        int css = neighbor->listshell[ci].atom;
 | 
				
			||||||
 | 
					        int cs_vec_base = CJ_VECTOR_BASE_INDEX(cs);
 | 
				
			||||||
 | 
					        MD_FLOAT *cs_x = &atom->cl_x[cs_vec_base];
 | 
				
			||||||
 | 
					        MD_FLOAT *cs_f = &atom->cl_f[cs_vec_base];   
 | 
				
			||||||
 | 
					        MD_FLOAT x = cs_x[CL_X_OFFSET + css];
 | 
				
			||||||
 | 
					        MD_FLOAT y = cs_x[CL_Y_OFFSET + css];
 | 
				
			||||||
 | 
					        MD_FLOAT z = cs_x[CL_Z_OFFSET + css];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for(int k = 0; k < numneighs; k++) {
 | 
				
			||||||
 | 
					            int cj = neighs[k].cluster;
 | 
				
			||||||
 | 
					            int cjj = neighs[k].atom;            
 | 
				
			||||||
 | 
					            int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
 | 
				
			||||||
 | 
					            MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
 | 
				
			||||||
 | 
					            MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
 | 
				
			||||||
 | 
					                    
 | 
				
			||||||
 | 
					            MD_FLOAT delx = x - cj_x[CL_X_OFFSET + cjj];
 | 
				
			||||||
 | 
					            MD_FLOAT dely = y - cj_x[CL_Y_OFFSET + cjj];
 | 
				
			||||||
 | 
					            MD_FLOAT delz = z - cj_x[CL_Z_OFFSET + cjj];
 | 
				
			||||||
 | 
					            MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
 | 
				
			||||||
 | 
					                    
 | 
				
			||||||
 | 
					            if(rsq < cutforcesq) {
 | 
				
			||||||
 | 
					                MD_FLOAT sr2 = 1.0 / rsq;
 | 
				
			||||||
 | 
					                MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
 | 
				
			||||||
 | 
					                MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                cj_f[CL_X_OFFSET + cjj] -= delx * force;
 | 
				
			||||||
 | 
					                cj_f[CL_Y_OFFSET + cjj] -= dely * force;
 | 
				
			||||||
 | 
					                cj_f[CL_Z_OFFSET + cjj] -= delz * force;
 | 
				
			||||||
 | 
					                cs_f[CL_X_OFFSET + css] += delx * force;
 | 
				
			||||||
 | 
					                cs_f[CL_Y_OFFSET + css] += delx * force;
 | 
				
			||||||
 | 
					                cs_f[CL_Z_OFFSET + css] += delx * force;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -5,6 +5,7 @@
 | 
				
			|||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#include <parameter.h>
 | 
					#include <parameter.h>
 | 
				
			||||||
 | 
					#include <box.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef __ATOM_H_
 | 
					#ifndef __ATOM_H_
 | 
				
			||||||
#define __ATOM_H_
 | 
					#define __ATOM_H_
 | 
				
			||||||
@@ -102,7 +103,7 @@ typedef struct {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
typedef struct {
 | 
					typedef struct {
 | 
				
			||||||
    int Natoms, Nlocal, Nghost, Nmax;
 | 
					    int Natoms, Nlocal, Nghost, Nmax;
 | 
				
			||||||
    int Nclusters, Nclusters_local, Nclusters_ghost, Nclusters_max;
 | 
					    int Nclusters, Nclusters_local, Nclusters_ghost, Nclusters_max, NmaxGhost,ncj;
 | 
				
			||||||
    MD_FLOAT *x, *y, *z;
 | 
					    MD_FLOAT *x, *y, *z;
 | 
				
			||||||
    MD_FLOAT *vx, *vy, *vz;
 | 
					    MD_FLOAT *vx, *vy, *vz;
 | 
				
			||||||
    int *border_map;
 | 
					    int *border_map;
 | 
				
			||||||
@@ -112,6 +113,7 @@ typedef struct {
 | 
				
			|||||||
    MD_FLOAT *sigma6;
 | 
					    MD_FLOAT *sigma6;
 | 
				
			||||||
    MD_FLOAT *cutforcesq;
 | 
					    MD_FLOAT *cutforcesq;
 | 
				
			||||||
    MD_FLOAT *cutneighsq;
 | 
					    MD_FLOAT *cutneighsq;
 | 
				
			||||||
 | 
					    //track the movement of a particle along boundaries
 | 
				
			||||||
    int *PBCx, *PBCy, *PBCz;
 | 
					    int *PBCx, *PBCy, *PBCz;
 | 
				
			||||||
    // Data in cluster format
 | 
					    // Data in cluster format
 | 
				
			||||||
    MD_FLOAT *cl_x;
 | 
					    MD_FLOAT *cl_x;
 | 
				
			||||||
@@ -128,6 +130,9 @@ typedef struct {
 | 
				
			|||||||
    unsigned int masks_2xnn_fn[8];
 | 
					    unsigned int masks_2xnn_fn[8];
 | 
				
			||||||
    unsigned int masks_4xn_hn[16];
 | 
					    unsigned int masks_4xn_hn[16];
 | 
				
			||||||
    unsigned int masks_4xn_fn[16];
 | 
					    unsigned int masks_4xn_fn[16];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //Info Subdomain
 | 
				
			||||||
 | 
					    Box mybox;   
 | 
				
			||||||
} Atom;
 | 
					} Atom;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern void initAtom(Atom*);
 | 
					extern void initAtom(Atom*);
 | 
				
			||||||
@@ -140,6 +145,18 @@ extern int readAtom_dmp(Atom*, Parameter*);
 | 
				
			|||||||
extern void growAtom(Atom*);
 | 
					extern void growAtom(Atom*);
 | 
				
			||||||
extern void growClusters(Atom*);
 | 
					extern void growClusters(Atom*);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int  packGhost(Atom*, int, MD_FLOAT* , int*);
 | 
				
			||||||
 | 
					int  unpackGhost(Atom*, int, MD_FLOAT*);
 | 
				
			||||||
 | 
					int  packExchange(Atom*, int, MD_FLOAT*);
 | 
				
			||||||
 | 
					int  unpackExchange(Atom*, int, MD_FLOAT*);
 | 
				
			||||||
 | 
					void packForward(Atom*, int, int*, MD_FLOAT*, int*); 
 | 
				
			||||||
 | 
					void unpackForward(Atom*, int, int, MD_FLOAT*);
 | 
				
			||||||
 | 
					void packReverse(Atom* , int , int , MD_FLOAT*);
 | 
				
			||||||
 | 
					void unpackReverse(Atom*, int, int*, MD_FLOAT*);
 | 
				
			||||||
 | 
					void pbc(Atom*);
 | 
				
			||||||
 | 
					void copy(Atom*, int, int);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef AOS
 | 
					#ifdef AOS
 | 
				
			||||||
#   define POS_DATA_LAYOUT     "AoS"
 | 
					#   define POS_DATA_LAYOUT     "AoS"
 | 
				
			||||||
#   define atom_x(i)           atom->x[(i) * 3 + 0]
 | 
					#   define atom_x(i)           atom->x[(i) * 3 + 0]
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -9,10 +9,13 @@
 | 
				
			|||||||
#include <atom.h>
 | 
					#include <atom.h>
 | 
				
			||||||
#include <parameter.h>
 | 
					#include <parameter.h>
 | 
				
			||||||
#include <util.h>
 | 
					#include <util.h>
 | 
				
			||||||
 | 
					#include <timers.h>
 | 
				
			||||||
 | 
					#include <timing.h>
 | 
				
			||||||
 | 
					#include <simd.h>
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
void cpuInitialIntegrate(Parameter *param, Atom *atom) {
 | 
					void cpuInitialIntegrate(Parameter *param, Atom *atom) {
 | 
				
			||||||
    DEBUG_MESSAGE("cpuInitialIntegrate start\n");
 | 
					 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
 | 
					    DEBUG_MESSAGE("cpuInitialIntegrate start\n");
 | 
				
			||||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
					    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
				
			||||||
        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
					        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
				
			||||||
        MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
 | 
					        MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
 | 
				
			||||||
@@ -33,8 +36,8 @@ void cpuInitialIntegrate(Parameter *param, Atom *atom) {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void  cpuFinalIntegrate(Parameter *param, Atom *atom) {
 | 
					void  cpuFinalIntegrate(Parameter *param, Atom *atom) {
 | 
				
			||||||
    DEBUG_MESSAGE("cpuFinalIntegrate start\n");
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    DEBUG_MESSAGE("cpuFinalIntegrate start\n");
 | 
				
			||||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
					    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
				
			||||||
        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
					        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
				
			||||||
        MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
 | 
					        MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
 | 
				
			||||||
@@ -46,6 +49,56 @@ void cpuFinalIntegrate(Parameter *param, Atom *atom) {
 | 
				
			|||||||
            ci_v[CL_Z_OFFSET + cii] += param->dtforce * ci_f[CL_Z_OFFSET + cii];
 | 
					            ci_v[CL_Z_OFFSET + cii] += param->dtforce * ci_f[CL_Z_OFFSET + cii];
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    DEBUG_MESSAGE("cpuFinalIntegrate end\n");
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void cpuInitialIntegrate(Parameter *param, Atom *atom) {
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					    DEBUG_MESSAGE("cpuInitialIntegrate start\n");
 | 
				
			||||||
 | 
					    for(int ci = 0; ci < atom->Nclusters_local; ci+=2) {
 | 
				
			||||||
 | 
					        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
				
			||||||
 | 
					        MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
 | 
				
			||||||
 | 
					        MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
 | 
				
			||||||
 | 
					        MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        MD_SIMD_FLOAT dtforce = simd_broadcast(param->dtforce);
 | 
				
			||||||
 | 
					        MD_SIMD_FLOAT dt = simd_broadcast(param->dt); 
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        MD_SIMD_FLOAT vx_vector = simd_fma(simd_load(&ci_f[CL_X_OFFSET]), dtforce, simd_load(&ci_v[CL_X_OFFSET]));
 | 
				
			||||||
 | 
					        MD_SIMD_FLOAT vy_vector = simd_fma(simd_load(&ci_f[CL_Y_OFFSET]), dtforce, simd_load(&ci_v[CL_Y_OFFSET]));
 | 
				
			||||||
 | 
					        MD_SIMD_FLOAT vz_vector = simd_fma(simd_load(&ci_f[CL_Z_OFFSET]), dtforce, simd_load(&ci_v[CL_Z_OFFSET]));
 | 
				
			||||||
 | 
					        MD_SIMD_FLOAT x_vector = simd_fma(vx_vector, dt, simd_load(&ci_x[CL_X_OFFSET]));
 | 
				
			||||||
 | 
					        MD_SIMD_FLOAT y_vector = simd_fma(vy_vector, dt, simd_load(&ci_x[CL_Y_OFFSET]));
 | 
				
			||||||
 | 
					        MD_SIMD_FLOAT z_vector = simd_fma(vz_vector, dt, simd_load(&ci_x[CL_Z_OFFSET]));
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        simd_store(&ci_v[CL_X_OFFSET], vx_vector);
 | 
				
			||||||
 | 
					        simd_store(&ci_v[CL_Y_OFFSET], vy_vector);
 | 
				
			||||||
 | 
					        simd_store(&ci_v[CL_Z_OFFSET], vz_vector);
 | 
				
			||||||
 | 
					        simd_store(&ci_x[CL_X_OFFSET], x_vector);
 | 
				
			||||||
 | 
					        simd_store(&ci_x[CL_Y_OFFSET], y_vector);
 | 
				
			||||||
 | 
					        simd_store(&ci_x[CL_Z_OFFSET], z_vector);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    DEBUG_MESSAGE("cpuInitialIntegrate end\n");
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void  cpuFinalIntegrate(Parameter *param, Atom *atom) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    DEBUG_MESSAGE("cpuFinalIntegrate start\n");
 | 
				
			||||||
 | 
					    for(int ci = 0; ci < atom->Nclusters_local; ci+=2) {
 | 
				
			||||||
 | 
					        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
				
			||||||
 | 
					        MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
 | 
				
			||||||
 | 
					        MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        MD_SIMD_FLOAT dtforce = simd_broadcast(param->dtforce);
 | 
				
			||||||
 | 
					        MD_SIMD_FLOAT vx_vector = simd_fma(simd_load(&ci_f[CL_X_OFFSET]), dtforce, simd_load(&ci_v[CL_X_OFFSET]));
 | 
				
			||||||
 | 
					        MD_SIMD_FLOAT vy_vector = simd_fma(simd_load(&ci_f[CL_Y_OFFSET]), dtforce, simd_load(&ci_v[CL_Y_OFFSET]));
 | 
				
			||||||
 | 
					        MD_SIMD_FLOAT vz_vector = simd_fma(simd_load(&ci_f[CL_Z_OFFSET]), dtforce, simd_load(&ci_v[CL_Z_OFFSET]));
 | 
				
			||||||
 | 
					        simd_store(&ci_v[CL_X_OFFSET], vx_vector);
 | 
				
			||||||
 | 
					        simd_store(&ci_v[CL_Y_OFFSET], vy_vector);
 | 
				
			||||||
 | 
					        simd_store(&ci_v[CL_Z_OFFSET], vz_vector);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    DEBUG_MESSAGE("cpuFinalIntegrate end\n");
 | 
					    DEBUG_MESSAGE("cpuFinalIntegrate end\n");
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -54,3 +107,6 @@ void cpuFinalIntegrate(Parameter *param, Atom *atom) {
 | 
				
			|||||||
void cudaInitialIntegrate(Parameter*, Atom*);
 | 
					void cudaInitialIntegrate(Parameter*, Atom*);
 | 
				
			||||||
void cudaFinalIntegrate(Parameter*, Atom*);
 | 
					void cudaFinalIntegrate(Parameter*, Atom*);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   
 | 
				
			||||||
@@ -25,6 +25,11 @@
 | 
				
			|||||||
#define NBNXN_INTERACTION_MASK_DIAG_J8_0 0xf0f8fcfeU
 | 
					#define NBNXN_INTERACTION_MASK_DIAG_J8_0 0xf0f8fcfeU
 | 
				
			||||||
#define NBNXN_INTERACTION_MASK_DIAG_J8_1 0x0080c0e0U
 | 
					#define NBNXN_INTERACTION_MASK_DIAG_J8_1 0x0080c0e0U
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					    int cluster;
 | 
				
			||||||
 | 
					    int atom;
 | 
				
			||||||
 | 
					} Pair;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
typedef struct {
 | 
					typedef struct {
 | 
				
			||||||
    int every;
 | 
					    int every;
 | 
				
			||||||
    int ncalls;
 | 
					    int ncalls;
 | 
				
			||||||
@@ -34,8 +39,20 @@ typedef struct {
 | 
				
			|||||||
    int half_neigh;
 | 
					    int half_neigh;
 | 
				
			||||||
    int* neighbors;
 | 
					    int* neighbors;
 | 
				
			||||||
    unsigned int* neighbors_imask;
 | 
					    unsigned int* neighbors_imask;
 | 
				
			||||||
 | 
					    //MPI
 | 
				
			||||||
 | 
					    /*
 | 
				
			||||||
 | 
					    int Nshell;         //# of atoms in listShell(Cluster here cover all possible ghost interactions)
 | 
				
			||||||
 | 
					    int *numNeighShell; //# of neighs for each atom in listShell
 | 
				
			||||||
 | 
					    Pair *neighshell;    //list of neighs for each atom in listShell
 | 
				
			||||||
 | 
					    Pair *listshell;     //Atoms to compute the force
 | 
				
			||||||
 | 
					    */
 | 
				
			||||||
 | 
					    int Nshell;         //# of cluster in listShell(Cluster here cover all possible ghost interactions)
 | 
				
			||||||
 | 
					    int *numNeighShell; //# of neighs for each atom in listShell
 | 
				
			||||||
 | 
					    int *neighshell;    //list of neighs for each atom in listShell
 | 
				
			||||||
 | 
					    int *listshell;     //Atoms to compute the force
 | 
				
			||||||
} Neighbor;
 | 
					} Neighbor;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern void initNeighbor(Neighbor*, Parameter*);
 | 
					extern void initNeighbor(Neighbor*, Parameter*);
 | 
				
			||||||
extern void setupNeighbor(Parameter*, Atom*);
 | 
					extern void setupNeighbor(Parameter*, Atom*);
 | 
				
			||||||
extern void binatoms(Atom*);
 | 
					extern void binatoms(Atom*);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -5,6 +5,8 @@
 | 
				
			|||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#include <atom.h>
 | 
					#include <atom.h>
 | 
				
			||||||
 | 
					#include <comm.h>
 | 
				
			||||||
 | 
					#include <parameter.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef __VTK_H_
 | 
					#ifndef __VTK_H_
 | 
				
			||||||
#define __VTK_H_
 | 
					#define __VTK_H_
 | 
				
			||||||
@@ -13,4 +15,5 @@ extern int write_local_atoms_to_vtk_file(const char* filename, Atom* atom, int t
 | 
				
			|||||||
extern int write_ghost_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
 | 
					extern int write_ghost_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
 | 
				
			||||||
extern int write_local_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep);
 | 
					extern int write_local_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep);
 | 
				
			||||||
extern int write_ghost_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep);
 | 
					extern int write_ghost_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep);
 | 
				
			||||||
 | 
					extern void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										220
									
								
								gromacs/main.c
									
									
									
									
									
								
							
							
						
						
									
										220
									
								
								gromacs/main.c
									
									
									
									
									
								
							@@ -5,9 +5,7 @@
 | 
				
			|||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#include <stdio.h>
 | 
					#include <stdio.h>
 | 
				
			||||||
#include <string.h>
 | 
					 | 
				
			||||||
#include <math.h>
 | 
					#include <math.h>
 | 
				
			||||||
#include <omp.h>
 | 
					 | 
				
			||||||
//--
 | 
					//--
 | 
				
			||||||
#include <likwid-marker.h>
 | 
					#include <likwid-marker.h>
 | 
				
			||||||
//--
 | 
					//--
 | 
				
			||||||
@@ -26,6 +24,10 @@
 | 
				
			|||||||
#include <util.h>
 | 
					#include <util.h>
 | 
				
			||||||
#include <vtk.h>
 | 
					#include <vtk.h>
 | 
				
			||||||
#include <xtc.h>
 | 
					#include <xtc.h>
 | 
				
			||||||
 | 
					#include <comm.h>
 | 
				
			||||||
 | 
					#include <grid.h>
 | 
				
			||||||
 | 
					#include <shell_methods.h>
 | 
				
			||||||
 | 
					#include <mpi.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define HLINE "----------------------------------------------------------------------------\n"
 | 
					#define HLINE "----------------------------------------------------------------------------\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -42,17 +44,55 @@ extern void copyDataFromCUDADevice(Atom *atom);
 | 
				
			|||||||
extern void cudaDeviceFree();
 | 
					extern void cudaDeviceFree();
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats) {
 | 
					double dynamicBalance(Comm* comm, Grid* grid, Atom* atom, Parameter* param, double time)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    double S, E;
 | 
				
			||||||
 | 
					    int dims = 3;   //TODO: Adjust to do in 3d and 2d
 | 
				
			||||||
 | 
					    S = getTimeStamp();
 | 
				
			||||||
 | 
					    if(param->balance == RCB) {
 | 
				
			||||||
 | 
					        rcbBalance(grid, atom, param, meanBisect,dims,0);
 | 
				
			||||||
 | 
					        neighComm(comm, param, grid);
 | 
				
			||||||
 | 
					    }else if(param->balance == meanTimeRCB){
 | 
				
			||||||
 | 
					        rcbBalance(grid, atom, param, meanTimeBisect,dims,time);
 | 
				
			||||||
 | 
					        neighComm(comm, param, grid);
 | 
				
			||||||
 | 
					    }else if(param->balance == Staggered) {
 | 
				
			||||||
 | 
					        staggeredBalance(grid, atom, param, time);
 | 
				
			||||||
 | 
					        neighComm(comm, param, grid);
 | 
				
			||||||
 | 
					        exchangeComm(comm,atom);
 | 
				
			||||||
 | 
					    }else { } //Do nothing
 | 
				
			||||||
 | 
					    //printGrid(grid);
 | 
				
			||||||
 | 
					    E = getTimeStamp();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return E-S;
 | 
				
			||||||
 | 
					} 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					double initialBalance(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid)
 | 
				
			||||||
 | 
					{      
 | 
				
			||||||
 | 
					    double E,S,time;
 | 
				
			||||||
 | 
					    int me;
 | 
				
			||||||
 | 
					    MPI_Comm_rank(world,&me);
 | 
				
			||||||
 | 
					    S = getTimeStamp();
 | 
				
			||||||
 | 
					    if(param->balance == meanTimeRCB || param->balance == RCB){
 | 
				
			||||||
 | 
					        rcbBalance(grid, atom, param, meanBisect,3,0);
 | 
				
			||||||
 | 
					        neighComm(comm, param, grid); 
 | 
				
			||||||
 | 
					    }      
 | 
				
			||||||
 | 
					    MPI_Allreduce(&atom->Nlocal, &atom->Natoms, 1, MPI_INT, MPI_SUM, world); 
 | 
				
			||||||
 | 
					    printf("Processor:%i, Local atoms:%i, Total atoms:%i\n",me, atom->Nlocal,atom->Natoms);
 | 
				
			||||||
 | 
					    MPI_Barrier(world);
 | 
				
			||||||
 | 
					    E = getTimeStamp();
 | 
				
			||||||
 | 
					    return E-S;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid) {
 | 
				
			||||||
    if(param->force_field == FF_EAM) { initEam(eam, param); }
 | 
					    if(param->force_field == FF_EAM) { initEam(eam, param); }
 | 
				
			||||||
    double S, E;
 | 
					    double S, E;
 | 
				
			||||||
    param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
 | 
					    param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
 | 
				
			||||||
    param->xprd = param->nx * param->lattice;
 | 
					    param->xprd = param->nx * param->lattice;
 | 
				
			||||||
    param->yprd = param->ny * param->lattice;
 | 
					    param->yprd = param->ny * param->lattice;
 | 
				
			||||||
    param->zprd = param->nz * param->lattice;
 | 
					    param->zprd = param->nz * param->lattice;
 | 
				
			||||||
 | 
					 | 
				
			||||||
    S = getTimeStamp();
 | 
					    S = getTimeStamp();
 | 
				
			||||||
    initAtom(atom);
 | 
					    initAtom(atom);
 | 
				
			||||||
    initPbc(atom);
 | 
					    //initPbc(atom);
 | 
				
			||||||
    initStats(stats);
 | 
					    initStats(stats);
 | 
				
			||||||
    initNeighbor(neighbor, param);
 | 
					    initNeighbor(neighbor, param);
 | 
				
			||||||
    if(param->input_file == NULL) {
 | 
					    if(param->input_file == NULL) {
 | 
				
			||||||
@@ -60,13 +100,18 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
 | 
				
			|||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        readAtom(atom, param);
 | 
					        readAtom(atom, param);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    setupGrid(grid,atom,param);
 | 
				
			||||||
    setupNeighbor(param, atom);
 | 
					    setupNeighbor(param, atom);
 | 
				
			||||||
 | 
					    setupComm(comm, param, grid);
 | 
				
			||||||
 | 
					    if(param->balance){  
 | 
				
			||||||
 | 
					        initialBalance(param, eam, atom, neighbor, stats, comm, grid);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
    setupThermo(param, atom->Natoms);
 | 
					    setupThermo(param, atom->Natoms);
 | 
				
			||||||
    if(param->input_file == NULL) { adjustThermo(param, atom); }
 | 
					    if(param->input_file == NULL) { adjustThermo(param, atom); }
 | 
				
			||||||
    buildClusters(atom);
 | 
					    buildClusters(atom);
 | 
				
			||||||
    defineJClusters(atom);
 | 
					    defineJClusters(atom);
 | 
				
			||||||
    setupPbc(atom, param);
 | 
					    //setupPbc(atom, param);
 | 
				
			||||||
 | 
					    ghostNeighbor(comm, atom, param); //change
 | 
				
			||||||
    binClusters(atom);
 | 
					    binClusters(atom);
 | 
				
			||||||
    buildNeighbor(atom, neighbor);
 | 
					    buildNeighbor(atom, neighbor);
 | 
				
			||||||
    initDevice(atom, neighbor);
 | 
					    initDevice(atom, neighbor);
 | 
				
			||||||
@@ -74,15 +119,15 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
 | 
				
			|||||||
    return E-S;
 | 
					    return E-S;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
 | 
					double reneighbour(Comm* comm, Parameter *param, Atom *atom, Neighbor *neighbor) {
 | 
				
			||||||
    double S, E;
 | 
					    double S, E;
 | 
				
			||||||
    S = getTimeStamp();
 | 
					    S = getTimeStamp();
 | 
				
			||||||
    LIKWID_MARKER_START("reneighbour");
 | 
					    LIKWID_MARKER_START("reneighbour");
 | 
				
			||||||
    updateSingleAtoms(atom);
 | 
					    //updateAtomsPbc(atom, param);
 | 
				
			||||||
    updateAtomsPbc(atom, param);
 | 
					 | 
				
			||||||
    buildClusters(atom);
 | 
					    buildClusters(atom);
 | 
				
			||||||
    defineJClusters(atom);
 | 
					    defineJClusters(atom);
 | 
				
			||||||
    setupPbc(atom, param);
 | 
					    //setupPbc(atom, param);
 | 
				
			||||||
 | 
					    ghostNeighbor(comm, atom, param);
 | 
				
			||||||
    binClusters(atom);
 | 
					    binClusters(atom);
 | 
				
			||||||
    buildNeighbor(atom, neighbor);
 | 
					    buildNeighbor(atom, neighbor);
 | 
				
			||||||
    LIKWID_MARKER_STOP("reneighbour");
 | 
					    LIKWID_MARKER_STOP("reneighbour");
 | 
				
			||||||
@@ -90,15 +135,13 @@ double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
 | 
				
			|||||||
    return E-S;
 | 
					    return E-S;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void printAtomState(Atom *atom) {
 | 
					double updateAtoms(Comm* comm, Atom* atom){
 | 
				
			||||||
    printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n",
 | 
					    double S,E;
 | 
				
			||||||
            atom->Natoms, atom->Nlocal, atom->Nghost, atom->Nmax);
 | 
					    S = getTimeStamp();
 | 
				
			||||||
 | 
					        updateSingleAtoms(atom);
 | 
				
			||||||
    /*     int nall = atom->Nlocal + atom->Nghost; */
 | 
					        exchangeComm(comm, atom);
 | 
				
			||||||
 | 
					    E = getTimeStamp();
 | 
				
			||||||
    /*     for (int i=0; i<nall; i++) { */
 | 
					    return E-S;
 | 
				
			||||||
    /*         printf("%d  %f %f %f\n", i, atom->x[i], atom->y[i], atom->z[i]); */
 | 
					 | 
				
			||||||
    /*     } */
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int main(int argc, char** argv) {
 | 
					int main(int argc, char** argv) {
 | 
				
			||||||
@@ -108,7 +151,8 @@ int main(int argc, char** argv) {
 | 
				
			|||||||
    Neighbor neighbor;
 | 
					    Neighbor neighbor;
 | 
				
			||||||
    Stats stats;
 | 
					    Stats stats;
 | 
				
			||||||
    Parameter param;
 | 
					    Parameter param;
 | 
				
			||||||
 | 
					    Comm comm; 
 | 
				
			||||||
 | 
					    Grid grid;
 | 
				
			||||||
    LIKWID_MARKER_INIT;
 | 
					    LIKWID_MARKER_INIT;
 | 
				
			||||||
#pragma omp parallel
 | 
					#pragma omp parallel
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
@@ -116,7 +160,7 @@ int main(int argc, char** argv) {
 | 
				
			|||||||
        //LIKWID_MARKER_REGISTER("reneighbour");
 | 
					        //LIKWID_MARKER_REGISTER("reneighbour");
 | 
				
			||||||
        //LIKWID_MARKER_REGISTER("pbc");
 | 
					        //LIKWID_MARKER_REGISTER("pbc");
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    initComm(&argc, &argv, &comm); //change
 | 
				
			||||||
    initParameter(¶m);
 | 
					    initParameter(¶m);
 | 
				
			||||||
    for(int i = 0; i < argc; i++) {
 | 
					    for(int i = 0; i < argc; i++) {
 | 
				
			||||||
        if((strcmp(argv[i], "-p") == 0) || (strcmp(argv[i], "--param") == 0)) {
 | 
					        if((strcmp(argv[i], "-p") == 0) || (strcmp(argv[i], "--param") == 0)) {
 | 
				
			||||||
@@ -158,6 +202,24 @@ int main(int argc, char** argv) {
 | 
				
			|||||||
            param.half_neigh = atoi(argv[++i]);
 | 
					            param.half_neigh = atoi(argv[++i]);
 | 
				
			||||||
            continue;
 | 
					            continue;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					        if((strcmp(argv[i], "-method") == 0)) {
 | 
				
			||||||
 | 
					            param.method = atoi(argv[++i]);
 | 
				
			||||||
 | 
					            if (param.method>2 || param.method< 0){
 | 
				
			||||||
 | 
					                if(comm.myproc == 0) fprintf(stderr, "Method does not exist!\n");
 | 
				
			||||||
 | 
					                endComm(&comm);   
 | 
				
			||||||
 | 
					                exit(0);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            continue;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        if((strcmp(argv[i], "-bal") == 0)) {
 | 
				
			||||||
 | 
					            param.balance = atoi(argv[++i]);
 | 
				
			||||||
 | 
					            if (param.balance>3 || param.balance< 0){
 | 
				
			||||||
 | 
					                if(comm.myproc == 0) fprintf(stderr, "Load balance does not exist!\n");
 | 
				
			||||||
 | 
					                endComm(&comm);   
 | 
				
			||||||
 | 
					                exit(0);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            continue;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
        if((strcmp(argv[i], "-m") == 0) || (strcmp(argv[i], "--mass") == 0)) {
 | 
					        if((strcmp(argv[i], "-m") == 0) || (strcmp(argv[i], "--mass") == 0)) {
 | 
				
			||||||
            param.mass = atof(argv[++i]);
 | 
					            param.mass = atof(argv[++i]);
 | 
				
			||||||
            continue;
 | 
					            continue;
 | 
				
			||||||
@@ -188,6 +250,7 @@ int main(int argc, char** argv) {
 | 
				
			|||||||
            continue;
 | 
					            continue;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
 | 
					        if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
 | 
				
			||||||
 | 
					            //TODO: add the shell and ac print options
 | 
				
			||||||
            printf("MD Bench: A minimalistic re-implementation of miniMD\n");
 | 
					            printf("MD Bench: A minimalistic re-implementation of miniMD\n");
 | 
				
			||||||
            printf(HLINE);
 | 
					            printf(HLINE);
 | 
				
			||||||
            printf("-p <string>:          file to read parameters from (can be specified more than once)\n");
 | 
					            printf("-p <string>:          file to read parameters from (can be specified more than once)\n");
 | 
				
			||||||
@@ -206,83 +269,86 @@ int main(int argc, char** argv) {
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    param.cutneigh = param.cutforce + param.skin;
 | 
					    if(param.balance>0 && param.method == 1){
 | 
				
			||||||
    setup(¶m, &eam, &atom, &neighbor, &stats);
 | 
					        if(comm.myproc == 0) fprintf(stderr, "Half Shell is not supported by load balance!\n");
 | 
				
			||||||
    printParameter(¶m);
 | 
					        endComm(&comm);   
 | 
				
			||||||
    printf(HLINE);
 | 
					        exit(0);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    printf("step\ttemp\t\tpressure\n");
 | 
					    param.cutneigh = param.cutforce + param.skin;
 | 
				
			||||||
 | 
					    timer[SETUP]=setup(¶m, &eam, &atom, &neighbor, &stats, &comm, &grid);
 | 
				
			||||||
 | 
					    if(comm.myproc == 0) printParameter(¶m);
 | 
				
			||||||
 | 
					    if(comm.myproc == 0) printf(HLINE);
 | 
				
			||||||
 | 
					    if(comm.myproc == 0) printf("step\ttemp\t\tpressure\n");
 | 
				
			||||||
    computeThermo(0, ¶m, &atom);
 | 
					    computeThermo(0, ¶m, &atom);
 | 
				
			||||||
    #if defined(MEM_TRACER) || defined(INDEX_TRACER)
 | 
					    #if defined(MEM_TRACER) || defined(INDEX_TRACER)
 | 
				
			||||||
    traceAddresses(¶m, &atom, &neighbor, n + 1);
 | 
					    traceAddresses(¶m, &atom, &neighbor, n + 1);
 | 
				
			||||||
    #endif
 | 
					    #endif
 | 
				
			||||||
 | 
					 | 
				
			||||||
    #ifdef CUDA_TARGET
 | 
					    #ifdef CUDA_TARGET
 | 
				
			||||||
    copyDataToCUDADevice(&atom);
 | 
					    copyDataToCUDADevice(&atom);
 | 
				
			||||||
    #endif
 | 
					    #endif
 | 
				
			||||||
 | 
					 | 
				
			||||||
    if(param.force_field == FF_EAM) {
 | 
					    if(param.force_field == FF_EAM) {
 | 
				
			||||||
        timer[FORCE] = computeForceEam(&eam, ¶m, &atom, &neighbor, &stats);
 | 
					        timer[FORCE] = computeForceEam(&eam, ¶m, &atom, &neighbor, &stats);
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        timer[FORCE] = computeForceLJ(¶m, &atom, &neighbor, &stats);
 | 
					        timer[FORCE] = computeForceLJ(¶m, &atom, &neighbor, &stats);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
    timer[NEIGH]    = 0.0;
 | 
					    timer[NEIGH]    = 0.0;
 | 
				
			||||||
 | 
					    timer[FORWARD]  = 0.0;
 | 
				
			||||||
 | 
					    timer[UPDATE]   = 0.0;
 | 
				
			||||||
 | 
					    timer[BALANCE]  = 0.0;
 | 
				
			||||||
 | 
					    timer[REVERSE]  = reverse(&comm, &atom, ¶m);
 | 
				
			||||||
 | 
					    MPI_Barrier(world);
 | 
				
			||||||
    timer[TOTAL] = getTimeStamp();
 | 
					    timer[TOTAL] = getTimeStamp();
 | 
				
			||||||
 | 
					 | 
				
			||||||
    if(param.vtk_file != NULL) {
 | 
					    if(param.vtk_file != NULL) {
 | 
				
			||||||
        write_data_to_vtk_file(param.vtk_file, &atom, 0);
 | 
					        //write_data_to_vtk_file(param.vtk_file, &comm ,&atom, 0);
 | 
				
			||||||
 | 
					        printvtk(param.vtk_file, &comm, &atom, ¶m, 0); 
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    //TODO: modify xct
 | 
				
			||||||
    if(param.xtc_file != NULL) {
 | 
					    if(param.xtc_file != NULL) {
 | 
				
			||||||
        xtc_init(param.xtc_file, &atom, 0);
 | 
					        xtc_init(param.xtc_file, &atom, 0);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    double forceTime=0.0;
 | 
				
			||||||
 | 
					    double commTime=0.0;
 | 
				
			||||||
    for(int n = 0; n < param.ntimes; n++) {  
 | 
					    for(int n = 0; n < param.ntimes; n++) {  
 | 
				
			||||||
        initialIntegrate(¶m, &atom);
 | 
					        initialIntegrate(¶m, &atom);
 | 
				
			||||||
 | 
					 | 
				
			||||||
        if((n + 1) % param.reneigh_every) {
 | 
					        if((n + 1) % param.reneigh_every) {
 | 
				
			||||||
            if(!((n + 1) % param.prune_every)) {
 | 
					            timer[FORWARD]+=forward(&comm, &atom, ¶m);
 | 
				
			||||||
 | 
					            if(!((n + 1) % param.prune_every)){
 | 
				
			||||||
                pruneNeighbor(¶m, &atom, &neighbor);
 | 
					                pruneNeighbor(¶m, &atom, &neighbor);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					 | 
				
			||||||
            updatePbc(&atom, ¶m, 0);
 | 
					 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            #ifdef CUDA_TARGET
 | 
					            #ifdef CUDA_TARGET
 | 
				
			||||||
            copyDataFromCUDADevice(&atom);
 | 
					            copyDataFromCUDADevice(&atom);
 | 
				
			||||||
            #endif
 | 
					            #endif
 | 
				
			||||||
 | 
					            timer[UPDATE] +=updateAtoms(&comm,&atom);
 | 
				
			||||||
            timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
 | 
					            if(param.balance && !((n+1)%param.balance_every))
 | 
				
			||||||
 | 
					                timer[BALANCE] +=dynamicBalance(&comm, &grid, &atom , ¶m, timer[FORCE]);   
 | 
				
			||||||
 | 
					            timer[NEIGH]  += reneighbour(&comm, ¶m, &atom, &neighbor);
 | 
				
			||||||
            #ifdef CUDA_TARGET
 | 
					            #ifdef CUDA_TARGET
 | 
				
			||||||
            copyDataToCUDADevice(&atom);
 | 
					            copyDataToCUDADevice(&atom);
 | 
				
			||||||
            isReneighboured = 1;
 | 
					            isReneighboured = 1;
 | 
				
			||||||
            #endif
 | 
					            #endif
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					 | 
				
			||||||
        #if defined(MEM_TRACER) || defined(INDEX_TRACER)
 | 
					        #if defined(MEM_TRACER) || defined(INDEX_TRACER)
 | 
				
			||||||
        traceAddresses(¶m, &atom, &neighbor, n + 1);
 | 
					        traceAddresses(¶m, &atom, &neighbor, n + 1);
 | 
				
			||||||
        #endif
 | 
					        #endif
 | 
				
			||||||
 | 
					 | 
				
			||||||
        if(param.force_field == FF_EAM) {
 | 
					        if(param.force_field == FF_EAM) {
 | 
				
			||||||
            timer[FORCE] += computeForceEam(&eam, ¶m, &atom, &neighbor, &stats); 
 | 
					            timer[FORCE] += computeForceEam(&eam, ¶m, &atom, &neighbor, &stats); 
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            timer[FORCE] += computeForceLJ(¶m, &atom, &neighbor, &stats);
 | 
					            timer[FORCE] += computeForceLJ(¶m, &atom, &neighbor, &stats);
 | 
				
			||||||
        } 
 | 
					        } 
 | 
				
			||||||
 | 
					        timer[REVERSE] += reverse(&comm, &atom, ¶m);
 | 
				
			||||||
        finalIntegrate(¶m, &atom);
 | 
					        finalIntegrate(¶m, &atom);
 | 
				
			||||||
 | 
					 | 
				
			||||||
        if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
 | 
					        if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
 | 
				
			||||||
            computeThermo(n + 1, ¶m, &atom);
 | 
					            computeThermo(n + 1, ¶m, &atom);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					 | 
				
			||||||
        int write_pos = !((n + 1) % param.x_out_every);
 | 
					        int write_pos = !((n + 1) % param.x_out_every);
 | 
				
			||||||
        int write_vel = !((n + 1) % param.v_out_every);
 | 
					        int write_vel = !((n + 1) % param.v_out_every);
 | 
				
			||||||
        if(write_pos || write_vel) {
 | 
					        if(write_pos || write_vel) {
 | 
				
			||||||
            if(param.vtk_file != NULL) {
 | 
					            if(param.vtk_file != NULL) {
 | 
				
			||||||
                write_data_to_vtk_file(param.vtk_file, &atom, n + 1);
 | 
					                printvtk(param.vtk_file, &comm, &atom, ¶m, n+1);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					            //TODO: xtc file
 | 
				
			||||||
            if(param.xtc_file != NULL) {
 | 
					            if(param.xtc_file != NULL) {
 | 
				
			||||||
                xtc_write(&atom, n + 1, write_pos, write_vel);
 | 
					                xtc_write(&atom, n + 1, write_pos, write_vel);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
@@ -292,11 +358,11 @@ int main(int argc, char** argv) {
 | 
				
			|||||||
    #ifdef CUDA_TARGET
 | 
					    #ifdef CUDA_TARGET
 | 
				
			||||||
    copyDataFromCUDADevice(&atom);
 | 
					    copyDataFromCUDADevice(&atom);
 | 
				
			||||||
    #endif
 | 
					    #endif
 | 
				
			||||||
 | 
					    MPI_Barrier(world);
 | 
				
			||||||
    timer[TOTAL] = getTimeStamp() - timer[TOTAL];
 | 
					    timer[TOTAL] = getTimeStamp() - timer[TOTAL];
 | 
				
			||||||
    updateSingleAtoms(&atom);
 | 
					    updateAtoms(&comm,&atom);
 | 
				
			||||||
    computeThermo(-1, ¶m, &atom);
 | 
					    computeThermo(-1, ¶m, &atom);
 | 
				
			||||||
 | 
					    //TODO:
 | 
				
			||||||
    if(param.xtc_file != NULL) {
 | 
					    if(param.xtc_file != NULL) {
 | 
				
			||||||
        xtc_end();
 | 
					        xtc_end();
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -304,41 +370,35 @@ int main(int argc, char** argv) {
 | 
				
			|||||||
    #ifdef CUDA_TARGET
 | 
					    #ifdef CUDA_TARGET
 | 
				
			||||||
    cudaDeviceFree();
 | 
					    cudaDeviceFree();
 | 
				
			||||||
    #endif
 | 
					    #endif
 | 
				
			||||||
 | 
					    double mint[NUMTIMER];
 | 
				
			||||||
 | 
					    double maxt[NUMTIMER];
 | 
				
			||||||
 | 
					    double sumt[NUMTIMER];
 | 
				
			||||||
 | 
					    timer[REST] = timer[TOTAL]-timer[FORCE]-timer[NEIGH]-timer[BALANCE]-timer[FORWARD]-timer[REVERSE];
 | 
				
			||||||
 | 
					    MPI_Reduce(timer,mint,NUMTIMER,MPI_DOUBLE,MPI_MIN,0,world);
 | 
				
			||||||
 | 
					    MPI_Reduce(timer,maxt,NUMTIMER,MPI_DOUBLE,MPI_MAX,0,world);
 | 
				
			||||||
 | 
					    MPI_Reduce(timer,sumt,NUMTIMER,MPI_DOUBLE,MPI_SUM,0,world);
 | 
				
			||||||
 | 
					    int Nghost;
 | 
				
			||||||
 | 
					    MPI_Reduce(&atom.Nghost,&Nghost,1,MPI_INT,MPI_SUM,0,world);
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
 | 
					    if(comm.myproc == 0){
 | 
				
			||||||
 | 
					        int n = comm.numproc;
 | 
				
			||||||
        printf(HLINE);
 | 
					        printf(HLINE);
 | 
				
			||||||
    printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
 | 
					        printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, Nghost, param.ntimes);
 | 
				
			||||||
    printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
 | 
					        printf("TOTAL %.2fs\n\n",timer[TOTAL]);
 | 
				
			||||||
            timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
 | 
					        printf("%4s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|\n","","FORCE ", "NEIGH ", "BALANCE", "FORWARD", "REVERSE","UPDATE","REST ","SETUP");
 | 
				
			||||||
 | 
					        printf("----|-------|-------|-------|-------|-------|-------|-------|-------|\n");
 | 
				
			||||||
 | 
					        printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "AVG", sumt[FORCE]/n,sumt[NEIGH]/n,sumt[BALANCE]/n,sumt[FORWARD]/n,sumt[REVERSE]/n,sumt[UPDATE]/n,sumt[REST]/n,sumt[SETUP]/n);
 | 
				
			||||||
 | 
					        printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MIN", mint[FORCE],mint[NEIGH],mint[BALANCE],mint[FORWARD],mint[REVERSE],mint[UPDATE],mint[REST],mint[SETUP]);
 | 
				
			||||||
 | 
					        printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MAX", maxt[FORCE],maxt[NEIGH],maxt[BALANCE],maxt[FORWARD],maxt[REVERSE],maxt[UPDATE],maxt[REST],maxt[SETUP]);
 | 
				
			||||||
        printf(HLINE);
 | 
					        printf(HLINE);
 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    int nthreads = 0;
 | 
					 | 
				
			||||||
    int chunkSize = 0;
 | 
					 | 
				
			||||||
    omp_sched_t schedKind;
 | 
					 | 
				
			||||||
    char schedType[10];
 | 
					 | 
				
			||||||
#pragma omp parallel
 | 
					 | 
				
			||||||
#pragma omp master
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
	omp_get_schedule(&schedKind, &chunkSize);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    	switch (schedKind)
 | 
					 | 
				
			||||||
    	{
 | 
					 | 
				
			||||||
        	case omp_sched_static:  strcpy(schedType, "static"); break;
 | 
					 | 
				
			||||||
        	case omp_sched_dynamic: strcpy(schedType, "dynamic"); break;
 | 
					 | 
				
			||||||
        	case omp_sched_guided:  strcpy(schedType, "guided"); break;
 | 
					 | 
				
			||||||
        	case omp_sched_auto:    strcpy(schedType, "auto"); break;
 | 
					 | 
				
			||||||
    	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    	nthreads = omp_get_max_threads();
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    printf("Num threads: %d\n", nthreads);
 | 
					 | 
				
			||||||
    printf("Schedule: (%s,%d)\n", schedType, chunkSize);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        printf("Performance: %.2f million atom updates per second\n",
 | 
					        printf("Performance: %.2f million atom updates per second\n",
 | 
				
			||||||
                1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
 | 
					                1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
 | 
				
			||||||
    #ifdef COMPUTE_STATS
 | 
					        
 | 
				
			||||||
 | 
					#ifdef COMPUTE_STATS
 | 
				
			||||||
    displayStatistics(&atom, ¶m, &stats, timer);
 | 
					    displayStatistics(&atom, ¶m, &stats, timer);
 | 
				
			||||||
    #endif
 | 
					#endif
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    endComm(&comm);
 | 
				
			||||||
    LIKWID_MARKER_CLOSE;
 | 
					    LIKWID_MARKER_CLOSE;
 | 
				
			||||||
    return EXIT_SUCCESS;
 | 
					    return EXIT_SUCCESS;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -7,15 +7,15 @@
 | 
				
			|||||||
#include <stdlib.h>
 | 
					#include <stdlib.h>
 | 
				
			||||||
#include <stdio.h>
 | 
					#include <stdio.h>
 | 
				
			||||||
#include <math.h>
 | 
					#include <math.h>
 | 
				
			||||||
 | 
					 | 
				
			||||||
#include <neighbor.h>
 | 
					#include <neighbor.h>
 | 
				
			||||||
#include <parameter.h>
 | 
					#include <parameter.h>
 | 
				
			||||||
#include <atom.h>
 | 
					#include <atom.h>
 | 
				
			||||||
#include <util.h>
 | 
					#include <util.h>
 | 
				
			||||||
 | 
					#include <mpi.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define SMALL 1.0e-6
 | 
					#define SMALL 1.0e-6
 | 
				
			||||||
#define FACTOR 0.999
 | 
					#define FACTOR 0.999
 | 
				
			||||||
 | 
					#define eps 1.0e-9
 | 
				
			||||||
static MD_FLOAT xprd, yprd, zprd;
 | 
					static MD_FLOAT xprd, yprd, zprd;
 | 
				
			||||||
static MD_FLOAT bininvx, bininvy;
 | 
					static MD_FLOAT bininvx, bininvy;
 | 
				
			||||||
static int mbinxlo, mbinylo;
 | 
					static int mbinxlo, mbinylo;
 | 
				
			||||||
@@ -34,9 +34,16 @@ static int nmax;
 | 
				
			|||||||
static int nstencil;      // # of bins in stencil
 | 
					static int nstencil;      // # of bins in stencil
 | 
				
			||||||
static int* stencil;      // stencil list of bin offsets
 | 
					static int* stencil;      // stencil list of bin offsets
 | 
				
			||||||
static MD_FLOAT binsizex, binsizey;
 | 
					static MD_FLOAT binsizex, binsizey;
 | 
				
			||||||
 | 
					int me;             //rank
 | 
				
			||||||
 | 
					int method;         // method 
 | 
				
			||||||
 | 
					int shellMethod;    //If shell method exist  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int coord2bin(MD_FLOAT, MD_FLOAT);
 | 
					static int coord2bin(MD_FLOAT, MD_FLOAT);
 | 
				
			||||||
static MD_FLOAT bindist(int, int);
 | 
					static MD_FLOAT bindist(int, int);
 | 
				
			||||||
 | 
					//static int ghostZone(Atom*, int);
 | 
				
			||||||
 | 
					static int halfZoneCluster(Atom*,int);
 | 
				
			||||||
 | 
					static int ghostClusterinRange(Atom*, int, int, MD_FLOAT);
 | 
				
			||||||
 | 
					static void neighborGhost(Atom*, Neighbor*);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* exported subroutines */
 | 
					/* exported subroutines */
 | 
				
			||||||
void initNeighbor(Neighbor *neighbor, Parameter *param) {
 | 
					void initNeighbor(Neighbor *neighbor, Parameter *param) {
 | 
				
			||||||
@@ -53,12 +60,25 @@ void initNeighbor(Neighbor *neighbor, Parameter *param) {
 | 
				
			|||||||
    bincount = NULL;
 | 
					    bincount = NULL;
 | 
				
			||||||
    bin_clusters = NULL;
 | 
					    bin_clusters = NULL;
 | 
				
			||||||
    bin_nclusters = NULL;
 | 
					    bin_nclusters = NULL;
 | 
				
			||||||
    neighbor->half_neigh = param->half_neigh;
 | 
					    neighbor->maxneighs = 200;
 | 
				
			||||||
    neighbor->maxneighs = 100;
 | 
					 | 
				
			||||||
    neighbor->numneigh = NULL;
 | 
					    neighbor->numneigh = NULL;
 | 
				
			||||||
    neighbor->numneigh_masked = NULL;
 | 
					    neighbor->numneigh_masked = NULL;
 | 
				
			||||||
    neighbor->neighbors = NULL;
 | 
					    neighbor->neighbors = NULL;
 | 
				
			||||||
    neighbor->neighbors_imask = NULL;
 | 
					    neighbor->neighbors_imask = NULL;
 | 
				
			||||||
 | 
					    //MPI
 | 
				
			||||||
 | 
					    shellMethod = 0;
 | 
				
			||||||
 | 
					    method = param->method;
 | 
				
			||||||
 | 
					    if(method == halfShell || method == eightShell){ 
 | 
				
			||||||
 | 
					        param->half_neigh = 1;
 | 
				
			||||||
 | 
					        shellMethod = 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    me = 0;
 | 
				
			||||||
 | 
					    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
				
			||||||
 | 
					    neighbor->half_neigh = param->half_neigh;
 | 
				
			||||||
 | 
					    neighbor->Nshell = 0;  
 | 
				
			||||||
 | 
					    neighbor->numNeighShell = NULL;
 | 
				
			||||||
 | 
					    neighbor->neighshell = NULL;
 | 
				
			||||||
 | 
					    neighbor->listshell = NULL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void setupNeighbor(Parameter *param, Atom *atom) {
 | 
					void setupNeighbor(Parameter *param, Atom *atom) {
 | 
				
			||||||
@@ -77,7 +97,7 @@ void setupNeighbor(Parameter *param, Atom *atom) {
 | 
				
			|||||||
    MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
 | 
					    MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
 | 
				
			||||||
    MD_FLOAT zlo = 0.0; MD_FLOAT zhi = zprd;
 | 
					    MD_FLOAT zlo = 0.0; MD_FLOAT zhi = zprd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    MD_FLOAT atom_density = ((MD_FLOAT)(atom->Nlocal)) / ((xhi - xlo) * (yhi - ylo) * (zhi - zlo));
 | 
					    MD_FLOAT atom_density = ((MD_FLOAT)(atom->Natoms)) / ((xhi - xlo) * (yhi - ylo) * (zhi - zlo));
 | 
				
			||||||
    MD_FLOAT atoms_in_cell = MAX(CLUSTER_M, CLUSTER_N);
 | 
					    MD_FLOAT atoms_in_cell = MAX(CLUSTER_M, CLUSTER_N);
 | 
				
			||||||
    MD_FLOAT targetsizex = cbrt(atoms_in_cell / atom_density);
 | 
					    MD_FLOAT targetsizex = cbrt(atoms_in_cell / atom_density);
 | 
				
			||||||
    MD_FLOAT targetsizey = cbrt(atoms_in_cell / atom_density);
 | 
					    MD_FLOAT targetsizey = cbrt(atoms_in_cell / atom_density);
 | 
				
			||||||
@@ -146,6 +166,7 @@ void setupNeighbor(Parameter *param, Atom *atom) {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
 | 
					MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
    MD_FLOAT dl = atom->iclusters[ci].bbminx - atom->jclusters[cj].bbmaxx;
 | 
					    MD_FLOAT dl = atom->iclusters[ci].bbminx - atom->jclusters[cj].bbmaxx;
 | 
				
			||||||
    MD_FLOAT dh = atom->jclusters[cj].bbminx - atom->iclusters[ci].bbmaxx;
 | 
					    MD_FLOAT dh = atom->jclusters[cj].bbminx - atom->iclusters[ci].bbmaxx;
 | 
				
			||||||
    MD_FLOAT dm = MAX(dl, dh);
 | 
					    MD_FLOAT dm = MAX(dl, dh);
 | 
				
			||||||
@@ -163,6 +184,7 @@ MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
 | 
				
			|||||||
    dm = MAX(dl, dh);
 | 
					    dm = MAX(dl, dh);
 | 
				
			||||||
    dm0 = MAX(dm, 0.0);
 | 
					    dm0 = MAX(dm, 0.0);
 | 
				
			||||||
    d2 += dm0 * dm0;
 | 
					    d2 += dm0 * dm0;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
    return d2;
 | 
					    return d2;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -225,7 +247,6 @@ static unsigned int get_imask_simd_j8(int rdiag, int ci, int cj) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
void buildNeighbor(Atom *atom, Neighbor *neighbor) {
 | 
					void buildNeighbor(Atom *atom, Neighbor *neighbor) {
 | 
				
			||||||
    DEBUG_MESSAGE("buildNeighbor start\n");
 | 
					    DEBUG_MESSAGE("buildNeighbor start\n");
 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* extend atom arrays if necessary */
 | 
					    /* extend atom arrays if necessary */
 | 
				
			||||||
    if(atom->Nclusters_local > nmax) {
 | 
					    if(atom->Nclusters_local > nmax) {
 | 
				
			||||||
        nmax = atom->Nclusters_local;
 | 
					        nmax = atom->Nclusters_local;
 | 
				
			||||||
@@ -249,7 +270,6 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
 | 
				
			|||||||
    while(resize) {
 | 
					    while(resize) {
 | 
				
			||||||
        int new_maxneighs = neighbor->maxneighs;
 | 
					        int new_maxneighs = neighbor->maxneighs;
 | 
				
			||||||
        resize = 0;
 | 
					        resize = 0;
 | 
				
			||||||
 | 
					 | 
				
			||||||
        for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
					        for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
				
			||||||
            int ci_cj1 = CJ1_FROM_CI(ci);
 | 
					            int ci_cj1 = CJ1_FROM_CI(ci);
 | 
				
			||||||
            int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
 | 
					            int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
 | 
				
			||||||
@@ -262,14 +282,12 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
 | 
				
			|||||||
            MD_FLOAT ibb_ymax = atom->iclusters[ci].bbmaxy;
 | 
					            MD_FLOAT ibb_ymax = atom->iclusters[ci].bbmaxy;
 | 
				
			||||||
            MD_FLOAT ibb_zmin = atom->iclusters[ci].bbminz;
 | 
					            MD_FLOAT ibb_zmin = atom->iclusters[ci].bbminz;
 | 
				
			||||||
            MD_FLOAT ibb_zmax = atom->iclusters[ci].bbmaxz;
 | 
					            MD_FLOAT ibb_zmax = atom->iclusters[ci].bbmaxz;
 | 
				
			||||||
 | 
					 | 
				
			||||||
            for(int k = 0; k < nstencil; k++) {
 | 
					            for(int k = 0; k < nstencil; k++) {
 | 
				
			||||||
                int jbin = ibin + stencil[k];
 | 
					                int jbin = ibin + stencil[k];
 | 
				
			||||||
                int *loc_bin = &bin_clusters[jbin * clusters_per_bin];
 | 
					                int *loc_bin = &bin_clusters[jbin * clusters_per_bin];
 | 
				
			||||||
                int cj, m = -1;
 | 
					                int cj, m = -1;
 | 
				
			||||||
                MD_FLOAT jbb_xmin, jbb_xmax, jbb_ymin, jbb_ymax, jbb_zmin, jbb_zmax;
 | 
					                MD_FLOAT jbb_xmin, jbb_xmax, jbb_ymin, jbb_ymax, jbb_zmin, jbb_zmax;
 | 
				
			||||||
                const int c = bin_nclusters[jbin];
 | 
					                const int c = bin_nclusters[jbin];
 | 
				
			||||||
 | 
					 | 
				
			||||||
                if(c > 0) {
 | 
					                if(c > 0) {
 | 
				
			||||||
                    MD_FLOAT dl, dh, dm, dm0, d_bb_sq;
 | 
					                    MD_FLOAT dl, dh, dm, dm0, d_bb_sq;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -279,6 +297,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
 | 
				
			|||||||
                        if(neighbor->half_neigh && ci_cj1 > cj) {
 | 
					                        if(neighbor->half_neigh && ci_cj1 > cj) {
 | 
				
			||||||
                            continue;
 | 
					                            continue;
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
 | 
					                        
 | 
				
			||||||
                        jbb_zmin = atom->jclusters[cj].bbminz;
 | 
					                        jbb_zmin = atom->jclusters[cj].bbminz;
 | 
				
			||||||
                        jbb_zmax = atom->jclusters[cj].bbmaxz;
 | 
					                        jbb_zmax = atom->jclusters[cj].bbmaxz;
 | 
				
			||||||
                        dl = ibb_zmin - jbb_zmax;
 | 
					                        dl = ibb_zmin - jbb_zmax;
 | 
				
			||||||
@@ -287,7 +306,6 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
 | 
				
			|||||||
                        dm0 = MAX(dm, 0.0);
 | 
					                        dm0 = MAX(dm, 0.0);
 | 
				
			||||||
                        d_bb_sq = dm0 * dm0;
 | 
					                        d_bb_sq = dm0 * dm0;
 | 
				
			||||||
                    } while(m + 1 < c && d_bb_sq > cutneighsq);
 | 
					                    } while(m + 1 < c && d_bb_sq > cutneighsq);
 | 
				
			||||||
 | 
					 | 
				
			||||||
                    jbb_xmin = atom->jclusters[cj].bbminx;
 | 
					                    jbb_xmin = atom->jclusters[cj].bbminx;
 | 
				
			||||||
                    jbb_xmax = atom->jclusters[cj].bbmaxx;
 | 
					                    jbb_xmax = atom->jclusters[cj].bbmaxx;
 | 
				
			||||||
                    jbb_ymin = atom->jclusters[cj].bbminy;
 | 
					                    jbb_ymin = atom->jclusters[cj].bbminy;
 | 
				
			||||||
@@ -383,13 +401,14 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        if(resize) {
 | 
					        if(resize) {
 | 
				
			||||||
            neighbor->maxneighs = new_maxneighs * 1.2;
 | 
					            neighbor->maxneighs = new_maxneighs * 1.2;
 | 
				
			||||||
            fprintf(stdout, "RESIZE %d\n", neighbor->maxneighs);
 | 
					            fprintf(stdout, "RESIZE %d, PROC %d\n", neighbor->maxneighs,me);
 | 
				
			||||||
            free(neighbor->neighbors);
 | 
					            free(neighbor->neighbors);
 | 
				
			||||||
            free(neighbor->neighbors_imask);
 | 
					            free(neighbor->neighbors_imask);
 | 
				
			||||||
            neighbor->neighbors = (int *) malloc(nmax * neighbor->maxneighs * sizeof(int));
 | 
					            neighbor->neighbors = (int *) malloc(nmax * neighbor->maxneighs * sizeof(int));
 | 
				
			||||||
            neighbor->neighbors_imask = (unsigned int *) malloc(nmax * neighbor->maxneighs * sizeof(unsigned int));
 | 
					            neighbor->neighbors_imask = (unsigned int *) malloc(nmax * neighbor->maxneighs * sizeof(unsigned int));
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    if(method == eightShell) neighborGhost(atom, neighbor);
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    /*
 | 
					    /*
 | 
				
			||||||
    DEBUG_MESSAGE("\ncutneighsq = %f, rbb_sq = %f\n", cutneighsq, rbb_sq);
 | 
					    DEBUG_MESSAGE("\ncutneighsq = %f, rbb_sq = %f\n", cutneighsq, rbb_sq);
 | 
				
			||||||
@@ -511,19 +530,19 @@ int coord2bin(MD_FLOAT xin, MD_FLOAT yin) {
 | 
				
			|||||||
    int ix, iy;
 | 
					    int ix, iy;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(xin >= xprd) {
 | 
					    if(xin >= xprd) {
 | 
				
			||||||
        ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
 | 
					        ix = (int)((xin + eps - xprd) * bininvx) + nbinx - mbinxlo;
 | 
				
			||||||
    } else if(xin >= 0.0) {
 | 
					    } else if(xin >= 0.0) {
 | 
				
			||||||
        ix = (int)(xin * bininvx) - mbinxlo;
 | 
					        ix = (int)((xin+eps) * bininvx) - mbinxlo;
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        ix = (int)(xin * bininvx) - mbinxlo - 1;
 | 
					        ix = (int)((xin+eps) * bininvx) - mbinxlo - 1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(yin >= yprd) {   
 | 
					    if(yin >= yprd) {   
 | 
				
			||||||
        iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
 | 
					        iy = (int)(((yin+eps) - yprd) * bininvy) + nbiny - mbinylo;
 | 
				
			||||||
    } else if(yin >= 0.0) {
 | 
					    } else if(yin >= 0.0) {
 | 
				
			||||||
        iy = (int)(yin * bininvy) - mbinylo;
 | 
					        iy = (int)((yin+eps) * bininvy) - mbinylo;
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        iy = (int)(yin * bininvy) - mbinylo - 1;
 | 
					        iy = (int)((yin+eps) * bininvy) - mbinylo - 1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 
 | 
					 
 | 
				
			||||||
    return (iy * mbinx + ix + 1);
 | 
					    return (iy * mbinx + ix + 1);
 | 
				
			||||||
@@ -531,26 +550,24 @@ int coord2bin(MD_FLOAT xin, MD_FLOAT yin) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
void coord2bin2D(MD_FLOAT xin, MD_FLOAT yin, int *ix, int *iy) {
 | 
					void coord2bin2D(MD_FLOAT xin, MD_FLOAT yin, int *ix, int *iy) {
 | 
				
			||||||
    if(xin >= xprd) {
 | 
					    if(xin >= xprd) {
 | 
				
			||||||
        *ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
 | 
					        *ix = (int)((xin + eps - xprd) * bininvx) + nbinx - mbinxlo;
 | 
				
			||||||
    } else if(xin >= 0.0) {
 | 
					    } else if(xin >= 0.0) {
 | 
				
			||||||
        *ix = (int)(xin * bininvx) - mbinxlo;
 | 
					        *ix = (int)((xin+eps) * bininvx) - mbinxlo;
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        *ix = (int)(xin * bininvx) - mbinxlo - 1;
 | 
					        *ix = (int)((xin+eps) * bininvx) - mbinxlo - 1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
    if(yin >= yprd) {  
 | 
					    if(yin >= yprd) {  
 | 
				
			||||||
        *iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
 | 
					        *iy = (int)((yin + eps - yprd) * bininvy) + nbiny - mbinylo;
 | 
				
			||||||
    } else if(yin >= 0.0) {
 | 
					    } else if(yin >= 0.0) {
 | 
				
			||||||
        *iy = (int)(yin * bininvy) - mbinylo;
 | 
					        *iy = (int)((yin+eps) * bininvy) - mbinylo;
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        *iy = (int)(yin * bininvy) - mbinylo - 1;
 | 
					        *iy = (int)((yin+eps) * bininvy) - mbinylo - 1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void binAtoms(Atom *atom) {
 | 
					void binAtoms(Atom *atom) {
 | 
				
			||||||
    DEBUG_MESSAGE("binAtoms start\n");
 | 
					    DEBUG_MESSAGE("binAtoms start\n");
 | 
				
			||||||
    int resize = 1;
 | 
					    int resize = 1;
 | 
				
			||||||
 | 
					 | 
				
			||||||
    while(resize > 0) {
 | 
					    while(resize > 0) {
 | 
				
			||||||
        resize = 0;
 | 
					        resize = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -616,7 +633,6 @@ void buildClusters(Atom *atom) {
 | 
				
			|||||||
    /* bin local atoms */
 | 
					    /* bin local atoms */
 | 
				
			||||||
    binAtoms(atom);
 | 
					    binAtoms(atom);
 | 
				
			||||||
    sortAtomsByZCoord(atom);   
 | 
					    sortAtomsByZCoord(atom);   
 | 
				
			||||||
 | 
					 | 
				
			||||||
    for(int bin = 0; bin < mbins; bin++) {
 | 
					    for(int bin = 0; bin < mbins; bin++) {
 | 
				
			||||||
        int c = bincount[bin];
 | 
					        int c = bincount[bin];
 | 
				
			||||||
        int ac = 0;
 | 
					        int ac = 0;
 | 
				
			||||||
@@ -688,6 +704,9 @@ void buildClusters(Atom *atom) {
 | 
				
			|||||||
void defineJClusters(Atom *atom) {
 | 
					void defineJClusters(Atom *atom) {
 | 
				
			||||||
    DEBUG_MESSAGE("defineJClusters start\n");
 | 
					    DEBUG_MESSAGE("defineJClusters start\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
 | 
				
			||||||
 | 
					    atom->ncj = atom->Nclusters_local / jfac;   
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
					    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
				
			||||||
        int cj0 = CJ0_FROM_CI(ci);
 | 
					        int cj0 = CJ0_FROM_CI(ci);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -830,12 +849,11 @@ void binClusters(Atom *atom) {
 | 
				
			|||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					 | 
				
			||||||
        for(int cg = 0; cg < atom->Nclusters_ghost && !resize; cg++) {
 | 
					        for(int cg = 0; cg < atom->Nclusters_ghost && !resize; cg++) {
 | 
				
			||||||
            const int cj = ncj + cg;
 | 
					            const int cj = ncj + cg;
 | 
				
			||||||
            int ix = -1, iy = -1;
 | 
					            int ix = -1, iy = -1;
 | 
				
			||||||
            MD_FLOAT xtmp, ytmp;
 | 
					            MD_FLOAT xtmp, ytmp;
 | 
				
			||||||
 | 
					            if(shellMethod == halfShell && !halfZoneCluster(atom, cj)) continue;
 | 
				
			||||||
            if(atom->jclusters[cj].natoms > 0) {
 | 
					            if(atom->jclusters[cj].natoms > 0) {
 | 
				
			||||||
                int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
 | 
					                int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
 | 
				
			||||||
                MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
 | 
					                MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
 | 
				
			||||||
@@ -846,6 +864,7 @@ void binClusters(Atom *atom) {
 | 
				
			|||||||
                coord2bin2D(xtmp, ytmp, &ix, &iy);
 | 
					                coord2bin2D(xtmp, ytmp, &ix, &iy);
 | 
				
			||||||
                ix = MAX(MIN(ix, mbinx - 1), 0);
 | 
					                ix = MAX(MIN(ix, mbinx - 1), 0);
 | 
				
			||||||
                iy = MAX(MIN(iy, mbiny - 1), 0);
 | 
					                iy = MAX(MIN(iy, mbiny - 1), 0);
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
                for(int cjj = 1; cjj < atom->jclusters[cj].natoms; cjj++) {
 | 
					                for(int cjj = 1; cjj < atom->jclusters[cj].natoms; cjj++) {
 | 
				
			||||||
                    int nix, niy;
 | 
					                    int nix, niy;
 | 
				
			||||||
                    xtmp = cj_x[CL_X_OFFSET + cjj];
 | 
					                    xtmp = cj_x[CL_X_OFFSET + cjj];
 | 
				
			||||||
@@ -861,7 +880,6 @@ void binClusters(Atom *atom) {
 | 
				
			|||||||
                    if(atom->PBCy[cg] > 0 && iy > niy) { iy = niy; }
 | 
					                    if(atom->PBCy[cg] > 0 && iy > niy) { iy = niy; }
 | 
				
			||||||
                    if(atom->PBCy[cg] < 0 && iy < niy) { iy = niy; }
 | 
					                    if(atom->PBCy[cg] < 0 && iy < niy) { iy = niy; }
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					 | 
				
			||||||
                int bin = iy * mbinx + ix + 1;
 | 
					                int bin = iy * mbinx + ix + 1;
 | 
				
			||||||
                int c = bin_nclusters[bin];
 | 
					                int c = bin_nclusters[bin];
 | 
				
			||||||
                if(c < clusters_per_bin) {
 | 
					                if(c < clusters_per_bin) {
 | 
				
			||||||
@@ -883,25 +901,21 @@ void binClusters(Atom *atom) {
 | 
				
			|||||||
                            break;
 | 
					                            break;
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
                    if(!inserted) {
 | 
					                    if(!inserted) {
 | 
				
			||||||
                        bin_clusters[bin * clusters_per_bin + c] = cj;
 | 
					                        bin_clusters[bin * clusters_per_bin + c] = cj;
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
                    bin_nclusters[bin]++;
 | 
					                    bin_nclusters[bin]++;
 | 
				
			||||||
                } else {
 | 
					                } else {
 | 
				
			||||||
                    resize = 1;
 | 
					                    resize = 1;
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					 | 
				
			||||||
        if(resize) {
 | 
					        if(resize) {
 | 
				
			||||||
            free(bin_clusters);
 | 
					            free(bin_clusters);
 | 
				
			||||||
            clusters_per_bin *= 2;
 | 
					            clusters_per_bin *= 2;
 | 
				
			||||||
            bin_clusters = (int*) malloc(mbins * clusters_per_bin * sizeof(int));
 | 
					            bin_clusters = (int*) malloc(mbins * clusters_per_bin * sizeof(int));
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
    /*
 | 
					    /*
 | 
				
			||||||
    DEBUG_MESSAGE("bin_nclusters\n");
 | 
					    DEBUG_MESSAGE("bin_nclusters\n");
 | 
				
			||||||
    for(int i = 0; i < mbins; i++) { DEBUG_MESSAGE("%d, ", bin_nclusters[i]); }
 | 
					    for(int i = 0; i < mbins; i++) { DEBUG_MESSAGE("%d, ", bin_nclusters[i]); }
 | 
				
			||||||
@@ -919,7 +933,6 @@ void updateSingleAtoms(Atom *atom) {
 | 
				
			|||||||
        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
					        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
				
			||||||
        MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
 | 
					        MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
 | 
				
			||||||
        MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
 | 
					        MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
 | 
				
			||||||
 | 
					 | 
				
			||||||
        for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
 | 
					        for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
 | 
				
			||||||
            atom_x(Natom) = ci_x[CL_X_OFFSET + cii];
 | 
					            atom_x(Natom) = ci_x[CL_X_OFFSET + cii];
 | 
				
			||||||
            atom_y(Natom) = ci_x[CL_Y_OFFSET + cii];
 | 
					            atom_y(Natom) = ci_x[CL_Y_OFFSET + cii];
 | 
				
			||||||
@@ -930,10 +943,172 @@ void updateSingleAtoms(Atom *atom) {
 | 
				
			|||||||
            Natom++;
 | 
					            Natom++;
 | 
				
			||||||
        }       
 | 
					        }       
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
    if(Natom != atom->Nlocal) {
 | 
					    if(Natom != atom->Nlocal) {
 | 
				
			||||||
        fprintf(stderr, "updateSingleAtoms(): Number of atoms changed!\n");
 | 
					        fprintf(stderr, "updateSingleAtoms(): Number of atoms changed!\n");
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    DEBUG_MESSAGE("updateSingleAtoms stop\n");
 | 
					    DEBUG_MESSAGE("updateSingleAtoms stop\n");
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//MPI Shell Methods
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int eightZoneCluster(Atom* atom, int cj)
 | 
				
			||||||
 | 
					{   
 | 
				
			||||||
 | 
					    //Mapping: 0->0, 1->1, 2->2, 3->6, 4->3, 5->5, 6->4, 7->7
 | 
				
			||||||
 | 
					    int zoneMapping[] = {0, 1, 2, 6, 3, 5, 4, 7};
 | 
				
			||||||
 | 
					    int zone = 0;
 | 
				
			||||||
 | 
					    MD_FLOAT *hi = atom->mybox.hi;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (atom->jclusters[cj].bbminx +eps >=hi[_x]){
 | 
				
			||||||
 | 
					        zone += 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (atom->jclusters[cj].bbminy  +eps >=hi[_y]){
 | 
				
			||||||
 | 
					        zone += 2;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (atom->jclusters[cj].bbminz  +eps >=hi[_z]){
 | 
				
			||||||
 | 
					        zone += 4;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return zoneMapping[zone];
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int halfZoneCluster(Atom* atom, int cj)
 | 
				
			||||||
 | 
					{   
 | 
				
			||||||
 | 
					    MD_FLOAT *hi = atom->mybox.hi;
 | 
				
			||||||
 | 
					    MD_FLOAT *lo = atom->mybox.lo;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(atom->jclusters[cj].bbmaxx < lo[_x] && atom->jclusters[cj].bbmaxy < hi[_y] && 
 | 
				
			||||||
 | 
					       atom->jclusters[cj].bbmaxz < hi[_z]){
 | 
				
			||||||
 | 
					        return 0;
 | 
				
			||||||
 | 
					    } else if(atom->jclusters[cj].bbmaxy < lo[_y] && atom->jclusters[cj].bbmaxz < hi[_z]){
 | 
				
			||||||
 | 
					        return 0;
 | 
				
			||||||
 | 
					    } else if(atom->jclusters[cj].bbmaxz < lo[_z]){
 | 
				
			||||||
 | 
					        return 0;
 | 
				
			||||||
 | 
					    } else { 
 | 
				
			||||||
 | 
					        return 1;
 | 
				
			||||||
 | 
					    } 
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int BoxGhostDistance(Atom *atom, int ci, int cj) {
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    MD_FLOAT dl = atom->jclusters[ci].bbminx - atom->jclusters[cj].bbmaxx;
 | 
				
			||||||
 | 
					    MD_FLOAT dh = atom->jclusters[cj].bbminx - atom->jclusters[ci].bbmaxx;
 | 
				
			||||||
 | 
					    MD_FLOAT dm = MAX(dl, dh);
 | 
				
			||||||
 | 
					    MD_FLOAT dm0 = MAX(dm, 0.0);
 | 
				
			||||||
 | 
					    MD_FLOAT dx2 = dm0 * dm0;
 | 
				
			||||||
 | 
					   
 | 
				
			||||||
 | 
					    dl = atom->jclusters[ci].bbminy - atom->jclusters[cj].bbmaxy;
 | 
				
			||||||
 | 
					    dh = atom->jclusters[cj].bbminy - atom->jclusters[ci].bbmaxy;
 | 
				
			||||||
 | 
					    dm = MAX(dl, dh);
 | 
				
			||||||
 | 
					    dm0 = MAX(dm, 0.0);
 | 
				
			||||||
 | 
					    MD_FLOAT dy2 = dm0 * dm0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    dl = atom->jclusters[ci].bbminz - atom->jclusters[cj].bbmaxz;
 | 
				
			||||||
 | 
					    dh = atom->jclusters[cj].bbminz - atom->jclusters[ci].bbmaxz;
 | 
				
			||||||
 | 
					    dm = MAX(dl, dh);
 | 
				
			||||||
 | 
					    dm0 = MAX(dm, 0.0);
 | 
				
			||||||
 | 
					    MD_FLOAT dz2 = dm0 * dm0;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return dx2 > cutneighsq ? 0 : dy2 > cutneighsq ? 0 : dz2 > cutneighsq ? 0 : 1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int ghostClusterinRange(Atom *atom, int cs, int cg, MD_FLOAT rsq) {
 | 
				
			||||||
 | 
					    int cs_vec_base = CJ_VECTOR_BASE_INDEX(cs);
 | 
				
			||||||
 | 
					    int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
 | 
				
			||||||
 | 
					    MD_FLOAT *cs_x = &atom->cl_x[cs_vec_base];
 | 
				
			||||||
 | 
					    MD_FLOAT *cg_x = &atom->cl_x[cj_vec_base];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(int cii = 0; cii < atom->jclusters[cs].natoms; cii++) {
 | 
				
			||||||
 | 
					        for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
 | 
				
			||||||
 | 
					            MD_FLOAT delx = cs_x[CL_X_OFFSET + cii] - cg_x[CL_X_OFFSET + cjj];
 | 
				
			||||||
 | 
					            MD_FLOAT dely = cs_x[CL_Y_OFFSET + cii] - cg_x[CL_Y_OFFSET + cjj];
 | 
				
			||||||
 | 
					            MD_FLOAT delz = cs_x[CL_Z_OFFSET + cii] - cg_x[CL_Z_OFFSET + cjj];
 | 
				
			||||||
 | 
					            if(delx * delx + dely * dely + delz * delz < rsq) {
 | 
				
			||||||
 | 
					                return 1;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void neighborGhost(Atom *atom, Neighbor *neighbor) {
 | 
				
			||||||
 | 
					    int Nshell=0;
 | 
				
			||||||
 | 
					    int Ncluster_local = atom->Nclusters_local;
 | 
				
			||||||
 | 
					    int Nclusterghost  = atom->Nclusters_ghost;
 | 
				
			||||||
 | 
					    if(neighbor->listshell) free(neighbor->listshell);
 | 
				
			||||||
 | 
					    neighbor->listshell = (int*) malloc(Nclusterghost * sizeof(int));
 | 
				
			||||||
 | 
					    int* listzone  = (int*) malloc(8 * Nclusterghost * sizeof(int));
 | 
				
			||||||
 | 
					    int countCluster[8] = {0,0,0,0,0,0,0,0};
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    //Selecting ghost atoms for interaction and putting them into regions
 | 
				
			||||||
 | 
					   for(int cg = atom->ncj; cg < atom->ncj+Nclusterghost; cg++) {
 | 
				
			||||||
 | 
					        int czone = eightZoneCluster(atom,cg);        
 | 
				
			||||||
 | 
					        int *list = &listzone[Nclusterghost*czone];
 | 
				
			||||||
 | 
					        int n  = countCluster[czone];
 | 
				
			||||||
 | 
					        list[n] = cg;
 | 
				
			||||||
 | 
					        countCluster[czone]++;     
 | 
				
			||||||
 | 
					       //It is only necessary to find neighbour particles for 3 regions
 | 
				
			||||||
 | 
					       //if(czone == 1 || czone == 2 || czone == 3)
 | 
				
			||||||
 | 
					       //neighbor->listshell[Nshell++] = cg;  
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    for(int zone = 1; zone<=3; zone++){
 | 
				
			||||||
 | 
					        int *list = &listzone[Nclusterghost*zone];
 | 
				
			||||||
 | 
					        for(int n=0; n<countCluster[zone]; n++)
 | 
				
			||||||
 | 
					            neighbor->listshell[Nshell++] = list[n]; 
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    neighbor->Nshell = Nshell;
 | 
				
			||||||
 | 
					    if(neighbor->numNeighShell) free(neighbor->numNeighShell);
 | 
				
			||||||
 | 
					    if(neighbor->neighshell) free(neighbor->neighshell);
 | 
				
			||||||
 | 
					    neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
 | 
				
			||||||
 | 
					    neighbor->numNeighShell = (int*) malloc(Nshell * sizeof(int));
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    int resize = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while(resize)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        resize = 0;
 | 
				
			||||||
 | 
					        for(int ic = 0; ic < Nshell; ic++) {   
 | 
				
			||||||
 | 
					            int *neighshell = &(neighbor->neighshell[ic*neighbor->maxneighs]); 
 | 
				
			||||||
 | 
					            int n = 0;  
 | 
				
			||||||
 | 
					            int icluster = neighbor->listshell[ic];
 | 
				
			||||||
 | 
					            int iczone = eightZoneCluster(atom, icluster);
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            for(int jczone=0; jczone<8; jczone++){
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                if(jczone <=iczone) continue;
 | 
				
			||||||
 | 
					                if(iczone == 1 && (jczone==5||jczone==6||jczone==7)) continue;
 | 
				
			||||||
 | 
					                if(iczone == 2 && (jczone==4||jczone==6||jczone==7)) continue;
 | 
				
			||||||
 | 
					                if(iczone == 3 && (jczone==4||jczone==5||jczone==7)) continue;
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                int Ncluster = countCluster[jczone];
 | 
				
			||||||
 | 
					                int* loc_zone = &listzone[jczone * Nclusterghost];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                for(int k = 0; k < Ncluster ; k++) {
 | 
				
			||||||
 | 
					                    int jcluster = loc_zone[k];    
 | 
				
			||||||
 | 
					                                    
 | 
				
			||||||
 | 
					                    if(BoxGhostDistance(atom, icluster, jcluster)) 
 | 
				
			||||||
 | 
					                    {
 | 
				
			||||||
 | 
					                        if(ghostClusterinRange(atom, icluster, jcluster, cutneighsq))
 | 
				
			||||||
 | 
					                            neighshell[n++] = jcluster;
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            neighbor->numNeighShell[ic] = n; 
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            if(n >= neighbor->maxneighs){
 | 
				
			||||||
 | 
					                resize = 1;
 | 
				
			||||||
 | 
					                neighbor->maxneighs = n * 1.2;
 | 
				
			||||||
 | 
					                fprintf(stdout, "RESIZE EIGHT SHELL %d, PROC %d\n", neighbor->maxneighs,me);
 | 
				
			||||||
 | 
					                break;
 | 
				
			||||||
 | 
					            }  
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if(resize) {
 | 
				
			||||||
 | 
					            free(neighbor->neighshell);
 | 
				
			||||||
 | 
					            neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }  
 | 
				
			||||||
 | 
					    free(listzone); 
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										130
									
								
								gromacs/vtk.c
									
									
									
									
									
								
							
							
						
						
									
										130
									
								
								gromacs/vtk.c
									
									
									
									
									
								
							@@ -9,6 +9,11 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#include <atom.h>
 | 
					#include <atom.h>
 | 
				
			||||||
#include <vtk.h>
 | 
					#include <vtk.h>
 | 
				
			||||||
 | 
					#include <mpi.h>
 | 
				
			||||||
 | 
					#include <string.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static MPI_File _fh; 
 | 
				
			||||||
 | 
					static inline void flushBuffer(char*); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void write_data_to_vtk_file(const char *filename, Atom* atom, int timestep) {
 | 
					void write_data_to_vtk_file(const char *filename, Atom* atom, int timestep) {
 | 
				
			||||||
    write_local_atoms_to_vtk_file(filename, atom, timestep);
 | 
					    write_local_atoms_to_vtk_file(filename, atom, timestep);
 | 
				
			||||||
@@ -188,3 +193,128 @@ int write_ghost_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int
 | 
				
			|||||||
    fclose(fp);
 | 
					    fclose(fp);
 | 
				
			||||||
    return 0;
 | 
					    return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int vtkOpen(const char* filename, Comm* comm, Atom* atom ,int timestep)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    char msg[256];
 | 
				
			||||||
 | 
					    char timestep_filename[128];
 | 
				
			||||||
 | 
					    snprintf(timestep_filename, sizeof timestep_filename, "%s_%d.vtk", filename, timestep);
 | 
				
			||||||
 | 
					    MPI_File_open(MPI_COMM_WORLD, timestep_filename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &_fh);
 | 
				
			||||||
 | 
					    if(_fh == MPI_FILE_NULL) {
 | 
				
			||||||
 | 
					        if(comm->myproc == 0) fprintf(stderr, "Could not open VTK file for writing!\n");
 | 
				
			||||||
 | 
					        return -1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					    if (comm->myproc==0){
 | 
				
			||||||
 | 
					        sprintf(msg, "# vtk DataFile Version 2.0\n");
 | 
				
			||||||
 | 
					        sprintf(msg, "%sParticle data\n",msg);
 | 
				
			||||||
 | 
					        sprintf(msg, "%sASCII\n",msg);
 | 
				
			||||||
 | 
					        sprintf(msg, "%sDATASET UNSTRUCTURED_GRID\n",msg);
 | 
				
			||||||
 | 
					        sprintf(msg, "%sPOINTS %d double\n",msg, atom->Natoms);  
 | 
				
			||||||
 | 
					        flushBuffer(msg);
 | 
				
			||||||
 | 
					    } 
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int vtkVector(Comm* comm, Atom* atom, Parameter* param)
 | 
				
			||||||
 | 
					{ 
 | 
				
			||||||
 | 
					    if (_fh == MPI_FILE_NULL) {
 | 
				
			||||||
 | 
					        if(comm->myproc==0) printf("vtk not initialize! Call vtkOpen first!\n");
 | 
				
			||||||
 | 
					        return -1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    int sizeline= 25;   //#initial guess of characters in "%.4f %.4f %.4f\n" 
 | 
				
			||||||
 | 
					    int extrabuff = 100;
 | 
				
			||||||
 | 
					    int sizebuff = sizeline*atom->Nlocal+extrabuff; 
 | 
				
			||||||
 | 
					    int mysize = 0;
 | 
				
			||||||
 | 
					    char* msg = (char*) malloc(sizebuff);
 | 
				
			||||||
 | 
					    sprintf(msg, "");
 | 
				
			||||||
 | 
					    for(int i = 0; i < atom->Nlocal; i++){
 | 
				
			||||||
 | 
					        if(mysize+extrabuff >= sizebuff){
 | 
				
			||||||
 | 
					            sizebuff*= 1.5;
 | 
				
			||||||
 | 
					            msg = (char*) realloc(msg, sizebuff); 
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        //TODO: do not forget to add param->xlo, param->ylo, param->zlo   
 | 
				
			||||||
 | 
					        sprintf(msg, "%s%.4f %.4f %.4f\n",msg, atom_x(i), atom_y(i), atom_z(i));
 | 
				
			||||||
 | 
					        mysize = strlen(msg);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    int gatherSize[comm->numproc];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MPI_Allgather(&mysize, 1, MPI_INT, gatherSize, 1, MPI_INT, MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					    int offset=0;
 | 
				
			||||||
 | 
					    int globalSize = 0;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    for(int i = 0; i < comm->myproc; i++)
 | 
				
			||||||
 | 
					        offset+= gatherSize[i];
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    for(int i = 0; i < comm->numproc; i++)
 | 
				
			||||||
 | 
					        globalSize+= gatherSize[i];
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    MPI_Offset displ;   
 | 
				
			||||||
 | 
					    MPI_Datatype FileType;       
 | 
				
			||||||
 | 
					    int GlobalSize[] = {globalSize}; 
 | 
				
			||||||
 | 
					    int LocalSize[]  = {mysize};
 | 
				
			||||||
 | 
					    int Start[] = {offset};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(LocalSize[0]>0){
 | 
				
			||||||
 | 
					        MPI_Type_create_subarray(1, GlobalSize, LocalSize, Start, MPI_ORDER_C, MPI_CHAR, &FileType);    
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        MPI_Type_vector(0,0,0,MPI_CHAR,&FileType);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    MPI_Type_commit(&FileType);
 | 
				
			||||||
 | 
					    MPI_File_get_size(_fh, &displ);
 | 
				
			||||||
 | 
					    MPI_File_set_view(_fh, displ, MPI_CHAR, FileType, "native", MPI_INFO_NULL);
 | 
				
			||||||
 | 
					    MPI_File_write_all (_fh, msg, mysize , MPI_CHAR ,MPI_STATUS_IGNORE);
 | 
				
			||||||
 | 
					    MPI_Barrier(MPI_COMM_WORLD); 
 | 
				
			||||||
 | 
					    MPI_File_set_view(_fh,0,MPI_CHAR, MPI_CHAR, "native", MPI_INFO_NULL);    
 | 
				
			||||||
 | 
					     
 | 
				
			||||||
 | 
					    if (comm->myproc==0){
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        sprintf(msg, "\n\n");
 | 
				
			||||||
 | 
					        sprintf(msg, "%sCELLS %d %d\n", msg, atom->Natoms, atom->Natoms * 2); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for(int i = 0; i < atom->Natoms; i++) 
 | 
				
			||||||
 | 
					            sprintf(msg, "%s1 %d\n", msg, i);
 | 
				
			||||||
 | 
					        flushBuffer(msg);
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        sprintf(msg, "\n\n"); 
 | 
				
			||||||
 | 
					        sprintf(msg, "%sCELL_TYPES %d\n",msg, atom->Natoms);
 | 
				
			||||||
 | 
					        for(int i = 0; i < atom->Natoms; i++) 
 | 
				
			||||||
 | 
					            sprintf(msg, "%s1\n",msg);
 | 
				
			||||||
 | 
					        flushBuffer(msg);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        sprintf(msg, "\n\n"); 
 | 
				
			||||||
 | 
					        sprintf(msg, "%sPOINT_DATA %d\n",msg,atom->Natoms);
 | 
				
			||||||
 | 
					        sprintf(msg, "%sSCALARS mass double\n",msg);
 | 
				
			||||||
 | 
					        sprintf(msg, "%sLOOKUP_TABLE default\n",msg);
 | 
				
			||||||
 | 
					        for(int i = 0; i < atom->Natoms; i++) 
 | 
				
			||||||
 | 
					            sprintf(msg, "%s1.0\n",msg);
 | 
				
			||||||
 | 
					        sprintf(msg, "%s\n\n",msg);
 | 
				
			||||||
 | 
					        flushBuffer(msg);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void vtkClose()
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    MPI_File_close(&_fh);
 | 
				
			||||||
 | 
					    _fh=MPI_FILE_NULL;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//TODO: print ghost and cluster using MPI
 | 
				
			||||||
 | 
					void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    if(comm->numproc == 1)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        write_data_to_vtk_file(filename, atom, timestep);
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    vtkOpen(filename, comm, atom, timestep);
 | 
				
			||||||
 | 
					    vtkVector(comm, atom, param);
 | 
				
			||||||
 | 
					    vtkClose(); 
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void flushBuffer(char* msg){
 | 
				
			||||||
 | 
					    MPI_Offset displ; 
 | 
				
			||||||
 | 
					    MPI_File_get_size(_fh, &displ);
 | 
				
			||||||
 | 
					    MPI_File_write_at(_fh, displ, msg, strlen(msg), MPI_CHAR, MPI_STATUS_IGNORE);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										32
									
								
								include_MPIICC.mk
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								include_MPIICC.mk
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,32 @@
 | 
				
			|||||||
 | 
					CC  = mpiicc
 | 
				
			||||||
 | 
					LINKER = $(CC)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					OPENMP  = #-qopenmp
 | 
				
			||||||
 | 
					PROFILE  = #-profile-functions -g  -pg
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ifeq ($(ISA),AVX512)
 | 
				
			||||||
 | 
					OPTS      =  -Ofast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE)  #-g -debug
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ifeq ($(ISA),AVX2)
 | 
				
			||||||
 | 
					OPTS     = -Ofast -xCORE-AVX2  $(PROFILE)
 | 
				
			||||||
 | 
					#OPTS     = -Ofast -xAVX2  $(PROFILE)
 | 
				
			||||||
 | 
					#OPTS     = -Ofast -march=core-avx2 $(PROFILE)
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ifeq ($(ISA),AVX)
 | 
				
			||||||
 | 
					OPTS     = -Ofast -xAVX  $(PROFILE)
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ifeq ($(ISA),SSE)
 | 
				
			||||||
 | 
					OPTS     = -Ofast -xSSE4.2 $(PROFILE)
 | 
				
			||||||
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#OPTS     = -Ofast -no-vec $(PROFILE)
 | 
				
			||||||
 | 
					#OPTS     = -Ofast -xHost $(PROFILE)
 | 
				
			||||||
 | 
					CFLAGS   = $(PROFILE) -restrict $(OPENMP) $(OPTS)
 | 
				
			||||||
 | 
					ASFLAGS  = #-masm=intel
 | 
				
			||||||
 | 
					LFLAGS   = $(PROFILE) $(OPTS) $(OPENMP) 
 | 
				
			||||||
 | 
					DEFINES  = -std=c11 -pedantic-errors -D_GNU_SOURCE -DNO_ZMM_INTRIN
 | 
				
			||||||
 | 
					INCLUDES = 
 | 
				
			||||||
 | 
					LIBS     = -lm
 | 
				
			||||||
							
								
								
									
										317
									
								
								lammps/atom.c
									
									
									
									
									
								
							
							
						
						
									
										317
									
								
								lammps/atom.c
									
									
									
									
									
								
							@@ -9,10 +9,12 @@
 | 
				
			|||||||
#include <string.h>
 | 
					#include <string.h>
 | 
				
			||||||
#include <math.h>
 | 
					#include <math.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <parameter.h>
 | 
				
			||||||
#include <atom.h>
 | 
					#include <atom.h>
 | 
				
			||||||
#include <allocate.h>
 | 
					#include <allocate.h>
 | 
				
			||||||
#include <device.h>
 | 
					#include <device.h>
 | 
				
			||||||
#include <util.h>
 | 
					#include <util.h>
 | 
				
			||||||
 | 
					#include <mpi.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define DELTA 20000
 | 
					#define DELTA 20000
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -24,7 +26,7 @@
 | 
				
			|||||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
 | 
					#define MAX(a,b) ((a) > (b) ? (a) : (b))
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void initAtom(Atom *atom) {
 | 
					void initAtom(Atom *atom){
 | 
				
			||||||
    atom->x  = NULL; atom->y  = NULL; atom->z  = NULL;
 | 
					    atom->x  = NULL; atom->y  = NULL; atom->z  = NULL;
 | 
				
			||||||
    atom->vx = NULL; atom->vy = NULL; atom->vz = NULL;
 | 
					    atom->vx = NULL; atom->vy = NULL; atom->vz = NULL;
 | 
				
			||||||
    atom->fx = NULL; atom->fy = NULL; atom->fz = NULL;
 | 
					    atom->fx = NULL; atom->fy = NULL; atom->fz = NULL;
 | 
				
			||||||
@@ -41,6 +43,7 @@ void initAtom(Atom *atom) {
 | 
				
			|||||||
    atom->radius = NULL;
 | 
					    atom->radius = NULL;
 | 
				
			||||||
    atom->av = NULL;
 | 
					    atom->av = NULL;
 | 
				
			||||||
    atom->r = NULL;
 | 
					    atom->r = NULL;
 | 
				
			||||||
 | 
					    atom->border_map = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    DeviceAtom *d_atom = &(atom->d_atom);
 | 
					    DeviceAtom *d_atom = &(atom->d_atom);
 | 
				
			||||||
    d_atom->x  = NULL; d_atom->y  = NULL; d_atom->z  = NULL;
 | 
					    d_atom->x  = NULL; d_atom->y  = NULL; d_atom->z  = NULL;
 | 
				
			||||||
@@ -52,12 +55,19 @@ void initAtom(Atom *atom) {
 | 
				
			|||||||
    d_atom->sigma6 = NULL;
 | 
					    d_atom->sigma6 = NULL;
 | 
				
			||||||
    d_atom->cutforcesq = NULL;
 | 
					    d_atom->cutforcesq = NULL;
 | 
				
			||||||
    d_atom->cutneighsq = NULL;
 | 
					    d_atom->cutneighsq = NULL;
 | 
				
			||||||
 | 
					    //MPI
 | 
				
			||||||
 | 
					    Box *mybox = &(atom->mybox);                  
 | 
				
			||||||
 | 
					    mybox->xprd = mybox->yprd = mybox->zprd = 0;          
 | 
				
			||||||
 | 
					    mybox->lo[_x]  = mybox->lo[_y]  = mybox->lo[_z] = 0;             
 | 
				
			||||||
 | 
					    mybox->hi[_x]  = mybox->hi[_y]  = mybox->hi[_z] = 0;   
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void createAtom(Atom *atom, Parameter *param) {
 | 
					void createAtom(Atom *atom, Parameter *param) {
 | 
				
			||||||
    MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
 | 
					    
 | 
				
			||||||
    MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
 | 
					    MD_FLOAT xlo = 0; MD_FLOAT xhi = param->xprd;
 | 
				
			||||||
    MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
 | 
					    MD_FLOAT ylo = 0; MD_FLOAT yhi = param->yprd;
 | 
				
			||||||
 | 
					    MD_FLOAT zlo = 0; MD_FLOAT zhi = param->zprd;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
    atom->Natoms = 4 * param->nx * param->ny * param->nz;
 | 
					    atom->Natoms = 4 * param->nx * param->ny * param->nz;
 | 
				
			||||||
    atom->Nlocal = 0;
 | 
					    atom->Nlocal = 0;
 | 
				
			||||||
    atom->ntypes = param->ntypes;
 | 
					    atom->ntypes = param->ntypes;
 | 
				
			||||||
@@ -131,7 +141,7 @@ void createAtom(Atom *atom, Parameter *param) {
 | 
				
			|||||||
                }
 | 
					                }
 | 
				
			||||||
                vztmp = myrandom(&n);
 | 
					                vztmp = myrandom(&n);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                if(atom->Nlocal == atom->Nmax) {
 | 
					                while(atom->Nlocal >= atom->Nmax) {
 | 
				
			||||||
                    growAtom(atom);
 | 
					                    growAtom(atom);
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -163,38 +173,42 @@ int type_str2int(const char *type) {
 | 
				
			|||||||
    return -1;
 | 
					    return -1;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int readAtom(Atom* atom, Parameter* param) {
 | 
					int readAtom(Atom *atom, Parameter *param) {
 | 
				
			||||||
 | 
					    int me = 0;
 | 
				
			||||||
 | 
					    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
				
			||||||
    int len = strlen(param->input_file);
 | 
					    int len = strlen(param->input_file);
 | 
				
			||||||
    if(strncmp(¶m->input_file[len - 4], ".pdb", 4) == 0) { return readAtom_pdb(atom, param); }
 | 
					    if(strncmp(¶m->input_file[len - 4], ".pdb", 4) == 0) { return readAtom_pdb(atom, param); }
 | 
				
			||||||
    if(strncmp(¶m->input_file[len - 4], ".gro", 4) == 0) { return readAtom_gro(atom, param); }
 | 
					    if(strncmp(¶m->input_file[len - 4], ".gro", 4) == 0) { return readAtom_gro(atom, param); }
 | 
				
			||||||
    if(strncmp(¶m->input_file[len - 4], ".dmp", 4) == 0) { return readAtom_dmp(atom, param); }
 | 
					    if(strncmp(¶m->input_file[len - 4], ".dmp", 4) == 0) { return readAtom_dmp(atom, param); }
 | 
				
			||||||
    if(strncmp(¶m->input_file[len - 3], ".in",  3) == 0) { return readAtom_in(atom, param); }
 | 
					    if(strncmp(¶m->input_file[len - 3], ".in",  3) == 0) { return readAtom_in(atom, param); }
 | 
				
			||||||
    fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp, in\n", param->input_file);
 | 
					    if(me==0) fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp, in\n", param->input_file);
 | 
				
			||||||
    exit(-1);
 | 
					    exit(-1);
 | 
				
			||||||
    return -1;
 | 
					    return -1;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
					int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
				
			||||||
 | 
					    int me = 0;
 | 
				
			||||||
 | 
					    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
				
			||||||
    FILE *fp = fopen(param->input_file, "r");
 | 
					    FILE *fp = fopen(param->input_file, "r");
 | 
				
			||||||
    char line[MAXLINE];
 | 
					    char line[MAXLINE];
 | 
				
			||||||
    int read_atoms = 0;
 | 
					    int read_atoms = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(!fp) {
 | 
					    if(!fp) {
 | 
				
			||||||
        fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
					        if(me==0)fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
				
			||||||
        exit(-1);
 | 
					        exit(-1);
 | 
				
			||||||
        return -1;
 | 
					        return -1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    while(!feof(fp)) {
 | 
					    while(!feof(fp)) {
 | 
				
			||||||
        readline(line, fp);
 | 
					        readline(line, fp);
 | 
				
			||||||
        char *item = strtok(line, " ");
 | 
					        char *item = strtok(line, "\t ");
 | 
				
			||||||
        if(strncmp(item, "CRYST1", 6) == 0) {
 | 
					        if(strncmp(item, "CRYST1", 6) == 0) {
 | 
				
			||||||
            param->xlo = 0.0;
 | 
					            param->xlo = 0.0;
 | 
				
			||||||
            param->xhi = atof(strtok(NULL, " "));
 | 
					            param->xhi = atof(strtok(NULL, "\t "));
 | 
				
			||||||
            param->ylo = 0.0;
 | 
					            param->ylo = 0.0;
 | 
				
			||||||
            param->yhi = atof(strtok(NULL, " "));
 | 
					            param->yhi = atof(strtok(NULL, "\t "));
 | 
				
			||||||
            param->zlo = 0.0;
 | 
					            param->zlo = 0.0;
 | 
				
			||||||
            param->zhi = atof(strtok(NULL, " "));
 | 
					            param->zhi = atof(strtok(NULL, "\t "));
 | 
				
			||||||
            param->xprd = param->xhi - param->xlo;
 | 
					            param->xprd = param->xhi - param->xlo;
 | 
				
			||||||
            param->yprd = param->yhi - param->ylo;
 | 
					            param->yprd = param->yhi - param->ylo;
 | 
				
			||||||
            param->zprd = param->zhi - param->zlo;
 | 
					            param->zprd = param->zhi - param->zlo;
 | 
				
			||||||
@@ -203,23 +217,23 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
				
			|||||||
            char *label;
 | 
					            char *label;
 | 
				
			||||||
            int atom_id, comp_id;
 | 
					            int atom_id, comp_id;
 | 
				
			||||||
            MD_FLOAT occupancy, charge;
 | 
					            MD_FLOAT occupancy, charge;
 | 
				
			||||||
            atom_id = atoi(strtok(NULL, " ")) - 1;
 | 
					            atom_id = atoi(strtok(NULL, "\t ")) - 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            while(atom_id + 1 >= atom->Nmax) {
 | 
					            while(atom_id + 1 >= atom->Nmax) {
 | 
				
			||||||
                growAtom(atom);
 | 
					                growAtom(atom);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            atom->type[atom_id] = type_str2int(strtok(NULL, " "));
 | 
					            atom->type[atom_id] = type_str2int(strtok(NULL, "\t "));
 | 
				
			||||||
            label = strtok(NULL, " ");
 | 
					            label = strtok(NULL, "\t ");
 | 
				
			||||||
            comp_id = atoi(strtok(NULL, " "));
 | 
					            comp_id = atoi(strtok(NULL, "\t "));
 | 
				
			||||||
            atom_x(atom_id) = atof(strtok(NULL, " "));
 | 
					            atom_x(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
            atom_y(atom_id) = atof(strtok(NULL, " "));
 | 
					            atom_y(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
            atom_z(atom_id) = atof(strtok(NULL, " "));
 | 
					            atom_z(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
            atom_vx(atom_id) = 0.0;
 | 
					            atom_vx(atom_id) = 0.0;
 | 
				
			||||||
            atom_vy(atom_id) = 0.0;
 | 
					            atom_vy(atom_id) = 0.0;
 | 
				
			||||||
            atom_vz(atom_id) = 0.0;
 | 
					            atom_vz(atom_id) = 0.0;
 | 
				
			||||||
            occupancy = atof(strtok(NULL, " "));
 | 
					            occupancy = atof(strtok(NULL, "\t "));
 | 
				
			||||||
            charge = atof(strtok(NULL, " "));
 | 
					            charge = atof(strtok(NULL, "\t "));
 | 
				
			||||||
            atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
 | 
					            atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
 | 
				
			||||||
            atom->Natoms++;
 | 
					            atom->Natoms++;
 | 
				
			||||||
            atom->Nlocal++;
 | 
					            atom->Nlocal++;
 | 
				
			||||||
@@ -231,14 +245,14 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
				
			|||||||
                  strncmp(item, "ENDMDL", 6) == 0) {
 | 
					                  strncmp(item, "ENDMDL", 6) == 0) {
 | 
				
			||||||
            // Do nothing
 | 
					            // Do nothing
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            fprintf(stderr, "Invalid item: %s\n", item);
 | 
					            if(me==0)fprintf(stderr, "Invalid item: %s\n", item);
 | 
				
			||||||
            exit(-1);
 | 
					            exit(-1);
 | 
				
			||||||
            return -1;
 | 
					            return -1;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(!read_atoms) {
 | 
					    if(!read_atoms) {
 | 
				
			||||||
        fprintf(stderr, "Input error: No atoms read!\n");
 | 
					        if(me==0)fprintf(stderr, "Input error: No atoms read!\n");
 | 
				
			||||||
        exit(-1);
 | 
					        exit(-1);
 | 
				
			||||||
        return -1;
 | 
					        return -1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -254,12 +268,15 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
				
			|||||||
        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
					        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
 | 
					    if(me==0)fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
 | 
				
			||||||
    fclose(fp);
 | 
					    fclose(fp);
 | 
				
			||||||
    return read_atoms;
 | 
					    return read_atoms;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int readAtom_gro(Atom* atom, Parameter* param) {
 | 
					int readAtom_gro(Atom* atom, Parameter* param) {
 | 
				
			||||||
 | 
					    int me = 0;
 | 
				
			||||||
 | 
					    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    FILE *fp = fopen(param->input_file, "r");
 | 
					    FILE *fp = fopen(param->input_file, "r");
 | 
				
			||||||
    char line[MAXLINE];
 | 
					    char line[MAXLINE];
 | 
				
			||||||
    char desc[MAXLINE];
 | 
					    char desc[MAXLINE];
 | 
				
			||||||
@@ -268,7 +285,7 @@ int readAtom_gro(Atom* atom, Parameter* param) {
 | 
				
			|||||||
    int i = 0;
 | 
					    int i = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(!fp) {
 | 
					    if(!fp) {
 | 
				
			||||||
        fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
					        if(me==0)fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
				
			||||||
        exit(-1);
 | 
					        exit(-1);
 | 
				
			||||||
        return -1;
 | 
					        return -1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -277,26 +294,26 @@ int readAtom_gro(Atom* atom, Parameter* param) {
 | 
				
			|||||||
    for(i = 0; desc[i] != '\n'; i++);
 | 
					    for(i = 0; desc[i] != '\n'; i++);
 | 
				
			||||||
    desc[i] = '\0';
 | 
					    desc[i] = '\0';
 | 
				
			||||||
    readline(line, fp);
 | 
					    readline(line, fp);
 | 
				
			||||||
    atoms_to_read = atoi(strtok(line, " "));
 | 
					    atoms_to_read = atoi(strtok(line, "\t "));
 | 
				
			||||||
    fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
 | 
					    if(me==0)fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    while(!feof(fp) && read_atoms < atoms_to_read) {
 | 
					    while(!feof(fp) && read_atoms < atoms_to_read) {
 | 
				
			||||||
        readline(line, fp);
 | 
					        readline(line, fp);
 | 
				
			||||||
        char *label = strtok(line, " ");
 | 
					        char *label = strtok(line, "\t ");
 | 
				
			||||||
        int type = type_str2int(strtok(NULL, " "));
 | 
					        int type = type_str2int(strtok(NULL, "\t "));
 | 
				
			||||||
        int atom_id = atoi(strtok(NULL, " ")) - 1;
 | 
					        int atom_id = atoi(strtok(NULL, "\t ")) - 1;
 | 
				
			||||||
        atom_id = read_atoms;
 | 
					        atom_id = read_atoms;
 | 
				
			||||||
        while(atom_id + 1 >= atom->Nmax) {
 | 
					        while(atom_id + 1 >= atom->Nmax) {
 | 
				
			||||||
            growAtom(atom);
 | 
					            growAtom(atom);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        atom->type[atom_id] = type;
 | 
					        atom->type[atom_id] = type;
 | 
				
			||||||
        atom_x(atom_id) = atof(strtok(NULL, " "));
 | 
					        atom_x(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        atom_y(atom_id) = atof(strtok(NULL, " "));
 | 
					        atom_y(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        atom_z(atom_id) = atof(strtok(NULL, " "));
 | 
					        atom_z(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        atom_vx(atom_id) = atof(strtok(NULL, " "));
 | 
					        atom_vx(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        atom_vy(atom_id) = atof(strtok(NULL, " "));
 | 
					        atom_vy(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        atom_vz(atom_id) = atof(strtok(NULL, " "));
 | 
					        atom_vz(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
 | 
					        atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
 | 
				
			||||||
        atom->Natoms++;
 | 
					        atom->Natoms++;
 | 
				
			||||||
        atom->Nlocal++;
 | 
					        atom->Nlocal++;
 | 
				
			||||||
@@ -306,18 +323,18 @@ int readAtom_gro(Atom* atom, Parameter* param) {
 | 
				
			|||||||
    if(!feof(fp)) {
 | 
					    if(!feof(fp)) {
 | 
				
			||||||
        readline(line, fp);
 | 
					        readline(line, fp);
 | 
				
			||||||
        param->xlo = 0.0;
 | 
					        param->xlo = 0.0;
 | 
				
			||||||
        param->xhi = atof(strtok(line, " "));
 | 
					        param->xhi = atof(strtok(line, "\t "));
 | 
				
			||||||
        param->ylo = 0.0;
 | 
					        param->ylo = 0.0;
 | 
				
			||||||
        param->yhi = atof(strtok(NULL, " "));
 | 
					        param->yhi = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        param->zlo = 0.0;
 | 
					        param->zlo = 0.0;
 | 
				
			||||||
        param->zhi = atof(strtok(NULL, " "));
 | 
					        param->zhi = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        param->xprd = param->xhi - param->xlo;
 | 
					        param->xprd = param->xhi - param->xlo;
 | 
				
			||||||
        param->yprd = param->yhi - param->ylo;
 | 
					        param->yprd = param->yhi - param->ylo;
 | 
				
			||||||
        param->zprd = param->zhi - param->zlo;
 | 
					        param->zprd = param->zhi - param->zlo;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(read_atoms != atoms_to_read) {
 | 
					    if(read_atoms != atoms_to_read) {
 | 
				
			||||||
        fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
 | 
					        if(me==0)fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
 | 
				
			||||||
        exit(-1);
 | 
					        exit(-1);
 | 
				
			||||||
        return -1;
 | 
					        return -1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -333,12 +350,14 @@ int readAtom_gro(Atom* atom, Parameter* param) {
 | 
				
			|||||||
        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
					        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
 | 
					    if(me==0)fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
 | 
				
			||||||
    fclose(fp);
 | 
					    fclose(fp);
 | 
				
			||||||
    return read_atoms;
 | 
					    return read_atoms;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int readAtom_dmp(Atom* atom, Parameter* param) {
 | 
					int readAtom_dmp(Atom* atom, Parameter* param) {
 | 
				
			||||||
 | 
					    int me = 0;
 | 
				
			||||||
 | 
					    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
				
			||||||
    FILE *fp = fopen(param->input_file, "r");
 | 
					    FILE *fp = fopen(param->input_file, "r");
 | 
				
			||||||
    char line[MAXLINE];
 | 
					    char line[MAXLINE];
 | 
				
			||||||
    int natoms = 0;
 | 
					    int natoms = 0;
 | 
				
			||||||
@@ -347,7 +366,7 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
 | 
				
			|||||||
    int ts = -1;
 | 
					    int ts = -1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(!fp) {
 | 
					    if(!fp) {
 | 
				
			||||||
        fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
					        if(me==0)fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
				
			||||||
        exit(-1);
 | 
					        exit(-1);
 | 
				
			||||||
        return -1;
 | 
					        return -1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -370,47 +389,47 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
 | 
				
			|||||||
                }
 | 
					                }
 | 
				
			||||||
            } else if(strncmp(item, "BOX BOUNDS pp pp pp", 19) == 0) {
 | 
					            } else if(strncmp(item, "BOX BOUNDS pp pp pp", 19) == 0) {
 | 
				
			||||||
                readline(line, fp);
 | 
					                readline(line, fp);
 | 
				
			||||||
                param->xlo = atof(strtok(line, " "));
 | 
					                param->xlo = atof(strtok(line, "\t "));
 | 
				
			||||||
                param->xhi = atof(strtok(NULL, " "));
 | 
					                param->xhi = atof(strtok(NULL, "\t "));
 | 
				
			||||||
                param->xprd = param->xhi - param->xlo;
 | 
					                param->xprd = param->xhi - param->xlo;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                readline(line, fp);
 | 
					                readline(line, fp);
 | 
				
			||||||
                param->ylo = atof(strtok(line, " "));
 | 
					                param->ylo = atof(strtok(line, "\t "));
 | 
				
			||||||
                param->yhi = atof(strtok(NULL, " "));
 | 
					                param->yhi = atof(strtok(NULL, "\t "));
 | 
				
			||||||
                param->yprd = param->yhi - param->ylo;
 | 
					                param->yprd = param->yhi - param->ylo;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                readline(line, fp);
 | 
					                readline(line, fp);
 | 
				
			||||||
                param->zlo = atof(strtok(line, " "));
 | 
					                param->zlo = atof(strtok(line, "\t "));
 | 
				
			||||||
                param->zhi = atof(strtok(NULL, " "));
 | 
					                param->zhi = atof(strtok(NULL, "\t "));
 | 
				
			||||||
                param->zprd = param->zhi - param->zlo;
 | 
					                param->zprd = param->zhi - param->zlo;
 | 
				
			||||||
            } else if(strncmp(item, "ATOMS id type x y z vx vy vz", 28) == 0) {
 | 
					            } else if(strncmp(item, "ATOMS id type x y z vx vy vz", 28) == 0) {
 | 
				
			||||||
                for(int i = 0; i < natoms; i++) {
 | 
					                for(int i = 0; i < natoms; i++) {
 | 
				
			||||||
                    readline(line, fp);
 | 
					                    readline(line, fp);
 | 
				
			||||||
                    atom_id = atoi(strtok(line, " ")) - 1;
 | 
					                    atom_id = atoi(strtok(line, "\t ")) - 1;
 | 
				
			||||||
                    atom->type[atom_id] = atoi(strtok(NULL, " "));
 | 
					                    atom->type[atom_id] = atoi(strtok(NULL, "\t "));
 | 
				
			||||||
                    atom_x(atom_id) = atof(strtok(NULL, " "));
 | 
					                    atom_x(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
                    atom_y(atom_id) = atof(strtok(NULL, " "));
 | 
					                    atom_y(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
                    atom_z(atom_id) = atof(strtok(NULL, " "));
 | 
					                    atom_z(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
                    atom_vx(atom_id) = atof(strtok(NULL, " "));
 | 
					                    atom_vx(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
                    atom_vy(atom_id) = atof(strtok(NULL, " "));
 | 
					                    atom_vy(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
                    atom_vz(atom_id) = atof(strtok(NULL, " "));
 | 
					                    atom_vz(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
                    atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
 | 
					                    atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
 | 
				
			||||||
                    read_atoms++;
 | 
					                    read_atoms++;
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            } else {
 | 
					            } else {
 | 
				
			||||||
                fprintf(stderr, "Invalid item: %s\n", item);
 | 
					                if(me==0)fprintf(stderr, "Invalid item: %s\n", item);
 | 
				
			||||||
                exit(-1);
 | 
					                exit(-1);
 | 
				
			||||||
                return -1;
 | 
					                return -1;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
 | 
					            if(me==0)fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
 | 
				
			||||||
            exit(-1);
 | 
					            exit(-1);
 | 
				
			||||||
            return -1;
 | 
					            return -1;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(ts < 0 || !natoms || !read_atoms) {
 | 
					    if(ts < 0 || !natoms || !read_atoms) {
 | 
				
			||||||
        fprintf(stderr, "Input error: atom data was not read!\n");
 | 
					        if(me==0)fprintf(stderr, "Input error: atom data was not read!\n");
 | 
				
			||||||
        exit(-1);
 | 
					        exit(-1);
 | 
				
			||||||
        return -1;
 | 
					        return -1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -426,30 +445,34 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
 | 
				
			|||||||
        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
					        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
 | 
					    if(me==0)fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
 | 
				
			||||||
    return natoms;
 | 
					    return natoms;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int readAtom_in(Atom* atom, Parameter* param) {
 | 
					int readAtom_in(Atom* atom, Parameter* param) {
 | 
				
			||||||
 | 
					    int me = 0;
 | 
				
			||||||
 | 
					    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
				
			||||||
    FILE *fp = fopen(param->input_file, "r");
 | 
					    FILE *fp = fopen(param->input_file, "r");
 | 
				
			||||||
    char line[MAXLINE];
 | 
					    char line[MAXLINE];
 | 
				
			||||||
    int natoms = 0;
 | 
					    int natoms = 0;
 | 
				
			||||||
    int atom_id = 0;
 | 
					    int atom_id = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(!fp) {
 | 
					    if(!fp) {
 | 
				
			||||||
        fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
					        if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
				
			||||||
        exit(-1);
 | 
					        exit(-1);
 | 
				
			||||||
        return -1;
 | 
					        return -1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
    readline(line, fp);
 | 
					    readline(line, fp);
 | 
				
			||||||
    natoms = atoi(strtok(line, " "));
 | 
					    natoms = atoi(strtok(line, "\t "));
 | 
				
			||||||
    param->xlo = atof(strtok(NULL, " "));
 | 
					    param->xlo = atof(strtok(NULL, "\t "));
 | 
				
			||||||
    param->xhi = atof(strtok(NULL, " "));
 | 
					    param->xhi = atof(strtok(NULL, "\t "));
 | 
				
			||||||
    param->ylo = atof(strtok(NULL, " "));
 | 
					    param->ylo = atof(strtok(NULL, "\t "));
 | 
				
			||||||
    param->yhi = atof(strtok(NULL, " "));
 | 
					    param->yhi = atof(strtok(NULL, "\t "));
 | 
				
			||||||
    param->zlo = atof(strtok(NULL, " "));
 | 
					    param->zlo = atof(strtok(NULL, "\t "));
 | 
				
			||||||
    param->zhi = atof(strtok(NULL, " "));
 | 
					    param->zhi = atof(strtok(NULL, "\t "));
 | 
				
			||||||
 | 
					    param->xprd = param->xhi - param->xlo; 
 | 
				
			||||||
 | 
					    param->yprd = param->yhi - param->ylo;
 | 
				
			||||||
 | 
					    param->zprd = param->zhi - param->zlo;
 | 
				
			||||||
    atom->Natoms = natoms;
 | 
					    atom->Natoms = natoms;
 | 
				
			||||||
    atom->Nlocal = natoms;
 | 
					    atom->Nlocal = natoms;
 | 
				
			||||||
    atom->ntypes = 1;
 | 
					    atom->ntypes = 1;
 | 
				
			||||||
@@ -462,27 +485,26 @@ int readAtom_in(Atom* atom, Parameter* param) {
 | 
				
			|||||||
        readline(line, fp);
 | 
					        readline(line, fp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // TODO: store mass per atom
 | 
					        // TODO: store mass per atom
 | 
				
			||||||
        char *s_mass = strtok(line, " ");
 | 
					        char *s_mass = strtok(line, "\t ");
 | 
				
			||||||
        if(strncmp(s_mass, "inf", 3) == 0) {
 | 
					        if(strncmp(s_mass, "inf", 3) == 0) {
 | 
				
			||||||
            // Set atom's mass to INFINITY
 | 
					            // Set atom's mass to INFINITY
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            param->mass = atof(s_mass);
 | 
					            param->mass = atof(s_mass);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
        atom->radius[atom_id] = atof(strtok(NULL, " "));
 | 
					        atom->radius[atom_id] = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        atom_x(atom_id) = atof(strtok(NULL, " "));
 | 
					        atom_x(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        atom_y(atom_id) = atof(strtok(NULL, " "));
 | 
					        atom_y(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        atom_z(atom_id) = atof(strtok(NULL, " "));
 | 
					        atom_z(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        atom_vx(atom_id) = atof(strtok(NULL, " "));
 | 
					        atom_vx(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        atom_vy(atom_id) = atof(strtok(NULL, " "));
 | 
					        atom_vy(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        atom_vz(atom_id) = atof(strtok(NULL, " "));
 | 
					        atom_vz(atom_id) = atof(strtok(NULL, "\t "));
 | 
				
			||||||
        atom->type[atom_id] = 0;
 | 
					        atom->type[atom_id] = 0;
 | 
				
			||||||
        atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
 | 
					        atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
 | 
				
			||||||
        atom_id++;
 | 
					        atom_id++;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
    if(!natoms) {
 | 
					    if(!natoms) {
 | 
				
			||||||
        fprintf(stderr, "Input error: atom data was not read!\n");
 | 
					        if(me==0)fprintf(stderr, "Input error: atom data was not read!\n");
 | 
				
			||||||
        exit(-1);
 | 
					        exit(-1);
 | 
				
			||||||
        return -1;
 | 
					        return -1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -498,25 +520,10 @@ int readAtom_in(Atom* atom, Parameter* param) {
 | 
				
			|||||||
        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
					        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
 | 
					    if(me==0)fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
 | 
				
			||||||
    return natoms;
 | 
					    return natoms;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void writeAtom(Atom *atom, Parameter *param) {
 | 
					 | 
				
			||||||
    FILE *fp = fopen(param->write_atom_file, "w");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for(int i = 0; i < atom->Nlocal; i++) {
 | 
					 | 
				
			||||||
        fprintf(fp, "%d,%f,%f,%f,%f,%f,%f,%f,0\n",
 | 
					 | 
				
			||||||
            atom->type[i], 1.0,
 | 
					 | 
				
			||||||
            atom_x(i), atom_y(i), atom_z(i),
 | 
					 | 
				
			||||||
            atom_vx(i), atom_vy(i), atom_vz(i));
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fclose(fp);
 | 
					 | 
				
			||||||
    fprintf(stdout, "Wrote input data to %s, grid size: %f, %f, %f\n",
 | 
					 | 
				
			||||||
        param->write_atom_file, param->xprd, param->yprd, param->zprd);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void growAtom(Atom *atom) {
 | 
					void growAtom(Atom *atom) {
 | 
				
			||||||
    DeviceAtom *d_atom = &(atom->d_atom);
 | 
					    DeviceAtom *d_atom = &(atom->d_atom);
 | 
				
			||||||
    int nold = atom->Nmax;
 | 
					    int nold = atom->Nmax;
 | 
				
			||||||
@@ -545,7 +552,125 @@ void growAtom(Atom *atom) {
 | 
				
			|||||||
    REALLOC(type, int, atom->Nmax * sizeof(int), nold * sizeof(int));
 | 
					    REALLOC(type, int, atom->Nmax * sizeof(int), nold * sizeof(int));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // DEM
 | 
					    // DEM
 | 
				
			||||||
    atom->radius = (MD_FLOAT *) reallocate(atom->radius, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
 | 
					    atom->radius = (MD_FLOAT*) reallocate(atom->radius, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
 | 
				
			||||||
    atom->av = (MD_FLOAT*) reallocate(atom->av, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
 | 
					    atom->av = (MD_FLOAT*) reallocate(atom->av, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
 | 
				
			||||||
    atom->r  = (MD_FLOAT*) reallocate(atom->r,  ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 4, nold * sizeof(MD_FLOAT) * 4);
 | 
					    atom->r  = (MD_FLOAT*) reallocate(atom->r,  ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 4, nold * sizeof(MD_FLOAT) * 4);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* MPI added*/
 | 
				
			||||||
 | 
					void packForward(Atom* atom, int n ,int* list, MD_FLOAT* buf, int* pbc)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int i, j;
 | 
				
			||||||
 | 
					    for(i = 0; i < n; i++) {
 | 
				
			||||||
 | 
					      j = list[i];
 | 
				
			||||||
 | 
					      buf_x(i) = atom_x(j) + pbc[0] * atom->mybox.xprd;
 | 
				
			||||||
 | 
					      buf_y(i) = atom_y(j) + pbc[1] * atom->mybox.yprd;
 | 
				
			||||||
 | 
					      buf_z(i) = atom_z(j) + pbc[2] * atom->mybox.zprd;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void unpackForward(Atom* atom, int n, int first, MD_FLOAT* buf)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  for(int i = 0; i < n; i++) {
 | 
				
			||||||
 | 
					    atom_x((first + i)) = buf_x(i);
 | 
				
			||||||
 | 
					    atom_y((first + i)) = buf_y(i);
 | 
				
			||||||
 | 
					    atom_z((first + i)) = buf_z(i);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int packGhost(Atom* atom, int i, MD_FLOAT* buf, int* pbc)
 | 
				
			||||||
 | 
					{    
 | 
				
			||||||
 | 
					    int m = 0; 
 | 
				
			||||||
 | 
					    buf[m++] = atom_x(i) + pbc[_x] * atom->mybox.xprd;
 | 
				
			||||||
 | 
					    buf[m++] = atom_y(i) + pbc[_y] * atom->mybox.yprd;
 | 
				
			||||||
 | 
					    buf[m++] = atom_z(i) + pbc[_z] * atom->mybox.zprd;
 | 
				
			||||||
 | 
					    buf[m++] = atom->type[i];
 | 
				
			||||||
 | 
					    return m;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int unpackGhost(Atom* atom, int i, MD_FLOAT* buf)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  while (i>=atom->Nmax) growAtom(atom);
 | 
				
			||||||
 | 
					  int m = 0;
 | 
				
			||||||
 | 
					  atom_x(i) = buf[m++];
 | 
				
			||||||
 | 
					  atom_y(i) = buf[m++];
 | 
				
			||||||
 | 
					  atom_z(i) = buf[m++];
 | 
				
			||||||
 | 
					  atom->type[i] = buf[m++];
 | 
				
			||||||
 | 
					  atom->Nghost++;
 | 
				
			||||||
 | 
					  return m;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void packReverse(Atom* atom, int n, int first, MD_FLOAT* buf)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    for(int i = 0; i < n; i++) {
 | 
				
			||||||
 | 
					        buf_x(i) = atom_fx(first + i);
 | 
				
			||||||
 | 
					        buf_y(i) = atom_fy(first + i);
 | 
				
			||||||
 | 
					        buf_z(i) = atom_fz(first + i);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void unpackReverse(Atom* atom, int n, int* list, MD_FLOAT* buf)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int i, j;
 | 
				
			||||||
 | 
					  for(i = 0; i < n; i++) {
 | 
				
			||||||
 | 
					    j = list[i];
 | 
				
			||||||
 | 
					    atom_fx(j) += buf_x(i);
 | 
				
			||||||
 | 
					    atom_fy(j) += buf_y(i);
 | 
				
			||||||
 | 
					    atom_fz(j) += buf_z(i);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int packExchange(Atom* atom, int i, MD_FLOAT* buf)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int m = 0;
 | 
				
			||||||
 | 
					  buf[m++] = atom_x(i);
 | 
				
			||||||
 | 
					  buf[m++] = atom_y(i);
 | 
				
			||||||
 | 
					  buf[m++] = atom_z(i);
 | 
				
			||||||
 | 
					  buf[m++] = atom_vx(i);
 | 
				
			||||||
 | 
					  buf[m++] = atom_vy(i);
 | 
				
			||||||
 | 
					  buf[m++] = atom_vz(i);
 | 
				
			||||||
 | 
					  buf[m++] = atom->type[i];
 | 
				
			||||||
 | 
					  return m;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int unpackExchange(Atom* atom, int i, MD_FLOAT* buf)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  while(i >= atom->Nmax) growAtom(atom);
 | 
				
			||||||
 | 
					  int m = 0;
 | 
				
			||||||
 | 
					  atom_x(i) = buf[m++];
 | 
				
			||||||
 | 
					  atom_y(i) = buf[m++];
 | 
				
			||||||
 | 
					  atom_z(i) = buf[m++];
 | 
				
			||||||
 | 
					  atom_vx(i) = buf[m++];
 | 
				
			||||||
 | 
					  atom_vy(i) = buf[m++];
 | 
				
			||||||
 | 
					  atom_vz(i) = buf[m++];
 | 
				
			||||||
 | 
					  atom->type[i] = buf[m++];
 | 
				
			||||||
 | 
					  return m;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void pbc(Atom* atom)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  for(int i = 0; i < atom->Nlocal; i++) {
 | 
				
			||||||
 | 
					   
 | 
				
			||||||
 | 
					    MD_FLOAT xprd = atom->mybox.xprd;
 | 
				
			||||||
 | 
					    MD_FLOAT yprd = atom->mybox.yprd;
 | 
				
			||||||
 | 
					    MD_FLOAT zprd = atom->mybox.zprd; 
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if(atom_x(i) < 0.0) atom_x(i) += xprd;
 | 
				
			||||||
 | 
					    if(atom_y(i) < 0.0) atom_y(i) += yprd;
 | 
				
			||||||
 | 
					    if(atom_z(i) < 0.0)  atom_z(i)+= zprd;
 | 
				
			||||||
 | 
					    if(atom_x(i) >= xprd) atom_x(i) -= xprd;
 | 
				
			||||||
 | 
					    if(atom_y(i) >= yprd) atom_y(i) -= yprd;
 | 
				
			||||||
 | 
					    if(atom_z(i) >= zprd) atom_z(i) -= zprd;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void copy(Atom* atom, int i, int j)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  atom_x(i) = atom_x(j);
 | 
				
			||||||
 | 
					  atom_y(i) = atom_y(j);
 | 
				
			||||||
 | 
					  atom_z(i) = atom_z(j);
 | 
				
			||||||
 | 
					  atom_vx(i) = atom_vx(j);
 | 
				
			||||||
 | 
					  atom_vy(i) = atom_vy(j);
 | 
				
			||||||
 | 
					  atom_vz(i) = atom_vz(j);
 | 
				
			||||||
 | 
					  atom->type[i] = atom->type[j];
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -13,13 +13,17 @@
 | 
				
			|||||||
#include <parameter.h>
 | 
					#include <parameter.h>
 | 
				
			||||||
#include <stats.h>
 | 
					#include <stats.h>
 | 
				
			||||||
#include <timing.h>
 | 
					#include <timing.h>
 | 
				
			||||||
 | 
					#include <mpi.h>
 | 
				
			||||||
 | 
					#include <util.h>
 | 
				
			||||||
#ifdef __SIMD_KERNEL__
 | 
					#ifdef __SIMD_KERNEL__
 | 
				
			||||||
#include <simd.h>
 | 
					#include <simd.h>
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void computeForceGhostShell(Parameter*, Atom*, Neighbor*);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
 | 
					double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
 | 
				
			||||||
    int Nlocal = atom->Nlocal;
 | 
					    int Nlocal = atom->Nlocal;
 | 
				
			||||||
 | 
					    int Nghost = atom->Nghost;
 | 
				
			||||||
    int* neighs;
 | 
					    int* neighs;
 | 
				
			||||||
    #ifndef EXPLICIT_TYPES
 | 
					    #ifndef EXPLICIT_TYPES
 | 
				
			||||||
    MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
 | 
					    MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
 | 
				
			||||||
@@ -41,16 +45,16 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
 | 
				
			|||||||
    {
 | 
					    {
 | 
				
			||||||
    LIKWID_MARKER_START("force");
 | 
					    LIKWID_MARKER_START("force");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #pragma omp for schedule(runtime)
 | 
					    #pragma omp for
 | 
				
			||||||
    for(int i = 0; i < Nlocal; i++) {
 | 
					    for(int i = 0; i < Nlocal; i++) {
 | 
				
			||||||
        neighs = &neighbor->neighbors[i * neighbor->maxneighs];
 | 
					        neighs = &neighbor->neighbors[i * neighbor->maxneighs];
 | 
				
			||||||
        int numneighs = neighbor->numneigh[i];
 | 
					        int numneighs = neighbor->numneigh[i];
 | 
				
			||||||
        MD_FLOAT xtmp = atom_x(i);
 | 
					        MD_FLOAT xtmp = atom_x(i);
 | 
				
			||||||
        MD_FLOAT ytmp = atom_y(i);
 | 
					        MD_FLOAT ytmp = atom_y(i);
 | 
				
			||||||
        MD_FLOAT ztmp = atom_z(i);
 | 
					        MD_FLOAT ztmp = atom_z(i);
 | 
				
			||||||
        MD_FLOAT fix = 0;
 | 
					        MD_FLOAT fix = 0.0;
 | 
				
			||||||
        MD_FLOAT fiy = 0;
 | 
					        MD_FLOAT fiy = 0.0;
 | 
				
			||||||
        MD_FLOAT fiz = 0;
 | 
					        MD_FLOAT fiz = 0.0;
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        #ifdef EXPLICIT_TYPES
 | 
					        #ifdef EXPLICIT_TYPES
 | 
				
			||||||
        const int type_i = atom->type[i];
 | 
					        const int type_i = atom->type[i];
 | 
				
			||||||
@@ -70,7 +74,6 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
 | 
				
			|||||||
            const MD_FLOAT sigma6 = atom->sigma6[type_ij];
 | 
					            const MD_FLOAT sigma6 = atom->sigma6[type_ij];
 | 
				
			||||||
            const MD_FLOAT epsilon = atom->epsilon[type_ij];
 | 
					            const MD_FLOAT epsilon = atom->epsilon[type_ij];
 | 
				
			||||||
            #endif
 | 
					            #endif
 | 
				
			||||||
 | 
					 | 
				
			||||||
            if(rsq < cutforcesq) {
 | 
					            if(rsq < cutforcesq) {
 | 
				
			||||||
                MD_FLOAT sr2 = num1 / rsq;
 | 
					                MD_FLOAT sr2 = num1 / rsq;
 | 
				
			||||||
                MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
 | 
					                MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
 | 
				
			||||||
@@ -78,6 +81,7 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
 | 
				
			|||||||
                fix += delx * force;
 | 
					                fix += delx * force;
 | 
				
			||||||
                fiy += dely * force;
 | 
					                fiy += dely * force;
 | 
				
			||||||
                fiz += delz * force; 
 | 
					                fiz += delz * force; 
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
            #ifdef USE_REFERENCE_VERSION
 | 
					            #ifdef USE_REFERENCE_VERSION
 | 
				
			||||||
                addStat(stats->atoms_within_cutoff, 1);
 | 
					                addStat(stats->atoms_within_cutoff, 1);
 | 
				
			||||||
            } else {
 | 
					            } else {
 | 
				
			||||||
@@ -85,7 +89,6 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
 | 
				
			|||||||
            #endif
 | 
					            #endif
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }              
 | 
					        }              
 | 
				
			||||||
 | 
					 | 
				
			||||||
        atom_fx(i) += fix;
 | 
					        atom_fx(i) += fix;
 | 
				
			||||||
        atom_fy(i) += fiy;
 | 
					        atom_fy(i) += fiy;
 | 
				
			||||||
        atom_fz(i) += fiz;
 | 
					        atom_fz(i) += fiz;
 | 
				
			||||||
@@ -102,13 +105,13 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    LIKWID_MARKER_STOP("force");
 | 
					    LIKWID_MARKER_STOP("force");
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
    double E = getTimeStamp();
 | 
					    double E = getTimeStamp();
 | 
				
			||||||
    return E-S;
 | 
					    return E-S;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
 | 
					double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
 | 
				
			||||||
    int Nlocal = atom->Nlocal;
 | 
					    int Nlocal = atom->Nlocal;
 | 
				
			||||||
 | 
					    int Nghost = atom->Nghost;
 | 
				
			||||||
    int* neighs;
 | 
					    int* neighs;
 | 
				
			||||||
    #ifndef EXPLICIT_TYPES
 | 
					    #ifndef EXPLICIT_TYPES
 | 
				
			||||||
    MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
 | 
					    MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
 | 
				
			||||||
@@ -119,19 +122,18 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
				
			|||||||
    const MD_FLOAT num48 = 48.0;
 | 
					    const MD_FLOAT num48 = 48.0;
 | 
				
			||||||
    const MD_FLOAT num05 = 0.5;
 | 
					    const MD_FLOAT num05 = 0.5;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for(int i = 0; i < Nlocal; i++) {
 | 
					    for(int i = 0; i < Nlocal+Nghost; i++) {
 | 
				
			||||||
        atom_fx(i) = 0.0;
 | 
					        atom_fx(i) = 0.0;
 | 
				
			||||||
        atom_fy(i) = 0.0;
 | 
					        atom_fy(i) = 0.0;
 | 
				
			||||||
        atom_fz(i) = 0.0;
 | 
					        atom_fz(i) = 0.0;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
    double S = getTimeStamp();
 | 
					    double S = getTimeStamp();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #pragma omp parallel
 | 
					    #pragma omp parallel
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
    LIKWID_MARKER_START("forceLJ-halfneigh");
 | 
					    LIKWID_MARKER_START("forceLJ-halfneigh");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #pragma omp for schedule(runtime)
 | 
					    #pragma omp for
 | 
				
			||||||
    for(int i = 0; i < Nlocal; i++) {
 | 
					    for(int i = 0; i < Nlocal; i++) {
 | 
				
			||||||
        neighs = &neighbor->neighbors[i * neighbor->maxneighs];
 | 
					        neighs = &neighbor->neighbors[i * neighbor->maxneighs];
 | 
				
			||||||
        int numneighs = neighbor->numneigh[i];
 | 
					        int numneighs = neighbor->numneigh[i];
 | 
				
			||||||
@@ -172,16 +174,14 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
				
			|||||||
                fix += delx * force;
 | 
					                fix += delx * force;
 | 
				
			||||||
                fiy += dely * force;
 | 
					                fiy += dely * force;
 | 
				
			||||||
                fiz += delz * force;
 | 
					                fiz += delz * force;
 | 
				
			||||||
 | 
					                // We need to update forces for ghost atoms if shell_method  or half stencil is requiered
 | 
				
			||||||
                // We do not need to update forces for ghost atoms
 | 
					                if((param->half_neigh && j<Nlocal) || param->method){
 | 
				
			||||||
                if(j < Nlocal) {
 | 
					 | 
				
			||||||
                    atom_fx(j) -= delx * force;
 | 
					                    atom_fx(j) -= delx * force;
 | 
				
			||||||
                    atom_fy(j) -= dely * force;
 | 
					                    atom_fy(j) -= dely * force;
 | 
				
			||||||
                    atom_fz(j) -= delz * force;
 | 
					                    atom_fz(j) -= delz * force;
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					 | 
				
			||||||
        atom_fx(i) += fix;
 | 
					        atom_fx(i) += fix;
 | 
				
			||||||
        atom_fy(i) += fiy;
 | 
					        atom_fy(i) += fiy;
 | 
				
			||||||
        atom_fz(i) += fiz;
 | 
					        atom_fz(i) += fiz;
 | 
				
			||||||
@@ -190,6 +190,7 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
				
			|||||||
        addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
 | 
					        addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor); 
 | 
				
			||||||
    LIKWID_MARKER_STOP("forceLJ-halfneigh");
 | 
					    LIKWID_MARKER_STOP("forceLJ-halfneigh");
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -227,7 +228,7 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig
 | 
				
			|||||||
    {
 | 
					    {
 | 
				
			||||||
    LIKWID_MARKER_START("force");
 | 
					    LIKWID_MARKER_START("force");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #pragma omp for schedule(runtime)
 | 
					    #pragma omp for
 | 
				
			||||||
    for(int i = 0; i < Nlocal; i++) {
 | 
					    for(int i = 0; i < Nlocal; i++) {
 | 
				
			||||||
        neighs = &neighbor->neighbors[i * neighbor->maxneighs];
 | 
					        neighs = &neighbor->neighbors[i * neighbor->maxneighs];
 | 
				
			||||||
        int numneighs = neighbor->numneigh[i];
 | 
					        int numneighs = neighbor->numneigh[i];
 | 
				
			||||||
@@ -276,3 +277,58 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig
 | 
				
			|||||||
    double E = getTimeStamp();
 | 
					    double E = getTimeStamp();
 | 
				
			||||||
    return E-S;
 | 
					    return E-S;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void computeForceGhostShell(Parameter *param, Atom *atom, Neighbor *neighbor) {
 | 
				
			||||||
 | 
					    int Nshell = neighbor->Nshell;
 | 
				
			||||||
 | 
					    int* neighs;
 | 
				
			||||||
 | 
					    #ifndef EXPLICIT_TYPES
 | 
				
			||||||
 | 
					    MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
 | 
				
			||||||
 | 
					    MD_FLOAT sigma6 = param->sigma6;
 | 
				
			||||||
 | 
					    MD_FLOAT epsilon = param->epsilon;
 | 
				
			||||||
 | 
					    #endif
 | 
				
			||||||
 | 
					    const MD_FLOAT num1 = 1.0;
 | 
				
			||||||
 | 
					    const MD_FLOAT num48 = 48.0;
 | 
				
			||||||
 | 
					    const MD_FLOAT num05 = 0.5;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(int i = 0; i < Nshell; i++) {
 | 
				
			||||||
 | 
					        neighs = &(neighbor->neighshell[i * neighbor->maxneighs]);
 | 
				
			||||||
 | 
					        int numneigh = neighbor->numNeighShell[i];
 | 
				
			||||||
 | 
					        int iatom = neighbor->listshell[i];
 | 
				
			||||||
 | 
					        MD_FLOAT xtmp = atom_x(iatom);
 | 
				
			||||||
 | 
					        MD_FLOAT ytmp = atom_y(iatom);
 | 
				
			||||||
 | 
					        MD_FLOAT ztmp = atom_z(iatom);
 | 
				
			||||||
 | 
					        MD_FLOAT fix = 0;
 | 
				
			||||||
 | 
					        MD_FLOAT fiy = 0;
 | 
				
			||||||
 | 
					        MD_FLOAT fiz = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        #ifdef EXPLICIT_TYPES
 | 
				
			||||||
 | 
					        const int type_i = atom->type[i];
 | 
				
			||||||
 | 
					        #endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for(int k = 0; k < numneigh; k++) {
 | 
				
			||||||
 | 
					            int jatom = neighs[k];
 | 
				
			||||||
 | 
					            MD_FLOAT delx = xtmp - atom_x(jatom);
 | 
				
			||||||
 | 
					            MD_FLOAT dely = ytmp - atom_y(jatom);
 | 
				
			||||||
 | 
					            MD_FLOAT delz = ztmp - atom_z(jatom);
 | 
				
			||||||
 | 
					            MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if(rsq < cutforcesq) {
 | 
				
			||||||
 | 
					                MD_FLOAT sr2 = num1 / rsq;
 | 
				
			||||||
 | 
					                MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
 | 
				
			||||||
 | 
					                MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
 | 
				
			||||||
 | 
					                fix += delx * force;
 | 
				
			||||||
 | 
					                fiy += dely * force;
 | 
				
			||||||
 | 
					                fiz += delz * force;
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                atom_fx(jatom) -= delx * force;
 | 
				
			||||||
 | 
					                atom_fy(jatom) -= dely * force;
 | 
				
			||||||
 | 
					                atom_fz(jatom) -= delz * force;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        atom_fx(iatom) += fix;
 | 
				
			||||||
 | 
					        atom_fy(iatom) += fiy;
 | 
				
			||||||
 | 
					        atom_fz(iatom) += fiz;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -4,8 +4,9 @@
 | 
				
			|||||||
 * Use of this source code is governed by a LGPL-3.0
 | 
					 * Use of this source code is governed by a LGPL-3.0
 | 
				
			||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#include <parameter.h>
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <box.h>
 | 
				
			||||||
 | 
					#include <parameter.h>
 | 
				
			||||||
#ifndef __ATOM_H_
 | 
					#ifndef __ATOM_H_
 | 
				
			||||||
#define __ATOM_H_
 | 
					#define __ATOM_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -56,6 +57,8 @@ typedef struct {
 | 
				
			|||||||
    MD_FLOAT *sigma6;
 | 
					    MD_FLOAT *sigma6;
 | 
				
			||||||
    MD_FLOAT *cutforcesq;
 | 
					    MD_FLOAT *cutforcesq;
 | 
				
			||||||
    MD_FLOAT *cutneighsq;
 | 
					    MD_FLOAT *cutneighsq;
 | 
				
			||||||
 | 
					    //TODO: insert the id number
 | 
				
			||||||
 | 
					    //MD_FLOAT *Atom_id;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // DEM
 | 
					    // DEM
 | 
				
			||||||
    MD_FLOAT *radius;
 | 
					    MD_FLOAT *radius;
 | 
				
			||||||
@@ -64,6 +67,9 @@ typedef struct {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    // Device data
 | 
					    // Device data
 | 
				
			||||||
    DeviceAtom d_atom;
 | 
					    DeviceAtom d_atom;
 | 
				
			||||||
 | 
					     
 | 
				
			||||||
 | 
					    //Info Subdomain
 | 
				
			||||||
 | 
					    Box mybox;            
 | 
				
			||||||
} Atom;
 | 
					} Atom;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern void initAtom(Atom*);
 | 
					extern void initAtom(Atom*);
 | 
				
			||||||
@@ -73,9 +79,19 @@ extern int readAtom_pdb(Atom*, Parameter*);
 | 
				
			|||||||
extern int readAtom_gro(Atom*, Parameter*);
 | 
					extern int readAtom_gro(Atom*, Parameter*);
 | 
				
			||||||
extern int readAtom_dmp(Atom*, Parameter*);
 | 
					extern int readAtom_dmp(Atom*, Parameter*);
 | 
				
			||||||
extern int readAtom_in(Atom*, Parameter*);
 | 
					extern int readAtom_in(Atom*, Parameter*);
 | 
				
			||||||
extern void writeAtom(Atom*, Parameter*);
 | 
					 | 
				
			||||||
extern void growAtom(Atom*);
 | 
					extern void growAtom(Atom*);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int  packGhost(Atom*, int, MD_FLOAT*, int*);
 | 
				
			||||||
 | 
					int  unpackGhost(Atom*, int, MD_FLOAT*);
 | 
				
			||||||
 | 
					int  packExchange(Atom*, int, MD_FLOAT*);
 | 
				
			||||||
 | 
					int  unpackExchange(Atom*, int, MD_FLOAT*);
 | 
				
			||||||
 | 
					void packForward(Atom*, int, int*, MD_FLOAT*, int*); 
 | 
				
			||||||
 | 
					void unpackForward(Atom*, int, int, MD_FLOAT*);
 | 
				
			||||||
 | 
					void packReverse(Atom* , int , int , MD_FLOAT*);
 | 
				
			||||||
 | 
					void unpackReverse(Atom*, int, int*, MD_FLOAT*);
 | 
				
			||||||
 | 
					void pbc(Atom*);
 | 
				
			||||||
 | 
					void copy(Atom*, int, int);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef AOS
 | 
					#ifdef AOS
 | 
				
			||||||
#   define POS_DATA_LAYOUT     "AoS"
 | 
					#   define POS_DATA_LAYOUT     "AoS"
 | 
				
			||||||
#   define atom_x(i)           atom->x[(i) * 3 + 0]
 | 
					#   define atom_x(i)           atom->x[(i) * 3 + 0]
 | 
				
			||||||
@@ -100,4 +116,8 @@ extern void growAtom(Atom*);
 | 
				
			|||||||
#   define atom_fz(i)          atom->fz[i]
 | 
					#   define atom_fz(i)          atom->fz[i]
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#   define buf_x(i)            buf[3*(i)] 
 | 
				
			||||||
 | 
					#   define buf_y(i)            buf[3*(i)+1]
 | 
				
			||||||
 | 
					#   define buf_z(i)            buf[3*(i)+2]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -20,9 +20,14 @@ typedef struct {
 | 
				
			|||||||
    int ncalls;
 | 
					    int ncalls;
 | 
				
			||||||
    int maxneighs;
 | 
					    int maxneighs;
 | 
				
			||||||
    int half_neigh;
 | 
					    int half_neigh;
 | 
				
			||||||
 | 
					    int half_stencil;
 | 
				
			||||||
    int *neighbors;
 | 
					    int *neighbors;
 | 
				
			||||||
    int *numneigh;
 | 
					    int *numneigh;
 | 
				
			||||||
 | 
					    //MPI
 | 
				
			||||||
 | 
					    int Nshell;         //# of atoms in listShell
 | 
				
			||||||
 | 
					    int *numNeighShell; //# of neighs for each atom in listShell
 | 
				
			||||||
 | 
					    int *neighshell;    //list of neighs for each atom in listShell
 | 
				
			||||||
 | 
					    int *listshell;     //Atoms to compute the force
 | 
				
			||||||
    // Device data
 | 
					    // Device data
 | 
				
			||||||
    DeviceNeighbor d_neighbor;
 | 
					    DeviceNeighbor d_neighbor;
 | 
				
			||||||
} Neighbor;
 | 
					} Neighbor;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -5,8 +5,11 @@
 | 
				
			|||||||
 * license that can be found in the LICENSE file.
 | 
					 * license that can be found in the LICENSE file.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#include <atom.h>
 | 
					#include <atom.h>
 | 
				
			||||||
 | 
					#include <comm.h>
 | 
				
			||||||
 | 
					#include <parameter.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef __VTK_H_
 | 
					#ifndef __VTK_H_
 | 
				
			||||||
#define __VTK_H_
 | 
					#define __VTK_H_
 | 
				
			||||||
extern int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
 | 
					extern int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
 | 
				
			||||||
 | 
					extern void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										304
									
								
								lammps/main.c
									
									
									
									
									
								
							
							
						
						
									
										304
									
								
								lammps/main.c
									
									
									
									
									
								
							@@ -11,10 +11,7 @@
 | 
				
			|||||||
#include <limits.h>
 | 
					#include <limits.h>
 | 
				
			||||||
#include <math.h>
 | 
					#include <math.h>
 | 
				
			||||||
#include <float.h>
 | 
					#include <float.h>
 | 
				
			||||||
#include <omp.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include <likwid-marker.h>
 | 
					#include <likwid-marker.h>
 | 
				
			||||||
 | 
					 | 
				
			||||||
#include <allocate.h>
 | 
					#include <allocate.h>
 | 
				
			||||||
#include <atom.h>
 | 
					#include <atom.h>
 | 
				
			||||||
#include <device.h>
 | 
					#include <device.h>
 | 
				
			||||||
@@ -24,13 +21,19 @@
 | 
				
			|||||||
#include <timing.h>
 | 
					#include <timing.h>
 | 
				
			||||||
#include <neighbor.h>
 | 
					#include <neighbor.h>
 | 
				
			||||||
#include <parameter.h>
 | 
					#include <parameter.h>
 | 
				
			||||||
#include <pbc.h>
 | 
					 | 
				
			||||||
#include <stats.h>
 | 
					#include <stats.h>
 | 
				
			||||||
#include <timers.h>
 | 
					#include <timers.h>
 | 
				
			||||||
#include <util.h>
 | 
					#include <util.h>
 | 
				
			||||||
#include <vtk.h>
 | 
					#include <vtk.h>
 | 
				
			||||||
 | 
					#include <comm.h>
 | 
				
			||||||
 | 
					#include <grid.h>
 | 
				
			||||||
 | 
					#include <shell_methods.h>
 | 
				
			||||||
 | 
					#include <mpi.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define HLINE "----------------------------------------------------------------------------\n"
 | 
					#define HLINE "----------------------------------------------------------------------------\n"
 | 
				
			||||||
 | 
					#ifdef CUDA_TARGET
 | 
				
			||||||
 | 
					extern double computeForceLJFullNeigh_cuda(Parameter*, Atom*, Neighbor*);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern double computeForceLJFullNeigh_plain_c(Parameter*, Atom*, Neighbor*, Stats*);
 | 
					extern double computeForceLJFullNeigh_plain_c(Parameter*, Atom*, Neighbor*, Stats*);
 | 
				
			||||||
extern double computeForceLJFullNeigh_simd(Parameter*, Atom*, Neighbor*, Stats*);
 | 
					extern double computeForceLJFullNeigh_simd(Parameter*, Atom*, Neighbor*, Stats*);
 | 
				
			||||||
@@ -38,69 +41,6 @@ extern double computeForceLJHalfNeigh(Parameter*, Atom*, Neighbor*, Stats*);
 | 
				
			|||||||
extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
 | 
					extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
 | 
				
			||||||
extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
 | 
					extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CUDA_TARGET
 | 
					 | 
				
			||||||
extern double computeForceLJFullNeigh_cuda(Parameter*, Atom*, Neighbor*);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats) {
 | 
					 | 
				
			||||||
    if(param->force_field == FF_EAM) { initEam(eam, param); }
 | 
					 | 
				
			||||||
    double S, E;
 | 
					 | 
				
			||||||
    param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
 | 
					 | 
				
			||||||
    param->xprd = param->nx * param->lattice;
 | 
					 | 
				
			||||||
    param->yprd = param->ny * param->lattice;
 | 
					 | 
				
			||||||
    param->zprd = param->nz * param->lattice;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    S = getTimeStamp();
 | 
					 | 
				
			||||||
    initAtom(atom);
 | 
					 | 
				
			||||||
    initPbc(atom);
 | 
					 | 
				
			||||||
    initStats(stats);
 | 
					 | 
				
			||||||
    initNeighbor(neighbor, param);
 | 
					 | 
				
			||||||
    if(param->input_file == NULL) {
 | 
					 | 
				
			||||||
        createAtom(atom, param);
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
        readAtom(atom, param);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    setupNeighbor(param);
 | 
					 | 
				
			||||||
    setupThermo(param, atom->Natoms);
 | 
					 | 
				
			||||||
    if(param->input_file == NULL) { adjustThermo(param, atom); }
 | 
					 | 
				
			||||||
    #ifdef SORT_ATOMS
 | 
					 | 
				
			||||||
    atom->Nghost = 0;
 | 
					 | 
				
			||||||
    sortAtom(atom);
 | 
					 | 
				
			||||||
    #endif
 | 
					 | 
				
			||||||
    setupPbc(atom, param);
 | 
					 | 
				
			||||||
    initDevice(atom, neighbor);
 | 
					 | 
				
			||||||
    updatePbc(atom, param, true);
 | 
					 | 
				
			||||||
    buildNeighbor(atom, neighbor);
 | 
					 | 
				
			||||||
    E = getTimeStamp();
 | 
					 | 
				
			||||||
    return E-S;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
 | 
					 | 
				
			||||||
    double S, E;
 | 
					 | 
				
			||||||
    S = getTimeStamp();
 | 
					 | 
				
			||||||
    LIKWID_MARKER_START("reneighbour");
 | 
					 | 
				
			||||||
    updateAtomsPbc(atom, param);
 | 
					 | 
				
			||||||
    #ifdef SORT_ATOMS
 | 
					 | 
				
			||||||
    atom->Nghost = 0;
 | 
					 | 
				
			||||||
    sortAtom(atom);
 | 
					 | 
				
			||||||
    #endif
 | 
					 | 
				
			||||||
    setupPbc(atom, param);
 | 
					 | 
				
			||||||
    updatePbc(atom, param, true);
 | 
					 | 
				
			||||||
    buildNeighbor(atom, neighbor);
 | 
					 | 
				
			||||||
    LIKWID_MARKER_STOP("reneighbour");
 | 
					 | 
				
			||||||
    E = getTimeStamp();
 | 
					 | 
				
			||||||
    return E-S;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void printAtomState(Atom *atom) {
 | 
					 | 
				
			||||||
    printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n", atom->Natoms, atom->Nlocal, atom->Nghost, atom->Nmax);
 | 
					 | 
				
			||||||
    // int nall = atom->Nlocal + atom->Nghost;
 | 
					 | 
				
			||||||
    // for (int i=0; i<nall; i++) {
 | 
					 | 
				
			||||||
    //     printf("%d  %f %f %f\n", i, atom->x[i], atom->y[i], atom->z[i]);
 | 
					 | 
				
			||||||
    // }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
 | 
					double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
 | 
				
			||||||
    if(param->force_field == FF_EAM) {
 | 
					    if(param->force_field == FF_EAM) {
 | 
				
			||||||
        return computeForceEam(eam, param, atom, neighbor, stats);
 | 
					        return computeForceEam(eam, param, atom, neighbor, stats);
 | 
				
			||||||
@@ -113,7 +53,7 @@ double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(param->half_neigh) {
 | 
					    if(param->half_neigh || param->method) {
 | 
				
			||||||
        return computeForceLJHalfNeigh(param, atom, neighbor, stats);
 | 
					        return computeForceLJHalfNeigh(param, atom, neighbor, stats);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -124,6 +64,102 @@ double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
				
			|||||||
    #endif
 | 
					    #endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					double dynamicBalance(Comm* comm, Grid* grid, Atom* atom, Parameter* param, double time){
 | 
				
			||||||
 | 
					    double S, E;
 | 
				
			||||||
 | 
					    int dims = 3;   //TODO: Adjust to do in 3d and 2d
 | 
				
			||||||
 | 
					    S = getTimeStamp();
 | 
				
			||||||
 | 
					    if(param->balance == RCB) {
 | 
				
			||||||
 | 
					        rcbBalance(grid, atom, param, meanBisect,dims,0);
 | 
				
			||||||
 | 
					        neighComm(comm, param, grid);
 | 
				
			||||||
 | 
					    }else if(param->balance == meanTimeRCB){
 | 
				
			||||||
 | 
					        rcbBalance(grid, atom, param, meanTimeBisect,dims,time);
 | 
				
			||||||
 | 
					        neighComm(comm, param, grid);
 | 
				
			||||||
 | 
					    }else if(param->balance == Staggered) {
 | 
				
			||||||
 | 
					        staggeredBalance(grid, atom, param, time);
 | 
				
			||||||
 | 
					        neighComm(comm, param, grid);
 | 
				
			||||||
 | 
					        exchangeComm(comm,atom);
 | 
				
			||||||
 | 
					    }else { } //Do nothing
 | 
				
			||||||
 | 
					    //printGrid(grid);
 | 
				
			||||||
 | 
					  E = getTimeStamp();
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  return E-S;
 | 
				
			||||||
 | 
					} 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					double initialBalance(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid)
 | 
				
			||||||
 | 
					{      
 | 
				
			||||||
 | 
					    double E,S,time;
 | 
				
			||||||
 | 
					    int me;
 | 
				
			||||||
 | 
					    MPI_Comm_rank(world,&me);
 | 
				
			||||||
 | 
					    S = getTimeStamp();
 | 
				
			||||||
 | 
					    if(param->balance == meanTimeRCB || param->balance == RCB){
 | 
				
			||||||
 | 
					        rcbBalance(grid, atom, param, meanBisect,3,0);
 | 
				
			||||||
 | 
					        neighComm(comm, param, grid); 
 | 
				
			||||||
 | 
					    }      
 | 
				
			||||||
 | 
					    MPI_Allreduce(&atom->Nlocal, &atom->Natoms, 1, MPI_INT, MPI_SUM, world); 
 | 
				
			||||||
 | 
					    printf("Processor:%i, Local atoms:%i, Total atoms:%i\n",me, atom->Nlocal,atom->Natoms);
 | 
				
			||||||
 | 
					    MPI_Barrier(world);
 | 
				
			||||||
 | 
					    E = getTimeStamp();
 | 
				
			||||||
 | 
					    return E-S;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid) {
 | 
				
			||||||
 | 
					    if(param->force_field == FF_EAM) { initEam(eam, param); }
 | 
				
			||||||
 | 
					    double S, E;
 | 
				
			||||||
 | 
					    param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
 | 
				
			||||||
 | 
					    param->xprd = param->nx * param->lattice;
 | 
				
			||||||
 | 
					    param->yprd = param->ny * param->lattice;
 | 
				
			||||||
 | 
					    param->zprd = param->nz * param->lattice;
 | 
				
			||||||
 | 
					    S = getTimeStamp();
 | 
				
			||||||
 | 
					    initAtom(atom);
 | 
				
			||||||
 | 
					    initStats(stats);
 | 
				
			||||||
 | 
					    initNeighbor(neighbor, param);
 | 
				
			||||||
 | 
					    if(param->input_file == NULL) {
 | 
				
			||||||
 | 
					        createAtom(atom, param);
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        readAtom(atom, param);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    setupGrid(grid,atom,param);
 | 
				
			||||||
 | 
					    setupNeighbor(param);
 | 
				
			||||||
 | 
					    setupComm(comm, param, grid);
 | 
				
			||||||
 | 
					    if(param->balance){  
 | 
				
			||||||
 | 
					        initialBalance(param, eam, atom, neighbor, stats, comm, grid);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    setupThermo(param, atom->Natoms);
 | 
				
			||||||
 | 
					    if(param->input_file == NULL) { adjustThermo(param, atom); }
 | 
				
			||||||
 | 
					    #ifdef SORT_ATOMS
 | 
				
			||||||
 | 
					    atom->Nghost = 0;
 | 
				
			||||||
 | 
					    sortAtom(atom);
 | 
				
			||||||
 | 
					    #endif
 | 
				
			||||||
 | 
					    initDevice(atom, neighbor);
 | 
				
			||||||
 | 
					    ghostNeighbor(comm, atom, param); 
 | 
				
			||||||
 | 
					    buildNeighbor(atom, neighbor);
 | 
				
			||||||
 | 
					    E = getTimeStamp();
 | 
				
			||||||
 | 
					    return E-S;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					double reneighbour(Comm* comm, Parameter *param, Atom *atom, Neighbor *neighbor) {
 | 
				
			||||||
 | 
					    double S, E;
 | 
				
			||||||
 | 
					    S = getTimeStamp();
 | 
				
			||||||
 | 
					    LIKWID_MARKER_START("reneighbour");
 | 
				
			||||||
 | 
					    #ifdef SORT_ATOMS
 | 
				
			||||||
 | 
					    atom->Nghost = 0;
 | 
				
			||||||
 | 
					    sortAtom(atom);
 | 
				
			||||||
 | 
					    #endif
 | 
				
			||||||
 | 
					    ghostNeighbor(comm, atom, param);
 | 
				
			||||||
 | 
					    buildNeighbor(atom, neighbor);
 | 
				
			||||||
 | 
					    LIKWID_MARKER_STOP("reneighbour");
 | 
				
			||||||
 | 
					    E = getTimeStamp();
 | 
				
			||||||
 | 
					    return E-S;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					double updateAtoms(Comm* comm, Atom* atom){
 | 
				
			||||||
 | 
					    double S,E;
 | 
				
			||||||
 | 
					    S = getTimeStamp();
 | 
				
			||||||
 | 
					        exchangeComm(comm, atom);
 | 
				
			||||||
 | 
					    E = getTimeStamp();
 | 
				
			||||||
 | 
					    return E-S;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void writeInput(Parameter *param, Atom *atom) {
 | 
					void writeInput(Parameter *param, Atom *atom) {
 | 
				
			||||||
    FILE *fpin = fopen("input.in", "w");
 | 
					    FILE *fpin = fopen("input.in", "w");
 | 
				
			||||||
    fprintf(fpin, "0,%f,0,%f,0,%f\n", param->xprd, param->yprd, param->zprd);
 | 
					    fprintf(fpin, "0,%f,0,%f,0,%f\n", param->xprd, param->yprd, param->zprd);
 | 
				
			||||||
@@ -142,7 +178,8 @@ int main(int argc, char** argv) {
 | 
				
			|||||||
    Neighbor neighbor;
 | 
					    Neighbor neighbor;
 | 
				
			||||||
    Stats stats;
 | 
					    Stats stats;
 | 
				
			||||||
    Parameter param;
 | 
					    Parameter param;
 | 
				
			||||||
 | 
					    Comm comm; 
 | 
				
			||||||
 | 
					    Grid grid;
 | 
				
			||||||
    LIKWID_MARKER_INIT;
 | 
					    LIKWID_MARKER_INIT;
 | 
				
			||||||
#pragma omp parallel
 | 
					#pragma omp parallel
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
@@ -150,10 +187,10 @@ int main(int argc, char** argv) {
 | 
				
			|||||||
        //LIKWID_MARKER_REGISTER("reneighbour");
 | 
					        //LIKWID_MARKER_REGISTER("reneighbour");
 | 
				
			||||||
        //LIKWID_MARKER_REGISTER("pbc");
 | 
					        //LIKWID_MARKER_REGISTER("pbc");
 | 
				
			||||||
    } 
 | 
					    } 
 | 
				
			||||||
 | 
					    initComm(&argc, &argv, &comm);
 | 
				
			||||||
    initParameter(¶m);
 | 
					    initParameter(¶m);
 | 
				
			||||||
    for(int i = 0; i < argc; i++) {
 | 
					    for(int i = 0; i < argc; i++) {
 | 
				
			||||||
        if((strcmp(argv[i], "-p") == 0) || strcmp(argv[i], "--params") == 0) {
 | 
					        if((strcmp(argv[i], "-p") == 0)) {
 | 
				
			||||||
            readParameter(¶m, argv[++i]);
 | 
					            readParameter(¶m, argv[++i]);
 | 
				
			||||||
            continue;
 | 
					            continue;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@@ -192,6 +229,24 @@ int main(int argc, char** argv) {
 | 
				
			|||||||
            param.half_neigh = atoi(argv[++i]);
 | 
					            param.half_neigh = atoi(argv[++i]);
 | 
				
			||||||
            continue;
 | 
					            continue;
 | 
				
			||||||
        } 
 | 
					        } 
 | 
				
			||||||
 | 
					        if((strcmp(argv[i], "-method") == 0)) {
 | 
				
			||||||
 | 
					            param.method = atoi(argv[++i]);
 | 
				
			||||||
 | 
					            if (param.method>3 || param.method< 0){
 | 
				
			||||||
 | 
					                if(comm.myproc == 0) fprintf(stderr, "Method does not exist!\n");
 | 
				
			||||||
 | 
					                endComm(&comm);   
 | 
				
			||||||
 | 
					                exit(0);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            continue;
 | 
				
			||||||
 | 
					        } 
 | 
				
			||||||
 | 
					        if((strcmp(argv[i], "-bal") == 0)) {
 | 
				
			||||||
 | 
					            param.balance = atoi(argv[++i]);
 | 
				
			||||||
 | 
					            if (param.balance>3 || param.balance< 0){
 | 
				
			||||||
 | 
					                if(comm.myproc == 0) fprintf(stderr, "Load Balance does not exist!\n");
 | 
				
			||||||
 | 
					                endComm(&comm);   
 | 
				
			||||||
 | 
					                exit(0);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            continue;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
        if((strcmp(argv[i], "-r") == 0) || (strcmp(argv[i], "--radius") == 0)) {
 | 
					        if((strcmp(argv[i], "-r") == 0) || (strcmp(argv[i], "--radius") == 0)) {
 | 
				
			||||||
            param.cutforce = atof(argv[++i]);
 | 
					            param.cutforce = atof(argv[++i]);
 | 
				
			||||||
            continue;
 | 
					            continue;
 | 
				
			||||||
@@ -208,69 +263,69 @@ int main(int argc, char** argv) {
 | 
				
			|||||||
            param.vtk_file = strdup(argv[++i]);
 | 
					            param.vtk_file = strdup(argv[++i]);
 | 
				
			||||||
            continue;
 | 
					            continue;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        if((strcmp(argv[i], "-w") == 0)) {
 | 
					 | 
				
			||||||
            param.write_atom_file = strdup(argv[++i]);
 | 
					 | 
				
			||||||
            continue;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
 | 
					        if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
 | 
				
			||||||
 | 
					            if(comm.myproc ==0 ){
 | 
				
			||||||
                printf("MD Bench: A minimalistic re-implementation of miniMD\n");
 | 
					                printf("MD Bench: A minimalistic re-implementation of miniMD\n");
 | 
				
			||||||
                printf(HLINE);
 | 
					                printf(HLINE);
 | 
				
			||||||
            printf("-p / --params <string>:     file to read parameters from (can be specified more than once)\n");
 | 
					                printf("-p <string>:          file to read parameters from (can be specified more than once)\n");
 | 
				
			||||||
                printf("-f <string>:          force field (lj, eam or dem), default lj\n");
 | 
					                printf("-f <string>:          force field (lj, eam or dem), default lj\n");
 | 
				
			||||||
                printf("-i <string>:          input file with atom positions (dump)\n");
 | 
					                printf("-i <string>:          input file with atom positions (dump)\n");
 | 
				
			||||||
                printf("-e <string>:          input file for EAM\n");
 | 
					                printf("-e <string>:          input file for EAM\n");
 | 
				
			||||||
                printf("-n / --nsteps <int>:  set number of timesteps for simulation\n");
 | 
					                printf("-n / --nsteps <int>:  set number of timesteps for simulation\n");
 | 
				
			||||||
                printf("-nx/-ny/-nz <int>:    set linear dimension of systembox in x/y/z direction\n");
 | 
					                printf("-nx/-ny/-nz <int>:    set linear dimension of systembox in x/y/z direction\n");
 | 
				
			||||||
            printf("-half <int>:                use half (1) or full (0) neighbor lists\n");
 | 
					 | 
				
			||||||
                printf("-r / --radius <real>: set cutoff radius\n");
 | 
					                printf("-r / --radius <real>: set cutoff radius\n");
 | 
				
			||||||
                printf("-s / --skin <real>:   set skin (verlet buffer)\n");
 | 
					                printf("-s / --skin <real>:   set skin (verlet buffer)\n");
 | 
				
			||||||
            printf("-w <file>:                  write input atoms to file\n");
 | 
					 | 
				
			||||||
                printf("--freq <real>:        processor frequency (GHz)\n");
 | 
					                printf("--freq <real>:        processor frequency (GHz)\n");
 | 
				
			||||||
                printf("--vtk <string>:       VTK file for visualization\n");
 | 
					                printf("--vtk <string>:       VTK file for visualization\n");
 | 
				
			||||||
                printf(HLINE);
 | 
					                printf(HLINE);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
                exit(EXIT_SUCCESS);
 | 
					                exit(EXIT_SUCCESS);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    param.cutneigh = param.cutforce + param.skin;
 | 
					    if(param.balance>0 && param.method == 1){
 | 
				
			||||||
    setup(¶m, &eam, &atom, &neighbor, &stats);
 | 
					        if(comm.myproc == 0) fprintf(stderr, "Half Shell is not supported by load balance!\n");
 | 
				
			||||||
    printParameter(¶m);
 | 
					        endComm(&comm);   
 | 
				
			||||||
    printf(HLINE);
 | 
					        exit(0);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    printf("step\ttemp\t\tpressure\n");
 | 
					    param.cutneigh = param.cutforce + param.skin;
 | 
				
			||||||
 | 
					    timer[SETUP]=setup(¶m, &eam, &atom, &neighbor, &stats, &comm, &grid);
 | 
				
			||||||
 | 
					    if(comm.myproc == 0)printParameter(¶m);
 | 
				
			||||||
 | 
					    if(comm.myproc == 0)printf(HLINE);
 | 
				
			||||||
 | 
					    if(comm.myproc == 0) printf("step\ttemp\t\tpressure\n"); 
 | 
				
			||||||
    computeThermo(0, ¶m, &atom);
 | 
					    computeThermo(0, ¶m, &atom);
 | 
				
			||||||
    #if defined(MEM_TRACER) || defined(INDEX_TRACER)
 | 
					    #if defined(MEM_TRACER) || defined(INDEX_TRACER)
 | 
				
			||||||
    traceAddresses(¶m, &atom, &neighbor, n + 1);
 | 
					    traceAddresses(¶m, &atom, &neighbor, n + 1);// TODO: trace adress
 | 
				
			||||||
    #endif
 | 
					    #endif
 | 
				
			||||||
 | 
					 | 
				
			||||||
    if(param.write_atom_file != NULL) {
 | 
					 | 
				
			||||||
        writeAtom(&atom, ¶m);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    //writeInput(¶m, &atom);
 | 
					    //writeInput(¶m, &atom);
 | 
				
			||||||
 | 
					 | 
				
			||||||
    timer[FORCE]    = computeForce(&eam, ¶m, &atom, &neighbor, &stats); 
 | 
					    timer[FORCE]    = computeForce(&eam, ¶m, &atom, &neighbor, &stats); 
 | 
				
			||||||
    timer[NEIGH]    = 0.0;
 | 
					    timer[NEIGH]    = 0.0;
 | 
				
			||||||
 | 
					    timer[FORWARD]  = 0.0;
 | 
				
			||||||
 | 
					    timer[UPDATE]   = 0.0;
 | 
				
			||||||
 | 
					    timer[BALANCE]  = 0.0;  
 | 
				
			||||||
 | 
					    timer[REVERSE]  = reverse(&comm, &atom, ¶m);
 | 
				
			||||||
 | 
					    MPI_Barrier(world);
 | 
				
			||||||
    timer[TOTAL]    = getTimeStamp();
 | 
					    timer[TOTAL]    = getTimeStamp();
 | 
				
			||||||
 | 
					 | 
				
			||||||
    if(param.vtk_file != NULL) {
 | 
					    if(param.vtk_file != NULL) {
 | 
				
			||||||
        write_atoms_to_vtk_file(param.vtk_file, &atom, 0);
 | 
					        printvtk(param.vtk_file, &comm, &atom, ¶m, 0);
 | 
				
			||||||
    } 
 | 
					    } 
 | 
				
			||||||
 | 
					 | 
				
			||||||
    for(int n = 0; n < param.ntimes; n++) {
 | 
					    for(int n = 0; n < param.ntimes; n++) {
 | 
				
			||||||
        bool reneigh = (n + 1) % param.reneigh_every == 0;
 | 
					        bool reneigh = (n + 1) % param.reneigh_every == 0;
 | 
				
			||||||
        initialIntegrate(reneigh, ¶m, &atom);
 | 
					        initialIntegrate(reneigh, ¶m, &atom);
 | 
				
			||||||
        if((n + 1) % param.reneigh_every) {
 | 
					        if(reneigh) { 
 | 
				
			||||||
            updatePbc(&atom, ¶m, false);
 | 
					            timer[UPDATE] +=updateAtoms(&comm,&atom);         
 | 
				
			||||||
 | 
					            if(param.balance && !((n+1)%param.balance_every))
 | 
				
			||||||
 | 
					                timer[BALANCE] +=dynamicBalance(&comm, &grid, &atom , ¶m, timer[FORCE]);
 | 
				
			||||||
 | 
					            timer[NEIGH] += reneighbour(&comm, ¶m, &atom, &neighbor);
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
 | 
					            timer[FORWARD] += forward(&comm, &atom, ¶m);
 | 
				
			||||||
        } 
 | 
					        } 
 | 
				
			||||||
 | 
					 | 
				
			||||||
        #if defined(MEM_TRACER) || defined(INDEX_TRACER)
 | 
					        #if defined(MEM_TRACER) || defined(INDEX_TRACER)
 | 
				
			||||||
        traceAddresses(¶m, &atom, &neighbor, n + 1);
 | 
					        traceAddresses(¶m, &atom, &neighbor, n + 1);
 | 
				
			||||||
        #endif
 | 
					        #endif
 | 
				
			||||||
 | 
					 | 
				
			||||||
        timer[FORCE] += computeForce(&eam, ¶m, &atom, &neighbor, &stats);
 | 
					        timer[FORCE] += computeForce(&eam, ¶m, &atom, &neighbor, &stats);
 | 
				
			||||||
 | 
					        timer[REVERSE] += reverse(&comm, &atom, ¶m);
 | 
				
			||||||
        finalIntegrate(reneigh, ¶m, &atom);
 | 
					        finalIntegrate(reneigh, ¶m, &atom);
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
 | 
					        if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
 | 
				
			||||||
@@ -281,47 +336,42 @@ int main(int argc, char** argv) {
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if(param.vtk_file != NULL) {
 | 
					        if(param.vtk_file != NULL) {
 | 
				
			||||||
            write_atoms_to_vtk_file(param.vtk_file, &atom, n + 1);
 | 
					            printvtk(param.vtk_file, &comm, &atom ,¶m, n+1);
 | 
				
			||||||
        } 
 | 
					        } 
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    MPI_Barrier(world);
 | 
				
			||||||
    timer[TOTAL] = getTimeStamp() - timer[TOTAL];
 | 
					    timer[TOTAL] = getTimeStamp() - timer[TOTAL];
 | 
				
			||||||
    computeThermo(-1, ¶m, &atom);
 | 
					    computeThermo(-1, ¶m, &atom);
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
 | 
					    double mint[NUMTIMER];
 | 
				
			||||||
 | 
					    double maxt[NUMTIMER];
 | 
				
			||||||
 | 
					    double sumt[NUMTIMER];
 | 
				
			||||||
 | 
					    timer[REST] = timer[TOTAL]-timer[FORCE]-timer[NEIGH]-timer[BALANCE]-timer[FORWARD]-timer[REVERSE];
 | 
				
			||||||
 | 
					    MPI_Reduce(timer,mint,NUMTIMER,MPI_DOUBLE,MPI_MIN,0,world);
 | 
				
			||||||
 | 
					    MPI_Reduce(timer,maxt,NUMTIMER,MPI_DOUBLE,MPI_MAX,0,world);
 | 
				
			||||||
 | 
					    MPI_Reduce(timer,sumt,NUMTIMER,MPI_DOUBLE,MPI_SUM,0,world);
 | 
				
			||||||
 | 
					    int Nghost;
 | 
				
			||||||
 | 
					    MPI_Reduce(&atom.Nghost,&Nghost,1,MPI_INT,MPI_SUM,0,world);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(comm.myproc == 0){
 | 
				
			||||||
 | 
					        int n = comm.numproc;
 | 
				
			||||||
        printf(HLINE);
 | 
					        printf(HLINE);
 | 
				
			||||||
    printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
 | 
					        printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, Nghost, param.ntimes);
 | 
				
			||||||
    printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
 | 
					        printf("TOTAL %.2fs\n\n",timer[TOTAL]);
 | 
				
			||||||
            timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
 | 
					        printf("%4s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|\n","","FORCE ", "NEIGH ", "BALANCE", "FORWARD", "REVERSE","UPDATE","REST ","SETUP");
 | 
				
			||||||
 | 
					        printf("----|-------|-------|-------|-------|-------|-------|-------|-------|\n");
 | 
				
			||||||
 | 
					        printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "AVG", sumt[FORCE]/n,sumt[NEIGH]/n,sumt[BALANCE]/n,sumt[FORWARD]/n,sumt[REVERSE]/n,sumt[UPDATE]/n,sumt[REST]/n,sumt[SETUP]/n);
 | 
				
			||||||
 | 
					        printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MIN", mint[FORCE],mint[NEIGH],mint[BALANCE],mint[FORWARD],mint[REVERSE],mint[UPDATE],mint[REST],mint[SETUP]);
 | 
				
			||||||
 | 
					        printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MAX", maxt[FORCE],maxt[NEIGH],maxt[BALANCE],maxt[FORWARD],maxt[REVERSE],maxt[UPDATE],maxt[REST],maxt[SETUP]);
 | 
				
			||||||
        printf(HLINE);
 | 
					        printf(HLINE);
 | 
				
			||||||
 | 
					 | 
				
			||||||
    int nthreads = 0;
 | 
					 | 
				
			||||||
    int chunkSize = 0;
 | 
					 | 
				
			||||||
    omp_sched_t schedKind;
 | 
					 | 
				
			||||||
    char schedType[10];
 | 
					 | 
				
			||||||
#pragma omp parallel
 | 
					 | 
				
			||||||
#pragma omp master
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
    	omp_get_schedule(&schedKind, &chunkSize);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    	switch (schedKind)
 | 
					 | 
				
			||||||
    	{
 | 
					 | 
				
			||||||
        	case omp_sched_static:  strcpy(schedType, "static"); break;
 | 
					 | 
				
			||||||
        	case omp_sched_dynamic: strcpy(schedType, "dynamic"); break;
 | 
					 | 
				
			||||||
        	case omp_sched_guided:  strcpy(schedType, "guided"); break;
 | 
					 | 
				
			||||||
        	case omp_sched_auto:    strcpy(schedType, "auto"); break;
 | 
					 | 
				
			||||||
    	}
 | 
					 | 
				
			||||||
	
 | 
					 | 
				
			||||||
	nthreads = omp_get_max_threads();
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    printf("Num threads: %d\n", nthreads);
 | 
					 | 
				
			||||||
    printf("Schedule: (%s,%d)\n", schedType, chunkSize);
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
        printf("Performance: %.2f million atom updates per second\n",
 | 
					        printf("Performance: %.2f million atom updates per second\n",
 | 
				
			||||||
            1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
 | 
					            1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
#ifdef COMPUTE_STATS
 | 
					#ifdef COMPUTE_STATS
 | 
				
			||||||
    displayStatistics(&atom, ¶m, &stats, timer);
 | 
					    displayStatistics(&atom, ¶m, &stats, timer);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					    } 
 | 
				
			||||||
 | 
					    endComm(&comm);
 | 
				
			||||||
    LIKWID_MARKER_CLOSE;
 | 
					    LIKWID_MARKER_CLOSE;
 | 
				
			||||||
    return EXIT_SUCCESS;
 | 
					    return EXIT_SUCCESS;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -11,15 +11,18 @@
 | 
				
			|||||||
#include <neighbor.h>
 | 
					#include <neighbor.h>
 | 
				
			||||||
#include <parameter.h>
 | 
					#include <parameter.h>
 | 
				
			||||||
#include <atom.h>
 | 
					#include <atom.h>
 | 
				
			||||||
 | 
					#include <util.h>
 | 
				
			||||||
 | 
					#include <mpi.h>
 | 
				
			||||||
 | 
					#include <sort.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define SMALL 1.0e-6
 | 
					#define SMALL 1.0e-6
 | 
				
			||||||
#define FACTOR 0.999
 | 
					#define FACTOR 0.999
 | 
				
			||||||
 | 
					
 | 
				
			||||||
MD_FLOAT xprd, yprd, zprd;
 | 
					MD_FLOAT xprd, yprd, zprd;
 | 
				
			||||||
MD_FLOAT bininvx, bininvy, bininvz;
 | 
					MD_FLOAT bininvx, bininvy, bininvz;
 | 
				
			||||||
int mbinxlo, mbinylo, mbinzlo;
 | 
					int pad_x, pad_y, pad_z;
 | 
				
			||||||
int nbinx, nbiny, nbinz;
 | 
					int nbinx, nbiny, nbinz;
 | 
				
			||||||
int mbinx, mbiny, mbinz; // n bins in x, y, z
 | 
					int mbinx, mbiny, mbinz; // m bins in x, y, z
 | 
				
			||||||
int *bincount;
 | 
					int *bincount;
 | 
				
			||||||
int *bins;
 | 
					int *bins;
 | 
				
			||||||
int mbins;              //total number of bins
 | 
					int mbins;              //total number of bins
 | 
				
			||||||
@@ -30,8 +33,18 @@ int nmax;
 | 
				
			|||||||
int nstencil;           // # of bins in stencil
 | 
					int nstencil;           // # of bins in stencil
 | 
				
			||||||
int* stencil;           // stencil list of bin offsets
 | 
					int* stencil;           // stencil list of bin offsets
 | 
				
			||||||
MD_FLOAT binsizex, binsizey, binsizez;
 | 
					MD_FLOAT binsizex, binsizey, binsizez;
 | 
				
			||||||
 | 
					int me;                 //rank
 | 
				
			||||||
 | 
					int method;             // method
 | 
				
			||||||
 | 
					int half_stencil;       //If half stencil exist 
 | 
				
			||||||
 | 
					int shellMethod;        //If shell method exist   
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT);
 | 
					static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT);
 | 
				
			||||||
static MD_FLOAT bindist(int, int, int);
 | 
					static MD_FLOAT bindist(int, int, int);
 | 
				
			||||||
 | 
					static int ghostZone(Atom*, int);
 | 
				
			||||||
 | 
					static int eightZone(Atom*, int);
 | 
				
			||||||
 | 
					static int halfZone(Atom*, int);
 | 
				
			||||||
 | 
					static void neighborGhost(Atom*, Neighbor*);
 | 
				
			||||||
 | 
					static inline int interaction(Atom* atom, int i, int j);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* exported subroutines */
 | 
					/* exported subroutines */
 | 
				
			||||||
void initNeighbor(Neighbor *neighbor, Parameter *param) {
 | 
					void initNeighbor(Neighbor *neighbor, Parameter *param) {
 | 
				
			||||||
@@ -51,7 +64,25 @@ void initNeighbor(Neighbor *neighbor, Parameter *param) {
 | 
				
			|||||||
    neighbor->maxneighs = 100;
 | 
					    neighbor->maxneighs = 100;
 | 
				
			||||||
    neighbor->numneigh = NULL;
 | 
					    neighbor->numneigh = NULL;
 | 
				
			||||||
    neighbor->neighbors = NULL;
 | 
					    neighbor->neighbors = NULL;
 | 
				
			||||||
 | 
					    //========== MPI =============
 | 
				
			||||||
 | 
					    shellMethod = 0;
 | 
				
			||||||
 | 
					    half_stencil = 0;
 | 
				
			||||||
 | 
					    method = param->method;
 | 
				
			||||||
 | 
					    if(method == halfShell || method == eightShell){ 
 | 
				
			||||||
 | 
					        param->half_neigh = 1;
 | 
				
			||||||
 | 
					        shellMethod = 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if(method == halfStencil){
 | 
				
			||||||
 | 
					        param->half_neigh = 0;
 | 
				
			||||||
 | 
					        half_stencil = 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    me = 0;
 | 
				
			||||||
 | 
					    MPI_Comm_rank(MPI_COMM_WORLD,&me);
 | 
				
			||||||
    neighbor->half_neigh = param->half_neigh;
 | 
					    neighbor->half_neigh = param->half_neigh;
 | 
				
			||||||
 | 
					    neighbor->Nshell = 0;  
 | 
				
			||||||
 | 
					    neighbor->numNeighShell = NULL;
 | 
				
			||||||
 | 
					    neighbor->neighshell = NULL;
 | 
				
			||||||
 | 
					    neighbor->listshell = NULL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void setupNeighbor(Parameter* param) {
 | 
					void setupNeighbor(Parameter* param) {
 | 
				
			||||||
@@ -64,7 +95,6 @@ void setupNeighbor(Parameter* param) {
 | 
				
			|||||||
        yprd = param->yprd;
 | 
					        yprd = param->yprd;
 | 
				
			||||||
        zprd = param->zprd;
 | 
					        zprd = param->zprd;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
    // TODO: update lo and hi for standard case and use them here instead
 | 
					    // TODO: update lo and hi for standard case and use them here instead
 | 
				
			||||||
    MD_FLOAT xlo = 0.0; MD_FLOAT xhi = xprd;
 | 
					    MD_FLOAT xlo = 0.0; MD_FLOAT xhi = xprd;
 | 
				
			||||||
    MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
 | 
					    MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
 | 
				
			||||||
@@ -93,54 +123,48 @@ void setupNeighbor(Parameter* param) {
 | 
				
			|||||||
        bininvy = 1.0 / binsizey;
 | 
					        bininvy = 1.0 / binsizey;
 | 
				
			||||||
        bininvz = 1.0 / binsizez;
 | 
					        bininvz = 1.0 / binsizez;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    pad_x = (int)(cutneigh*bininvx);
 | 
				
			||||||
    coord = xlo - cutneigh - SMALL * xprd;
 | 
					    while(pad_x * binsizex < FACTOR * cutneigh) pad_x++;
 | 
				
			||||||
    mbinxlo = (int) (coord * bininvx);
 | 
					    pad_y = (int)(cutneigh*bininvy);
 | 
				
			||||||
    if (coord < 0.0) { mbinxlo = mbinxlo - 1; }
 | 
					    while(pad_y * binsizey < FACTOR * cutneigh) pad_y++;
 | 
				
			||||||
    coord = xhi + cutneigh + SMALL * xprd;
 | 
					    pad_z = (int)(cutneigh*bininvz);
 | 
				
			||||||
    mbinxhi = (int) (coord * bininvx);
 | 
					    while(pad_z * binsizez < FACTOR * cutneigh) pad_z++;
 | 
				
			||||||
 | 
					 | 
				
			||||||
    coord = ylo - cutneigh - SMALL * yprd;
 | 
					 | 
				
			||||||
    mbinylo = (int) (coord * bininvy);
 | 
					 | 
				
			||||||
    if (coord < 0.0) { mbinylo = mbinylo - 1; }
 | 
					 | 
				
			||||||
    coord = yhi + cutneigh + SMALL * yprd;
 | 
					 | 
				
			||||||
    mbinyhi = (int) (coord * bininvy);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    coord = zlo - cutneigh - SMALL * zprd;
 | 
					 | 
				
			||||||
    mbinzlo = (int) (coord * bininvz);
 | 
					 | 
				
			||||||
    if (coord < 0.0) { mbinzlo = mbinzlo - 1; }
 | 
					 | 
				
			||||||
    coord = zhi + cutneigh + SMALL * zprd;
 | 
					 | 
				
			||||||
    mbinzhi = (int) (coord * bininvz);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    mbinxlo = mbinxlo - 1;
 | 
					 | 
				
			||||||
    mbinxhi = mbinxhi + 1;
 | 
					 | 
				
			||||||
    mbinx = mbinxhi - mbinxlo + 1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    mbinylo = mbinylo - 1;
 | 
					 | 
				
			||||||
    mbinyhi = mbinyhi + 1;
 | 
					 | 
				
			||||||
    mbiny = mbinyhi - mbinylo + 1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    mbinzlo = mbinzlo - 1;
 | 
					 | 
				
			||||||
    mbinzhi = mbinzhi + 1;
 | 
					 | 
				
			||||||
    mbinz = mbinzhi - mbinzlo + 1;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    nextx = (int) (cutneigh * bininvx);
 | 
					    nextx = (int) (cutneigh * bininvx);
 | 
				
			||||||
    if(nextx * binsizex < FACTOR * cutneigh) nextx++;
 | 
					    if(nextx * binsizex < FACTOR * cutneigh){
 | 
				
			||||||
 | 
					        nextx++;
 | 
				
			||||||
 | 
					        pad_x++;
 | 
				
			||||||
 | 
					    } 
 | 
				
			||||||
    nexty = (int) (cutneigh * bininvy);
 | 
					    nexty = (int) (cutneigh * bininvy);
 | 
				
			||||||
    if(nexty * binsizey < FACTOR * cutneigh) nexty++;
 | 
					    if(nexty * binsizey < FACTOR * cutneigh){
 | 
				
			||||||
 | 
					        nexty++;
 | 
				
			||||||
 | 
					        pad_y++;
 | 
				
			||||||
 | 
					    } 
 | 
				
			||||||
    nextz = (int) (cutneigh * bininvz);
 | 
					    nextz = (int) (cutneigh * bininvz);
 | 
				
			||||||
    if(nextz * binsizez < FACTOR * cutneigh) nextz++;
 | 
					    if(nextz * binsizez < FACTOR * cutneigh){
 | 
				
			||||||
 | 
					        nextz++;
 | 
				
			||||||
 | 
					        pad_z++;
 | 
				
			||||||
 | 
					    } 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    mbinx = nbinx+4*pad_x;
 | 
				
			||||||
 | 
					    mbiny = nbiny+4*pad_y;
 | 
				
			||||||
 | 
					    mbinz = nbinz+4*pad_z;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (stencil) { free(stencil); }
 | 
					    if (stencil) { free(stencil); }
 | 
				
			||||||
    stencil = (int*) malloc((2 * nextz + 1) * (2 * nexty + 1) * (2 * nextx + 1) * sizeof(int));
 | 
					    stencil = (int*) malloc((2 * nextz + 1) * (2 * nexty + 1) * (2 * nextx + 1) * sizeof(int));
 | 
				
			||||||
    nstencil = 0;
 | 
					    nstencil = 0;
 | 
				
			||||||
    int kstart = -nextz;
 | 
					 | 
				
			||||||
 
 | 
					 
 | 
				
			||||||
 | 
					    int kstart = -nextz;
 | 
				
			||||||
 | 
					    int jstart = -nexty; 
 | 
				
			||||||
 | 
					    int istart = -nextx;
 | 
				
			||||||
 | 
					    int ibin = 0;
 | 
				
			||||||
    for(int k = kstart; k <= nextz; k++) {
 | 
					    for(int k = kstart; k <= nextz; k++) {
 | 
				
			||||||
        for(int j = -nexty; j <= nexty; j++) {
 | 
					        for(int j = jstart; j <= nexty; j++) {
 | 
				
			||||||
            for(int i = -nextx; i <= nextx; i++) {
 | 
					            for(int i = istart; i <= nextx; i++) {
 | 
				
			||||||
                if(bindist(i, j, k) < cutneighsq) {     
 | 
					                if(bindist(i, j, k) < cutneighsq) {     
 | 
				
			||||||
                    stencil[nstencil++] = k * mbiny * mbinx + j * mbinx + i;
 | 
					                    int jbin = k * mbiny * mbinx + j * mbinx + i;
 | 
				
			||||||
 | 
					                    if(ibin>jbin && half_stencil) continue;                  
 | 
				
			||||||
 | 
					                    stencil[nstencil++] = jbin;
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@@ -155,7 +179,6 @@ void setupNeighbor(Parameter* param) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
 | 
					void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
 | 
				
			||||||
    int nall = atom->Nlocal + atom->Nghost;  
 | 
					    int nall = atom->Nlocal + atom->Nghost;  
 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* extend atom arrays if necessary */
 | 
					    /* extend atom arrays if necessary */
 | 
				
			||||||
    if(nall > nmax) {
 | 
					    if(nall > nmax) {
 | 
				
			||||||
        nmax = nall;
 | 
					        nmax = nall;
 | 
				
			||||||
@@ -164,16 +187,13 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
 | 
				
			|||||||
        neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
 | 
					        neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
 | 
				
			||||||
        neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*));
 | 
					        neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* bin local & ghost atoms */
 | 
					    /* bin local & ghost atoms */
 | 
				
			||||||
    binatoms(atom);
 | 
					    binatoms(atom);
 | 
				
			||||||
    int resize = 1;
 | 
					    int resize = 1;
 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* loop over each atom, storing neighbors */
 | 
					    /* loop over each atom, storing neighbors */
 | 
				
			||||||
    while(resize) {
 | 
					    while(resize) {
 | 
				
			||||||
        int new_maxneighs = neighbor->maxneighs;
 | 
					        int new_maxneighs = neighbor->maxneighs;
 | 
				
			||||||
        resize = 0;
 | 
					        resize = 0;
 | 
				
			||||||
 | 
					 | 
				
			||||||
        for(int i = 0; i < atom->Nlocal; i++) {
 | 
					        for(int i = 0; i < atom->Nlocal; i++) {
 | 
				
			||||||
            int* neighptr = &(neighbor->neighbors[i * neighbor->maxneighs]);
 | 
					            int* neighptr = &(neighbor->neighbors[i * neighbor->maxneighs]);
 | 
				
			||||||
            int n = 0;
 | 
					            int n = 0;
 | 
				
			||||||
@@ -184,21 +204,22 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
 | 
				
			|||||||
            #ifdef EXPLICIT_TYPES
 | 
					            #ifdef EXPLICIT_TYPES
 | 
				
			||||||
            int type_i = atom->type[i];
 | 
					            int type_i = atom->type[i];
 | 
				
			||||||
            #endif
 | 
					            #endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            for(int k = 0; k < nstencil; k++) {
 | 
					            for(int k = 0; k < nstencil; k++) {
 | 
				
			||||||
                int jbin = ibin + stencil[k];
 | 
					                int jbin = ibin + stencil[k];
 | 
				
			||||||
                int* loc_bin = &bins[jbin * atoms_per_bin];
 | 
					                int* loc_bin = &bins[jbin * atoms_per_bin];
 | 
				
			||||||
 | 
					 | 
				
			||||||
                for(int m = 0; m < bincount[jbin]; m++) {    
 | 
					                for(int m = 0; m < bincount[jbin]; m++) {    
 | 
				
			||||||
                    int j = loc_bin[m];
 | 
					                    int j = loc_bin[m];
 | 
				
			||||||
                    if((j == i) || (neighbor->half_neigh && (j < i))) {
 | 
					                    
 | 
				
			||||||
 | 
					                    if((j==i) || (neighbor->half_neigh && (j<i))) 
 | 
				
			||||||
 | 
					                        continue;              
 | 
				
			||||||
 | 
					                    if(half_stencil && ibin==jbin  && !interaction(atom,i,j))
 | 
				
			||||||
                        continue;          
 | 
					                        continue;          
 | 
				
			||||||
                    }
 | 
					 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
                    MD_FLOAT delx = xtmp - atom_x(j);
 | 
					                    MD_FLOAT delx = xtmp - atom_x(j);
 | 
				
			||||||
                    MD_FLOAT dely = ytmp - atom_y(j);  
 | 
					                    MD_FLOAT dely = ytmp - atom_y(j);  
 | 
				
			||||||
                    MD_FLOAT delz = ztmp - atom_z(j);
 | 
					                    MD_FLOAT delz = ztmp - atom_z(j);
 | 
				
			||||||
                    MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
 | 
					                    MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
 | 
				
			||||||
 | 
					 | 
				
			||||||
                    #ifdef EXPLICIT_TYPES
 | 
					                    #ifdef EXPLICIT_TYPES
 | 
				
			||||||
                    int type_j = atom->type[j];
 | 
					                    int type_j = atom->type[j];
 | 
				
			||||||
                    const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
 | 
					                    const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
 | 
				
			||||||
@@ -210,8 +231,8 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
 | 
				
			|||||||
                    }
 | 
					                    }
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					 | 
				
			||||||
            neighbor->numneigh[i] = n;
 | 
					            neighbor->numneigh[i] = n;
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
            if(n >= neighbor->maxneighs) {
 | 
					            if(n >= neighbor->maxneighs) {
 | 
				
			||||||
                resize = 1;
 | 
					                resize = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -220,14 +241,15 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
 | 
				
			|||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					 | 
				
			||||||
        if(resize) {
 | 
					        if(resize) {
 | 
				
			||||||
            printf("RESIZE %d\n", neighbor->maxneighs);
 | 
					            printf("RESIZE %d, PROC %d\n", neighbor->maxneighs,me);
 | 
				
			||||||
            neighbor->maxneighs = new_maxneighs * 1.2;
 | 
					            neighbor->maxneighs = new_maxneighs * 1.2;
 | 
				
			||||||
            free(neighbor->neighbors);
 | 
					            free(neighbor->neighbors);
 | 
				
			||||||
            neighbor->neighbors = (int*) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
 | 
					            neighbor->neighbors = (int*) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(method == eightShell) neighborGhost(atom, neighbor);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* internal subroutines */
 | 
					/* internal subroutines */
 | 
				
			||||||
@@ -257,38 +279,22 @@ MD_FLOAT bindist(int i, int j, int k) {
 | 
				
			|||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        delz = (k + 1) * binsizez;
 | 
					        delz = (k + 1) * binsizez;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
    return (delx * delx + dely * dely + delz * delz);
 | 
					    return (delx * delx + dely * dely + delz * delz);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int coord2bin(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin) {
 | 
					int coord2bin(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin) {
 | 
				
			||||||
   int ix, iy, iz;
 | 
					   int ix, iy, iz;
 | 
				
			||||||
 | 
					   MD_FLOAT eps = 1e-9; 
 | 
				
			||||||
 | 
					   MD_FLOAT xlo=0.0; MD_FLOAT ylo=0.0; MD_FLOAT zlo=0.0;
 | 
				
			||||||
 | 
					   xlo = fabs(xlo - pad_x*binsizex)+eps;
 | 
				
			||||||
 | 
					   ylo = fabs(ylo - pad_y*binsizey)+eps;
 | 
				
			||||||
 | 
					   zlo = fabs(zlo - pad_z*binsizez)+eps;
 | 
				
			||||||
 | 
					   ix = (int) ((xin + xlo)*bininvx);
 | 
				
			||||||
 | 
					   iy = (int) ((yin + ylo)*bininvy);
 | 
				
			||||||
 | 
					   iz = (int) ((zin + zlo)*bininvz);
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    if(xin >= xprd) {
 | 
					    return (iz * mbiny * mbinx + iy * mbinx + ix);
 | 
				
			||||||
        ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
 | 
					    //return (iz * mbiny * mbinx + iy * mbinx + ix + 1);
 | 
				
			||||||
    } else if(xin >= 0.0) {
 | 
					 | 
				
			||||||
        ix = (int)(xin * bininvx) - mbinxlo;
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
        ix = (int)(xin * bininvx) - mbinxlo - 1;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if(yin >= yprd) {
 | 
					 | 
				
			||||||
        iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
 | 
					 | 
				
			||||||
    } else if(yin >= 0.0) {
 | 
					 | 
				
			||||||
        iy = (int)(yin * bininvy) - mbinylo;
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
        iy = (int)(yin * bininvy) - mbinylo - 1;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if(zin >= zprd) {
 | 
					 | 
				
			||||||
        iz = (int)((zin - zprd) * bininvz) + nbinz - mbinzlo;
 | 
					 | 
				
			||||||
    } else if(zin >= 0.0) {
 | 
					 | 
				
			||||||
        iz = (int)(zin * bininvz) - mbinzlo;
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
        iz = (int)(zin * bininvz) - mbinzlo - 1;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return (iz * mbiny * mbinx + iy * mbinx + ix + 1);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void binatoms(Atom *atom) {    
 | 
					void binatoms(Atom *atom) {    
 | 
				
			||||||
@@ -304,7 +310,7 @@ void binatoms(Atom *atom) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        for(int i = 0; i < nall; i++) {
 | 
					        for(int i = 0; i < nall; i++) {
 | 
				
			||||||
            int ibin = coord2bin(atom_x(i), atom_y(i), atom_z(i));
 | 
					            int ibin = coord2bin(atom_x(i), atom_y(i), atom_z(i));
 | 
				
			||||||
 | 
					            if(shellMethod && !ghostZone(atom, i)) continue; 
 | 
				
			||||||
            if(bincount[ibin] < atoms_per_bin) {
 | 
					            if(bincount[ibin] < atoms_per_bin) {
 | 
				
			||||||
                int ac = bincount[ibin]++;
 | 
					                int ac = bincount[ibin]++;
 | 
				
			||||||
                bins[ibin * atoms_per_bin + ac] = i;
 | 
					                bins[ibin * atoms_per_bin + ac] = i;
 | 
				
			||||||
@@ -325,11 +331,9 @@ void sortAtom(Atom* atom) {
 | 
				
			|||||||
    binatoms(atom);
 | 
					    binatoms(atom);
 | 
				
			||||||
    int Nmax = atom->Nmax;
 | 
					    int Nmax = atom->Nmax;
 | 
				
			||||||
    int* binpos = bincount;
 | 
					    int* binpos = bincount;
 | 
				
			||||||
 | 
					 | 
				
			||||||
    for(int i = 1; i < mbins; i++) {
 | 
					    for(int i = 1; i < mbins; i++) {
 | 
				
			||||||
        binpos[i] += binpos[i - 1];
 | 
					        binpos[i] += binpos[i - 1];
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
    #ifdef AOS
 | 
					    #ifdef AOS
 | 
				
			||||||
    MD_FLOAT* new_x = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
 | 
					    MD_FLOAT* new_x = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
 | 
				
			||||||
    MD_FLOAT* new_vx = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
 | 
					    MD_FLOAT* new_vx = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
 | 
				
			||||||
@@ -367,7 +371,6 @@ void sortAtom(Atom* atom) {
 | 
				
			|||||||
            #endif
 | 
					            #endif
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
    free(atom->x);
 | 
					    free(atom->x);
 | 
				
			||||||
    free(atom->vx);
 | 
					    free(atom->vx);
 | 
				
			||||||
    atom->x = new_x;
 | 
					    atom->x = new_x;
 | 
				
			||||||
@@ -383,3 +386,158 @@ void sortAtom(Atom* atom) {
 | 
				
			|||||||
    atom->vz = new_vz;
 | 
					    atom->vz = new_vz;
 | 
				
			||||||
    #endif
 | 
					    #endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* internal subroutines 
 | 
				
			||||||
 | 
					Added with MPI*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int ghostZone(Atom* atom, int i){
 | 
				
			||||||
 | 
					    if(i<atom->Nlocal)  return 1;
 | 
				
			||||||
 | 
					    else if(method == halfShell)  return halfZone(atom,i);
 | 
				
			||||||
 | 
					    else if(method == eightShell) return eightZone(atom,i); 
 | 
				
			||||||
 | 
					    else return 0;  
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int eightZone(Atom* atom, int i)
 | 
				
			||||||
 | 
					{   
 | 
				
			||||||
 | 
					    //Mapping: 0->0, 1->1, 2->2, 3->6, 4->3, 5->5, 6->4, 7->7
 | 
				
			||||||
 | 
					    int zoneMapping[] = {0, 1, 2, 6, 3, 5, 4, 7};
 | 
				
			||||||
 | 
					    MD_FLOAT *hi = atom->mybox.hi;
 | 
				
			||||||
 | 
					    int zone = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(BigOrEqual(atom_x(i),hi[_x])) {
 | 
				
			||||||
 | 
					        zone += 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if(BigOrEqual(atom_y(i),hi[_y])) {
 | 
				
			||||||
 | 
					        zone += 2;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if(BigOrEqual(atom_z(i),hi[_z])) {
 | 
				
			||||||
 | 
					        zone += 4;
 | 
				
			||||||
 | 
					    }   
 | 
				
			||||||
 | 
					    return zoneMapping[zone];
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int halfZone(Atom* atom, int i)
 | 
				
			||||||
 | 
					{   
 | 
				
			||||||
 | 
					    MD_FLOAT *hi = atom->mybox.hi;
 | 
				
			||||||
 | 
					    MD_FLOAT *lo = atom->mybox.lo;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(atom_x(i)<lo[_x] && atom_y(i)<hi[_y] && atom_z(i)<hi[_z]){
 | 
				
			||||||
 | 
					        return 0;
 | 
				
			||||||
 | 
					    } else if(atom_y(i)<lo[_y] && atom_z(i)<hi[_z]){
 | 
				
			||||||
 | 
					        return 0;
 | 
				
			||||||
 | 
					    } else if(atom_z(i)<lo[_z]){
 | 
				
			||||||
 | 
					        return 0;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        return 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void neighborGhost(Atom *atom, Neighbor *neighbor) {
 | 
				
			||||||
 | 
					    int Nshell=0;
 | 
				
			||||||
 | 
					    int Nlocal = atom->Nlocal;
 | 
				
			||||||
 | 
					    int Nghost = atom->Nghost;
 | 
				
			||||||
 | 
					    if(neighbor->listshell) free(neighbor->listshell);
 | 
				
			||||||
 | 
					    neighbor->listshell = (int*) malloc(Nghost * sizeof(int));
 | 
				
			||||||
 | 
					    int* listzone  = (int*) malloc(8 * Nghost * sizeof(int));
 | 
				
			||||||
 | 
					    int countAtoms[8] = {0,0,0,0,0,0,0,0};
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					    //Selecting ghost atoms for interaction
 | 
				
			||||||
 | 
					   for(int i = Nlocal; i < Nlocal+Nghost; i++) {
 | 
				
			||||||
 | 
					        int izone = ghostZone(atom,i);
 | 
				
			||||||
 | 
					        int *list = &listzone[Nghost*izone];
 | 
				
			||||||
 | 
					        int n  = countAtoms[izone];
 | 
				
			||||||
 | 
					        list[n] = i;
 | 
				
			||||||
 | 
					        countAtoms[izone]++;     
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(int zone = 1; zone<=3; zone++){
 | 
				
			||||||
 | 
					        int *list = &listzone[Nghost*zone];
 | 
				
			||||||
 | 
					        for(int n=0; n<countAtoms[zone]; n++)
 | 
				
			||||||
 | 
					            neighbor->listshell[Nshell++] = list[n]; 
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    neighbor->Nshell = Nshell;
 | 
				
			||||||
 | 
					    if(neighbor->numNeighShell) free(neighbor->numNeighShell);
 | 
				
			||||||
 | 
					    if(neighbor->neighshell) free(neighbor->neighshell);
 | 
				
			||||||
 | 
					    neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
 | 
				
			||||||
 | 
					    neighbor->numNeighShell = (int*) malloc(Nshell * sizeof(int));
 | 
				
			||||||
 | 
					    int resize = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while(resize)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        resize = 0;
 | 
				
			||||||
 | 
					        for(int i = 0; i < Nshell; i++) {   
 | 
				
			||||||
 | 
					            int *neighshell = &(neighbor->neighshell[i*neighbor->maxneighs]); 
 | 
				
			||||||
 | 
					            int n = 0;  
 | 
				
			||||||
 | 
					            int iatom = neighbor->listshell[i];
 | 
				
			||||||
 | 
					            int izone = ghostZone(atom, iatom);
 | 
				
			||||||
 | 
					            MD_FLOAT xtmp = atom_x(iatom);
 | 
				
			||||||
 | 
					            MD_FLOAT ytmp = atom_y(iatom);
 | 
				
			||||||
 | 
					            MD_FLOAT ztmp = atom_z(iatom);
 | 
				
			||||||
 | 
					            int ibin = coord2bin(xtmp, ytmp, ztmp);
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            #ifdef EXPLICIT_TYPES
 | 
				
			||||||
 | 
					            int type_i = atom->type[iatom];
 | 
				
			||||||
 | 
					            #endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            for(int k = 0; k < nstencil; k++) {
 | 
				
			||||||
 | 
					                int jbin = ibin + stencil[k];
 | 
				
			||||||
 | 
					                int* loc_bin = &bins[jbin * atoms_per_bin];
 | 
				
			||||||
 | 
					                for(int m = 0; m < bincount[jbin]; m++) {    
 | 
				
			||||||
 | 
					                    int jatom = loc_bin[m];
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                    int jzone = ghostZone(atom,jatom);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    if(jzone <=izone) continue;
 | 
				
			||||||
 | 
					                    if(izone == 1 && (jzone==5||jzone==6||jzone==7)) continue;
 | 
				
			||||||
 | 
					                    if(izone == 2 && (jzone==4||jzone==6||jzone==7)) continue;
 | 
				
			||||||
 | 
					                    if(izone == 3 && (jzone==4||jzone==5||jzone==7)) continue;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					                    MD_FLOAT delx = xtmp - atom_x(jatom);
 | 
				
			||||||
 | 
					                    MD_FLOAT dely = ytmp - atom_y(jatom);  
 | 
				
			||||||
 | 
					                    MD_FLOAT delz = ztmp - atom_z(jatom);
 | 
				
			||||||
 | 
					                    MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    #ifdef EXPLICIT_TYPES
 | 
				
			||||||
 | 
					                    int type_j = atom->type[jatom];
 | 
				
			||||||
 | 
					                    const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
 | 
				
			||||||
 | 
					                    #else
 | 
				
			||||||
 | 
					                    const MD_FLOAT cutoff = cutneighsq;
 | 
				
			||||||
 | 
					                    #endif
 | 
				
			||||||
 | 
					                    if(rsq <= cutoff) {
 | 
				
			||||||
 | 
					                        neighshell[n++] = jatom;
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }         
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            neighbor->numNeighShell[i] = n; 
 | 
				
			||||||
 | 
					            if(n >= neighbor->maxneighs){
 | 
				
			||||||
 | 
					                resize = 1;
 | 
				
			||||||
 | 
					                neighbor->maxneighs = n * 1.2;
 | 
				
			||||||
 | 
					                break;
 | 
				
			||||||
 | 
					            }  
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					        if(resize) {
 | 
				
			||||||
 | 
					            free(neighbor->neighshell);
 | 
				
			||||||
 | 
					            neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }  
 | 
				
			||||||
 | 
					    free(listzone); 
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline int interaction(Atom* atom, int i, int j) {
 | 
				
			||||||
 | 
					       
 | 
				
			||||||
 | 
					    if(i<j && j<atom->Nlocal) {
 | 
				
			||||||
 | 
					        return 1;
 | 
				
			||||||
 | 
					    } else if( atom_z(j)>atom_z(i) && j>=atom->Nlocal) {
 | 
				
			||||||
 | 
					        return 1;
 | 
				
			||||||
 | 
					    } else if(Equal(atom_z(j),atom_z(i)) && atom_y(j)<atom_y(i) && j>=atom->Nlocal){
 | 
				
			||||||
 | 
					        return 1;  
 | 
				
			||||||
 | 
					    } else if(Equal(atom_z(j),atom_z(i)) && Equal(atom_y(j),atom_y(i)) && atom_x(j)<atom_x(i) && j>=atom->Nlocal){
 | 
				
			||||||
 | 
					        return 1;
 | 
				
			||||||
 | 
					    }  else {
 | 
				
			||||||
 | 
					        return 0;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}                          
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
							
								
								
									
										173
									
								
								lammps/vtk.c
									
									
									
									
									
								
							
							
						
						
									
										173
									
								
								lammps/vtk.c
									
									
									
									
									
								
							@@ -6,8 +6,12 @@
 | 
				
			|||||||
 */
 | 
					 */
 | 
				
			||||||
#include <stdio.h>
 | 
					#include <stdio.h>
 | 
				
			||||||
#include <stdlib.h>
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#include <string.h>
 | 
				
			||||||
 | 
					#include <vtk.h>
 | 
				
			||||||
 | 
					#include <mpi.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <atom.h>
 | 
					static MPI_File _fh; 
 | 
				
			||||||
 | 
					static inline void flushBuffer(char*); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
 | 
					int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
 | 
				
			||||||
    char timestep_filename[128];
 | 
					    char timestep_filename[128];
 | 
				
			||||||
@@ -18,12 +22,12 @@ int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
 | 
				
			|||||||
        fprintf(stderr, "Could not open VTK file for writing!\n");
 | 
					        fprintf(stderr, "Could not open VTK file for writing!\n");
 | 
				
			||||||
        return -1;
 | 
					        return -1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
    fprintf(fp, "# vtk DataFile Version 2.0\n");
 | 
					    fprintf(fp, "# vtk DataFile Version 2.0\n");
 | 
				
			||||||
    fprintf(fp, "Particle data\n");
 | 
					    fprintf(fp, "Particle data\n");
 | 
				
			||||||
    fprintf(fp, "ASCII\n");
 | 
					    fprintf(fp, "ASCII\n");
 | 
				
			||||||
    fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
 | 
					    fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
 | 
				
			||||||
    fprintf(fp, "POINTS %d double\n", atom->Nlocal);
 | 
					    fprintf(fp, "POINTS %d double\n", atom->Nlocal);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for(int i = 0; i < atom->Nlocal; ++i) {
 | 
					    for(int i = 0; i < atom->Nlocal; ++i) {
 | 
				
			||||||
        fprintf(fp, "%.4f %.4f %.4f\n", atom_x(i), atom_y(i), atom_z(i));
 | 
					        fprintf(fp, "%.4f %.4f %.4f\n", atom_x(i), atom_y(i), atom_z(i));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -48,3 +52,168 @@ int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
 | 
				
			|||||||
    fclose(fp);
 | 
					    fclose(fp);
 | 
				
			||||||
    return 0;
 | 
					    return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int vtkOpen(const char* filename, Comm* comm, Atom* atom ,int timestep)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    char msg[256];
 | 
				
			||||||
 | 
					    char timestep_filename[128];
 | 
				
			||||||
 | 
					    snprintf(timestep_filename, sizeof timestep_filename, "%s_%d.vtk", filename, timestep);
 | 
				
			||||||
 | 
					    MPI_File_open(MPI_COMM_WORLD, timestep_filename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &_fh);
 | 
				
			||||||
 | 
					    if(_fh == MPI_FILE_NULL) {
 | 
				
			||||||
 | 
					        if(comm->myproc == 0) fprintf(stderr, "Could not open VTK file for writing!\n");
 | 
				
			||||||
 | 
					        return -1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					    if (comm->myproc==0){
 | 
				
			||||||
 | 
					        sprintf(msg, "# vtk DataFile Version 2.0\n");
 | 
				
			||||||
 | 
					        sprintf(msg, "%sParticle data\n",msg);
 | 
				
			||||||
 | 
					        sprintf(msg, "%sASCII\n",msg);
 | 
				
			||||||
 | 
					        sprintf(msg, "%sDATASET UNSTRUCTURED_GRID\n",msg);
 | 
				
			||||||
 | 
					        sprintf(msg, "%sPOINTS %d double\n",msg, atom->Natoms);  
 | 
				
			||||||
 | 
					        flushBuffer(msg);
 | 
				
			||||||
 | 
					    } 
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int vtkVector(Comm* comm, Atom* atom, Parameter* param)
 | 
				
			||||||
 | 
					{ 
 | 
				
			||||||
 | 
					    if (_fh == MPI_FILE_NULL) {
 | 
				
			||||||
 | 
					        if(comm->myproc==0) printf("vtk not initialize! Call vtkOpen first!\n");
 | 
				
			||||||
 | 
					        return -1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    int sizeline= 25;   //#initial guess of characters in "%.4f %.4f %.4f\n" 
 | 
				
			||||||
 | 
					    int extrabuff = 100;
 | 
				
			||||||
 | 
					    int sizebuff = sizeline*atom->Nlocal+extrabuff; 
 | 
				
			||||||
 | 
					    int mysize = 0;
 | 
				
			||||||
 | 
					    char* msg = (char*) malloc(sizebuff);
 | 
				
			||||||
 | 
					    sprintf(msg, "");
 | 
				
			||||||
 | 
					    for(int i = 0; i < atom->Nlocal; i++){
 | 
				
			||||||
 | 
					        if(mysize+extrabuff >= sizebuff){
 | 
				
			||||||
 | 
					            sizebuff*= 1.5;
 | 
				
			||||||
 | 
					            msg = (char*) realloc(msg, sizebuff); 
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        //TODO: do not forget to add param->xlo, param->ylo, param->zlo   
 | 
				
			||||||
 | 
					        sprintf(msg, "%s%.4f %.4f %.4f\n",msg, atom_x(i), atom_y(i), atom_z(i));
 | 
				
			||||||
 | 
					        mysize = strlen(msg);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    int gatherSize[comm->numproc];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MPI_Allgather(&mysize, 1, MPI_INT, gatherSize, 1, MPI_INT, MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					    int offset=0;
 | 
				
			||||||
 | 
					    int globalSize = 0;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    for(int i = 0; i < comm->myproc; i++)
 | 
				
			||||||
 | 
					        offset+= gatherSize[i];
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    for(int i = 0; i < comm->numproc; i++)
 | 
				
			||||||
 | 
					        globalSize+= gatherSize[i];
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    MPI_Offset displ;   
 | 
				
			||||||
 | 
					    MPI_Datatype FileType;       
 | 
				
			||||||
 | 
					    int GlobalSize[] = {globalSize}; 
 | 
				
			||||||
 | 
					    int LocalSize[]  = {mysize};
 | 
				
			||||||
 | 
					    int Start[] = {offset};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(LocalSize[0]>0){
 | 
				
			||||||
 | 
					        MPI_Type_create_subarray(1, GlobalSize, LocalSize, Start, MPI_ORDER_C, MPI_CHAR, &FileType);    
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        MPI_Type_vector(0,0,0,MPI_CHAR,&FileType);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    MPI_Type_commit(&FileType);
 | 
				
			||||||
 | 
					    MPI_File_get_size(_fh, &displ);
 | 
				
			||||||
 | 
					    MPI_File_set_view(_fh, displ, MPI_CHAR, FileType, "native", MPI_INFO_NULL);
 | 
				
			||||||
 | 
					    MPI_File_write_all (_fh, msg, mysize , MPI_CHAR ,MPI_STATUS_IGNORE);
 | 
				
			||||||
 | 
					    MPI_Barrier(MPI_COMM_WORLD); 
 | 
				
			||||||
 | 
					    MPI_File_set_view(_fh,0,MPI_CHAR, MPI_CHAR, "native", MPI_INFO_NULL);    
 | 
				
			||||||
 | 
					     
 | 
				
			||||||
 | 
					    if (comm->myproc==0){
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        sprintf(msg, "\n\n");
 | 
				
			||||||
 | 
					        sprintf(msg, "%sCELLS %d %d\n", msg, atom->Natoms, atom->Natoms * 2); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for(int i = 0; i < atom->Natoms; i++) 
 | 
				
			||||||
 | 
					            sprintf(msg, "%s1 %d\n", msg, i);
 | 
				
			||||||
 | 
					        flushBuffer(msg);
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        sprintf(msg, "\n\n"); 
 | 
				
			||||||
 | 
					        sprintf(msg, "%sCELL_TYPES %d\n",msg, atom->Natoms);
 | 
				
			||||||
 | 
					        for(int i = 0; i < atom->Natoms; i++) 
 | 
				
			||||||
 | 
					            sprintf(msg, "%s1\n",msg);
 | 
				
			||||||
 | 
					        flushBuffer(msg);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        sprintf(msg, "\n\n"); 
 | 
				
			||||||
 | 
					        sprintf(msg, "%sPOINT_DATA %d\n",msg,atom->Natoms);
 | 
				
			||||||
 | 
					        sprintf(msg, "%sSCALARS mass double\n",msg);
 | 
				
			||||||
 | 
					        sprintf(msg, "%sLOOKUP_TABLE default\n",msg);
 | 
				
			||||||
 | 
					        for(int i = 0; i < atom->Natoms; i++) 
 | 
				
			||||||
 | 
					            sprintf(msg, "%s1.0\n",msg);
 | 
				
			||||||
 | 
					        sprintf(msg, "%s\n\n",msg);
 | 
				
			||||||
 | 
					        flushBuffer(msg);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void vtkClose()
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    MPI_File_close(&_fh);
 | 
				
			||||||
 | 
					    _fh=MPI_FILE_NULL;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int printGhost(const char* filename, Atom* atom, int timestep, int me) {
 | 
				
			||||||
 | 
					    char timestep_filename[128];
 | 
				
			||||||
 | 
					    snprintf(timestep_filename, sizeof timestep_filename, "%s_%d_ghost%i.vtk", filename, timestep,me);
 | 
				
			||||||
 | 
					    FILE* fp = fopen(timestep_filename, "wb");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(fp == NULL) {
 | 
				
			||||||
 | 
					        fprintf(stderr, "Could not open VTK file for writing!\n");
 | 
				
			||||||
 | 
					        return -1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    fprintf(fp, "# vtk DataFile Version 2.0\n");
 | 
				
			||||||
 | 
					    fprintf(fp, "Particle data\n");
 | 
				
			||||||
 | 
					    fprintf(fp, "ASCII\n");
 | 
				
			||||||
 | 
					    fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
 | 
				
			||||||
 | 
					    fprintf(fp, "POINTS %d double\n", atom->Nghost);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; ++i) {
 | 
				
			||||||
 | 
					        fprintf(fp, "%.4f %.4f %.4f\n", atom_x(i), atom_y(i), atom_z(i));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    fprintf(fp, "\n\n");
 | 
				
			||||||
 | 
					    fprintf(fp, "CELLS %d %d\n", atom->Nlocal, atom->Nlocal * 2);
 | 
				
			||||||
 | 
					    for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; ++i) {
 | 
				
			||||||
 | 
					        fprintf(fp, "1 %d\n", i);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    fprintf(fp, "\n\n");
 | 
				
			||||||
 | 
					    fprintf(fp, "CELL_TYPES %d\n", atom->Nlocal);
 | 
				
			||||||
 | 
					    for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; ++i) {
 | 
				
			||||||
 | 
					        fprintf(fp, "1\n");
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    fprintf(fp, "\n\n");
 | 
				
			||||||
 | 
					    fprintf(fp, "POINT_DATA %d\n", atom->Nghost);
 | 
				
			||||||
 | 
					    fprintf(fp, "SCALARS mass double\n");
 | 
				
			||||||
 | 
					    fprintf(fp, "LOOKUP_TABLE default\n");
 | 
				
			||||||
 | 
					    for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; i++) {
 | 
				
			||||||
 | 
					        fprintf(fp, "1.0\n");
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    fprintf(fp, "\n\n");
 | 
				
			||||||
 | 
					    fclose(fp);
 | 
				
			||||||
 | 
					    return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    if(comm->numproc == 1)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        write_atoms_to_vtk_file(filename, atom, timestep);
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    vtkOpen(filename, comm, atom, timestep);
 | 
				
			||||||
 | 
					    vtkVector(comm, atom, param);
 | 
				
			||||||
 | 
					    vtkClose(); 
 | 
				
			||||||
 | 
					    //printGhost(filename, atom, timestep, comm->myproc);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void flushBuffer(char* msg){
 | 
				
			||||||
 | 
					    MPI_Offset displ; 
 | 
				
			||||||
 | 
					    MPI_File_get_size(_fh, &displ);
 | 
				
			||||||
 | 
					    MPI_File_write_at(_fh, displ, msg, strlen(msg), MPI_CHAR, MPI_STATUS_IGNORE);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Reference in New Issue
	
	Block a user