Add first draft version of GROMACS method separating i-clusters and j-clusters
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
cecb31d6a9
commit
c7360305c8
1
Makefile
1
Makefile
@ -10,6 +10,7 @@ Q ?= @
|
|||||||
include $(MAKE_DIR)/config.mk
|
include $(MAKE_DIR)/config.mk
|
||||||
include $(MAKE_DIR)/include_$(TAG).mk
|
include $(MAKE_DIR)/include_$(TAG).mk
|
||||||
include $(MAKE_DIR)/include_LIKWID.mk
|
include $(MAKE_DIR)/include_LIKWID.mk
|
||||||
|
include $(MAKE_DIR)/include_ISA.mk
|
||||||
include $(MAKE_DIR)/include_GROMACS.mk
|
include $(MAKE_DIR)/include_GROMACS.mk
|
||||||
INCLUDES += -I./$(SRC_DIR)/includes
|
INCLUDES += -I./$(SRC_DIR)/includes
|
||||||
|
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
# Compiler tag (GCC/CLANG/ICC)
|
# Compiler tag (GCC/CLANG/ICC)
|
||||||
TAG ?= ICC
|
TAG ?= ICC
|
||||||
|
# Instruction set (SSE/AVX/AVX2/AVX512)
|
||||||
|
ISA ?= AVX512
|
||||||
# Optimization scheme (lammps/gromacs/clusters_per_bin)
|
# Optimization scheme (lammps/gromacs/clusters_per_bin)
|
||||||
OPT_SCHEME ?= gromacs
|
OPT_SCHEME ?= gromacs
|
||||||
# Enable likwid (true or false)
|
# Enable likwid (true or false)
|
||||||
@ -23,10 +25,6 @@ EXPLICIT_TYPES ?= false
|
|||||||
MEM_TRACER ?= false
|
MEM_TRACER ?= false
|
||||||
# Trace indexes and distances for gather-md (true or false)
|
# Trace indexes and distances for gather-md (true or false)
|
||||||
INDEX_TRACER ?= false
|
INDEX_TRACER ?= false
|
||||||
# Vector width (elements) for index and distance tracer
|
|
||||||
VECTOR_WIDTH ?= 8
|
|
||||||
# When vector width is 4 but AVX2 is not supported (AVX only), set this to true
|
|
||||||
NO_AVX2 ?= false
|
|
||||||
# Compute statistics
|
# Compute statistics
|
||||||
COMPUTE_STATS ?= true
|
COMPUTE_STATS ?= true
|
||||||
|
|
||||||
|
@ -63,7 +63,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
|||||||
int cj = neighs[k];
|
int cj = neighs[k];
|
||||||
int any = 0;
|
int any = 0;
|
||||||
MD_FLOAT *cjptr = cluster_pos_ptr(cj);
|
MD_FLOAT *cjptr = cluster_pos_ptr(cj);
|
||||||
for(int cii = 0; cii < CLUSTER_DIM_M; cii++) {
|
for(int cii = 0; cii < CLUSTER_M; cii++) {
|
||||||
MD_FLOAT xtmp = cluster_x(ciptr, cii);
|
MD_FLOAT xtmp = cluster_x(ciptr, cii);
|
||||||
MD_FLOAT ytmp = cluster_y(ciptr, cii);
|
MD_FLOAT ytmp = cluster_y(ciptr, cii);
|
||||||
MD_FLOAT ztmp = cluster_z(ciptr, cii);
|
MD_FLOAT ztmp = cluster_z(ciptr, cii);
|
||||||
@ -71,7 +71,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
|||||||
MD_FLOAT fiy = 0;
|
MD_FLOAT fiy = 0;
|
||||||
MD_FLOAT fiz = 0;
|
MD_FLOAT fiz = 0;
|
||||||
|
|
||||||
for(int cjj = 0; cjj < CLUSTER_DIM_M; cjj++) {
|
for(int cjj = 0; cjj < CLUSTER_M; cjj++) {
|
||||||
if(ci != cj || cii != cjj) {
|
if(ci != cj || cii != cjj) {
|
||||||
MD_FLOAT delx = xtmp - cluster_x(cjptr, cjj);
|
MD_FLOAT delx = xtmp - cluster_x(cjptr, cjj);
|
||||||
MD_FLOAT dely = ytmp - cluster_y(cjptr, cjj);
|
MD_FLOAT dely = ytmp - cluster_y(cjptr, cjj);
|
||||||
@ -106,7 +106,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
|||||||
|
|
||||||
addStat(stats->calculated_forces, 1);
|
addStat(stats->calculated_forces, 1);
|
||||||
addStat(stats->num_neighs, numneighs);
|
addStat(stats->num_neighs, numneighs);
|
||||||
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_DIM_M / CLUSTER_DIM_N));
|
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
|
||||||
}
|
}
|
||||||
|
|
||||||
LIKWID_MARKER_STOP("force");
|
LIKWID_MARKER_STOP("force");
|
||||||
@ -128,7 +128,7 @@ double computeForceLJ_4xn(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
|||||||
MD_SIMD_FLOAT epsilon_vec = simd_broadcast(epsilon);
|
MD_SIMD_FLOAT epsilon_vec = simd_broadcast(epsilon);
|
||||||
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
|
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
|
||||||
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
|
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
|
||||||
const int unroll_j = CLUSTER_DIM_N / CLUSTER_DIM_M;
|
const int unroll_j = CLUSTER_N / CLUSTER_M;
|
||||||
|
|
||||||
double S = getTimeStamp();
|
double S = getTimeStamp();
|
||||||
LIKWID_MARKER_START("force");
|
LIKWID_MARKER_START("force");
|
||||||
@ -262,7 +262,7 @@ double computeForceLJ_4xn(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
|||||||
|
|
||||||
addStat(stats->calculated_forces, 1);
|
addStat(stats->calculated_forces, 1);
|
||||||
addStat(stats->num_neighs, numneighs);
|
addStat(stats->num_neighs, numneighs);
|
||||||
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_DIM_M / CLUSTER_DIM_N));
|
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
|
||||||
}
|
}
|
||||||
|
|
||||||
LIKWID_MARKER_STOP("force");
|
LIKWID_MARKER_STOP("force");
|
||||||
|
@ -27,13 +27,60 @@
|
|||||||
|
|
||||||
#define DELTA 20000
|
#define DELTA 20000
|
||||||
|
|
||||||
#define CLUSTER_DIM_M 4
|
#define CLUSTER_M 4
|
||||||
#define CLUSTER_DIM_N VECTOR_WIDTH
|
|
||||||
|
// Nbnxn layouts (as of GROMACS):
|
||||||
|
// Simd4xN: M=4, N=VECTOR_WIDTH
|
||||||
|
// Simd2xNN: M=4, N=(VECTOR_WIDTH/2)
|
||||||
|
|
||||||
|
// Simd2XNN (here used for single-precision)
|
||||||
|
#if VECTOR_WIDTH > CLUSTER_M * 2
|
||||||
|
# define CLUSTER_N (VECTOR_WIDTH / 2)
|
||||||
|
// Simd4xN
|
||||||
|
#else
|
||||||
|
# define CLUSTER_N VECTOR_WIDTH
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if CLUSTER_M == CLUSTER_N
|
||||||
|
# define CJ0_FROM_CI(a) (a)
|
||||||
|
# define CJ1_FROM_CI(a) (a)
|
||||||
|
# define CI_BASE_INDEX(a,b) ((a) * CLUSTER_N * (b))
|
||||||
|
# define CJ_BASE_INDEX(a,b) ((a) * CLUSTER_N * (b))
|
||||||
|
#elif CLUSTER_M == CLUSTER_N * 2 // M > N
|
||||||
|
# define CJ0_FROM_CI(a) ((a) << 1)
|
||||||
|
# define CJ1_FROM_CI(a) (((a) << 1) | 0x1)
|
||||||
|
# define CI_BASE_INDEX(a,b) ((a) * CLUSTER_M * (b))
|
||||||
|
# define CJ_BASE_INDEX(a,b) (((a) >> 1) * CLUSTER_M * (b) + ((a) & 0x1) * (CLUSTER_M >> 1))
|
||||||
|
#elif CLUSTER_M == CLUSTER_N / 2 // M < N
|
||||||
|
# define CJ0_FROM_CI(a) ((a) >> 1)
|
||||||
|
# define CJ1_FROM_CI(a) ((a) >> 1)
|
||||||
|
# define CI_BASE_INDEX(a,b) (((a) >> 1) * CLUSTER_N * (b) + ((a) & 0x1) * (CLUSTER_N >> 1))
|
||||||
|
# define CJ_BASE_INDEX(a,b) ((a) * CLUSTER_N * (b))
|
||||||
|
#else
|
||||||
|
# error "Invalid cluster configuration!"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if CLUSTER_N != 2 && CLUSTER_N != 4 && CLUSTER_N != 8
|
||||||
|
# error "Cluster N dimension can be only 2, 4 and 8"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define CI_SCALAR_BASE_INDEX(a) (CI_BASE_INDEX(a, 1))
|
||||||
|
#define CI_VECTOR_BASE_INDEX(a) (CI_BASE_INDEX(a, 3))
|
||||||
|
#define CJ_SCALAR_BASE_INDEX(a) (CJ_BASE_INDEX(a, 1))
|
||||||
|
#define CJ_VECTOR_BASE_INDEX(a) (CJ_BASE_INDEX(a, 3))
|
||||||
|
|
||||||
|
#if CLUSTER_M >= CLUSTER_N
|
||||||
|
# define CL_X_OFFSET (0 * CLUSTER_M)
|
||||||
|
# define CL_Y_OFFSET (1 * CLUSTER_M)
|
||||||
|
# define CL_Z_OFFSET (2 * CLUSTER_M)
|
||||||
|
#else
|
||||||
|
# define CL_X_OFFSET (0 * CLUSTER_N)
|
||||||
|
# define CL_Y_OFFSET (1 * CLUSTER_N)
|
||||||
|
# define CL_Z_OFFSET (2 * CLUSTER_N)
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int bin;
|
|
||||||
int natoms;
|
int natoms;
|
||||||
int type[CLUSTER_DIM_M];
|
|
||||||
MD_FLOAT bbminx, bbmaxx;
|
MD_FLOAT bbminx, bbmaxx;
|
||||||
MD_FLOAT bbminy, bbmaxy;
|
MD_FLOAT bbminy, bbmaxy;
|
||||||
MD_FLOAT bbminz, bbmaxz;
|
MD_FLOAT bbminz, bbmaxz;
|
||||||
@ -44,9 +91,6 @@ typedef struct {
|
|||||||
int Nclusters, Nclusters_local, Nclusters_ghost, Nclusters_max;
|
int Nclusters, Nclusters_local, Nclusters_ghost, Nclusters_max;
|
||||||
MD_FLOAT *x, *y, *z;
|
MD_FLOAT *x, *y, *z;
|
||||||
MD_FLOAT *vx, *vy, *vz;
|
MD_FLOAT *vx, *vy, *vz;
|
||||||
MD_FLOAT *cl_x;
|
|
||||||
MD_FLOAT *cl_v;
|
|
||||||
MD_FLOAT *cl_f;
|
|
||||||
int *border_map;
|
int *border_map;
|
||||||
int *type;
|
int *type;
|
||||||
int ntypes;
|
int ntypes;
|
||||||
@ -54,8 +98,13 @@ typedef struct {
|
|||||||
MD_FLOAT *sigma6;
|
MD_FLOAT *sigma6;
|
||||||
MD_FLOAT *cutforcesq;
|
MD_FLOAT *cutforcesq;
|
||||||
MD_FLOAT *cutneighsq;
|
MD_FLOAT *cutneighsq;
|
||||||
Cluster *clusters;
|
|
||||||
int *PBCx, *PBCy, *PBCz;
|
int *PBCx, *PBCy, *PBCz;
|
||||||
|
// Data in cluster format
|
||||||
|
MD_FLOAT *cl_x;
|
||||||
|
MD_FLOAT *cl_v;
|
||||||
|
MD_FLOAT *cl_f;
|
||||||
|
int *cl_type;
|
||||||
|
Cluster *iclusters, *jclusters;
|
||||||
} Atom;
|
} Atom;
|
||||||
|
|
||||||
extern void initAtom(Atom*);
|
extern void initAtom(Atom*);
|
||||||
@ -67,10 +116,6 @@ extern int readAtom_dmp(Atom*, Parameter*);
|
|||||||
extern void growAtom(Atom*);
|
extern void growAtom(Atom*);
|
||||||
extern void growClusters(Atom*);
|
extern void growClusters(Atom*);
|
||||||
|
|
||||||
#define cluster_pos_ptr(ci) &(atom->cl_x[(ci) * CLUSTER_DIM_M * 3])
|
|
||||||
#define cluster_velocity_ptr(ci) &(atom->cl_v[(ci) * CLUSTER_DIM_M * 3])
|
|
||||||
#define cluster_force_ptr(ci) &(atom->cl_f[(ci) * CLUSTER_DIM_M * 3])
|
|
||||||
|
|
||||||
#ifdef AOS
|
#ifdef AOS
|
||||||
#define POS_DATA_LAYOUT "AoS"
|
#define POS_DATA_LAYOUT "AoS"
|
||||||
#define atom_x(i) atom->x[(i) * 3 + 0]
|
#define atom_x(i) atom->x[(i) * 3 + 0]
|
||||||
@ -83,14 +128,4 @@ extern void growClusters(Atom*);
|
|||||||
#define atom_z(i) atom->z[i]
|
#define atom_z(i) atom->z[i]
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CLUSTER_AOS
|
|
||||||
#define cluster_x(cptr, i) cptr[(i) * 3 + 0]
|
|
||||||
#define cluster_y(cptr, i) cptr[(i) * 3 + 1]
|
|
||||||
#define cluster_z(cptr, i) cptr[(i) * 3 + 2]
|
|
||||||
#else
|
|
||||||
#define cluster_x(cptr, i) cptr[0 * CLUSTER_DIM_M + (i)]
|
|
||||||
#define cluster_y(cptr, i) cptr[1 * CLUSTER_DIM_M + (i)]
|
|
||||||
#define cluster_z(cptr, i) cptr[2 * CLUSTER_DIM_M + (i)]
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -54,8 +54,8 @@ static MD_SIMD_FLOAT simd_load2(MD_FLOAT *c0, MD_FLOAT *c1, int d) {
|
|||||||
x = _mm512_mask_i32gather_pd(simd_zero(), simd_mask_from_u32(0x0f), vindex, c0, sizeof(double));
|
x = _mm512_mask_i32gather_pd(simd_zero(), simd_mask_from_u32(0x0f), vindex, c0, sizeof(double));
|
||||||
x = _mm512_mask_i32gather_pd(x, simd_mask_from_u32(0xf0), vindex, c1, sizeof(double));
|
x = _mm512_mask_i32gather_pd(x, simd_mask_from_u32(0xf0), vindex, c1, sizeof(double));
|
||||||
#else
|
#else
|
||||||
x = _mm512_load_pd(&c0[d * CLUSTER_DIM_M]);
|
x = _mm512_load_pd(&c0[d * CLUSTER_M]);
|
||||||
x = _mm512_insertf64x4(x, _mm256_load_pd(&c1[d * CLUSTER_DIM_M]), 1);
|
x = _mm512_insertf64x4(x, _mm256_load_pd(&c1[d * CLUSTER_M]), 1);
|
||||||
#endif
|
#endif
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
@ -134,7 +134,7 @@ static MD_SIMD_FLOAT simd_load(MD_FLOAT *c0, int d) {
|
|||||||
__m128i vindex = _mm128_add_epi32(aos_gather_vindex, _mm128_set1_epi32(d));
|
__m128i vindex = _mm128_add_epi32(aos_gather_vindex, _mm128_set1_epi32(d));
|
||||||
x = _mm256_i32gather_pd(c0, vindex, sizeof(double));
|
x = _mm256_i32gather_pd(c0, vindex, sizeof(double));
|
||||||
#else
|
#else
|
||||||
x = _mm256_load_pd(&c0[d * CLUSTER_DIM_M]);
|
x = _mm256_load_pd(&c0[d * CLUSTER_M]);
|
||||||
#endif
|
#endif
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
@ -217,8 +217,6 @@ int main(int argc, const char *argv[]) {
|
|||||||
|
|
||||||
if(!csv) {
|
if(!csv) {
|
||||||
printf("Number of timesteps: %d\n", param.ntimes);
|
printf("Number of timesteps: %d\n", param.ntimes);
|
||||||
printf("Number of times to compute the atoms loop: %d\n", ATOMS_LOOP_RUNS);
|
|
||||||
printf("Number of times to compute the neighbors loop: %d\n", NEIGHBORS_LOOP_RUNS);
|
|
||||||
printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz);
|
printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz);
|
||||||
printf("Atoms per unit cell: %d\n", atoms_per_unit_cell);
|
printf("Atoms per unit cell: %d\n", atoms_per_unit_cell);
|
||||||
printf("Total number of atoms: %d\n", atom->Nlocal);
|
printf("Total number of atoms: %d\n", atom->Nlocal);
|
||||||
@ -257,9 +255,8 @@ int main(int argc, const char *argv[]) {
|
|||||||
E = getTimeStamp();
|
E = getTimeStamp();
|
||||||
double T_accum = E-S;
|
double T_accum = E-S;
|
||||||
double freq_hz = param.proc_freq * 1.e9;
|
double freq_hz = param.proc_freq * 1.e9;
|
||||||
const double repeats = ATOMS_LOOP_RUNS * NEIGHBORS_LOOP_RUNS;
|
const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes);
|
||||||
const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes * repeats);
|
const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes) * freq_hz;
|
||||||
const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes * repeats) * freq_hz;
|
|
||||||
const double cycles_per_neigh = cycles_per_atom / (double)(atoms_per_unit_cell - 1);
|
const double cycles_per_neigh = cycles_per_atom / (double)(atoms_per_unit_cell - 1);
|
||||||
|
|
||||||
if(!csv) {
|
if(!csv) {
|
||||||
|
@ -39,8 +39,9 @@ static int nbinx, nbiny;
|
|||||||
static int mbinx, mbiny; // n bins in x, y
|
static int mbinx, mbiny; // n bins in x, y
|
||||||
static int *bincount;
|
static int *bincount;
|
||||||
static int *bins;
|
static int *bins;
|
||||||
static int *cluster_bincount;
|
static int *bin_nclusters;
|
||||||
static int *cluster_bins;
|
static int *bin_clusters;
|
||||||
|
static int *icluster_bin;
|
||||||
static int mbins; //total number of bins
|
static int mbins; //total number of bins
|
||||||
static int atoms_per_bin; // max atoms per bin
|
static int atoms_per_bin; // max atoms per bin
|
||||||
static int clusters_per_bin; // max clusters per bin
|
static int clusters_per_bin; // max clusters per bin
|
||||||
@ -63,12 +64,12 @@ void initNeighbor(Neighbor *neighbor, Parameter *param) {
|
|||||||
cutneigh = param->cutneigh;
|
cutneigh = param->cutneigh;
|
||||||
nmax = 0;
|
nmax = 0;
|
||||||
atoms_per_bin = 8;
|
atoms_per_bin = 8;
|
||||||
clusters_per_bin = (atoms_per_bin / CLUSTER_DIM_M) + 4;
|
clusters_per_bin = (atoms_per_bin / CLUSTER_M) + 4;
|
||||||
stencil = NULL;
|
stencil = NULL;
|
||||||
bins = NULL;
|
bins = NULL;
|
||||||
bincount = NULL;
|
bincount = NULL;
|
||||||
cluster_bins = NULL;
|
bin_clusters = NULL;
|
||||||
cluster_bincount = NULL;
|
bin_nclusters = NULL;
|
||||||
neighbor->maxneighs = 100;
|
neighbor->maxneighs = 100;
|
||||||
neighbor->numneigh = NULL;
|
neighbor->numneigh = NULL;
|
||||||
neighbor->neighbors = NULL;
|
neighbor->neighbors = NULL;
|
||||||
@ -91,7 +92,7 @@ void setupNeighbor(Parameter *param, Atom *atom) {
|
|||||||
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = zprd;
|
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = zprd;
|
||||||
|
|
||||||
MD_FLOAT atom_density = ((MD_FLOAT)(atom->Nlocal)) / ((xhi - xlo) * (yhi - ylo) * (zhi - zlo));
|
MD_FLOAT atom_density = ((MD_FLOAT)(atom->Nlocal)) / ((xhi - xlo) * (yhi - ylo) * (zhi - zlo));
|
||||||
MD_FLOAT atoms_in_cell = MAX(CLUSTER_DIM_M, CLUSTER_DIM_N);
|
MD_FLOAT atoms_in_cell = MAX(CLUSTER_M, CLUSTER_N);
|
||||||
MD_FLOAT targetsizex = cbrt(atoms_in_cell / atom_density);
|
MD_FLOAT targetsizex = cbrt(atoms_in_cell / atom_density);
|
||||||
MD_FLOAT targetsizey = cbrt(atoms_in_cell / atom_density);
|
MD_FLOAT targetsizey = cbrt(atoms_in_cell / atom_density);
|
||||||
nbinx = MAX(1, (int)ceil((xhi - xlo) / targetsizex));
|
nbinx = MAX(1, (int)ceil((xhi - xlo) / targetsizex));
|
||||||
@ -103,20 +104,16 @@ void setupNeighbor(Parameter *param, Atom *atom) {
|
|||||||
cutneighsq = cutneigh * cutneigh;
|
cutneighsq = cutneigh * cutneigh;
|
||||||
|
|
||||||
coord = xlo - cutneigh - SMALL * xprd;
|
coord = xlo - cutneigh - SMALL * xprd;
|
||||||
mbinxlo = (int) (coord * bininvx);
|
mbinxlo = (int)(coord * bininvx);
|
||||||
if (coord < 0.0) {
|
if(coord < 0.0) { mbinxlo = mbinxlo - 1; }
|
||||||
mbinxlo = mbinxlo - 1;
|
|
||||||
}
|
|
||||||
coord = xhi + cutneigh + SMALL * xprd;
|
coord = xhi + cutneigh + SMALL * xprd;
|
||||||
mbinxhi = (int) (coord * bininvx);
|
mbinxhi = (int)(coord * bininvx);
|
||||||
|
|
||||||
coord = ylo - cutneigh - SMALL * yprd;
|
coord = ylo - cutneigh - SMALL * yprd;
|
||||||
mbinylo = (int) (coord * bininvy);
|
mbinylo = (int)(coord * bininvy);
|
||||||
if (coord < 0.0) {
|
if(coord < 0.0) { mbinylo = mbinylo - 1; }
|
||||||
mbinylo = mbinylo - 1;
|
|
||||||
}
|
|
||||||
coord = yhi + cutneigh + SMALL * yprd;
|
coord = yhi + cutneigh + SMALL * yprd;
|
||||||
mbinyhi = (int) (coord * bininvy);
|
mbinyhi = (int)(coord * bininvy);
|
||||||
|
|
||||||
mbinxlo = mbinxlo - 1;
|
mbinxlo = mbinxlo - 1;
|
||||||
mbinxhi = mbinxhi + 1;
|
mbinxhi = mbinxhi + 1;
|
||||||
@ -126,16 +123,12 @@ void setupNeighbor(Parameter *param, Atom *atom) {
|
|||||||
mbinyhi = mbinyhi + 1;
|
mbinyhi = mbinyhi + 1;
|
||||||
mbiny = mbinyhi - mbinylo + 1;
|
mbiny = mbinyhi - mbinylo + 1;
|
||||||
|
|
||||||
nextx = (int) (cutneigh * bininvx);
|
nextx = (int)(cutneigh * bininvx);
|
||||||
|
nexty = (int)(cutneigh * bininvy);
|
||||||
if(nextx * binsizex < FACTOR * cutneigh) nextx++;
|
if(nextx * binsizex < FACTOR * cutneigh) nextx++;
|
||||||
|
|
||||||
nexty = (int) (cutneigh * bininvy);
|
|
||||||
if(nexty * binsizey < FACTOR * cutneigh) nexty++;
|
if(nexty * binsizey < FACTOR * cutneigh) nexty++;
|
||||||
|
|
||||||
if (stencil) {
|
if (stencil) { free(stencil); }
|
||||||
free(stencil);
|
|
||||||
}
|
|
||||||
|
|
||||||
stencil = (int *) malloc((2 * nexty + 1) * (2 * nextx + 1) * sizeof(int));
|
stencil = (int *) malloc((2 * nexty + 1) * (2 * nextx + 1) * sizeof(int));
|
||||||
nstencil = 0;
|
nstencil = 0;
|
||||||
|
|
||||||
@ -147,19 +140,15 @@ void setupNeighbor(Parameter *param, Atom *atom) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(bincount) { free(bincount); }
|
||||||
|
if(bins) { free(bins); }
|
||||||
|
if(bin_nclusters) { free(bin_nclusters); }
|
||||||
|
if(bin_clusters) { free(bin_clusters); }
|
||||||
mbins = mbinx * mbiny;
|
mbins = mbinx * mbiny;
|
||||||
|
|
||||||
if (bincount) { free(bincount); }
|
|
||||||
bincount = (int*) malloc(mbins * sizeof(int));
|
bincount = (int*) malloc(mbins * sizeof(int));
|
||||||
|
|
||||||
if (bins) { free(bins); }
|
|
||||||
bins = (int*) malloc(mbins * atoms_per_bin * sizeof(int));
|
bins = (int*) malloc(mbins * atoms_per_bin * sizeof(int));
|
||||||
|
bin_nclusters = (int*) malloc(mbins * sizeof(int));
|
||||||
if (cluster_bincount) { free(cluster_bincount); }
|
bin_clusters = (int*) malloc(mbins * clusters_per_bin * sizeof(int));
|
||||||
cluster_bincount = (int*) malloc(mbins * sizeof(int));
|
|
||||||
|
|
||||||
if (cluster_bins) { free(cluster_bins); }
|
|
||||||
cluster_bins = (int*) malloc(mbins * clusters_per_bin * sizeof(int));
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
DEBUG_MESSAGE("lo, hi = (%e, %e, %e), (%e, %e, %e)\n", xlo, ylo, zlo, xhi, yhi, zhi);
|
DEBUG_MESSAGE("lo, hi = (%e, %e, %e), (%e, %e, %e)\n", xlo, ylo, zlo, xhi, yhi, zhi);
|
||||||
@ -171,20 +160,20 @@ void setupNeighbor(Parameter *param, Atom *atom) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
|
MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
|
||||||
MD_FLOAT dl = atom->clusters[ci].bbminx - atom->clusters[cj].bbmaxx;
|
MD_FLOAT dl = atom->iclusters[ci].bbminx - atom->jclusters[cj].bbmaxx;
|
||||||
MD_FLOAT dh = atom->clusters[cj].bbminx - atom->clusters[ci].bbmaxx;
|
MD_FLOAT dh = atom->jclusters[cj].bbminx - atom->iclusters[ci].bbmaxx;
|
||||||
MD_FLOAT dm = MAX(dl, dh);
|
MD_FLOAT dm = MAX(dl, dh);
|
||||||
MD_FLOAT dm0 = MAX(dm, 0.0);
|
MD_FLOAT dm0 = MAX(dm, 0.0);
|
||||||
MD_FLOAT d2 = dm0 * dm0;
|
MD_FLOAT d2 = dm0 * dm0;
|
||||||
|
|
||||||
dl = atom->clusters[ci].bbminy - atom->clusters[cj].bbmaxy;
|
dl = atom->iclusters[ci].bbminy - atom->jclusters[cj].bbmaxy;
|
||||||
dh = atom->clusters[cj].bbminy - atom->clusters[ci].bbmaxy;
|
dh = atom->jclusters[cj].bbminy - atom->iclusters[ci].bbmaxy;
|
||||||
dm = MAX(dl, dh);
|
dm = MAX(dl, dh);
|
||||||
dm0 = MAX(dm, 0.0);
|
dm0 = MAX(dm, 0.0);
|
||||||
d2 += dm0 * dm0;
|
d2 += dm0 * dm0;
|
||||||
|
|
||||||
dl = atom->clusters[ci].bbminz - atom->clusters[cj].bbmaxz;
|
dl = atom->iclusters[ci].bbminz - atom->jclusters[cj].bbmaxz;
|
||||||
dh = atom->clusters[cj].bbminz - atom->clusters[ci].bbmaxz;
|
dh = atom->jclusters[cj].bbminz - atom->iclusters[ci].bbmaxz;
|
||||||
dm = MAX(dl, dh);
|
dm = MAX(dl, dh);
|
||||||
dm0 = MAX(dm, 0.0);
|
dm0 = MAX(dm, 0.0);
|
||||||
d2 += dm0 * dm0;
|
d2 += dm0 * dm0;
|
||||||
@ -192,14 +181,16 @@ MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int atomDistanceInRange(Atom *atom, int ci, int cj, MD_FLOAT rsq) {
|
int atomDistanceInRange(Atom *atom, int ci, int cj, MD_FLOAT rsq) {
|
||||||
MD_FLOAT *ciptr = cluster_pos_ptr(ci);
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||||
MD_FLOAT *cjptr = cluster_pos_ptr(cj);
|
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||||
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||||
|
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||||
|
|
||||||
for(int cii = 0; cii < atom->clusters[ci].natoms; cii++) {
|
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
|
||||||
for(int cjj = 0; cjj < atom->clusters[cj].natoms; cjj++) {
|
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
|
||||||
MD_FLOAT delx = cluster_x(ciptr, cii) - cluster_x(cjptr, cjj);
|
MD_FLOAT delx = ci_x[CL_X_OFFSET + cii] - cj_x[CL_X_OFFSET + cjj];
|
||||||
MD_FLOAT dely = cluster_y(ciptr, cii) - cluster_y(cjptr, cjj);
|
MD_FLOAT dely = ci_x[CL_Y_OFFSET + cii] - cj_x[CL_Y_OFFSET + cjj];
|
||||||
MD_FLOAT delz = cluster_z(ciptr, cii) - cluster_z(cjptr, cjj);
|
MD_FLOAT delz = ci_x[CL_Z_OFFSET + cii] - cj_x[CL_Z_OFFSET + cjj];
|
||||||
if(delx * delx + dely * dely + delz * delz < rsq) {
|
if(delx * delx + dely * dely + delz * delz < rsq) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -234,22 +225,22 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
|||||||
resize = 0;
|
resize = 0;
|
||||||
|
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
int* neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
|
int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
|
||||||
int n = 0;
|
int n = 0;
|
||||||
int ibin = atom->clusters[ci].bin;
|
int ibin = icluster_bin[ci];
|
||||||
MD_FLOAT ibb_xmin = atom->clusters[ci].bbminx;
|
MD_FLOAT ibb_xmin = atom->iclusters[ci].bbminx;
|
||||||
MD_FLOAT ibb_xmax = atom->clusters[ci].bbmaxx;
|
MD_FLOAT ibb_xmax = atom->iclusters[ci].bbmaxx;
|
||||||
MD_FLOAT ibb_ymin = atom->clusters[ci].bbminy;
|
MD_FLOAT ibb_ymin = atom->iclusters[ci].bbminy;
|
||||||
MD_FLOAT ibb_ymax = atom->clusters[ci].bbmaxy;
|
MD_FLOAT ibb_ymax = atom->iclusters[ci].bbmaxy;
|
||||||
MD_FLOAT ibb_zmin = atom->clusters[ci].bbminz;
|
MD_FLOAT ibb_zmin = atom->iclusters[ci].bbminz;
|
||||||
MD_FLOAT ibb_zmax = atom->clusters[ci].bbmaxz;
|
MD_FLOAT ibb_zmax = atom->iclusters[ci].bbmaxz;
|
||||||
|
|
||||||
for(int k = 0; k < nstencil; k++) {
|
for(int k = 0; k < nstencil; k++) {
|
||||||
int jbin = ibin + stencil[k];
|
int jbin = ibin + stencil[k];
|
||||||
int *loc_bin = &cluster_bins[jbin * clusters_per_bin];
|
int *loc_bin = &bin_clusters[jbin * clusters_per_bin];
|
||||||
int cj, m = -1;
|
int cj, m = -1;
|
||||||
MD_FLOAT jbb_xmin, jbb_xmax, jbb_ymin, jbb_ymax, jbb_zmin, jbb_zmax;
|
MD_FLOAT jbb_xmin, jbb_xmax, jbb_ymin, jbb_ymax, jbb_zmin, jbb_zmax;
|
||||||
const int c = cluster_bincount[jbin];
|
const int c = bin_nclusters[jbin];
|
||||||
|
|
||||||
if(c > 0) {
|
if(c > 0) {
|
||||||
MD_FLOAT dl, dh, dm, dm0, d_bb_sq;
|
MD_FLOAT dl, dh, dm, dm0, d_bb_sq;
|
||||||
@ -257,8 +248,8 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
|||||||
do {
|
do {
|
||||||
m++;
|
m++;
|
||||||
cj = loc_bin[m];
|
cj = loc_bin[m];
|
||||||
jbb_zmin = atom->clusters[cj].bbminz;
|
jbb_zmin = atom->jclusters[cj].bbminz;
|
||||||
jbb_zmax = atom->clusters[cj].bbmaxz;
|
jbb_zmax = atom->jclusters[cj].bbmaxz;
|
||||||
dl = ibb_zmin - jbb_zmax;
|
dl = ibb_zmin - jbb_zmax;
|
||||||
dh = jbb_zmin - ibb_zmax;
|
dh = jbb_zmin - ibb_zmax;
|
||||||
dm = MAX(dl, dh);
|
dm = MAX(dl, dh);
|
||||||
@ -266,10 +257,10 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
|||||||
d_bb_sq = dm0 * dm0;
|
d_bb_sq = dm0 * dm0;
|
||||||
} while(m + 1 < c && d_bb_sq > cutneighsq);
|
} while(m + 1 < c && d_bb_sq > cutneighsq);
|
||||||
|
|
||||||
jbb_xmin = atom->clusters[cj].bbminx;
|
jbb_xmin = atom->jclusters[cj].bbminx;
|
||||||
jbb_xmax = atom->clusters[cj].bbmaxx;
|
jbb_xmax = atom->jclusters[cj].bbmaxx;
|
||||||
jbb_ymin = atom->clusters[cj].bbminy;
|
jbb_ymin = atom->jclusters[cj].bbminy;
|
||||||
jbb_ymax = atom->clusters[cj].bbmaxy;
|
jbb_ymax = atom->jclusters[cj].bbmaxy;
|
||||||
|
|
||||||
while(m < c) {
|
while(m < c) {
|
||||||
dl = ibb_zmin - jbb_zmax;
|
dl = ibb_zmin - jbb_zmax;
|
||||||
@ -303,20 +294,21 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
|||||||
m++;
|
m++;
|
||||||
if(m < c) {
|
if(m < c) {
|
||||||
cj = loc_bin[m];
|
cj = loc_bin[m];
|
||||||
jbb_xmin = atom->clusters[cj].bbminx;
|
jbb_xmin = atom->jclusters[cj].bbminx;
|
||||||
jbb_xmax = atom->clusters[cj].bbmaxx;
|
jbb_xmax = atom->jclusters[cj].bbmaxx;
|
||||||
jbb_ymin = atom->clusters[cj].bbminy;
|
jbb_ymin = atom->jclusters[cj].bbminy;
|
||||||
jbb_ymax = atom->clusters[cj].bbmaxy;
|
jbb_ymax = atom->jclusters[cj].bbmaxy;
|
||||||
jbb_zmin = atom->clusters[cj].bbminz;
|
jbb_zmin = atom->jclusters[cj].bbminz;
|
||||||
jbb_zmax = atom->clusters[cj].bbmaxz;
|
jbb_zmax = atom->jclusters[cj].bbmaxz;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(CLUSTER_DIM_N > CLUSTER_DIM_M) {
|
// Fill neighbor list with dummy values to fit vector width
|
||||||
while(n % (CLUSTER_DIM_N / CLUSTER_DIM_M)) {
|
if(CLUSTER_N < VECTOR_WIDTH) {
|
||||||
neighptr[n++] = nall - 1; // Last cluster is always a dummy cluster
|
while(n % (VECTOR_WIDTH / CLUSTER_N)) {
|
||||||
|
neighptr[n++] = CJ0_FROM_CI(nall - 1); // Last cluster is always a dummy cluster
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -353,7 +345,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
|||||||
atom->clusters[ci].bbminz,
|
atom->clusters[ci].bbminz,
|
||||||
atom->clusters[ci].bbmaxz);
|
atom->clusters[ci].bbmaxz);
|
||||||
|
|
||||||
for(int cii = 0; cii < CLUSTER_DIM_M; cii++) {
|
for(int cii = 0; cii < CLUSTER_M; cii++) {
|
||||||
DEBUG_MESSAGE("%f, %f, %f\n", cluster_x(ciptr, cii), cluster_y(ciptr, cii), cluster_z(ciptr, cii));
|
DEBUG_MESSAGE("%f, %f, %f\n", cluster_x(ciptr, cii), cluster_y(ciptr, cii), cluster_z(ciptr, cii));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -371,7 +363,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
|||||||
atom->clusters[cj].bbminz,
|
atom->clusters[cj].bbminz,
|
||||||
atom->clusters[cj].bbmaxz);
|
atom->clusters[cj].bbmaxz);
|
||||||
|
|
||||||
for(int cjj = 0; cjj < CLUSTER_DIM_M; cjj++) {
|
for(int cjj = 0; cjj < CLUSTER_M; cjj++) {
|
||||||
DEBUG_MESSAGE(" %f, %f, %f\n", cluster_x(cjptr, cjj), cluster_y(cjptr, cjj), cluster_z(cjptr, cjj));
|
DEBUG_MESSAGE(" %f, %f, %f\n", cluster_x(cjptr, cjj), cluster_y(cjptr, cjj), cluster_z(cjptr, cjj));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -393,8 +385,8 @@ void pruneNeighbor(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
|||||||
int k = 0;
|
int k = 0;
|
||||||
|
|
||||||
// Remove dummy clusters if necessary
|
// Remove dummy clusters if necessary
|
||||||
if(CLUSTER_DIM_N > CLUSTER_DIM_M) {
|
if(CLUSTER_N < VECTOR_WIDTH) {
|
||||||
while(neighs[numneighs - 1] == nall - 1) {
|
while(neighs[numneighs - 1] == CJ0_FROM_CI(nall - 1)) {
|
||||||
numneighs--;
|
numneighs--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -410,9 +402,9 @@ void pruneNeighbor(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Readd dummy clusters if necessary
|
// Readd dummy clusters if necessary
|
||||||
if(CLUSTER_DIM_N > CLUSTER_DIM_M) {
|
if(CLUSTER_N < VECTOR_WIDTH) {
|
||||||
while(numneighs % (CLUSTER_DIM_N / CLUSTER_DIM_M)) {
|
while(numneighs % (VECTOR_WIDTH / CLUSTER_N)) {
|
||||||
neighs[numneighs++] = nall - 1; // Last cluster is always a dummy cluster
|
neighs[numneighs++] = CJ0_FROM_CI(nall - 1); // Last cluster is always a dummy cluster
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -558,34 +550,37 @@ void buildClusters(Atom *atom) {
|
|||||||
for(int bin = 0; bin < mbins; bin++) {
|
for(int bin = 0; bin < mbins; bin++) {
|
||||||
int c = bincount[bin];
|
int c = bincount[bin];
|
||||||
int ac = 0;
|
int ac = 0;
|
||||||
const int nclusters = ((c + CLUSTER_DIM_M - 1) / CLUSTER_DIM_M);
|
int nclusters = ((c + CLUSTER_M - 1) / CLUSTER_M);
|
||||||
|
if(CLUSTER_N > CLUSTER_M && nclusters % 2) { nclusters++; }
|
||||||
for(int cl = 0; cl < nclusters; cl++) {
|
for(int cl = 0; cl < nclusters; cl++) {
|
||||||
const int ci = atom->Nclusters_local;
|
const int ci = atom->Nclusters_local;
|
||||||
|
|
||||||
if(ci >= atom->Nclusters_max) {
|
if(ci >= atom->Nclusters_max) {
|
||||||
growClusters(atom);
|
growClusters(atom);
|
||||||
}
|
}
|
||||||
|
|
||||||
MD_FLOAT *cptr = cluster_pos_ptr(ci);
|
int ci_sca_base = CI_SCALAR_BASE_INDEX(ci);
|
||||||
MD_FLOAT *cvptr = cluster_velocity_ptr(ci);
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||||
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||||
|
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
|
||||||
|
int *ci_type = &atom->cl_type[ci_sca_base];
|
||||||
MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY;
|
MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY;
|
||||||
MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
|
MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
|
||||||
MD_FLOAT bbminz = INFINITY, bbmaxz = -INFINITY;
|
MD_FLOAT bbminz = INFINITY, bbmaxz = -INFINITY;
|
||||||
atom->clusters[ci].natoms = 0;
|
|
||||||
|
|
||||||
for(int cii = 0; cii < CLUSTER_DIM_M; cii++) {
|
atom->iclusters[ci].natoms = 0;
|
||||||
|
for(int cii = 0; cii < CLUSTER_M; cii++) {
|
||||||
if(ac < c) {
|
if(ac < c) {
|
||||||
int i = bins[bin * atoms_per_bin + ac];
|
int i = bins[bin * atoms_per_bin + ac];
|
||||||
MD_FLOAT xtmp = atom_x(i);
|
MD_FLOAT xtmp = atom_x(i);
|
||||||
MD_FLOAT ytmp = atom_y(i);
|
MD_FLOAT ytmp = atom_y(i);
|
||||||
MD_FLOAT ztmp = atom_z(i);
|
MD_FLOAT ztmp = atom_z(i);
|
||||||
|
|
||||||
cluster_x(cptr, cii) = xtmp;
|
ci_x[CL_X_OFFSET + cii] = xtmp;
|
||||||
cluster_y(cptr, cii) = ytmp;
|
ci_x[CL_Y_OFFSET + cii] = ytmp;
|
||||||
cluster_z(cptr, cii) = ztmp;
|
ci_x[CL_Z_OFFSET + cii] = ztmp;
|
||||||
cluster_x(cvptr, cii) = atom->vx[i];
|
ci_v[CL_X_OFFSET + cii] = atom->vx[i];
|
||||||
cluster_y(cvptr, cii) = atom->vy[i];
|
ci_v[CL_Y_OFFSET + cii] = atom->vy[i];
|
||||||
cluster_z(cvptr, cii) = atom->vz[i];
|
ci_v[CL_Z_OFFSET + cii] = atom->vz[i];
|
||||||
|
|
||||||
// TODO: To create the bounding boxes faster, we can use SIMD operations
|
// TODO: To create the bounding boxes faster, we can use SIMD operations
|
||||||
if(bbminx > xtmp) { bbminx = xtmp; }
|
if(bbminx > xtmp) { bbminx = xtmp; }
|
||||||
@ -595,24 +590,24 @@ void buildClusters(Atom *atom) {
|
|||||||
if(bbminz > ztmp) { bbminz = ztmp; }
|
if(bbminz > ztmp) { bbminz = ztmp; }
|
||||||
if(bbmaxz < ztmp) { bbmaxz = ztmp; }
|
if(bbmaxz < ztmp) { bbmaxz = ztmp; }
|
||||||
|
|
||||||
atom->clusters[ci].type[cii] = atom->type[i];
|
atom->cl_type[cii] = atom->type[i];
|
||||||
atom->clusters[ci].natoms++;
|
atom->iclusters[ci].natoms++;
|
||||||
} else {
|
} else {
|
||||||
cluster_x(cptr, cii) = INFINITY;
|
ci_x[CL_X_OFFSET + cii] = INFINITY;
|
||||||
cluster_y(cptr, cii) = INFINITY;
|
ci_x[CL_Y_OFFSET + cii] = INFINITY;
|
||||||
cluster_z(cptr, cii) = INFINITY;
|
ci_x[CL_Z_OFFSET + cii] = INFINITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
ac++;
|
ac++;
|
||||||
}
|
}
|
||||||
|
|
||||||
atom->clusters[ci].bin = bin;
|
icluster_bin[ci] = bin;
|
||||||
atom->clusters[ci].bbminx = bbminx;
|
atom->iclusters[ci].bbminx = bbminx;
|
||||||
atom->clusters[ci].bbmaxx = bbmaxx;
|
atom->iclusters[ci].bbmaxx = bbmaxx;
|
||||||
atom->clusters[ci].bbminy = bbminy;
|
atom->iclusters[ci].bbminy = bbminy;
|
||||||
atom->clusters[ci].bbmaxy = bbmaxy;
|
atom->iclusters[ci].bbmaxy = bbmaxy;
|
||||||
atom->clusters[ci].bbminz = bbminz;
|
atom->iclusters[ci].bbminz = bbminz;
|
||||||
atom->clusters[ci].bbmaxz = bbmaxz;
|
atom->iclusters[ci].bbmaxz = bbmaxz;
|
||||||
atom->Nclusters_local++;
|
atom->Nclusters_local++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -620,6 +615,93 @@ void buildClusters(Atom *atom) {
|
|||||||
DEBUG_MESSAGE("buildClusters end\n");
|
DEBUG_MESSAGE("buildClusters end\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void defineJClusters(Atom *atom) {
|
||||||
|
DEBUG_MESSAGE("defineJClusters start\n");
|
||||||
|
|
||||||
|
for(int ci = 0; ci < atom->Nclusters_local + atom->Nclusters_ghost; ci++) {
|
||||||
|
int cj0 = CJ0_FROM_CI(ci);
|
||||||
|
|
||||||
|
if(CLUSTER_M == CLUSTER_N) {
|
||||||
|
atom->jclusters[cj0].bbminx = atom->iclusters[ci].bbminx;
|
||||||
|
atom->jclusters[cj0].bbmaxx = atom->iclusters[ci].bbmaxx;
|
||||||
|
atom->jclusters[cj0].bbminy = atom->iclusters[ci].bbminy;
|
||||||
|
atom->jclusters[cj0].bbmaxy = atom->iclusters[ci].bbmaxy;
|
||||||
|
atom->jclusters[cj0].bbminz = atom->iclusters[ci].bbminz;
|
||||||
|
atom->jclusters[cj0].bbmaxz = atom->iclusters[ci].bbmaxz;
|
||||||
|
atom->jclusters[cj0].natoms = atom->iclusters[ci].natoms;
|
||||||
|
|
||||||
|
} else if(CLUSTER_M > CLUSTER_N) {
|
||||||
|
int cj1 = CJ1_FROM_CI(ci);
|
||||||
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||||
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||||
|
MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY;
|
||||||
|
MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
|
||||||
|
MD_FLOAT bbminz = INFINITY, bbmaxz = -INFINITY;
|
||||||
|
|
||||||
|
for(int cii = 0; cii < MAX(atom->iclusters[ci].natoms, CLUSTER_N); cii++) {
|
||||||
|
MD_FLOAT xtmp = ci_x[CL_X_OFFSET + cii];
|
||||||
|
MD_FLOAT ytmp = ci_x[CL_Y_OFFSET + cii];
|
||||||
|
MD_FLOAT ztmp = ci_x[CL_Z_OFFSET + cii];
|
||||||
|
|
||||||
|
// TODO: To create the bounding boxes faster, we can use SIMD operations
|
||||||
|
if(bbminx > xtmp) { bbminx = xtmp; }
|
||||||
|
if(bbmaxx < xtmp) { bbmaxx = xtmp; }
|
||||||
|
if(bbminy > ytmp) { bbminy = ytmp; }
|
||||||
|
if(bbmaxy < ytmp) { bbmaxy = ytmp; }
|
||||||
|
if(bbminz > ztmp) { bbminz = ztmp; }
|
||||||
|
if(bbmaxz < ztmp) { bbmaxz = ztmp; }
|
||||||
|
}
|
||||||
|
|
||||||
|
atom->jclusters[cj0].bbminx = bbminx;
|
||||||
|
atom->jclusters[cj0].bbmaxx = bbmaxx;
|
||||||
|
atom->jclusters[cj0].bbminy = bbminy;
|
||||||
|
atom->jclusters[cj0].bbmaxy = bbmaxy;
|
||||||
|
atom->jclusters[cj0].bbminz = bbminz;
|
||||||
|
atom->jclusters[cj0].bbmaxz = bbmaxz;
|
||||||
|
atom->jclusters[cj0].natoms = MAX(atom->iclusters[ci].natoms, CLUSTER_N);
|
||||||
|
|
||||||
|
bbminx = INFINITY, bbmaxx = -INFINITY;
|
||||||
|
bbminy = INFINITY, bbmaxy = -INFINITY;
|
||||||
|
bbminz = INFINITY, bbmaxz = -INFINITY;
|
||||||
|
|
||||||
|
for(int cii = CLUSTER_N; cii < atom->iclusters[ci].natoms; cii++) {
|
||||||
|
MD_FLOAT xtmp = ci_x[CL_X_OFFSET + cii];
|
||||||
|
MD_FLOAT ytmp = ci_x[CL_Y_OFFSET + cii];
|
||||||
|
MD_FLOAT ztmp = ci_x[CL_Z_OFFSET + cii];
|
||||||
|
|
||||||
|
// TODO: To create the bounding boxes faster, we can use SIMD operations
|
||||||
|
if(bbminx > xtmp) { bbminx = xtmp; }
|
||||||
|
if(bbmaxx < xtmp) { bbmaxx = xtmp; }
|
||||||
|
if(bbminy > ytmp) { bbminy = ytmp; }
|
||||||
|
if(bbmaxy < ytmp) { bbmaxy = ytmp; }
|
||||||
|
if(bbminz > ztmp) { bbminz = ztmp; }
|
||||||
|
if(bbmaxz < ztmp) { bbmaxz = ztmp; }
|
||||||
|
}
|
||||||
|
|
||||||
|
atom->jclusters[cj1].bbminx = bbminx;
|
||||||
|
atom->jclusters[cj1].bbmaxx = bbmaxx;
|
||||||
|
atom->jclusters[cj1].bbminy = bbminy;
|
||||||
|
atom->jclusters[cj1].bbmaxy = bbmaxy;
|
||||||
|
atom->jclusters[cj1].bbminz = bbminz;
|
||||||
|
atom->jclusters[cj1].bbmaxz = bbmaxz;
|
||||||
|
atom->jclusters[cj1].natoms = MIN(0, atom->iclusters[ci].natoms - CLUSTER_N);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
if(ci % 2 == 0) {
|
||||||
|
atom->jclusters[cj0].bbminx = MIN(atom->iclusters[ci].bbminx, atom->iclusters[ci + 1].bbminx);
|
||||||
|
atom->jclusters[cj0].bbmaxx = MAX(atom->iclusters[ci].bbmaxx, atom->iclusters[ci + 1].bbmaxx);
|
||||||
|
atom->jclusters[cj0].bbminy = MIN(atom->iclusters[ci].bbminy, atom->iclusters[ci + 1].bbminy);
|
||||||
|
atom->jclusters[cj0].bbmaxy = MAX(atom->iclusters[ci].bbmaxy, atom->iclusters[ci + 1].bbmaxy);
|
||||||
|
atom->jclusters[cj0].bbminz = MIN(atom->iclusters[ci].bbminz, atom->iclusters[ci + 1].bbminz);
|
||||||
|
atom->jclusters[cj0].bbmaxz = MAX(atom->iclusters[ci].bbmaxz, atom->iclusters[ci + 1].bbmaxz);
|
||||||
|
atom->jclusters[cj0].natoms = atom->iclusters[ci].natoms + atom->iclusters[ci + 1].natoms;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_MESSAGE("defineJClusters end\n");
|
||||||
|
}
|
||||||
|
|
||||||
void binClusters(Atom *atom) {
|
void binClusters(Atom *atom) {
|
||||||
DEBUG_MESSAGE("binClusters start\n");
|
DEBUG_MESSAGE("binClusters start\n");
|
||||||
|
|
||||||
@ -629,7 +711,7 @@ void binClusters(Atom *atom) {
|
|||||||
MD_FLOAT *cptr = cluster_pos_ptr(ci);
|
MD_FLOAT *cptr = cluster_pos_ptr(ci);
|
||||||
DEBUG_MESSAGE("Cluster %d:\n", ci);
|
DEBUG_MESSAGE("Cluster %d:\n", ci);
|
||||||
DEBUG_MESSAGE("bin=%d, Natoms=%d, bbox={%f,%f},{%f,%f},{%f,%f}\n",
|
DEBUG_MESSAGE("bin=%d, Natoms=%d, bbox={%f,%f},{%f,%f},{%f,%f}\n",
|
||||||
atom->clusters[ci].bin,
|
icluster_bin[ci],
|
||||||
atom->clusters[ci].natoms,
|
atom->clusters[ci].natoms,
|
||||||
atom->clusters[ci].bbminx,
|
atom->clusters[ci].bbminx,
|
||||||
atom->clusters[ci].bbmaxx,
|
atom->clusters[ci].bbmaxx,
|
||||||
@ -638,104 +720,133 @@ void binClusters(Atom *atom) {
|
|||||||
atom->clusters[ci].bbminz,
|
atom->clusters[ci].bbminz,
|
||||||
atom->clusters[ci].bbmaxz);
|
atom->clusters[ci].bbmaxz);
|
||||||
|
|
||||||
for(int cii = 0; cii < CLUSTER_DIM_M; cii++) {
|
for(int cii = 0; cii < CLUSTER_M; cii++) {
|
||||||
DEBUG_MESSAGE("%f, %f, %f\n", cluster_x(cptr, cii), cluster_y(cptr, cii), cluster_z(cptr, cii));
|
DEBUG_MESSAGE("%f, %f, %f\n", cluster_x(cptr, cii), cluster_y(cptr, cii), cluster_z(cptr, cii));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const int nlocal = atom->Nclusters_local;
|
const int nlocal = atom->Nclusters_local;
|
||||||
int resize = 1;
|
const int ncji = MAX(1, CLUSTER_M / CLUSTER_N);
|
||||||
|
if(ncji > 2) {
|
||||||
|
fprintf(stderr, "binClusters(): ncji cannot be higher than 2!");
|
||||||
|
}
|
||||||
|
|
||||||
|
defineJClusters(atom);
|
||||||
|
|
||||||
|
int resize = 1;
|
||||||
while(resize > 0) {
|
while(resize > 0) {
|
||||||
resize = 0;
|
resize = 0;
|
||||||
|
|
||||||
for(int bin = 0; bin < mbins; bin++) {
|
for(int bin = 0; bin < mbins; bin++) {
|
||||||
cluster_bincount[bin] = 0;
|
bin_nclusters[bin] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int ci = 0; ci < nlocal && !resize; ci++) {
|
for(int ci = 0; ci < nlocal && !resize; ci++) {
|
||||||
int bin = atom->clusters[ci].bin;
|
// Assure we add this j-cluster only once in the bin
|
||||||
int c = cluster_bincount[bin];
|
if(!(CLUSTER_M < CLUSTER_N && ci % 2)) {
|
||||||
if(c < clusters_per_bin) {
|
int bin = icluster_bin[ci];
|
||||||
cluster_bins[bin * clusters_per_bin + c] = ci;
|
int c = bin_nclusters[bin];
|
||||||
cluster_bincount[bin]++;
|
if(c + 1 < clusters_per_bin) {
|
||||||
} else {
|
bin_clusters[bin * clusters_per_bin + c] = CJ0_FROM_CI(ci);
|
||||||
resize = 1;
|
bin_nclusters[bin]++;
|
||||||
|
|
||||||
|
if(CLUSTER_M > CLUSTER_N) {
|
||||||
|
int cj1 = CJ1_FROM_CI(ci);
|
||||||
|
if(atoms->jclusters[cj1].natoms > 0) {
|
||||||
|
bin_clusters[bin * clusters_per_bin + c + 1] = cj1;
|
||||||
|
bin_nclusters[bin]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
resize = 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int cg = 0; cg < atom->Nclusters_ghost && !resize; cg++) {
|
for(int cg = 0; cg < atom->Nclusters_ghost && !resize; cg++) {
|
||||||
const int ci = nlocal + cg;
|
// Assure we add this j-cluster only once in the bin
|
||||||
MD_FLOAT *cptr = cluster_pos_ptr(ci);
|
if(!(CLUSTER_M < CLUSTER_N && cg % 2)) {
|
||||||
MD_FLOAT ci_minz = atom->clusters[ci].bbminz;
|
const int ci = nlocal + cg;
|
||||||
MD_FLOAT xtmp, ytmp;
|
int ix = -1, iy = -1;
|
||||||
int ix = -1, iy = -1;
|
MD_FLOAT xtmp, ytmp;
|
||||||
|
|
||||||
xtmp = cluster_x(cptr, 0);
|
for(int cji = 0; cji < ncji && !resize; cji++) {
|
||||||
ytmp = cluster_y(cptr, 0);
|
int cj = (cji == 0) ? CJ0_FROM_CI(ci) : CJ1_FROM_CI(ci);
|
||||||
coord2bin2D(xtmp, ytmp, &ix, &iy);
|
const int n = atom->jclusters[cj].natoms;
|
||||||
ix = MAX(MIN(ix, mbinx - 1), 0);
|
|
||||||
iy = MAX(MIN(iy, mbiny - 1), 0);
|
|
||||||
for(int cii = 1; cii < atom->clusters[ci].natoms; cii++) {
|
|
||||||
int nix, niy;
|
|
||||||
xtmp = cluster_x(cptr, cii);
|
|
||||||
ytmp = cluster_y(cptr, cii);
|
|
||||||
coord2bin2D(xtmp, ytmp, &nix, &niy);
|
|
||||||
nix = MAX(MIN(nix, mbinx - 1), 0);
|
|
||||||
niy = MAX(MIN(niy, mbiny - 1), 0);
|
|
||||||
|
|
||||||
// Always put the cluster on the bin of its innermost atom so
|
if(n > 0) {
|
||||||
// the cluster should be closer to local clusters
|
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||||
if(atom->PBCx[cg] > 0 && ix > nix) { ix = nix; }
|
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||||
if(atom->PBCx[cg] < 0 && ix < nix) { ix = nix; }
|
MD_FLOAT cj_minz = atom->jclusters[cj].bbminz;
|
||||||
if(atom->PBCy[cg] > 0 && iy > niy) { iy = niy; }
|
|
||||||
if(atom->PBCy[cg] < 0 && iy < niy) { iy = niy; }
|
|
||||||
}
|
|
||||||
|
|
||||||
int bin = iy * mbinx + ix + 1;
|
xtmp = cj_x[CL_X_OFFSET + 0]
|
||||||
int c = cluster_bincount[bin];
|
ytmp = cj_x[CL_Y_OFFSET + 0]
|
||||||
if(c < clusters_per_bin) {
|
coord2bin2D(xtmp, ytmp, &ix, &iy);
|
||||||
// Insert the current ghost cluster in the bin keeping clusters
|
ix = MAX(MIN(ix, mbinx - 1), 0);
|
||||||
// sorted by z coordinate
|
iy = MAX(MIN(iy, mbiny - 1), 0);
|
||||||
int inserted = 0;
|
for(int cjj = 1; cjj < n; cjj++) {
|
||||||
for(int i = 0; i < c; i++) {
|
int nix, niy;
|
||||||
int last_cl = cluster_bins[bin * clusters_per_bin + i];
|
xtmp = cj_x[CL_X_OFFSET + cjj]
|
||||||
if(atom->clusters[last_cl].bbminz > ci_minz) {
|
ytmp = cj_x[CL_Y_OFFSET + cjj]
|
||||||
cluster_bins[bin * clusters_per_bin + i] = ci;
|
coord2bin2D(xtmp, ytmp, &nix, &niy);
|
||||||
|
nix = MAX(MIN(nix, mbinx - 1), 0);
|
||||||
|
niy = MAX(MIN(niy, mbiny - 1), 0);
|
||||||
|
|
||||||
for(int j = i + 1; j <= c; j++) {
|
// Always put the cluster on the bin of its innermost atom so
|
||||||
int tmp = cluster_bins[bin * clusters_per_bin + j];
|
// the cluster should be closer to local clusters
|
||||||
cluster_bins[bin * clusters_per_bin + j] = last_cl;
|
if(atom->PBCx[cg] > 0 && ix > nix) { ix = nix; }
|
||||||
last_cl = tmp;
|
if(atom->PBCx[cg] < 0 && ix < nix) { ix = nix; }
|
||||||
|
if(atom->PBCy[cg] > 0 && iy > niy) { iy = niy; }
|
||||||
|
if(atom->PBCy[cg] < 0 && iy < niy) { iy = niy; }
|
||||||
}
|
}
|
||||||
|
|
||||||
inserted = 1;
|
int bin = iy * mbinx + ix + 1;
|
||||||
break;
|
int c = bin_nclusters[bin];
|
||||||
|
if(c < clusters_per_bin) {
|
||||||
|
// Insert the current ghost cluster in the bin keeping clusters
|
||||||
|
// sorted by z coordinate
|
||||||
|
int inserted = 0;
|
||||||
|
for(int i = 0; i < c; i++) {
|
||||||
|
int last_cl = bin_clusters[bin * clusters_per_bin + i];
|
||||||
|
if(atom->jclusters[last_cl].bbminz > cj_minz) {
|
||||||
|
bin_clusters[bin * clusters_per_bin + i] = cj;
|
||||||
|
|
||||||
|
for(int j = i + 1; j <= c; j++) {
|
||||||
|
int tmp = bin_clusters[bin * clusters_per_bin + j];
|
||||||
|
bin_clusters[bin * clusters_per_bin + j] = last_cl;
|
||||||
|
last_cl = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
inserted = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!inserted) {
|
||||||
|
bin_clusters[bin * clusters_per_bin + c] = cj;
|
||||||
|
}
|
||||||
|
|
||||||
|
icluster_bin[ci] = bin;
|
||||||
|
bin_nclusters[bin]++;
|
||||||
|
} else {
|
||||||
|
resize = 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!inserted) {
|
|
||||||
cluster_bins[bin * clusters_per_bin + c] = ci;
|
|
||||||
}
|
|
||||||
|
|
||||||
atom->clusters[ci].bin = bin;
|
|
||||||
cluster_bincount[bin]++;
|
|
||||||
} else {
|
|
||||||
resize = 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(resize) {
|
if(resize) {
|
||||||
free(cluster_bins);
|
free(bin_clusters);
|
||||||
clusters_per_bin *= 2;
|
clusters_per_bin *= 2;
|
||||||
cluster_bins = (int*) malloc(mbins * clusters_per_bin * sizeof(int));
|
bin_clusters = (int*) malloc(mbins * clusters_per_bin * sizeof(int));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
DEBUG_MESSAGE("cluster_bincount\n");
|
DEBUG_MESSAGE("bin_nclusters\n");
|
||||||
for(int i = 0; i < mbins; i++) { DEBUG_MESSAGE("%d, ", cluster_bincount[i]); }
|
for(int i = 0; i < mbins; i++) { DEBUG_MESSAGE("%d, ", bin_nclusters[i]); }
|
||||||
DEBUG_MESSAGE("\n");
|
DEBUG_MESSAGE("\n");
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -747,16 +858,17 @@ void updateSingleAtoms(Atom *atom) {
|
|||||||
int Natom = 0;
|
int Natom = 0;
|
||||||
|
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
MD_FLOAT *cptr = cluster_pos_ptr(ci);
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||||
MD_FLOAT *cvptr = cluster_velocity_ptr(ci);
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||||
|
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
|
||||||
|
|
||||||
for(int cii = 0; cii < atom->clusters[ci].natoms; cii++) {
|
for(int cii = 0; cii < atom->clusters[ci].natoms; cii++) {
|
||||||
atom_x(Natom) = cluster_x(cptr, cii);
|
atom_x(Natom) = ci_x[CL_X_OFFSET + cii];
|
||||||
atom_y(Natom) = cluster_y(cptr, cii);
|
atom_y(Natom) = ci_x[CL_Y_OFFSET + cii];
|
||||||
atom_z(Natom) = cluster_z(cptr, cii);
|
atom_z(Natom) = ci_x[CL_Z_OFFSET + cii];
|
||||||
atom->vx[Natom] = cluster_x(cvptr, cii);
|
atom->vx[Natom] = ci_v[CL_X_OFFSET + cii];
|
||||||
atom->vy[Natom] = cluster_y(cvptr, cii);
|
atom->vy[Natom] = ci_v[CL_Y_OFFSET + cii];
|
||||||
atom->vz[Natom] = cluster_z(cvptr, cii);
|
atom->vz[Natom] = ci_v[CL_Z_OFFSET + cii];
|
||||||
Natom++;
|
Natom++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -53,20 +53,22 @@ void updatePbc(Atom *atom, Parameter *param, int firstUpdate) {
|
|||||||
|
|
||||||
for(int cg = 0; cg < atom->Nclusters_ghost; cg++) {
|
for(int cg = 0; cg < atom->Nclusters_ghost; cg++) {
|
||||||
const int ci = nlocal + cg;
|
const int ci = nlocal + cg;
|
||||||
MD_FLOAT *cptr = cluster_pos_ptr(ci);
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||||
MD_FLOAT *bmap_cptr = cluster_pos_ptr(border_map[cg]);
|
int bmap_vec_base = CI_VECTOR_BASE_INDEX(border_map[cg]);
|
||||||
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||||
|
MD_FLOAT *bmap_x = &atom->cl_x[bmap_vec_base];
|
||||||
MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY;
|
MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY;
|
||||||
MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
|
MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
|
||||||
MD_FLOAT bbminz = INFINITY, bbmaxz = -INFINITY;
|
MD_FLOAT bbminz = INFINITY, bbmaxz = -INFINITY;
|
||||||
|
|
||||||
for(int cii = 0; cii < atom->clusters[ci].natoms; cii++) {
|
for(int cii = 0; cii < atom->clusters[ci].natoms; cii++) {
|
||||||
MD_FLOAT xtmp = cluster_x(bmap_cptr, cii) + atom->PBCx[cg] * xprd;
|
MD_FLOAT xtmp = bmap_x[CL_X_OFFSET + cii] + atom->PBCx[cg] * xprd;
|
||||||
MD_FLOAT ytmp = cluster_y(bmap_cptr, cii) + atom->PBCy[cg] * yprd;
|
MD_FLOAT ytmp = bmap_x[CL_Y_OFFSET + cii] + atom->PBCy[cg] * yprd;
|
||||||
MD_FLOAT ztmp = cluster_z(bmap_cptr, cii) + atom->PBCz[cg] * zprd;
|
MD_FLOAT ztmp = bmap_x[CL_Z_OFFSET + cii] + atom->PBCz[cg] * zprd;
|
||||||
|
|
||||||
cluster_x(cptr, cii) = xtmp;
|
ci_x[CL_X_OFFSET + cii] = xtmp;
|
||||||
cluster_y(cptr, cii) = ytmp;
|
ci_x[CL_Y_OFFSET + cii] = ytmp;
|
||||||
cluster_z(cptr, cii) = ztmp;
|
ci_x[CL_Z_OFFSET + cii] = ztmp;
|
||||||
|
|
||||||
if(firstUpdate) {
|
if(firstUpdate) {
|
||||||
// TODO: To create the bounding boxes faster, we can use SIMD operations
|
// TODO: To create the bounding boxes faster, we can use SIMD operations
|
||||||
@ -80,18 +82,18 @@ void updatePbc(Atom *atom, Parameter *param, int firstUpdate) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if(firstUpdate) {
|
if(firstUpdate) {
|
||||||
for(int cii = atom->clusters[ci].natoms; cii < CLUSTER_DIM_M; cii++) {
|
for(int cii = atom->clusters[ci].natoms; cii < CLUSTER_M; cii++) {
|
||||||
cluster_x(cptr, cii) = INFINITY;
|
ci_x[CL_X_OFFSET + cii] = INFINITY;
|
||||||
cluster_y(cptr, cii) = INFINITY;
|
ci_x[CL_Y_OFFSET + cii] = INFINITY;
|
||||||
cluster_z(cptr, cii) = INFINITY;
|
ci_x[CL_Z_OFFSET + cii] = INFINITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
atom->clusters[ci].bbminx = bbminx;
|
atom->iclusters[ci].bbminx = bbminx;
|
||||||
atom->clusters[ci].bbmaxx = bbmaxx;
|
atom->iclusters[ci].bbmaxx = bbmaxx;
|
||||||
atom->clusters[ci].bbminy = bbminy;
|
atom->iclusters[ci].bbminy = bbminy;
|
||||||
atom->clusters[ci].bbmaxy = bbmaxy;
|
atom->iclusters[ci].bbmaxy = bbmaxy;
|
||||||
atom->clusters[ci].bbminz = bbminz;
|
atom->iclusters[ci].bbminz = bbminz;
|
||||||
atom->clusters[ci].bbmaxz = bbmaxz;
|
atom->iclusters[ci].bbmaxz = bbmaxz;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -131,14 +133,17 @@ void updateAtomsPbc(Atom *atom, Parameter *param) {
|
|||||||
#define ADDGHOST(dx,dy,dz); \
|
#define ADDGHOST(dx,dy,dz); \
|
||||||
Nghost++; \
|
Nghost++; \
|
||||||
const int g_atom_idx = atom->Nclusters_local + Nghost; \
|
const int g_atom_idx = atom->Nclusters_local + Nghost; \
|
||||||
|
const int natoms_ci = atom->iclusters[ci].natoms; \
|
||||||
border_map[Nghost] = ci; \
|
border_map[Nghost] = ci; \
|
||||||
atom->PBCx[Nghost] = dx; \
|
atom->PBCx[Nghost] = dx; \
|
||||||
atom->PBCy[Nghost] = dy; \
|
atom->PBCy[Nghost] = dy; \
|
||||||
atom->PBCz[Nghost] = dz; \
|
atom->PBCz[Nghost] = dz; \
|
||||||
atom->clusters[g_atom_idx].natoms = atom->clusters[ci].natoms; \
|
atom->iclusters[g_atom_idx].natoms = natoms_ci; \
|
||||||
Nghost_atoms += atom->clusters[g_atom_idx].natoms; \
|
Nghost_atoms += natoms_ci; \
|
||||||
for(int cii = 0; cii < atom->clusters[ci].natoms; cii++) { \
|
int ci_sca_base = CI_SCALAR_BASE_INDEX(ci); \
|
||||||
atom->clusters[g_atom_idx].type[cii] = atom->clusters[ci].type[cii]; \
|
int cg_sca_base = CI_SCALAR_BASE_INDEX(cg); \
|
||||||
|
for(int cii = 0; cii < natoms_ci; cii++) { \
|
||||||
|
atom->cl_type[cg_sca_base + cii] = atom->cl_type[ci_sca_base + cii]; \
|
||||||
}
|
}
|
||||||
|
|
||||||
/* internal subroutines */
|
/* internal subroutines */
|
||||||
@ -171,12 +176,12 @@ void setupPbc(Atom *atom, Parameter *param) {
|
|||||||
border_map = atom->border_map;
|
border_map = atom->border_map;
|
||||||
}
|
}
|
||||||
|
|
||||||
MD_FLOAT bbminx = atom->clusters[ci].bbminx;
|
MD_FLOAT bbminx = atom->iclusters[ci].bbminx;
|
||||||
MD_FLOAT bbmaxx = atom->clusters[ci].bbmaxx;
|
MD_FLOAT bbmaxx = atom->iclusters[ci].bbmaxx;
|
||||||
MD_FLOAT bbminy = atom->clusters[ci].bbminy;
|
MD_FLOAT bbminy = atom->iclusters[ci].bbminy;
|
||||||
MD_FLOAT bbmaxy = atom->clusters[ci].bbmaxy;
|
MD_FLOAT bbmaxy = atom->iclusters[ci].bbmaxy;
|
||||||
MD_FLOAT bbminz = atom->clusters[ci].bbminz;
|
MD_FLOAT bbminz = atom->iclusters[ci].bbminz;
|
||||||
MD_FLOAT bbmaxz = atom->clusters[ci].bbmaxz;
|
MD_FLOAT bbmaxz = atom->iclusters[ci].bbmaxz;
|
||||||
|
|
||||||
/* Setup ghost atoms */
|
/* Setup ghost atoms */
|
||||||
/* 6 planes */
|
/* 6 planes */
|
||||||
@ -210,16 +215,17 @@ void setupPbc(Atom *atom, Parameter *param) {
|
|||||||
if (bbmaxy >= (yprd-Cutneigh) && bbmaxx >= (xprd-Cutneigh)) { ADDGHOST(-1,-1,0); }
|
if (bbmaxy >= (yprd-Cutneigh) && bbmaxx >= (xprd-Cutneigh)) { ADDGHOST(-1,-1,0); }
|
||||||
}
|
}
|
||||||
|
|
||||||
if(atom->Nclusters_local + Nghost + 1 >= atom->Nclusters_max) {
|
if(atom->Nclusters_local + Nghost + 2 >= atom->Nclusters_max) {
|
||||||
growClusters(atom);
|
growClusters(atom);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add dummy cluster at the end
|
// Add dummy cluster at the end
|
||||||
MD_FLOAT *cptr = cluster_pos_ptr(atom->Nclusters_local + Nghost + 1);
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(atom->Nclusters_local + Nghost + 1);
|
||||||
for(int cii = 0; cii < CLUSTER_DIM_M; cii++) {
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||||
cluster_x(cptr, cii) = INFINITY;
|
for(int cii = 0; cii < MAX(CLUSTER_M, CLUSTER_N); cii++) {
|
||||||
cluster_y(cptr, cii) = INFINITY;
|
ci_x[CL_X_OFFSET + cii] = INFINITY;
|
||||||
cluster_z(cptr, cii) = INFINITY;
|
ci_x[CL_Y_OFFSET + cii] = INFINITY;
|
||||||
|
ci_x[CL_Z_OFFSET + cii] = INFINITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
// increase by one to make it the ghost atom count
|
// increase by one to make it the ghost atom count
|
||||||
|
15
include_ISA.mk
Normal file
15
include_ISA.mk
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
ifeq ($(strip $(ISA)), SSE)
|
||||||
|
VECTOR_WIDTH=2
|
||||||
|
else ifeq ($(strip $(ISA)), AVX)
|
||||||
|
# Vector width is 4 but AVX2 instruction set is not supported
|
||||||
|
NO_AVX2=true
|
||||||
|
VECTOR_WIDTH=4
|
||||||
|
else ifeq ($(strip $(ISA)), AVX2)
|
||||||
|
VECTOR_WIDTH=4
|
||||||
|
else ifeq ($(strip $(ISA)), AVX512)
|
||||||
|
VECTOR_WIDTH=8
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(DATA_TYPE)), SP)
|
||||||
|
VECTOR_WIDTH=$((VECTOR_WIDTH * 2))
|
||||||
|
endif
|
Loading…
Reference in New Issue
Block a user