Make code compilable
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
c7360305c8
commit
2b441e691e
@ -51,7 +51,8 @@ void initAtom(Atom *atom) {
|
|||||||
atom->sigma6 = NULL;
|
atom->sigma6 = NULL;
|
||||||
atom->cutforcesq = NULL;
|
atom->cutforcesq = NULL;
|
||||||
atom->cutneighsq = NULL;
|
atom->cutneighsq = NULL;
|
||||||
atom->clusters = NULL;
|
atom->iclusters = NULL;
|
||||||
|
atom->jclusters = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void createAtom(Atom *atom, Parameter *param) {
|
void createAtom(Atom *atom, Parameter *param) {
|
||||||
@ -426,9 +427,11 @@ void growAtom(Atom *atom) {
|
|||||||
|
|
||||||
void growClusters(Atom *atom) {
|
void growClusters(Atom *atom) {
|
||||||
int nold = atom->Nclusters_max;
|
int nold = atom->Nclusters_max;
|
||||||
|
int jfac = MIN(1, CLUSTER_M / CLUSTER_N); // If M>N, we need to allocate more j-clusters
|
||||||
atom->Nclusters_max += DELTA;
|
atom->Nclusters_max += DELTA;
|
||||||
atom->clusters = (Cluster*) reallocate(atom->clusters, ALIGNMENT, atom->Nclusters_max * sizeof(Cluster), nold * sizeof(Cluster));
|
atom->iclusters = (Cluster*) reallocate(atom->iclusters, ALIGNMENT, atom->Nclusters_max * sizeof(Cluster), nold * sizeof(Cluster));
|
||||||
atom->cl_x = (MD_FLOAT*) reallocate(atom->cl_x, ALIGNMENT, atom->Nclusters_max * CLUSTER_DIM_M * 3 * sizeof(MD_FLOAT), nold * CLUSTER_DIM_M * 3 * sizeof(MD_FLOAT));
|
atom->jclusters = (Cluster*) reallocate(atom->jclusters, ALIGNMENT, atom->Nclusters_max * jfac * sizeof(Cluster), nold * jfac * sizeof(Cluster));
|
||||||
atom->cl_f = (MD_FLOAT*) reallocate(atom->cl_f, ALIGNMENT, atom->Nclusters_max * CLUSTER_DIM_M * 3 * sizeof(MD_FLOAT), nold * CLUSTER_DIM_M * 3 * sizeof(MD_FLOAT));
|
atom->cl_x = (MD_FLOAT*) reallocate(atom->cl_x, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT), nold * CLUSTER_M * 3 * sizeof(MD_FLOAT));
|
||||||
atom->cl_v = (MD_FLOAT*) reallocate(atom->cl_v, ALIGNMENT, atom->Nclusters_max * CLUSTER_DIM_M * 3 * sizeof(MD_FLOAT), nold * CLUSTER_DIM_M * 3 * sizeof(MD_FLOAT));
|
atom->cl_f = (MD_FLOAT*) reallocate(atom->cl_f, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT), nold * CLUSTER_M * 3 * sizeof(MD_FLOAT));
|
||||||
|
atom->cl_v = (MD_FLOAT*) reallocate(atom->cl_v, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT), nold * CLUSTER_M * 3 * sizeof(MD_FLOAT));
|
||||||
}
|
}
|
||||||
|
@ -41,11 +41,12 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
|||||||
MD_FLOAT epsilon = param->epsilon;
|
MD_FLOAT epsilon = param->epsilon;
|
||||||
|
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
MD_FLOAT *fptr = cluster_force_ptr(ci);
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||||
for(int cii = 0; cii < atom->clusters[ci].natoms; cii++) {
|
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
||||||
cluster_x(fptr, cii) = 0.0;
|
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
|
||||||
cluster_y(fptr, cii) = 0.0;
|
ci_f[CL_X_OFFSET + cii] = 0.0;
|
||||||
cluster_z(fptr, cii) = 0.0;
|
ci_f[CL_Y_OFFSET + cii] = 0.0;
|
||||||
|
ci_f[CL_Z_OFFSET + cii] = 0.0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -54,28 +55,31 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
|||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
MD_FLOAT *ciptr = cluster_pos_ptr(ci);
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||||
MD_FLOAT *cifptr = cluster_force_ptr(ci);
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||||
|
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
||||||
neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
|
neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
|
||||||
int numneighs = neighbor->numneigh[ci];
|
int numneighs = neighbor->numneigh[ci];
|
||||||
|
|
||||||
for(int k = 0; k < numneighs; k++) {
|
for(int k = 0; k < numneighs; k++) {
|
||||||
int cj = neighs[k];
|
int cj = neighs[k];
|
||||||
|
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||||
int any = 0;
|
int any = 0;
|
||||||
MD_FLOAT *cjptr = cluster_pos_ptr(cj);
|
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||||
|
|
||||||
for(int cii = 0; cii < CLUSTER_M; cii++) {
|
for(int cii = 0; cii < CLUSTER_M; cii++) {
|
||||||
MD_FLOAT xtmp = cluster_x(ciptr, cii);
|
MD_FLOAT xtmp = ci_x[CL_X_OFFSET + cii];
|
||||||
MD_FLOAT ytmp = cluster_y(ciptr, cii);
|
MD_FLOAT ytmp = ci_x[CL_Y_OFFSET + cii];
|
||||||
MD_FLOAT ztmp = cluster_z(ciptr, cii);
|
MD_FLOAT ztmp = ci_x[CL_Z_OFFSET + cii];
|
||||||
MD_FLOAT fix = 0;
|
MD_FLOAT fix = 0;
|
||||||
MD_FLOAT fiy = 0;
|
MD_FLOAT fiy = 0;
|
||||||
MD_FLOAT fiz = 0;
|
MD_FLOAT fiz = 0;
|
||||||
|
|
||||||
for(int cjj = 0; cjj < CLUSTER_M; cjj++) {
|
for(int cjj = 0; cjj < CLUSTER_N; cjj++) {
|
||||||
if(ci != cj || cii != cjj) {
|
if(ci != cj || cii != cjj) {
|
||||||
MD_FLOAT delx = xtmp - cluster_x(cjptr, cjj);
|
MD_FLOAT delx = xtmp - cj_x[CL_X_OFFSET + cjj];
|
||||||
MD_FLOAT dely = ytmp - cluster_y(cjptr, cjj);
|
MD_FLOAT dely = ytmp - cj_x[CL_Y_OFFSET + cjj];
|
||||||
MD_FLOAT delz = ztmp - cluster_z(cjptr, cjj);
|
MD_FLOAT delz = ztmp - cj_x[CL_Z_OFFSET + cjj];
|
||||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||||
if(rsq < cutforcesq) {
|
if(rsq < cutforcesq) {
|
||||||
MD_FLOAT sr2 = 1.0 / rsq;
|
MD_FLOAT sr2 = 1.0 / rsq;
|
||||||
@ -98,9 +102,9 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
|||||||
addStat(stats->clusters_outside_cutoff, 1);
|
addStat(stats->clusters_outside_cutoff, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
cluster_x(cifptr, cii) += fix;
|
ci_f[CL_X_OFFSET + cii] += fix;
|
||||||
cluster_y(cifptr, cii) += fiy;
|
ci_f[CL_Y_OFFSET + cii] += fiy;
|
||||||
cluster_z(cifptr, cii) += fiz;
|
ci_f[CL_Z_OFFSET + cii] += fiz;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -115,8 +119,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
|||||||
return E-S;
|
return E-S;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double computeForceLJ_2xnn(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||||
double computeForceLJ_4xn(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
|
||||||
DEBUG_MESSAGE("computeForceLJ_4xn begin\n");
|
DEBUG_MESSAGE("computeForceLJ_4xn begin\n");
|
||||||
int Nlocal = atom->Nlocal;
|
int Nlocal = atom->Nlocal;
|
||||||
int* neighs;
|
int* neighs;
|
||||||
@ -128,30 +131,31 @@ double computeForceLJ_4xn(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
|||||||
MD_SIMD_FLOAT epsilon_vec = simd_broadcast(epsilon);
|
MD_SIMD_FLOAT epsilon_vec = simd_broadcast(epsilon);
|
||||||
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
|
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
|
||||||
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
|
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
|
||||||
const int unroll_j = CLUSTER_N / CLUSTER_M;
|
const int unroll_j = VECTOR_WIDTH / CLUSTER_N;
|
||||||
|
|
||||||
double S = getTimeStamp();
|
double S = getTimeStamp();
|
||||||
LIKWID_MARKER_START("force");
|
LIKWID_MARKER_START("force");
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
MD_FLOAT *ciptr = cluster_pos_ptr(ci);
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||||
MD_FLOAT *cifptr = cluster_force_ptr(ci);
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||||
|
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
||||||
neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
|
neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
|
||||||
int numneighs = neighbor->numneigh[ci];
|
int numneighs = neighbor->numneigh[ci];
|
||||||
|
|
||||||
MD_SIMD_FLOAT xi0_tmp = simd_broadcast(cluster_x(ciptr, 0));
|
MD_SIMD_FLOAT xi0_tmp = simd_broadcast(ci_x[CL_X_OFFSET + 0]);
|
||||||
MD_SIMD_FLOAT yi0_tmp = simd_broadcast(cluster_y(ciptr, 0));
|
MD_SIMD_FLOAT xi1_tmp = simd_broadcast(ci_x[CL_X_OFFSET + 1]);
|
||||||
MD_SIMD_FLOAT zi0_tmp = simd_broadcast(cluster_z(ciptr, 0));
|
MD_SIMD_FLOAT xi2_tmp = simd_broadcast(ci_x[CL_X_OFFSET + 2]);
|
||||||
MD_SIMD_FLOAT xi1_tmp = simd_broadcast(cluster_x(ciptr, 1));
|
MD_SIMD_FLOAT xi3_tmp = simd_broadcast(ci_x[CL_X_OFFSET + 3]);
|
||||||
MD_SIMD_FLOAT yi1_tmp = simd_broadcast(cluster_y(ciptr, 1));
|
MD_SIMD_FLOAT yi0_tmp = simd_broadcast(ci_x[CL_Y_OFFSET + 0]);
|
||||||
MD_SIMD_FLOAT zi1_tmp = simd_broadcast(cluster_z(ciptr, 1));
|
MD_SIMD_FLOAT yi1_tmp = simd_broadcast(ci_x[CL_Y_OFFSET + 1]);
|
||||||
MD_SIMD_FLOAT xi2_tmp = simd_broadcast(cluster_x(ciptr, 2));
|
MD_SIMD_FLOAT yi2_tmp = simd_broadcast(ci_x[CL_Y_OFFSET + 2]);
|
||||||
MD_SIMD_FLOAT yi2_tmp = simd_broadcast(cluster_y(ciptr, 2));
|
MD_SIMD_FLOAT yi3_tmp = simd_broadcast(ci_x[CL_Y_OFFSET + 3]);
|
||||||
MD_SIMD_FLOAT zi2_tmp = simd_broadcast(cluster_z(ciptr, 2));
|
MD_SIMD_FLOAT zi0_tmp = simd_broadcast(ci_x[CL_Z_OFFSET + 0]);
|
||||||
MD_SIMD_FLOAT xi3_tmp = simd_broadcast(cluster_x(ciptr, 3));
|
MD_SIMD_FLOAT zi1_tmp = simd_broadcast(ci_x[CL_Z_OFFSET + 1]);
|
||||||
MD_SIMD_FLOAT yi3_tmp = simd_broadcast(cluster_y(ciptr, 3));
|
MD_SIMD_FLOAT zi2_tmp = simd_broadcast(ci_x[CL_Z_OFFSET + 2]);
|
||||||
MD_SIMD_FLOAT zi3_tmp = simd_broadcast(cluster_z(ciptr, 3));
|
MD_SIMD_FLOAT zi3_tmp = simd_broadcast(ci_x[CL_Z_OFFSET + 3]);
|
||||||
MD_SIMD_FLOAT fix0 = simd_zero();
|
MD_SIMD_FLOAT fix0 = simd_zero();
|
||||||
MD_SIMD_FLOAT fiy0 = simd_zero();
|
MD_SIMD_FLOAT fiy0 = simd_zero();
|
||||||
MD_SIMD_FLOAT fiz0 = simd_zero();
|
MD_SIMD_FLOAT fiz0 = simd_zero();
|
||||||
@ -166,10 +170,15 @@ double computeForceLJ_4xn(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
|||||||
MD_SIMD_FLOAT fiz3 = simd_zero();
|
MD_SIMD_FLOAT fiz3 = simd_zero();
|
||||||
|
|
||||||
for(int k = 0; k < numneighs; k += unroll_j) {
|
for(int k = 0; k < numneighs; k += unroll_j) {
|
||||||
int cj0 = neighs[k + 0];
|
int cj = neighs[k + 0];
|
||||||
unsigned int cond0 = (unsigned int)(ci == cj0);
|
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||||
MD_FLOAT *cj0_ptr = cluster_pos_ptr(cj0);
|
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||||
|
unsigned int cond0 = (unsigned int)(ci == cj);
|
||||||
|
unsigned int cond1 = cond0;
|
||||||
|
MD_SIMD_FLOAT xj_tmp, yj_tmp, zj_tmp;
|
||||||
|
|
||||||
|
/*
|
||||||
|
MD_FLOAT *cj0_ptr = cluster_pos_ptr(cj0);
|
||||||
#if VECTOR_WIDTH == 8
|
#if VECTOR_WIDTH == 8
|
||||||
int cj1 = neighs[k + 1];
|
int cj1 = neighs[k + 1];
|
||||||
unsigned int cond1 = (unsigned int)(ci == cj1);
|
unsigned int cond1 = (unsigned int)(ci == cj1);
|
||||||
@ -182,6 +191,7 @@ double computeForceLJ_4xn(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
|||||||
MD_SIMD_FLOAT yj_tmp = simd_load(cj0_ptr, 1);
|
MD_SIMD_FLOAT yj_tmp = simd_load(cj0_ptr, 1);
|
||||||
MD_SIMD_FLOAT zj_tmp = simd_load(cj0_ptr, 2);
|
MD_SIMD_FLOAT zj_tmp = simd_load(cj0_ptr, 2);
|
||||||
#endif
|
#endif
|
||||||
|
*/
|
||||||
|
|
||||||
MD_SIMD_FLOAT delx0 = simd_sub(xi0_tmp, xj_tmp);
|
MD_SIMD_FLOAT delx0 = simd_sub(xi0_tmp, xj_tmp);
|
||||||
MD_SIMD_FLOAT dely0 = simd_sub(yi0_tmp, yj_tmp);
|
MD_SIMD_FLOAT dely0 = simd_sub(yi0_tmp, yj_tmp);
|
||||||
@ -247,18 +257,166 @@ double computeForceLJ_4xn(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
|||||||
fiz3 = simd_masked_add(fiz3, simd_mul(delz3, force3), cutoff_mask3);
|
fiz3 = simd_masked_add(fiz3, simd_mul(delz3, force3), cutoff_mask3);
|
||||||
}
|
}
|
||||||
|
|
||||||
cluster_x(cifptr, 0) = simd_horizontal_sum(fix0);
|
ci_f[CL_X_OFFSET + 0] = simd_horizontal_sum(fix0);
|
||||||
cluster_y(cifptr, 0) = simd_horizontal_sum(fiy0);
|
ci_f[CL_X_OFFSET + 1] = simd_horizontal_sum(fix1);
|
||||||
cluster_z(cifptr, 0) = simd_horizontal_sum(fiz0);
|
ci_f[CL_X_OFFSET + 2] = simd_horizontal_sum(fix2);
|
||||||
cluster_x(cifptr, 1) = simd_horizontal_sum(fix1);
|
ci_f[CL_X_OFFSET + 3] = simd_horizontal_sum(fix3);
|
||||||
cluster_y(cifptr, 1) = simd_horizontal_sum(fiy1);
|
ci_f[CL_Y_OFFSET + 0] = simd_horizontal_sum(fiy0);
|
||||||
cluster_z(cifptr, 1) = simd_horizontal_sum(fiz1);
|
ci_f[CL_Y_OFFSET + 1] = simd_horizontal_sum(fiy1);
|
||||||
cluster_x(cifptr, 2) = simd_horizontal_sum(fix2);
|
ci_f[CL_Y_OFFSET + 2] = simd_horizontal_sum(fiy2);
|
||||||
cluster_y(cifptr, 2) = simd_horizontal_sum(fiy2);
|
ci_f[CL_Y_OFFSET + 3] = simd_horizontal_sum(fiy3);
|
||||||
cluster_z(cifptr, 2) = simd_horizontal_sum(fiz2);
|
ci_f[CL_Z_OFFSET + 0] = simd_horizontal_sum(fiz0);
|
||||||
cluster_x(cifptr, 3) = simd_horizontal_sum(fix3);
|
ci_f[CL_Z_OFFSET + 1] = simd_horizontal_sum(fiz1);
|
||||||
cluster_y(cifptr, 3) = simd_horizontal_sum(fiy3);
|
ci_f[CL_Z_OFFSET + 2] = simd_horizontal_sum(fiz2);
|
||||||
cluster_z(cifptr, 3) = simd_horizontal_sum(fiz3);
|
ci_f[CL_Z_OFFSET + 3] = simd_horizontal_sum(fiz3);
|
||||||
|
|
||||||
|
addStat(stats->calculated_forces, 1);
|
||||||
|
addStat(stats->num_neighs, numneighs);
|
||||||
|
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
|
||||||
|
}
|
||||||
|
|
||||||
|
LIKWID_MARKER_STOP("force");
|
||||||
|
double E = getTimeStamp();
|
||||||
|
DEBUG_MESSAGE("computeForceLJ_4xn end\n");
|
||||||
|
return E-S;
|
||||||
|
}
|
||||||
|
|
||||||
|
double computeForceLJ_4xn(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||||
|
DEBUG_MESSAGE("computeForceLJ_4xn begin\n");
|
||||||
|
int Nlocal = atom->Nlocal;
|
||||||
|
int* neighs;
|
||||||
|
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||||
|
MD_FLOAT sigma6 = param->sigma6;
|
||||||
|
MD_FLOAT epsilon = param->epsilon;
|
||||||
|
MD_SIMD_FLOAT cutforcesq_vec = simd_broadcast(cutforcesq);
|
||||||
|
MD_SIMD_FLOAT sigma6_vec = simd_broadcast(sigma6);
|
||||||
|
MD_SIMD_FLOAT epsilon_vec = simd_broadcast(epsilon);
|
||||||
|
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
|
||||||
|
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
|
||||||
|
const int unroll_j = VECTOR_WIDTH / CLUSTER_N;
|
||||||
|
|
||||||
|
double S = getTimeStamp();
|
||||||
|
LIKWID_MARKER_START("force");
|
||||||
|
|
||||||
|
#pragma omp parallel for
|
||||||
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||||
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||||
|
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
||||||
|
neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
|
||||||
|
int numneighs = neighbor->numneigh[ci];
|
||||||
|
|
||||||
|
MD_SIMD_FLOAT xi0_tmp = simd_broadcast(ci_x[CL_X_OFFSET + 0]);
|
||||||
|
MD_SIMD_FLOAT xi1_tmp = simd_broadcast(ci_x[CL_X_OFFSET + 1]);
|
||||||
|
MD_SIMD_FLOAT xi2_tmp = simd_broadcast(ci_x[CL_X_OFFSET + 2]);
|
||||||
|
MD_SIMD_FLOAT xi3_tmp = simd_broadcast(ci_x[CL_X_OFFSET + 3]);
|
||||||
|
MD_SIMD_FLOAT yi0_tmp = simd_broadcast(ci_x[CL_Y_OFFSET + 0]);
|
||||||
|
MD_SIMD_FLOAT yi1_tmp = simd_broadcast(ci_x[CL_Y_OFFSET + 1]);
|
||||||
|
MD_SIMD_FLOAT yi2_tmp = simd_broadcast(ci_x[CL_Y_OFFSET + 2]);
|
||||||
|
MD_SIMD_FLOAT yi3_tmp = simd_broadcast(ci_x[CL_Y_OFFSET + 3]);
|
||||||
|
MD_SIMD_FLOAT zi0_tmp = simd_broadcast(ci_x[CL_Z_OFFSET + 0]);
|
||||||
|
MD_SIMD_FLOAT zi1_tmp = simd_broadcast(ci_x[CL_Z_OFFSET + 1]);
|
||||||
|
MD_SIMD_FLOAT zi2_tmp = simd_broadcast(ci_x[CL_Z_OFFSET + 2]);
|
||||||
|
MD_SIMD_FLOAT zi3_tmp = simd_broadcast(ci_x[CL_Z_OFFSET + 3]);
|
||||||
|
MD_SIMD_FLOAT fix0 = simd_zero();
|
||||||
|
MD_SIMD_FLOAT fiy0 = simd_zero();
|
||||||
|
MD_SIMD_FLOAT fiz0 = simd_zero();
|
||||||
|
MD_SIMD_FLOAT fix1 = simd_zero();
|
||||||
|
MD_SIMD_FLOAT fiy1 = simd_zero();
|
||||||
|
MD_SIMD_FLOAT fiz1 = simd_zero();
|
||||||
|
MD_SIMD_FLOAT fix2 = simd_zero();
|
||||||
|
MD_SIMD_FLOAT fiy2 = simd_zero();
|
||||||
|
MD_SIMD_FLOAT fiz2 = simd_zero();
|
||||||
|
MD_SIMD_FLOAT fix3 = simd_zero();
|
||||||
|
MD_SIMD_FLOAT fiy3 = simd_zero();
|
||||||
|
MD_SIMD_FLOAT fiz3 = simd_zero();
|
||||||
|
|
||||||
|
for(int k = 0; k < numneighs; k += unroll_j) {
|
||||||
|
int cj = neighs[k + 0];
|
||||||
|
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||||
|
unsigned int cond0 = (unsigned int)(ci == cj);
|
||||||
|
unsigned int cond1 = cond0;
|
||||||
|
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||||
|
MD_SIMD_FLOAT xj_tmp = simd_load(&cj_x[CL_X_OFFSET]);
|
||||||
|
MD_SIMD_FLOAT yj_tmp = simd_load(&cj_x[CL_Y_OFFSET]);
|
||||||
|
MD_SIMD_FLOAT zj_tmp = simd_load(&cj_x[CL_Z_OFFSET]);
|
||||||
|
|
||||||
|
MD_SIMD_FLOAT delx0 = simd_sub(xi0_tmp, xj_tmp);
|
||||||
|
MD_SIMD_FLOAT dely0 = simd_sub(yi0_tmp, yj_tmp);
|
||||||
|
MD_SIMD_FLOAT delz0 = simd_sub(zi0_tmp, zj_tmp);
|
||||||
|
MD_SIMD_FLOAT delx1 = simd_sub(xi1_tmp, xj_tmp);
|
||||||
|
MD_SIMD_FLOAT dely1 = simd_sub(yi1_tmp, yj_tmp);
|
||||||
|
MD_SIMD_FLOAT delz1 = simd_sub(zi1_tmp, zj_tmp);
|
||||||
|
MD_SIMD_FLOAT delx2 = simd_sub(xi2_tmp, xj_tmp);
|
||||||
|
MD_SIMD_FLOAT dely2 = simd_sub(yi2_tmp, yj_tmp);
|
||||||
|
MD_SIMD_FLOAT delz2 = simd_sub(zi2_tmp, zj_tmp);
|
||||||
|
MD_SIMD_FLOAT delx3 = simd_sub(xi3_tmp, xj_tmp);
|
||||||
|
MD_SIMD_FLOAT dely3 = simd_sub(yi3_tmp, yj_tmp);
|
||||||
|
MD_SIMD_FLOAT delz3 = simd_sub(zi3_tmp, zj_tmp);
|
||||||
|
|
||||||
|
#if VECTOR_WIDTH == 8
|
||||||
|
MD_SIMD_MASK excl_mask0 = simd_mask_from_u32((unsigned int)(0xff - 0x1 * cond0 - 0x10 * cond1));
|
||||||
|
MD_SIMD_MASK excl_mask1 = simd_mask_from_u32((unsigned int)(0xff - 0x2 * cond0 - 0x20 * cond1));
|
||||||
|
MD_SIMD_MASK excl_mask2 = simd_mask_from_u32((unsigned int)(0xff - 0x4 * cond0 - 0x40 * cond1));
|
||||||
|
MD_SIMD_MASK excl_mask3 = simd_mask_from_u32((unsigned int)(0xff - 0x8 * cond0 - 0x80 * cond1));
|
||||||
|
#else
|
||||||
|
MD_SIMD_MASK excl_mask0 = simd_mask_from_u32((unsigned int)(0xf - 0x1 * cond0));
|
||||||
|
MD_SIMD_MASK excl_mask1 = simd_mask_from_u32((unsigned int)(0xf - 0x2 * cond0));
|
||||||
|
MD_SIMD_MASK excl_mask2 = simd_mask_from_u32((unsigned int)(0xf - 0x4 * cond0));
|
||||||
|
MD_SIMD_MASK excl_mask3 = simd_mask_from_u32((unsigned int)(0xf - 0x8 * cond0));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
MD_SIMD_FLOAT rsq0 = simd_fma(delx0, delx0, simd_fma(dely0, dely0, simd_mul(delz0, delz0)));
|
||||||
|
MD_SIMD_FLOAT rsq1 = simd_fma(delx1, delx1, simd_fma(dely1, dely1, simd_mul(delz1, delz1)));
|
||||||
|
MD_SIMD_FLOAT rsq2 = simd_fma(delx2, delx2, simd_fma(dely2, dely2, simd_mul(delz2, delz2)));
|
||||||
|
MD_SIMD_FLOAT rsq3 = simd_fma(delx3, delx3, simd_fma(dely3, dely3, simd_mul(delz3, delz3)));
|
||||||
|
|
||||||
|
MD_SIMD_MASK cutoff_mask0 = simd_mask_and(excl_mask0, simd_mask_cond_lt(rsq0, cutforcesq_vec));
|
||||||
|
MD_SIMD_MASK cutoff_mask1 = simd_mask_and(excl_mask1, simd_mask_cond_lt(rsq1, cutforcesq_vec));
|
||||||
|
MD_SIMD_MASK cutoff_mask2 = simd_mask_and(excl_mask2, simd_mask_cond_lt(rsq2, cutforcesq_vec));
|
||||||
|
MD_SIMD_MASK cutoff_mask3 = simd_mask_and(excl_mask3, simd_mask_cond_lt(rsq3, cutforcesq_vec));
|
||||||
|
|
||||||
|
MD_SIMD_FLOAT sr2_0 = simd_reciprocal(rsq0);
|
||||||
|
MD_SIMD_FLOAT sr2_1 = simd_reciprocal(rsq1);
|
||||||
|
MD_SIMD_FLOAT sr2_2 = simd_reciprocal(rsq2);
|
||||||
|
MD_SIMD_FLOAT sr2_3 = simd_reciprocal(rsq3);
|
||||||
|
|
||||||
|
MD_SIMD_FLOAT sr6_0 = simd_mul(sr2_0, simd_mul(sr2_0, simd_mul(sr2_0, sigma6_vec)));
|
||||||
|
MD_SIMD_FLOAT sr6_1 = simd_mul(sr2_1, simd_mul(sr2_1, simd_mul(sr2_1, sigma6_vec)));
|
||||||
|
MD_SIMD_FLOAT sr6_2 = simd_mul(sr2_2, simd_mul(sr2_2, simd_mul(sr2_2, sigma6_vec)));
|
||||||
|
MD_SIMD_FLOAT sr6_3 = simd_mul(sr2_3, simd_mul(sr2_3, simd_mul(sr2_3, sigma6_vec)));
|
||||||
|
|
||||||
|
MD_SIMD_FLOAT force0 = simd_mul(c48_vec, simd_mul(sr6_0, simd_mul(simd_sub(sr6_0, c05_vec), simd_mul(sr2_0, epsilon_vec))));
|
||||||
|
MD_SIMD_FLOAT force1 = simd_mul(c48_vec, simd_mul(sr6_1, simd_mul(simd_sub(sr6_1, c05_vec), simd_mul(sr2_1, epsilon_vec))));
|
||||||
|
MD_SIMD_FLOAT force2 = simd_mul(c48_vec, simd_mul(sr6_2, simd_mul(simd_sub(sr6_2, c05_vec), simd_mul(sr2_2, epsilon_vec))));
|
||||||
|
MD_SIMD_FLOAT force3 = simd_mul(c48_vec, simd_mul(sr6_3, simd_mul(simd_sub(sr6_3, c05_vec), simd_mul(sr2_3, epsilon_vec))));
|
||||||
|
|
||||||
|
fix0 = simd_masked_add(fix0, simd_mul(delx0, force0), cutoff_mask0);
|
||||||
|
fiy0 = simd_masked_add(fiy0, simd_mul(dely0, force0), cutoff_mask0);
|
||||||
|
fiz0 = simd_masked_add(fiz0, simd_mul(delz0, force0), cutoff_mask0);
|
||||||
|
fix1 = simd_masked_add(fix1, simd_mul(delx1, force1), cutoff_mask1);
|
||||||
|
fiy1 = simd_masked_add(fiy1, simd_mul(dely1, force1), cutoff_mask1);
|
||||||
|
fiz1 = simd_masked_add(fiz1, simd_mul(delz1, force1), cutoff_mask1);
|
||||||
|
fix2 = simd_masked_add(fix2, simd_mul(delx2, force2), cutoff_mask2);
|
||||||
|
fiy2 = simd_masked_add(fiy2, simd_mul(dely2, force2), cutoff_mask2);
|
||||||
|
fiz2 = simd_masked_add(fiz2, simd_mul(delz2, force2), cutoff_mask2);
|
||||||
|
fix3 = simd_masked_add(fix3, simd_mul(delx3, force3), cutoff_mask3);
|
||||||
|
fiy3 = simd_masked_add(fiy3, simd_mul(dely3, force3), cutoff_mask3);
|
||||||
|
fiz3 = simd_masked_add(fiz3, simd_mul(delz3, force3), cutoff_mask3);
|
||||||
|
}
|
||||||
|
|
||||||
|
ci_f[CL_X_OFFSET + 0] = simd_horizontal_sum(fix0);
|
||||||
|
ci_f[CL_X_OFFSET + 1] = simd_horizontal_sum(fix1);
|
||||||
|
ci_f[CL_X_OFFSET + 2] = simd_horizontal_sum(fix2);
|
||||||
|
ci_f[CL_X_OFFSET + 3] = simd_horizontal_sum(fix3);
|
||||||
|
ci_f[CL_Y_OFFSET + 0] = simd_horizontal_sum(fiy0);
|
||||||
|
ci_f[CL_Y_OFFSET + 1] = simd_horizontal_sum(fiy1);
|
||||||
|
ci_f[CL_Y_OFFSET + 2] = simd_horizontal_sum(fiy2);
|
||||||
|
ci_f[CL_Y_OFFSET + 3] = simd_horizontal_sum(fiy3);
|
||||||
|
ci_f[CL_Z_OFFSET + 0] = simd_horizontal_sum(fiz0);
|
||||||
|
ci_f[CL_Z_OFFSET + 1] = simd_horizontal_sum(fiz1);
|
||||||
|
ci_f[CL_Z_OFFSET + 2] = simd_horizontal_sum(fiz2);
|
||||||
|
ci_f[CL_Z_OFFSET + 3] = simd_horizontal_sum(fiz3);
|
||||||
|
|
||||||
addStat(stats->calculated_forces, 1);
|
addStat(stats->calculated_forces, 1);
|
||||||
addStat(stats->num_neighs, numneighs);
|
addStat(stats->num_neighs, numneighs);
|
||||||
|
@ -45,20 +45,7 @@ static inline MD_SIMD_MASK simd_mask_and(MD_SIMD_MASK a, MD_SIMD_MASK b) { retur
|
|||||||
static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm512_cmp_pd_mask(a, b, _CMP_LT_OQ); }
|
static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm512_cmp_pd_mask(a, b, _CMP_LT_OQ); }
|
||||||
static inline MD_SIMD_MASK simd_mask_from_u32(unsigned int a) { return _cvtu32_mask8(a); }
|
static inline MD_SIMD_MASK simd_mask_from_u32(unsigned int a) { return _cvtu32_mask8(a); }
|
||||||
static inline unsigned int simd_mask_to_u32(MD_SIMD_MASK a) { return _cvtmask8_u32(a); }
|
static inline unsigned int simd_mask_to_u32(MD_SIMD_MASK a) { return _cvtmask8_u32(a); }
|
||||||
|
static inline MD_SIMD_FLOAT simd_load(MD_FLOAT *p) { return _mm512_load_pd(p); }
|
||||||
static MD_SIMD_FLOAT simd_load2(MD_FLOAT *c0, MD_FLOAT *c1, int d) {
|
|
||||||
MD_SIMD_FLOAT x;
|
|
||||||
#ifdef CLUSTER_AOS
|
|
||||||
__m256i aos_gather_vindex = _mm256_set_epi32(9, 6, 3, 0, 9, 6, 3, 0);
|
|
||||||
__m256i vindex = _mm256_add_epi32(aos_gather_vindex, _mm256_set1_epi32(d));
|
|
||||||
x = _mm512_mask_i32gather_pd(simd_zero(), simd_mask_from_u32(0x0f), vindex, c0, sizeof(double));
|
|
||||||
x = _mm512_mask_i32gather_pd(x, simd_mask_from_u32(0xf0), vindex, c1, sizeof(double));
|
|
||||||
#else
|
|
||||||
x = _mm512_load_pd(&c0[d * CLUSTER_M]);
|
|
||||||
x = _mm512_insertf64x4(x, _mm256_load_pd(&c1[d * CLUSTER_M]), 1);
|
|
||||||
#endif
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline MD_FLOAT simd_horizontal_sum(MD_SIMD_FLOAT a) {
|
static inline MD_FLOAT simd_horizontal_sum(MD_SIMD_FLOAT a) {
|
||||||
MD_SIMD_FLOAT x = _mm512_add_pd(a, _mm512_shuffle_f64x2(a, a, 0xee));
|
MD_SIMD_FLOAT x = _mm512_add_pd(a, _mm512_shuffle_f64x2(a, a, 0xee));
|
||||||
@ -82,6 +69,7 @@ static inline MD_SIMD_FLOAT simd_zero() { return _mm256_set1_pd(0.0); }
|
|||||||
static inline MD_SIMD_FLOAT simd_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_add_pd(a, b); }
|
static inline MD_SIMD_FLOAT simd_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_add_pd(a, b); }
|
||||||
static inline MD_SIMD_FLOAT simd_sub(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_sub_pd(a, b); }
|
static inline MD_SIMD_FLOAT simd_sub(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_sub_pd(a, b); }
|
||||||
static inline MD_SIMD_FLOAT simd_mul(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_mul_pd(a, b); }
|
static inline MD_SIMD_FLOAT simd_mul(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_mul_pd(a, b); }
|
||||||
|
static inline MD_SIMD_FLOAT simd_load(MD_FLOAT *p) { return _mm256_load_pd(p); }
|
||||||
|
|
||||||
#ifdef NO_AVX2
|
#ifdef NO_AVX2
|
||||||
static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm256_cvtps_pd(_mm_rcp_ps(_mm256_cvtpd_ps(a))); }
|
static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm256_cvtps_pd(_mm_rcp_ps(_mm256_cvtpd_ps(a))); }
|
||||||
@ -124,20 +112,6 @@ static inline MD_FLOAT simd_horizontal_sum(MD_SIMD_FLOAT a) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static MD_SIMD_FLOAT simd_load(MD_FLOAT *c0, int d) {
|
|
||||||
MD_SIMD_FLOAT x;
|
|
||||||
#ifdef CLUSTER_AOS
|
|
||||||
#ifdef NO_AVX2
|
|
||||||
#error "Not possible to use AoS cluster layout without AVX2 support!"
|
|
||||||
#endif
|
|
||||||
__m128i aos_gather_vindex = _mm128_set_epi32(9, 6, 3, 0);
|
|
||||||
__m128i vindex = _mm128_add_epi32(aos_gather_vindex, _mm128_set1_epi32(d));
|
|
||||||
x = _mm256_i32gather_pd(c0, vindex, sizeof(double));
|
|
||||||
#else
|
|
||||||
x = _mm256_load_pd(&c0[d * CLUSTER_M]);
|
|
||||||
#endif
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -101,17 +101,18 @@ void initialIntegrate(Parameter *param, Atom *atom) {
|
|||||||
DEBUG_MESSAGE("initialIntegrate start\n");
|
DEBUG_MESSAGE("initialIntegrate start\n");
|
||||||
|
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
MD_FLOAT *ciptr = cluster_pos_ptr(ci);
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||||
MD_FLOAT *civptr = cluster_velocity_ptr(ci);
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||||
MD_FLOAT *cifptr = cluster_force_ptr(ci);
|
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
|
||||||
|
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
||||||
|
|
||||||
for(int cii = 0; cii < atom->clusters[ci].natoms; cii++) {
|
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
|
||||||
cluster_x(civptr, cii) += param->dtforce * cluster_x(cifptr, cii);
|
ci_v[CL_X_OFFSET + cii] += param->dtforce * ci_f[CL_X_OFFSET + cii];
|
||||||
cluster_y(civptr, cii) += param->dtforce * cluster_y(cifptr, cii);
|
ci_v[CL_Y_OFFSET + cii] += param->dtforce * ci_f[CL_Y_OFFSET + cii];
|
||||||
cluster_z(civptr, cii) += param->dtforce * cluster_z(cifptr, cii);
|
ci_v[CL_Z_OFFSET + cii] += param->dtforce * ci_f[CL_Z_OFFSET + cii];
|
||||||
cluster_x(ciptr, cii) += param->dt * cluster_x(civptr, cii);
|
ci_x[CL_X_OFFSET + cii] += param->dt * ci_v[CL_X_OFFSET + cii];
|
||||||
cluster_y(ciptr, cii) += param->dt * cluster_y(civptr, cii);
|
ci_x[CL_Y_OFFSET + cii] += param->dt * ci_v[CL_Y_OFFSET + cii];
|
||||||
cluster_z(ciptr, cii) += param->dt * cluster_z(civptr, cii);
|
ci_x[CL_Z_OFFSET + cii] += param->dt * ci_v[CL_Z_OFFSET + cii];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -122,13 +123,14 @@ void finalIntegrate(Parameter *param, Atom *atom) {
|
|||||||
DEBUG_MESSAGE("finalIntegrate start\n");
|
DEBUG_MESSAGE("finalIntegrate start\n");
|
||||||
|
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||||
MD_FLOAT *civptr = cluster_velocity_ptr(ci);
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||||
MD_FLOAT *cifptr = cluster_force_ptr(ci);
|
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
|
||||||
|
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
||||||
|
|
||||||
for(int cii = 0; cii < atom->clusters[ci].natoms; cii++) {
|
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
|
||||||
cluster_x(civptr, cii) += param->dtforce * cluster_x(cifptr, cii);
|
ci_v[CL_X_OFFSET + cii] += param->dtforce * ci_f[CL_X_OFFSET + cii];
|
||||||
cluster_y(civptr, cii) += param->dtforce * cluster_y(cifptr, cii);
|
ci_v[CL_Y_OFFSET + cii] += param->dtforce * ci_f[CL_Y_OFFSET + cii];
|
||||||
cluster_z(civptr, cii) += param->dtforce * cluster_z(cifptr, cii);
|
ci_v[CL_Z_OFFSET + cii] += param->dtforce * ci_f[CL_Z_OFFSET + cii];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -753,7 +753,7 @@ void binClusters(Atom *atom) {
|
|||||||
|
|
||||||
if(CLUSTER_M > CLUSTER_N) {
|
if(CLUSTER_M > CLUSTER_N) {
|
||||||
int cj1 = CJ1_FROM_CI(ci);
|
int cj1 = CJ1_FROM_CI(ci);
|
||||||
if(atoms->jclusters[cj1].natoms > 0) {
|
if(atom->jclusters[cj1].natoms > 0) {
|
||||||
bin_clusters[bin * clusters_per_bin + c + 1] = cj1;
|
bin_clusters[bin * clusters_per_bin + c + 1] = cj1;
|
||||||
bin_nclusters[bin]++;
|
bin_nclusters[bin]++;
|
||||||
}
|
}
|
||||||
@ -780,15 +780,15 @@ void binClusters(Atom *atom) {
|
|||||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||||
MD_FLOAT cj_minz = atom->jclusters[cj].bbminz;
|
MD_FLOAT cj_minz = atom->jclusters[cj].bbminz;
|
||||||
|
|
||||||
xtmp = cj_x[CL_X_OFFSET + 0]
|
xtmp = cj_x[CL_X_OFFSET + 0];
|
||||||
ytmp = cj_x[CL_Y_OFFSET + 0]
|
ytmp = cj_x[CL_Y_OFFSET + 0];
|
||||||
coord2bin2D(xtmp, ytmp, &ix, &iy);
|
coord2bin2D(xtmp, ytmp, &ix, &iy);
|
||||||
ix = MAX(MIN(ix, mbinx - 1), 0);
|
ix = MAX(MIN(ix, mbinx - 1), 0);
|
||||||
iy = MAX(MIN(iy, mbiny - 1), 0);
|
iy = MAX(MIN(iy, mbiny - 1), 0);
|
||||||
for(int cjj = 1; cjj < n; cjj++) {
|
for(int cjj = 1; cjj < n; cjj++) {
|
||||||
int nix, niy;
|
int nix, niy;
|
||||||
xtmp = cj_x[CL_X_OFFSET + cjj]
|
xtmp = cj_x[CL_X_OFFSET + cjj];
|
||||||
ytmp = cj_x[CL_Y_OFFSET + cjj]
|
ytmp = cj_x[CL_Y_OFFSET + cjj];
|
||||||
coord2bin2D(xtmp, ytmp, &nix, &niy);
|
coord2bin2D(xtmp, ytmp, &nix, &niy);
|
||||||
nix = MAX(MIN(nix, mbinx - 1), 0);
|
nix = MAX(MIN(nix, mbinx - 1), 0);
|
||||||
niy = MAX(MIN(niy, mbiny - 1), 0);
|
niy = MAX(MIN(niy, mbiny - 1), 0);
|
||||||
@ -862,7 +862,7 @@ void updateSingleAtoms(Atom *atom) {
|
|||||||
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||||
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
|
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
|
||||||
|
|
||||||
for(int cii = 0; cii < atom->clusters[ci].natoms; cii++) {
|
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
|
||||||
atom_x(Natom) = ci_x[CL_X_OFFSET + cii];
|
atom_x(Natom) = ci_x[CL_X_OFFSET + cii];
|
||||||
atom_y(Natom) = ci_x[CL_Y_OFFSET + cii];
|
atom_y(Natom) = ci_x[CL_Y_OFFSET + cii];
|
||||||
atom_z(Natom) = ci_x[CL_Z_OFFSET + cii];
|
atom_z(Natom) = ci_x[CL_Z_OFFSET + cii];
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include <atom.h>
|
#include <atom.h>
|
||||||
#include <allocate.h>
|
#include <allocate.h>
|
||||||
#include <neighbor.h>
|
#include <neighbor.h>
|
||||||
|
#include <util.h>
|
||||||
|
|
||||||
#define DELTA 20000
|
#define DELTA 20000
|
||||||
|
|
||||||
@ -61,7 +62,7 @@ void updatePbc(Atom *atom, Parameter *param, int firstUpdate) {
|
|||||||
MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
|
MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
|
||||||
MD_FLOAT bbminz = INFINITY, bbmaxz = -INFINITY;
|
MD_FLOAT bbminz = INFINITY, bbmaxz = -INFINITY;
|
||||||
|
|
||||||
for(int cii = 0; cii < atom->clusters[ci].natoms; cii++) {
|
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
|
||||||
MD_FLOAT xtmp = bmap_x[CL_X_OFFSET + cii] + atom->PBCx[cg] * xprd;
|
MD_FLOAT xtmp = bmap_x[CL_X_OFFSET + cii] + atom->PBCx[cg] * xprd;
|
||||||
MD_FLOAT ytmp = bmap_x[CL_Y_OFFSET + cii] + atom->PBCy[cg] * yprd;
|
MD_FLOAT ytmp = bmap_x[CL_Y_OFFSET + cii] + atom->PBCy[cg] * yprd;
|
||||||
MD_FLOAT ztmp = bmap_x[CL_Z_OFFSET + cii] + atom->PBCz[cg] * zprd;
|
MD_FLOAT ztmp = bmap_x[CL_Z_OFFSET + cii] + atom->PBCz[cg] * zprd;
|
||||||
@ -82,7 +83,7 @@ void updatePbc(Atom *atom, Parameter *param, int firstUpdate) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if(firstUpdate) {
|
if(firstUpdate) {
|
||||||
for(int cii = atom->clusters[ci].natoms; cii < CLUSTER_M; cii++) {
|
for(int cii = atom->iclusters[ci].natoms; cii < CLUSTER_M; cii++) {
|
||||||
ci_x[CL_X_OFFSET + cii] = INFINITY;
|
ci_x[CL_X_OFFSET + cii] = INFINITY;
|
||||||
ci_x[CL_Y_OFFSET + cii] = INFINITY;
|
ci_x[CL_Y_OFFSET + cii] = INFINITY;
|
||||||
ci_x[CL_Z_OFFSET + cii] = INFINITY;
|
ci_x[CL_Z_OFFSET + cii] = INFINITY;
|
||||||
@ -132,13 +133,13 @@ void updateAtomsPbc(Atom *atom, Parameter *param) {
|
|||||||
* that are then enforced in updatePbc */
|
* that are then enforced in updatePbc */
|
||||||
#define ADDGHOST(dx,dy,dz); \
|
#define ADDGHOST(dx,dy,dz); \
|
||||||
Nghost++; \
|
Nghost++; \
|
||||||
const int g_atom_idx = atom->Nclusters_local + Nghost; \
|
const int cg = atom->Nclusters_local + Nghost; \
|
||||||
const int natoms_ci = atom->iclusters[ci].natoms; \
|
const int natoms_ci = atom->iclusters[ci].natoms; \
|
||||||
border_map[Nghost] = ci; \
|
border_map[Nghost] = ci; \
|
||||||
atom->PBCx[Nghost] = dx; \
|
atom->PBCx[Nghost] = dx; \
|
||||||
atom->PBCy[Nghost] = dy; \
|
atom->PBCy[Nghost] = dy; \
|
||||||
atom->PBCz[Nghost] = dz; \
|
atom->PBCz[Nghost] = dz; \
|
||||||
atom->iclusters[g_atom_idx].natoms = natoms_ci; \
|
atom->iclusters[cg].natoms = natoms_ci; \
|
||||||
Nghost_atoms += natoms_ci; \
|
Nghost_atoms += natoms_ci; \
|
||||||
int ci_sca_base = CI_SCALAR_BASE_INDEX(ci); \
|
int ci_sca_base = CI_SCALAR_BASE_INDEX(ci); \
|
||||||
int cg_sca_base = CI_SCALAR_BASE_INDEX(cg); \
|
int cg_sca_base = CI_SCALAR_BASE_INDEX(cg); \
|
||||||
|
@ -18,11 +18,11 @@ void initStats(Stats *s) {
|
|||||||
void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer) {
|
void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer) {
|
||||||
#ifdef COMPUTE_STATS
|
#ifdef COMPUTE_STATS
|
||||||
|
|
||||||
const int MxN = CLUSTER_DIM_M * CLUSTER_DIM_N;
|
const int MxN = CLUSTER_M * CLUSTER_N;
|
||||||
double avg_atoms_cluster = (double)(atom->Nlocal) / (double)(atom->Nclusters_local);
|
double avg_atoms_cluster = (double)(atom->Nlocal) / (double)(atom->Nclusters_local);
|
||||||
double force_useful_volume = 1e-9 * ( (double)(atom->Nlocal * (param->ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) +
|
double force_useful_volume = 1e-9 * ( (double)(atom->Nlocal * (param->ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) +
|
||||||
(double)(stats->num_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) );
|
(double)(stats->num_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) );
|
||||||
double avg_neigh_atom = (stats->num_neighs * CLUSTER_DIM_N) / (double)(atom->Nlocal * (param->ntimes + 1));
|
double avg_neigh_atom = (stats->num_neighs * CLUSTER_N) / (double)(atom->Nlocal * (param->ntimes + 1));
|
||||||
double avg_neigh_cluster = (double)(stats->num_neighs) / (double)(stats->calculated_forces);
|
double avg_neigh_cluster = (double)(stats->num_neighs) / (double)(stats->calculated_forces);
|
||||||
double avg_simd = stats->force_iters / (double)(atom->Nlocal * (param->ntimes + 1));
|
double avg_simd = stats->force_iters / (double)(atom->Nlocal * (param->ntimes + 1));
|
||||||
|
|
||||||
|
@ -27,9 +27,10 @@ int write_local_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep
|
|||||||
fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
|
fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
|
||||||
fprintf(fp, "POINTS %d double\n", atom->Nlocal);
|
fprintf(fp, "POINTS %d double\n", atom->Nlocal);
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ++ci) {
|
for(int ci = 0; ci < atom->Nclusters_local; ++ci) {
|
||||||
MD_FLOAT *cptr = cluster_pos_ptr(ci);
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||||
for(int cii = 0; cii < atom->clusters[ci].natoms; ++cii) {
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||||
fprintf(fp, "%.4f %.4f %.4f\n", cluster_x(cptr, cii), cluster_y(cptr, cii), cluster_z(cptr, cii));
|
for(int cii = 0; cii < atom->iclusters[ci].natoms; ++cii) {
|
||||||
|
fprintf(fp, "%.4f %.4f %.4f\n", ci_x[CL_X_OFFSET + cii], ci_x[CL_Y_OFFSET + cii], ci_x[CL_Z_OFFSET + cii]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fprintf(fp, "\n\n");
|
fprintf(fp, "\n\n");
|
||||||
@ -70,9 +71,10 @@ int write_ghost_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep
|
|||||||
fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
|
fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
|
||||||
fprintf(fp, "POINTS %d double\n", atom->Nghost);
|
fprintf(fp, "POINTS %d double\n", atom->Nghost);
|
||||||
for(int ci = atom->Nclusters_local; ci < atom->Nclusters_local + atom->Nclusters_ghost; ++ci) {
|
for(int ci = atom->Nclusters_local; ci < atom->Nclusters_local + atom->Nclusters_ghost; ++ci) {
|
||||||
MD_FLOAT *cptr = cluster_pos_ptr(ci);
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||||
for(int cii = 0; cii < atom->clusters[ci].natoms; ++cii) {
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||||
fprintf(fp, "%.4f %.4f %.4f\n", cluster_x(cptr, cii), cluster_y(cptr, cii), cluster_z(cptr, cii));
|
for(int cii = 0; cii < atom->iclusters[ci].natoms; ++cii) {
|
||||||
|
fprintf(fp, "%.4f %.4f %.4f\n", ci_x[CL_X_OFFSET + cii], ci_x[CL_Y_OFFSET + cii], ci_x[CL_Z_OFFSET + cii]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fprintf(fp, "\n\n");
|
fprintf(fp, "\n\n");
|
||||||
@ -116,18 +118,19 @@ int write_local_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int
|
|||||||
fprintf(fp, "DATASET POLYDATA\n");
|
fprintf(fp, "DATASET POLYDATA\n");
|
||||||
fprintf(fp, "POINTS %d double\n", atom->Nlocal);
|
fprintf(fp, "POINTS %d double\n", atom->Nlocal);
|
||||||
for(int ci = 0; ci < N; ++ci) {
|
for(int ci = 0; ci < N; ++ci) {
|
||||||
MD_FLOAT *cptr = cluster_pos_ptr(ci);
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||||
for(int cii = 0; cii < atom->clusters[ci].natoms; ++cii) {
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||||
fprintf(fp, "%.4f %.4f %.4f\n", cluster_x(cptr, cii), cluster_y(cptr, cii), cluster_z(cptr, cii));
|
for(int cii = 0; cii < atom->iclusters[ci].natoms; ++cii) {
|
||||||
|
fprintf(fp, "%.4f %.4f %.4f\n", ci_x[CL_X_OFFSET + cii], ci_x[CL_Y_OFFSET + cii], ci_x[CL_Z_OFFSET + cii]);
|
||||||
}
|
}
|
||||||
|
|
||||||
tot_lines += atom->clusters[ci].natoms;
|
tot_lines += atom->iclusters[ci].natoms;
|
||||||
}
|
}
|
||||||
fprintf(fp, "\n\n");
|
fprintf(fp, "\n\n");
|
||||||
fprintf(fp, "LINES %d %d\n", N, N + tot_lines);
|
fprintf(fp, "LINES %d %d\n", N, N + tot_lines);
|
||||||
for(int ci = 0; ci < N; ++ci) {
|
for(int ci = 0; ci < N; ++ci) {
|
||||||
fprintf(fp, "%d ", atom->clusters[ci].natoms);
|
fprintf(fp, "%d ", atom->iclusters[ci].natoms);
|
||||||
for(int cii = 0; cii < atom->clusters[ci].natoms; ++cii) {
|
for(int cii = 0; cii < atom->iclusters[ci].natoms; ++cii) {
|
||||||
fprintf(fp, "%d ", i++);
|
fprintf(fp, "%d ", i++);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -157,18 +160,19 @@ int write_ghost_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int
|
|||||||
fprintf(fp, "DATASET POLYDATA\n");
|
fprintf(fp, "DATASET POLYDATA\n");
|
||||||
fprintf(fp, "POINTS %d double\n", atom->Nghost);
|
fprintf(fp, "POINTS %d double\n", atom->Nghost);
|
||||||
for(int ci = atom->Nclusters_local; ci < N; ++ci) {
|
for(int ci = atom->Nclusters_local; ci < N; ++ci) {
|
||||||
MD_FLOAT *cptr = cluster_pos_ptr(ci);
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||||
for(int cii = 0; cii < atom->clusters[ci].natoms; ++cii) {
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||||
fprintf(fp, "%.4f %.4f %.4f\n", cluster_x(cptr, cii), cluster_y(cptr, cii), cluster_z(cptr, cii));
|
for(int cii = 0; cii < atom->iclusters[ci].natoms; ++cii) {
|
||||||
|
fprintf(fp, "%.4f %.4f %.4f\n", ci_x[CL_X_OFFSET + cii], ci_x[CL_Y_OFFSET + cii], ci_x[CL_Z_OFFSET + cii]);
|
||||||
}
|
}
|
||||||
|
|
||||||
tot_lines += atom->clusters[ci].natoms;
|
tot_lines += atom->iclusters[ci].natoms;
|
||||||
}
|
}
|
||||||
fprintf(fp, "\n\n");
|
fprintf(fp, "\n\n");
|
||||||
fprintf(fp, "LINES %d %d\n", atom->Nclusters_ghost, atom->Nclusters_ghost + tot_lines);
|
fprintf(fp, "LINES %d %d\n", atom->Nclusters_ghost, atom->Nclusters_ghost + tot_lines);
|
||||||
for(int ci = atom->Nclusters_local; ci < N; ++ci) {
|
for(int ci = atom->Nclusters_local; ci < N; ++ci) {
|
||||||
fprintf(fp, "%d ", atom->clusters[ci].natoms);
|
fprintf(fp, "%d ", atom->iclusters[ci].natoms);
|
||||||
for(int cii = 0; cii < atom->clusters[ci].natoms; ++cii) {
|
for(int cii = 0; cii < atom->iclusters[ci].natoms; ++cii) {
|
||||||
fprintf(fp, "%d ", i++);
|
fprintf(fp, "%d ", i++);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -52,11 +52,12 @@ void xtc_init(const char *filename, Atom *atom, int timestep) {
|
|||||||
void xtc_write(Atom *atom, int timestep, int write_pos, int write_vel) {
|
void xtc_write(Atom *atom, int timestep, int write_pos, int write_vel) {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for(int ci = 0; ci < atom->Nclusters_local; ++ci) {
|
for(int ci = 0; ci < atom->Nclusters_local; ++ci) {
|
||||||
MD_FLOAT *cptr = cluster_pos_ptr(ci);
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||||
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||||
for(int cii = 0; cii < atom->clusters[ci].natoms; ++cii) {
|
for(int cii = 0; cii < atom->clusters[ci].natoms; ++cii) {
|
||||||
x_buf[i][XX] = cluster_x(cptr, cii);
|
x_buf[i][XX] = ci_x[CL_X_OFFSET + cii];
|
||||||
x_buf[i][YY] = cluster_y(cptr, cii);
|
x_buf[i][YY] = ci_x[CL_Y_OFFSET + cii];
|
||||||
x_buf[i][ZZ] = cluster_z(cptr, cii);
|
x_buf[i][ZZ] = ci_x[CL_Z_OFFSET + cii];
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user