Final MPI version

This commit is contained in:
JairoBuitrago
2024-04-15 16:53:25 +02:00
parent a6a269703d
commit a13a0f3bae
33 changed files with 3568 additions and 624 deletions

View File

@@ -12,7 +12,8 @@
#include <atom.h>
#include <allocate.h>
#include <util.h>
#include <mpi.h>
void initAtom(Atom *atom) {
atom->x = NULL; atom->y = NULL; atom->z = NULL;
atom->vx = NULL; atom->vy = NULL; atom->vz = NULL;
@@ -27,6 +28,7 @@ void initAtom(Atom *atom) {
atom->Nclusters = 0;
atom->Nclusters_local = 0;
atom->Nclusters_ghost = 0;
atom->NmaxGhost = 0; //Temporal
atom->Nclusters_max = 0;
atom->type = NULL;
atom->ntypes = 0;
@@ -37,10 +39,19 @@ void initAtom(Atom *atom) {
atom->iclusters = NULL;
atom->jclusters = NULL;
atom->icluster_bin = NULL;
atom->PBCx = NULL;
atom->PBCy = NULL;
atom->PBCz = NULL;
initMasks(atom);
//MPI
Box *mybox = &(atom->mybox);
mybox->xprd = mybox->yprd = mybox->zprd = 0;
mybox->lo[0] = mybox->lo[1] = mybox->lo[2] = 0;
mybox->hi[0] = mybox->hi[1] = mybox->hi[2] = 0;
}
void createAtom(Atom *atom, Parameter *param) {
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
@@ -90,7 +101,7 @@ void createAtom(Atom *atom, Parameter *param) {
ytmp = 0.5 * alat * j;
ztmp = 0.5 * alat * k;
if(xtmp >= xlo && xtmp < xhi && ytmp >= ylo && ytmp < yhi && ztmp >= zlo && ztmp < zhi ) {
if(xtmp >= xlo && xtmp < xhi && ytmp >= ylo && ytmp < yhi && ztmp >= zlo && ztmp < zhi ) {
n = k * (2 * param->ny) * (2 * param->nx) + j * (2 * param->nx) + i + 1;
for(m = 0; m < 5; m++) { myrandom(&n); }
vxtmp = myrandom(&n);
@@ -128,22 +139,26 @@ int type_str2int(const char *type) {
}
int readAtom(Atom* atom, Parameter* param) {
int me = 0;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
int len = strlen(param->input_file);
if(strncmp(&param->input_file[len - 4], ".pdb", 4) == 0) { return readAtom_pdb(atom, param); }
if(strncmp(&param->input_file[len - 4], ".gro", 4) == 0) { return readAtom_gro(atom, param); }
if(strncmp(&param->input_file[len - 4], ".dmp", 4) == 0) { return readAtom_dmp(atom, param); }
fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp\n", param->input_file);
if(me==0) fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp\n", param->input_file);
exit(-1);
return -1;
}
int readAtom_pdb(Atom* atom, Parameter* param) {
int me = 0;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
int read_atoms = 0;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
@@ -153,11 +168,11 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
char *item = strtok(line, " ");
if(strncmp(item, "CRYST1", 6) == 0) {
param->xlo = 0.0;
param->xhi = atof(strtok(NULL, " "));
param->xhi = atof(strtok(NULL, "\t "));
param->ylo = 0.0;
param->yhi = atof(strtok(NULL, " "));
param->yhi = atof(strtok(NULL, "\t "));
param->zlo = 0.0;
param->zhi = atof(strtok(NULL, " "));
param->zhi = atof(strtok(NULL, "\t "));
param->xprd = param->xhi - param->xlo;
param->yprd = param->yhi - param->ylo;
param->zprd = param->zhi - param->zlo;
@@ -166,23 +181,23 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
char *label;
int atom_id, comp_id;
MD_FLOAT occupancy, charge;
atom_id = atoi(strtok(NULL, " ")) - 1;
atom_id = atoi(strtok(NULL, "\t ")) - 1;
while(atom_id + 1 >= atom->Nmax) {
growAtom(atom);
}
atom->type[atom_id] = type_str2int(strtok(NULL, " "));
label = strtok(NULL, " ");
comp_id = atoi(strtok(NULL, " "));
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom->type[atom_id] = type_str2int(strtok(NULL, "\t "));
label = strtok(NULL, "\t ");
comp_id = atoi(strtok(NULL, "\t "));
atom_x(atom_id) = atof(strtok(NULL, "\t "));
atom_y(atom_id) = atof(strtok(NULL, "\t "));
atom_z(atom_id) = atof(strtok(NULL, "\t "));
atom->vx[atom_id] = 0.0;
atom->vy[atom_id] = 0.0;
atom->vz[atom_id] = 0.0;
occupancy = atof(strtok(NULL, " "));
charge = atof(strtok(NULL, " "));
occupancy = atof(strtok(NULL, "\t "));
charge = atof(strtok(NULL, "\t "));
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
atom->Natoms++;
atom->Nlocal++;
@@ -194,14 +209,14 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
strncmp(item, "ENDMDL", 6) == 0) {
// Do nothing
} else {
fprintf(stderr, "Invalid item: %s\n", item);
if(me==0) fprintf(stderr, "Invalid item: %s\n", item);
exit(-1);
return -1;
}
}
if(!read_atoms) {
fprintf(stderr, "Input error: No atoms read!\n");
if(me==0) fprintf(stderr, "Input error: No atoms read!\n");
exit(-1);
return -1;
}
@@ -217,12 +232,15 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
if(me==0) fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
fclose(fp);
return read_atoms;
}
int readAtom_gro(Atom* atom, Parameter* param) {
int me = 0;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
char desc[MAXLINE];
@@ -231,7 +249,7 @@ int readAtom_gro(Atom* atom, Parameter* param) {
int i = 0;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
@@ -241,25 +259,25 @@ int readAtom_gro(Atom* atom, Parameter* param) {
desc[i] = '\0';
readline(line, fp);
atoms_to_read = atoi(strtok(line, " "));
fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
if(me==0) fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
while(!feof(fp) && read_atoms < atoms_to_read) {
readline(line, fp);
char *label = strtok(line, " ");
int type = type_str2int(strtok(NULL, " "));
int atom_id = atoi(strtok(NULL, " ")) - 1;
char *label = strtok(line, "\t ");
int type = type_str2int(strtok(NULL, "\t "));
int atom_id = atoi(strtok(NULL, "\t ")) - 1;
atom_id = read_atoms;
while(atom_id + 1 >= atom->Nmax) {
growAtom(atom);
}
atom->type[atom_id] = type;
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom->vx[atom_id] = atof(strtok(NULL, " "));
atom->vy[atom_id] = atof(strtok(NULL, " "));
atom->vz[atom_id] = atof(strtok(NULL, " "));
atom_x(atom_id) = atof(strtok(NULL, "\t "));
atom_y(atom_id) = atof(strtok(NULL, "\t "));
atom_z(atom_id) = atof(strtok(NULL, "\t "));
atom->vx[atom_id] = atof(strtok(NULL, "\t "));
atom->vy[atom_id] = atof(strtok(NULL, "\t "));
atom->vz[atom_id] = atof(strtok(NULL, "\t "));
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
atom->Natoms++;
atom->Nlocal++;
@@ -269,18 +287,18 @@ int readAtom_gro(Atom* atom, Parameter* param) {
if(!feof(fp)) {
readline(line, fp);
param->xlo = 0.0;
param->xhi = atof(strtok(line, " "));
param->xhi = atof(strtok(line, "\t "));
param->ylo = 0.0;
param->yhi = atof(strtok(NULL, " "));
param->yhi = atof(strtok(NULL, "\t "));
param->zlo = 0.0;
param->zhi = atof(strtok(NULL, " "));
param->zhi = atof(strtok(NULL, "\t "));
param->xprd = param->xhi - param->xlo;
param->yprd = param->yhi - param->ylo;
param->zprd = param->zhi - param->zlo;
}
if(read_atoms != atoms_to_read) {
fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
if(me==0) fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
exit(-1);
return -1;
}
@@ -296,12 +314,14 @@ int readAtom_gro(Atom* atom, Parameter* param) {
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
if(me==0) fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
fclose(fp);
return read_atoms;
}
int readAtom_dmp(Atom* atom, Parameter* param) {
int me = 0;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
int natoms = 0;
@@ -310,7 +330,7 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
int ts = -1;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
@@ -333,47 +353,47 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
}
} else if(strncmp(item, "BOX BOUNDS pp pp pp", 19) == 0) {
readline(line, fp);
param->xlo = atof(strtok(line, " "));
param->xhi = atof(strtok(NULL, " "));
param->xlo = atof(strtok(line, "\t "));
param->xhi = atof(strtok(NULL, "\t "));
param->xprd = param->xhi - param->xlo;
readline(line, fp);
param->ylo = atof(strtok(line, " "));
param->yhi = atof(strtok(NULL, " "));
param->ylo = atof(strtok(line, "\t "));
param->yhi = atof(strtok(NULL, "\t "));
param->yprd = param->yhi - param->ylo;
readline(line, fp);
param->zlo = atof(strtok(line, " "));
param->zhi = atof(strtok(NULL, " "));
param->zlo = atof(strtok(line, "\t "));
param->zhi = atof(strtok(NULL, "\t "));
param->zprd = param->zhi - param->zlo;
} else if(strncmp(item, "ATOMS id type x y z vx vy vz", 28) == 0) {
for(int i = 0; i < natoms; i++) {
readline(line, fp);
atom_id = atoi(strtok(line, " ")) - 1;
atom->type[atom_id] = atoi(strtok(NULL, " "));
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom->vx[atom_id] = atof(strtok(NULL, " "));
atom->vy[atom_id] = atof(strtok(NULL, " "));
atom->vz[atom_id] = atof(strtok(NULL, " "));
atom_id = atoi(strtok(line, "\t ")) - 1;
atom->type[atom_id] = atoi(strtok(NULL, "\t "));
atom_x(atom_id) = atof(strtok(NULL, "\t "));
atom_y(atom_id) = atof(strtok(NULL, "\t "));
atom_z(atom_id) = atof(strtok(NULL, "\t "));
atom->vx[atom_id] = atof(strtok(NULL, "\t "));
atom->vy[atom_id] = atof(strtok(NULL, "\t "));
atom->vz[atom_id] = atof(strtok(NULL, "\t "));
atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
read_atoms++;
}
} else {
fprintf(stderr, "Invalid item: %s\n", item);
if(me==0) fprintf(stderr, "Invalid item: %s\n", item);
exit(-1);
return -1;
}
} else {
fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
if(me==0) fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
exit(-1);
return -1;
}
}
if(ts < 0 || !natoms || !read_atoms) {
fprintf(stderr, "Input error: atom data was not read!\n");
if(me==0) fprintf(stderr, "Input error: atom data was not read!\n");
exit(-1);
return -1;
}
@@ -389,7 +409,7 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
if(me==0) fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
fclose(fp);
return natoms;
}
@@ -530,3 +550,249 @@ void growClusters(Atom *atom) {
atom->cl_v = (MD_FLOAT*) reallocate(atom->cl_v, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT), nold * CLUSTER_M * 3 * sizeof(MD_FLOAT));
atom->cl_type = (int*) reallocate(atom->cl_type, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * sizeof(int), nold * CLUSTER_M * sizeof(int));
}
/* MPI added*/
void growPbc(Atom* atom) {
int nold = atom->NmaxGhost;
atom->NmaxGhost += DELTA;
if (atom->PBCx || atom->PBCy || atom->PBCz){
atom->PBCx = (int*) reallocate(atom->PBCx, ALIGNMENT, atom->NmaxGhost * sizeof(int), nold * sizeof(int));
atom->PBCy = (int*) reallocate(atom->PBCy, ALIGNMENT, atom->NmaxGhost * sizeof(int), nold * sizeof(int));
atom->PBCz = (int*) reallocate(atom->PBCz, ALIGNMENT, atom->NmaxGhost * sizeof(int), nold * sizeof(int));
} else {
atom->PBCx = (int*) malloc(atom->NmaxGhost * sizeof(int));
atom->PBCy = (int*) malloc(atom->NmaxGhost * sizeof(int));
atom->PBCz = (int*) malloc(atom->NmaxGhost * sizeof(int));
}
}
void packForward(Atom* atom, int nc, int* list, MD_FLOAT* buf, int* pbc)
{
for(int i = 0; i < nc; i++) {
int cj = list[i];
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
int displ = i*CLUSTER_N;
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
buf[3*(displ+cjj)+0] = cj_x[CL_X_OFFSET + cjj] + pbc[_x] * atom->mybox.xprd;
buf[3*(displ+cjj)+1] = cj_x[CL_Y_OFFSET + cjj] + pbc[_y] * atom->mybox.yprd;
buf[3*(displ+cjj)+2] = cj_x[CL_Z_OFFSET + cjj] + pbc[_z] * atom->mybox.zprd;
}
for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
buf[3*(displ+cjj)+0] = -1; //x
buf[3*(displ+cjj)+1] = -1; //y
buf[3*(displ+cjj)+2] = -1; //z
}
}
}
void unpackForward(Atom* atom, int nc, int c0, MD_FLOAT* buf)
{
for(int i = 0; i < nc; i++) {
int cj = c0+i;
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
int displ = i*CLUSTER_N;
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
if(cj_x[CL_X_OFFSET + cjj]<INFINITY) cj_x[CL_X_OFFSET + cjj] = buf[3*(displ+cjj)+0];
if(cj_x[CL_Y_OFFSET + cjj]<INFINITY) cj_x[CL_Y_OFFSET + cjj] = buf[3*(displ+cjj)+1];
if(cj_x[CL_Z_OFFSET + cjj]<INFINITY) cj_x[CL_Z_OFFSET + cjj] = buf[3*(displ+cjj)+2];
}
}
}
int packGhost(Atom* atom, int cj, MD_FLOAT* buf, int* pbc)
{
//#of elements per cluster natoms,x0,y0,z0,type_0, . . ,xn,yn,zn,type_n,bbminx,bbmaxxy,bbminy,bbmaxy,bbminz,bbmaxz
//count = 4*N_CLUSTER+7, if N_CLUSTER =4 => count = 23 value/cluster + trackpbc[x] + trackpbc[y] + trackpbc[z]
int m = 0;
if(atom->jclusters[cj].natoms > 0) {
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
int cj_sca_base = CJ_SCALAR_BASE_INDEX(cj);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY;
MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
MD_FLOAT bbminz = INFINITY, bbmaxz = -INFINITY;
buf[m++] = atom->jclusters[cj].natoms;
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
MD_FLOAT xtmp = cj_x[CL_X_OFFSET + cjj] + pbc[_x] * atom->mybox.xprd;
MD_FLOAT ytmp = cj_x[CL_Y_OFFSET + cjj] + pbc[_y] * atom->mybox.yprd;
MD_FLOAT ztmp = cj_x[CL_Z_OFFSET + cjj] + pbc[_z] * atom->mybox.zprd;
buf[m++] = xtmp;
buf[m++] = ytmp;
buf[m++] = ztmp;
buf[m++]= atom->cl_type[cj_sca_base + cjj];
if(bbminx > xtmp) { bbminx = xtmp; }
if(bbmaxx < xtmp) { bbmaxx = xtmp; }
if(bbminy > ytmp) { bbminy = ytmp; }
if(bbmaxy < ytmp) { bbmaxy = ytmp; }
if(bbminz > ztmp) { bbminz = ztmp; }
if(bbmaxz < ztmp) { bbmaxz = ztmp; }
}
for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
buf[m++] = -1; //x
buf[m++] = -1; //y
buf[m++] = -1; //z
buf[m++] = -1; //type
}
buf[m++] = bbminx;
buf[m++] = bbmaxx;
buf[m++] = bbminy;
buf[m++] = bbmaxy;
buf[m++] = bbminz;
buf[m++] = bbmaxz;
//TODO: check atom->ncj
int ghostId = cj-atom->ncj;
//check for ghost particles
buf[m++] = (cj-atom->ncj>=0) ? pbc[_x]+atom->PBCx[ghostId]:pbc[_x];
buf[m++] = (cj-atom->ncj>=0) ? pbc[_y]+atom->PBCy[ghostId]:pbc[_y];
buf[m++] = (cj-atom->ncj>=0) ? pbc[_z]+atom->PBCz[ghostId]:pbc[_z];
}
return m;
}
int unpackGhost(Atom* atom, int cj, MD_FLOAT* buf)
{
int m = 0;
int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
if(cj*jfac>=atom->Nclusters_max) growClusters(atom);
if(atom->Nclusters_ghost>=atom->NmaxGhost) growPbc(atom);
int cj_sca_base = CJ_SCALAR_BASE_INDEX(cj);
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
atom->jclusters[cj].natoms = buf[m++];
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
cj_x[CL_X_OFFSET + cjj] = buf[m++];
cj_x[CL_Y_OFFSET + cjj] = buf[m++];
cj_x[CL_Z_OFFSET + cjj] = buf[m++];
atom->cl_type[cj_sca_base + cjj] = buf[m++];
atom->Nghost++;
}
for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
cj_x[CL_X_OFFSET + cjj] = INFINITY;
cj_x[CL_Y_OFFSET + cjj] = INFINITY;
cj_x[CL_Z_OFFSET + cjj] = INFINITY;
atom->cl_type[cj_sca_base + cjj] = -1;
m+=4;
}
atom->jclusters[cj].bbminx = buf[m++];
atom->jclusters[cj].bbmaxx = buf[m++];
atom->jclusters[cj].bbminy = buf[m++];
atom->jclusters[cj].bbmaxy = buf[m++];
atom->jclusters[cj].bbminz = buf[m++];
atom->jclusters[cj].bbmaxz = buf[m++];
atom->PBCx[atom->Nclusters_ghost] = buf[m++];
atom->PBCy[atom->Nclusters_ghost] = buf[m++];
atom->PBCz[atom->Nclusters_ghost] = buf[m++];
atom->Nclusters_ghost++;
}
void packReverse(Atom* atom, int nc, int c0, MD_FLOAT* buf)
{
for(int i = 0; i < nc; i++) {
int cj = c0+i;
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
int displ = i*CLUSTER_N;
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
buf[3*(displ+cjj)+0] = cj_f[CL_X_OFFSET + cjj];
buf[3*(displ+cjj)+1] = cj_f[CL_Y_OFFSET + cjj];
buf[3*(displ+cjj)+2] = cj_f[CL_Z_OFFSET + cjj];
}
for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
buf[3*(displ+cjj)+0] = -1; //x
buf[3*(displ+cjj)+1] = -1; //y
buf[3*(displ+cjj)+2] = -1; //z
}
}
}
void unpackReverse(Atom* atom, int nc, int* list, MD_FLOAT* buf)
{
for(int i = 0; i < nc; i++) {
int cj = list[i];
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
int displ = i*CLUSTER_N;
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
cj_f[CL_X_OFFSET + cjj] += buf[3*(displ+cjj)+0];
cj_f[CL_Y_OFFSET + cjj] += buf[3*(displ+cjj)+1];
cj_f[CL_Z_OFFSET + cjj] += buf[3*(displ+cjj)+2];
}
}
}
int packExchange(Atom* atom, int i, MD_FLOAT* buf)
{
int m = 0;
buf[m++] = atom_x(i);
buf[m++] = atom_y(i);
buf[m++] = atom_z(i);
buf[m++] = atom_vx(i);
buf[m++] = atom_vy(i);
buf[m++] = atom_vz(i);
buf[m++] = atom->type[i];
return m;
}
int unpackExchange(Atom* atom, int i, MD_FLOAT* buf)
{
while(i >= atom->Nmax) growAtom(atom);
int m = 0;
atom_x(i) = buf[m++];
atom_y(i) = buf[m++];
atom_z(i) = buf[m++];
atom_vx(i) = buf[m++];
atom_vy(i) = buf[m++];
atom_vz(i) = buf[m++];
atom->type[i] = buf[m++];
return m;
}
void pbc(Atom* atom)
{
for(int i = 0; i < atom->Nlocal; i++) {
MD_FLOAT xprd = atom->mybox.xprd;
MD_FLOAT yprd = atom->mybox.yprd;
MD_FLOAT zprd = atom->mybox.zprd;
if(atom_x(i) < 0.0) atom_x(i) += xprd;
if(atom_y(i) < 0.0) atom_y(i) += yprd;
if(atom_z(i) < 0.0) atom_z(i) +=zprd;
if(atom_x(i) >= xprd) atom_x(i) -=xprd;
if(atom_y(i) >= yprd) atom_y(i) -=yprd;
if(atom_z(i) >= zprd) atom_z(i) -=zprd;
}
}
void copy(Atom* atom, int i, int j)
{
atom_x(i) = atom_x(j);
atom_y(i) = atom_y(j);
atom_z(i) = atom_z(j);
atom_vx(i) = atom_vx(j);
atom_vy(i) = atom_vy(j);
atom_vz(i) = atom_vz(j);
atom->type[i] = atom->type[j];
}

View File

@@ -14,8 +14,9 @@
#include <stats.h>
#include <util.h>
#include <simd.h>
#include <math.h>
void computeForceGhostShell(Parameter*, Atom*, Neighbor*);
/*
static inline void gmx_load_simd_2xnn_interactions(
int excl,
@@ -49,7 +50,6 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
@@ -60,13 +60,23 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
}
}
for(int cg = atom->ncj; cg < atom->ncj+atom->Nclusters_ghost; cg++) {
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
cj_f[CL_X_OFFSET + cjj] = 0.0;
cj_f[CL_Y_OFFSET + cjj] = 0.0;
cj_f[CL_Z_OFFSET + cjj] = 0.0;
}
}
double S = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("force");
#pragma omp for schedule(runtime)
#pragma omp for
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci);
int ci_cj1 = CJ1_FROM_CI(ci);
@@ -75,14 +85,14 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
int numneighs = neighbor->numneigh[ci];
for(int k = 0; k < numneighs; k++) {
int cj = neighs[k];
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
int any = 0;
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
for(int cii = 0; cii < CLUSTER_M; cii++) {
MD_FLOAT xtmp = ci_x[CL_X_OFFSET + cii];
MD_FLOAT ytmp = ci_x[CL_Y_OFFSET + cii];
@@ -103,6 +113,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
cond = neighbor->half_neigh ? (ci_cj0 != cj || cii < cjj) && (ci_cj1 != cj || cii < cjj + CLUSTER_N) :
(ci_cj0 != cj || cii != cjj) && (ci_cj1 != cj || cii != cjj + CLUSTER_N);
#endif
if(cond) {
MD_FLOAT delx = xtmp - cj_x[CL_X_OFFSET + cjj];
MD_FLOAT dely = ytmp - cj_x[CL_Y_OFFSET + cjj];
@@ -113,12 +124,11 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
if(neighbor->half_neigh) {
if(neighbor->half_neigh || param->method) {
cj_f[CL_X_OFFSET + cjj] -= delx * force;
cj_f[CL_Y_OFFSET + cjj] -= dely * force;
cj_f[CL_Z_OFFSET + cjj] -= delz * force;
}
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
@@ -129,13 +139,11 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
}
}
}
if(any != 0) {
addStat(stats->clusters_within_cutoff, 1);
} else {
addStat(stats->clusters_outside_cutoff, 1);
}
ci_f[CL_X_OFFSET + cii] += fix;
ci_f[CL_Y_OFFSET + cii] += fiy;
ci_f[CL_Z_OFFSET + cii] += fiz;
@@ -146,7 +154,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
addStat(stats->num_neighs, numneighs);
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
}
if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor);
LIKWID_MARKER_STOP("force");
}
@@ -168,7 +176,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
for(int ci = 0; ci < atom->Nclusters_local+atom->Nclusters_ghost; ci++) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
@@ -178,6 +186,16 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
}
}
for(int cg = atom->ncj; cg < atom->ncj+atom->Nclusters_ghost; cg++) {
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
cj_f[CL_X_OFFSET + cjj] = 0.0;
cj_f[CL_Y_OFFSET + cjj] = 0.0;
cj_f[CL_Z_OFFSET + cjj] = 0.0;
}
}
double S = getTimeStamp();
#pragma omp parallel
@@ -213,7 +231,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
#endif
*/
#pragma omp for schedule(runtime)
#pragma omp for
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N
@@ -322,7 +340,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
fiz2 += tz2;
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
if(cj < CJ1_FROM_CI(atom->Nlocal)|| param->method) {
simd_h_decr3(cj_f, tx0 + tx2, ty0 + ty2, tz0 + tz2);
}
#else
@@ -373,7 +391,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
fiz2 += tz2;
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
if(cj < CJ1_FROM_CI(atom->Nlocal) || param->method) {
simd_h_decr3(cj_f, tx0 + tx2, ty0 + ty2, tz0 + tz2);
}
#else
@@ -389,7 +407,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
addStat(stats->num_neighs, numneighs);
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
}
if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor);
LIKWID_MARKER_STOP("force");
}
@@ -427,7 +445,7 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor
{
LIKWID_MARKER_START("force");
#pragma omp for schedule(runtime)
#pragma omp for
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N
@@ -562,7 +580,6 @@ double computeForceLJ_2xnn(Parameter *param, Atom *atom, Neighbor *neighbor, Sta
if(neighbor->half_neigh) {
return computeForceLJ_2xnn_half(param, atom, neighbor, stats);
}
return computeForceLJ_2xnn_full(param, atom, neighbor, stats);
}
@@ -589,13 +606,23 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
}
}
for(int cg = atom->ncj; cg < atom->ncj+atom->Nclusters_ghost; cg++) {
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
cj_f[CL_X_OFFSET + cjj] = 0.0;
cj_f[CL_Y_OFFSET + cjj] = 0.0;
cj_f[CL_Z_OFFSET + cjj] = 0.0;
}
}
double S = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("force");
#pragma omp for schedule(runtime)
#pragma omp for
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N
@@ -726,7 +753,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
fiz3 = simd_add(fiz3, tz3);
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
if(cj < CJ1_FROM_CI(atom->Nlocal) || param->method) {
simd_store(&cj_f[CL_X_OFFSET], simd_load(&cj_f[CL_X_OFFSET]) - (tx0 + tx1 + tx2 + tx3));
simd_store(&cj_f[CL_Y_OFFSET], simd_load(&cj_f[CL_Y_OFFSET]) - (ty0 + ty1 + ty2 + ty3));
simd_store(&cj_f[CL_Z_OFFSET], simd_load(&cj_f[CL_Z_OFFSET]) - (tz0 + tz1 + tz2 + tz3));
@@ -811,7 +838,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
fiz3 = simd_add(fiz3, tz3);
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
if(cj < CJ1_FROM_CI(atom->Nlocal) || param->method) {
simd_store(&cj_f[CL_X_OFFSET], simd_load(&cj_f[CL_X_OFFSET]) - (tx0 + tx1 + tx2 + tx3));
simd_store(&cj_f[CL_Y_OFFSET], simd_load(&cj_f[CL_Y_OFFSET]) - (ty0 + ty1 + ty2 + ty3));
simd_store(&cj_f[CL_Z_OFFSET], simd_load(&cj_f[CL_Z_OFFSET]) - (tz0 + tz1 + tz2 + tz3));
@@ -831,7 +858,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
addStat(stats->num_neighs, numneighs);
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
}
if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor);
LIKWID_MARKER_STOP("force");
}
@@ -869,7 +896,7 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
{
LIKWID_MARKER_START("force");
#pragma omp for schedule(runtime)
#pragma omp for
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N
@@ -1070,3 +1097,120 @@ double computeForceLJ_4xn(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
return computeForceLJ_4xn_full(param, atom, neighbor, stats);
}
//Routine for eight shell method
void computeForceGhostShell(Parameter *param, Atom *atom, Neighbor *neighbor) {
DEBUG_MESSAGE("computeForceLJ begin\n");
int Nshell = neighbor->Nshell;
int *neighs;
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
for(int ci = 0; ci < Nshell; ci++) {
neighs = &neighbor->neighshell[ci * neighbor->maxneighs];
int numneighs = neighbor->numNeighShell[ci];
int cs = neighbor->listshell[ci];
int cs_vec_base = CJ_VECTOR_BASE_INDEX(cs);
MD_FLOAT *cs_x = &atom->cl_x[cs_vec_base];
MD_FLOAT *cs_f = &atom->cl_f[cs_vec_base];
for(int k = 0; k < numneighs; k++) {
int cj = neighs[k];
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
for(int css = 0; css < CLUSTER_N; css++) {
MD_FLOAT x = cs_x[CL_X_OFFSET + css];
MD_FLOAT y = cs_x[CL_Y_OFFSET + css];
MD_FLOAT z = cs_x[CL_Z_OFFSET + css];
MD_FLOAT fix = 0;
MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0;
for(int cjj = 0; cjj < CLUSTER_N; cjj++) {
MD_FLOAT delx = x - cj_x[CL_X_OFFSET + cjj];
MD_FLOAT dely = y - cj_x[CL_Y_OFFSET + cjj];
MD_FLOAT delz = z - cj_x[CL_Z_OFFSET + cjj];
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq < cutforcesq) {
MD_FLOAT sr2 = 1.0 / rsq;
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
cj_f[CL_X_OFFSET + cjj] -= delx * force;
cj_f[CL_Y_OFFSET + cjj] -= dely * force;
cj_f[CL_Z_OFFSET + cjj] -= delz * force;
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
}
}
cs_f[CL_X_OFFSET + css] += fix;
cs_f[CL_Y_OFFSET + css] += fiy;
cs_f[CL_Z_OFFSET + css] += fiz;
}
}
// addStat(stats->calculated_forces, 1);
// addStat(stats->num_neighs, numneighs);
// addStat(stats->force_iters, (long long int)((double)numneighs));
}
}
/*
void computeForceGhostShell(Parameter *param, Atom *atom, Neighbor *neighbor) {
int Nshell = neighbor->Nshell;
Pair* neighs;
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
for(int ci = 0; ci < Nshell; ci++) {
neighs = &neighbor->neighshell[ci * neighbor->maxneighs];
int numneighs = neighbor->numNeighShell[ci];
int cs = neighbor->listshell[ci].cluster;
int css = neighbor->listshell[ci].atom;
int cs_vec_base = CJ_VECTOR_BASE_INDEX(cs);
MD_FLOAT *cs_x = &atom->cl_x[cs_vec_base];
MD_FLOAT *cs_f = &atom->cl_f[cs_vec_base];
MD_FLOAT x = cs_x[CL_X_OFFSET + css];
MD_FLOAT y = cs_x[CL_Y_OFFSET + css];
MD_FLOAT z = cs_x[CL_Z_OFFSET + css];
for(int k = 0; k < numneighs; k++) {
int cj = neighs[k].cluster;
int cjj = neighs[k].atom;
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
MD_FLOAT delx = x - cj_x[CL_X_OFFSET + cjj];
MD_FLOAT dely = y - cj_x[CL_Y_OFFSET + cjj];
MD_FLOAT delz = z - cj_x[CL_Z_OFFSET + cjj];
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq < cutforcesq) {
MD_FLOAT sr2 = 1.0 / rsq;
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
cj_f[CL_X_OFFSET + cjj] -= delx * force;
cj_f[CL_Y_OFFSET + cjj] -= dely * force;
cj_f[CL_Z_OFFSET + cjj] -= delz * force;
cs_f[CL_X_OFFSET + css] += delx * force;
cs_f[CL_Y_OFFSET + css] += delx * force;
cs_f[CL_Z_OFFSET + css] += delx * force;
}
}
}
}
*/

View File

@@ -5,6 +5,7 @@
* license that can be found in the LICENSE file.
*/
#include <parameter.h>
#include <box.h>
#ifndef __ATOM_H_
#define __ATOM_H_
@@ -102,7 +103,7 @@ typedef struct {
typedef struct {
int Natoms, Nlocal, Nghost, Nmax;
int Nclusters, Nclusters_local, Nclusters_ghost, Nclusters_max;
int Nclusters, Nclusters_local, Nclusters_ghost, Nclusters_max, NmaxGhost,ncj;
MD_FLOAT *x, *y, *z;
MD_FLOAT *vx, *vy, *vz;
int *border_map;
@@ -112,6 +113,7 @@ typedef struct {
MD_FLOAT *sigma6;
MD_FLOAT *cutforcesq;
MD_FLOAT *cutneighsq;
//track the movement of a particle along boundaries
int *PBCx, *PBCy, *PBCz;
// Data in cluster format
MD_FLOAT *cl_x;
@@ -128,6 +130,9 @@ typedef struct {
unsigned int masks_2xnn_fn[8];
unsigned int masks_4xn_hn[16];
unsigned int masks_4xn_fn[16];
//Info Subdomain
Box mybox;
} Atom;
extern void initAtom(Atom*);
@@ -140,6 +145,18 @@ extern int readAtom_dmp(Atom*, Parameter*);
extern void growAtom(Atom*);
extern void growClusters(Atom*);
int packGhost(Atom*, int, MD_FLOAT* , int*);
int unpackGhost(Atom*, int, MD_FLOAT*);
int packExchange(Atom*, int, MD_FLOAT*);
int unpackExchange(Atom*, int, MD_FLOAT*);
void packForward(Atom*, int, int*, MD_FLOAT*, int*);
void unpackForward(Atom*, int, int, MD_FLOAT*);
void packReverse(Atom* , int , int , MD_FLOAT*);
void unpackReverse(Atom*, int, int*, MD_FLOAT*);
void pbc(Atom*);
void copy(Atom*, int, int);
#ifdef AOS
# define POS_DATA_LAYOUT "AoS"
# define atom_x(i) atom->x[(i) * 3 + 0]

View File

@@ -9,10 +9,13 @@
#include <atom.h>
#include <parameter.h>
#include <util.h>
#include <timers.h>
#include <timing.h>
#include <simd.h>
/*
void cpuInitialIntegrate(Parameter *param, Atom *atom) {
DEBUG_MESSAGE("cpuInitialIntegrate start\n");
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
@@ -32,9 +35,9 @@ void cpuInitialIntegrate(Parameter *param, Atom *atom) {
DEBUG_MESSAGE("cpuInitialIntegrate end\n");
}
void cpuFinalIntegrate(Parameter *param, Atom *atom) {
DEBUG_MESSAGE("cpuFinalIntegrate start\n");
void cpuFinalIntegrate(Parameter *param, Atom *atom) {
DEBUG_MESSAGE("cpuFinalIntegrate start\n");
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
@@ -46,6 +49,56 @@ void cpuFinalIntegrate(Parameter *param, Atom *atom) {
ci_v[CL_Z_OFFSET + cii] += param->dtforce * ci_f[CL_Z_OFFSET + cii];
}
}
DEBUG_MESSAGE("cpuFinalIntegrate end\n");
}
*/
void cpuInitialIntegrate(Parameter *param, Atom *atom) {
DEBUG_MESSAGE("cpuInitialIntegrate start\n");
for(int ci = 0; ci < atom->Nclusters_local; ci+=2) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
MD_SIMD_FLOAT dtforce = simd_broadcast(param->dtforce);
MD_SIMD_FLOAT dt = simd_broadcast(param->dt);
MD_SIMD_FLOAT vx_vector = simd_fma(simd_load(&ci_f[CL_X_OFFSET]), dtforce, simd_load(&ci_v[CL_X_OFFSET]));
MD_SIMD_FLOAT vy_vector = simd_fma(simd_load(&ci_f[CL_Y_OFFSET]), dtforce, simd_load(&ci_v[CL_Y_OFFSET]));
MD_SIMD_FLOAT vz_vector = simd_fma(simd_load(&ci_f[CL_Z_OFFSET]), dtforce, simd_load(&ci_v[CL_Z_OFFSET]));
MD_SIMD_FLOAT x_vector = simd_fma(vx_vector, dt, simd_load(&ci_x[CL_X_OFFSET]));
MD_SIMD_FLOAT y_vector = simd_fma(vy_vector, dt, simd_load(&ci_x[CL_Y_OFFSET]));
MD_SIMD_FLOAT z_vector = simd_fma(vz_vector, dt, simd_load(&ci_x[CL_Z_OFFSET]));
simd_store(&ci_v[CL_X_OFFSET], vx_vector);
simd_store(&ci_v[CL_Y_OFFSET], vy_vector);
simd_store(&ci_v[CL_Z_OFFSET], vz_vector);
simd_store(&ci_x[CL_X_OFFSET], x_vector);
simd_store(&ci_x[CL_Y_OFFSET], y_vector);
simd_store(&ci_x[CL_Z_OFFSET], z_vector);
}
DEBUG_MESSAGE("cpuInitialIntegrate end\n");
}
void cpuFinalIntegrate(Parameter *param, Atom *atom) {
DEBUG_MESSAGE("cpuFinalIntegrate start\n");
for(int ci = 0; ci < atom->Nclusters_local; ci+=2) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
MD_SIMD_FLOAT dtforce = simd_broadcast(param->dtforce);
MD_SIMD_FLOAT vx_vector = simd_fma(simd_load(&ci_f[CL_X_OFFSET]), dtforce, simd_load(&ci_v[CL_X_OFFSET]));
MD_SIMD_FLOAT vy_vector = simd_fma(simd_load(&ci_f[CL_Y_OFFSET]), dtforce, simd_load(&ci_v[CL_Y_OFFSET]));
MD_SIMD_FLOAT vz_vector = simd_fma(simd_load(&ci_f[CL_Z_OFFSET]), dtforce, simd_load(&ci_v[CL_Z_OFFSET]));
simd_store(&ci_v[CL_X_OFFSET], vx_vector);
simd_store(&ci_v[CL_Y_OFFSET], vy_vector);
simd_store(&ci_v[CL_Z_OFFSET], vz_vector);
}
DEBUG_MESSAGE("cpuFinalIntegrate end\n");
}
@@ -54,3 +107,6 @@ void cpuFinalIntegrate(Parameter *param, Atom *atom) {
void cudaInitialIntegrate(Parameter*, Atom*);
void cudaFinalIntegrate(Parameter*, Atom*);
#endif

View File

@@ -25,6 +25,11 @@
#define NBNXN_INTERACTION_MASK_DIAG_J8_0 0xf0f8fcfeU
#define NBNXN_INTERACTION_MASK_DIAG_J8_1 0x0080c0e0U
typedef struct {
int cluster;
int atom;
} Pair;
typedef struct {
int every;
int ncalls;
@@ -34,8 +39,20 @@ typedef struct {
int half_neigh;
int* neighbors;
unsigned int* neighbors_imask;
//MPI
/*
int Nshell; //# of atoms in listShell(Cluster here cover all possible ghost interactions)
int *numNeighShell; //# of neighs for each atom in listShell
Pair *neighshell; //list of neighs for each atom in listShell
Pair *listshell; //Atoms to compute the force
*/
int Nshell; //# of cluster in listShell(Cluster here cover all possible ghost interactions)
int *numNeighShell; //# of neighs for each atom in listShell
int *neighshell; //list of neighs for each atom in listShell
int *listshell; //Atoms to compute the force
} Neighbor;
extern void initNeighbor(Neighbor*, Parameter*);
extern void setupNeighbor(Parameter*, Atom*);
extern void binatoms(Atom*);

View File

@@ -5,6 +5,8 @@
* license that can be found in the LICENSE file.
*/
#include <atom.h>
#include <comm.h>
#include <parameter.h>
#ifndef __VTK_H_
#define __VTK_H_
@@ -13,4 +15,5 @@ extern int write_local_atoms_to_vtk_file(const char* filename, Atom* atom, int t
extern int write_ghost_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
extern int write_local_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep);
extern int write_ghost_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep);
extern void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep);
#endif

View File

@@ -5,9 +5,7 @@
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <omp.h>
//--
#include <likwid-marker.h>
//--
@@ -26,6 +24,10 @@
#include <util.h>
#include <vtk.h>
#include <xtc.h>
#include <comm.h>
#include <grid.h>
#include <shell_methods.h>
#include <mpi.h>
#define HLINE "----------------------------------------------------------------------------\n"
@@ -42,17 +44,55 @@ extern void copyDataFromCUDADevice(Atom *atom);
extern void cudaDeviceFree();
#endif
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats) {
double dynamicBalance(Comm* comm, Grid* grid, Atom* atom, Parameter* param, double time)
{
double S, E;
int dims = 3; //TODO: Adjust to do in 3d and 2d
S = getTimeStamp();
if(param->balance == RCB) {
rcbBalance(grid, atom, param, meanBisect,dims,0);
neighComm(comm, param, grid);
}else if(param->balance == meanTimeRCB){
rcbBalance(grid, atom, param, meanTimeBisect,dims,time);
neighComm(comm, param, grid);
}else if(param->balance == Staggered) {
staggeredBalance(grid, atom, param, time);
neighComm(comm, param, grid);
exchangeComm(comm,atom);
}else { } //Do nothing
//printGrid(grid);
E = getTimeStamp();
return E-S;
}
double initialBalance(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid)
{
double E,S,time;
int me;
MPI_Comm_rank(world,&me);
S = getTimeStamp();
if(param->balance == meanTimeRCB || param->balance == RCB){
rcbBalance(grid, atom, param, meanBisect,3,0);
neighComm(comm, param, grid);
}
MPI_Allreduce(&atom->Nlocal, &atom->Natoms, 1, MPI_INT, MPI_SUM, world);
printf("Processor:%i, Local atoms:%i, Total atoms:%i\n",me, atom->Nlocal,atom->Natoms);
MPI_Barrier(world);
E = getTimeStamp();
return E-S;
}
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid) {
if(param->force_field == FF_EAM) { initEam(eam, param); }
double S, E;
param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
param->xprd = param->nx * param->lattice;
param->yprd = param->ny * param->lattice;
param->zprd = param->nz * param->lattice;
S = getTimeStamp();
initAtom(atom);
initPbc(atom);
//initPbc(atom);
initStats(stats);
initNeighbor(neighbor, param);
if(param->input_file == NULL) {
@@ -60,13 +100,18 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
} else {
readAtom(atom, param);
}
setupGrid(grid,atom,param);
setupNeighbor(param, atom);
setupComm(comm, param, grid);
if(param->balance){
initialBalance(param, eam, atom, neighbor, stats, comm, grid);
}
setupThermo(param, atom->Natoms);
if(param->input_file == NULL) { adjustThermo(param, atom); }
buildClusters(atom);
defineJClusters(atom);
setupPbc(atom, param);
//setupPbc(atom, param);
ghostNeighbor(comm, atom, param); //change
binClusters(atom);
buildNeighbor(atom, neighbor);
initDevice(atom, neighbor);
@@ -74,15 +119,15 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
return E-S;
}
double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
double reneighbour(Comm* comm, Parameter *param, Atom *atom, Neighbor *neighbor) {
double S, E;
S = getTimeStamp();
LIKWID_MARKER_START("reneighbour");
updateSingleAtoms(atom);
updateAtomsPbc(atom, param);
//updateAtomsPbc(atom, param);
buildClusters(atom);
defineJClusters(atom);
setupPbc(atom, param);
//setupPbc(atom, param);
ghostNeighbor(comm, atom, param);
binClusters(atom);
buildNeighbor(atom, neighbor);
LIKWID_MARKER_STOP("reneighbour");
@@ -90,15 +135,13 @@ double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
return E-S;
}
void printAtomState(Atom *atom) {
printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n",
atom->Natoms, atom->Nlocal, atom->Nghost, atom->Nmax);
/* int nall = atom->Nlocal + atom->Nghost; */
/* for (int i=0; i<nall; i++) { */
/* printf("%d %f %f %f\n", i, atom->x[i], atom->y[i], atom->z[i]); */
/* } */
double updateAtoms(Comm* comm, Atom* atom){
double S,E;
S = getTimeStamp();
updateSingleAtoms(atom);
exchangeComm(comm, atom);
E = getTimeStamp();
return E-S;
}
int main(int argc, char** argv) {
@@ -108,7 +151,8 @@ int main(int argc, char** argv) {
Neighbor neighbor;
Stats stats;
Parameter param;
Comm comm;
Grid grid;
LIKWID_MARKER_INIT;
#pragma omp parallel
{
@@ -116,7 +160,7 @@ int main(int argc, char** argv) {
//LIKWID_MARKER_REGISTER("reneighbour");
//LIKWID_MARKER_REGISTER("pbc");
}
initComm(&argc, &argv, &comm); //change
initParameter(&param);
for(int i = 0; i < argc; i++) {
if((strcmp(argv[i], "-p") == 0) || (strcmp(argv[i], "--param") == 0)) {
@@ -158,6 +202,24 @@ int main(int argc, char** argv) {
param.half_neigh = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-method") == 0)) {
param.method = atoi(argv[++i]);
if (param.method>2 || param.method< 0){
if(comm.myproc == 0) fprintf(stderr, "Method does not exist!\n");
endComm(&comm);
exit(0);
}
continue;
}
if((strcmp(argv[i], "-bal") == 0)) {
param.balance = atoi(argv[++i]);
if (param.balance>3 || param.balance< 0){
if(comm.myproc == 0) fprintf(stderr, "Load balance does not exist!\n");
endComm(&comm);
exit(0);
}
continue;
}
if((strcmp(argv[i], "-m") == 0) || (strcmp(argv[i], "--mass") == 0)) {
param.mass = atof(argv[++i]);
continue;
@@ -188,6 +250,7 @@ int main(int argc, char** argv) {
continue;
}
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
//TODO: add the shell and ac print options
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
printf(HLINE);
printf("-p <string>: file to read parameters from (can be specified more than once)\n");
@@ -205,98 +268,101 @@ int main(int argc, char** argv) {
exit(EXIT_SUCCESS);
}
}
if(param.balance>0 && param.method == 1){
if(comm.myproc == 0) fprintf(stderr, "Half Shell is not supported by load balance!\n");
endComm(&comm);
exit(0);
}
param.cutneigh = param.cutforce + param.skin;
setup(&param, &eam, &atom, &neighbor, &stats);
printParameter(&param);
printf(HLINE);
printf("step\ttemp\t\tpressure\n");
timer[SETUP]=setup(&param, &eam, &atom, &neighbor, &stats, &comm, &grid);
if(comm.myproc == 0) printParameter(&param);
if(comm.myproc == 0) printf(HLINE);
if(comm.myproc == 0) printf("step\ttemp\t\tpressure\n");
computeThermo(0, &param, &atom);
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
traceAddresses(&param, &atom, &neighbor, n + 1);
#endif
#ifdef CUDA_TARGET
copyDataToCUDADevice(&atom);
#endif
if(param.force_field == FF_EAM) {
timer[FORCE] = computeForceEam(&eam, &param, &atom, &neighbor, &stats);
} else {
timer[FORCE] = computeForceLJ(&param, &atom, &neighbor, &stats);
}
timer[NEIGH] = 0.0;
timer[NEIGH] = 0.0;
timer[FORWARD] = 0.0;
timer[UPDATE] = 0.0;
timer[BALANCE] = 0.0;
timer[REVERSE] = reverse(&comm, &atom, &param);
MPI_Barrier(world);
timer[TOTAL] = getTimeStamp();
if(param.vtk_file != NULL) {
write_data_to_vtk_file(param.vtk_file, &atom, 0);
//write_data_to_vtk_file(param.vtk_file, &comm ,&atom, 0);
printvtk(param.vtk_file, &comm, &atom, &param, 0);
}
//TODO: modify xct
if(param.xtc_file != NULL) {
xtc_init(param.xtc_file, &atom, 0);
}
for(int n = 0; n < param.ntimes; n++) {
double forceTime=0.0;
double commTime=0.0;
for(int n = 0; n < param.ntimes; n++) {
initialIntegrate(&param, &atom);
if((n + 1) % param.reneigh_every) {
if(!((n + 1) % param.prune_every)) {
timer[FORWARD]+=forward(&comm, &atom, &param);
if(!((n + 1) % param.prune_every)){
pruneNeighbor(&param, &atom, &neighbor);
}
updatePbc(&atom, &param, 0);
} else {
#ifdef CUDA_TARGET
copyDataFromCUDADevice(&atom);
#endif
timer[NEIGH] += reneighbour(&param, &atom, &neighbor);
timer[UPDATE] +=updateAtoms(&comm,&atom);
if(param.balance && !((n+1)%param.balance_every))
timer[BALANCE] +=dynamicBalance(&comm, &grid, &atom , &param, timer[FORCE]);
timer[NEIGH] += reneighbour(&comm, &param, &atom, &neighbor);
#ifdef CUDA_TARGET
copyDataToCUDADevice(&atom);
isReneighboured = 1;
#endif
}
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
traceAddresses(&param, &atom, &neighbor, n + 1);
#endif
if(param.force_field == FF_EAM) {
timer[FORCE] += computeForceEam(&eam, &param, &atom, &neighbor, &stats);
timer[FORCE] += computeForceEam(&eam, &param, &atom, &neighbor, &stats);
} else {
timer[FORCE] += computeForceLJ(&param, &atom, &neighbor, &stats);
}
}
timer[REVERSE] += reverse(&comm, &atom, &param);
finalIntegrate(&param, &atom);
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
computeThermo(n + 1, &param, &atom);
}
int write_pos = !((n + 1) % param.x_out_every);
int write_vel = !((n + 1) % param.v_out_every);
if(write_pos || write_vel) {
if(param.vtk_file != NULL) {
write_data_to_vtk_file(param.vtk_file, &atom, n + 1);
printvtk(param.vtk_file, &comm, &atom, &param, n+1);
}
//TODO: xtc file
if(param.xtc_file != NULL) {
xtc_write(&atom, n + 1, write_pos, write_vel);
}
}
}
}
#ifdef CUDA_TARGET
copyDataFromCUDADevice(&atom);
#endif
MPI_Barrier(world);
timer[TOTAL] = getTimeStamp() - timer[TOTAL];
updateSingleAtoms(&atom);
updateAtoms(&comm,&atom);
computeThermo(-1, &param, &atom);
//TODO:
if(param.xtc_file != NULL) {
xtc_end();
}
@@ -304,41 +370,35 @@ int main(int argc, char** argv) {
#ifdef CUDA_TARGET
cudaDeviceFree();
#endif
printf(HLINE);
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
printf(HLINE);
double mint[NUMTIMER];
double maxt[NUMTIMER];
double sumt[NUMTIMER];
timer[REST] = timer[TOTAL]-timer[FORCE]-timer[NEIGH]-timer[BALANCE]-timer[FORWARD]-timer[REVERSE];
MPI_Reduce(timer,mint,NUMTIMER,MPI_DOUBLE,MPI_MIN,0,world);
MPI_Reduce(timer,maxt,NUMTIMER,MPI_DOUBLE,MPI_MAX,0,world);
MPI_Reduce(timer,sumt,NUMTIMER,MPI_DOUBLE,MPI_SUM,0,world);
int Nghost;
MPI_Reduce(&atom.Nghost,&Nghost,1,MPI_INT,MPI_SUM,0,world);
int nthreads = 0;
int chunkSize = 0;
omp_sched_t schedKind;
char schedType[10];
#pragma omp parallel
#pragma omp master
{
omp_get_schedule(&schedKind, &chunkSize);
switch (schedKind)
{
case omp_sched_static: strcpy(schedType, "static"); break;
case omp_sched_dynamic: strcpy(schedType, "dynamic"); break;
case omp_sched_guided: strcpy(schedType, "guided"); break;
case omp_sched_auto: strcpy(schedType, "auto"); break;
}
nthreads = omp_get_max_threads();
}
printf("Num threads: %d\n", nthreads);
printf("Schedule: (%s,%d)\n", schedType, chunkSize);
printf("Performance: %.2f million atom updates per second\n",
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
#ifdef COMPUTE_STATS
if(comm.myproc == 0){
int n = comm.numproc;
printf(HLINE);
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, Nghost, param.ntimes);
printf("TOTAL %.2fs\n\n",timer[TOTAL]);
printf("%4s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|\n","","FORCE ", "NEIGH ", "BALANCE", "FORWARD", "REVERSE","UPDATE","REST ","SETUP");
printf("----|-------|-------|-------|-------|-------|-------|-------|-------|\n");
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "AVG", sumt[FORCE]/n,sumt[NEIGH]/n,sumt[BALANCE]/n,sumt[FORWARD]/n,sumt[REVERSE]/n,sumt[UPDATE]/n,sumt[REST]/n,sumt[SETUP]/n);
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MIN", mint[FORCE],mint[NEIGH],mint[BALANCE],mint[FORWARD],mint[REVERSE],mint[UPDATE],mint[REST],mint[SETUP]);
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MAX", maxt[FORCE],maxt[NEIGH],maxt[BALANCE],maxt[FORWARD],maxt[REVERSE],maxt[UPDATE],maxt[REST],maxt[SETUP]);
printf(HLINE);
printf("Performance: %.2f million atom updates per second\n",
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
#ifdef COMPUTE_STATS
displayStatistics(&atom, &param, &stats, timer);
#endif
#endif
}
endComm(&comm);
LIKWID_MARKER_CLOSE;
return EXIT_SUCCESS;
}

View File

@@ -7,15 +7,15 @@
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#include <util.h>
#include <mpi.h>
#define SMALL 1.0e-6
#define FACTOR 0.999
#define eps 1.0e-9
static MD_FLOAT xprd, yprd, zprd;
static MD_FLOAT bininvx, bininvy;
static int mbinxlo, mbinylo;
@@ -34,9 +34,16 @@ static int nmax;
static int nstencil; // # of bins in stencil
static int* stencil; // stencil list of bin offsets
static MD_FLOAT binsizex, binsizey;
int me; //rank
int method; // method
int shellMethod; //If shell method exist
static int coord2bin(MD_FLOAT, MD_FLOAT);
static MD_FLOAT bindist(int, int);
//static int ghostZone(Atom*, int);
static int halfZoneCluster(Atom*,int);
static int ghostClusterinRange(Atom*, int, int, MD_FLOAT);
static void neighborGhost(Atom*, Neighbor*);
/* exported subroutines */
void initNeighbor(Neighbor *neighbor, Parameter *param) {
@@ -53,12 +60,25 @@ void initNeighbor(Neighbor *neighbor, Parameter *param) {
bincount = NULL;
bin_clusters = NULL;
bin_nclusters = NULL;
neighbor->half_neigh = param->half_neigh;
neighbor->maxneighs = 100;
neighbor->maxneighs = 200;
neighbor->numneigh = NULL;
neighbor->numneigh_masked = NULL;
neighbor->neighbors = NULL;
neighbor->neighbors_imask = NULL;
//MPI
shellMethod = 0;
method = param->method;
if(method == halfShell || method == eightShell){
param->half_neigh = 1;
shellMethod = 1;
}
me = 0;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
neighbor->half_neigh = param->half_neigh;
neighbor->Nshell = 0;
neighbor->numNeighShell = NULL;
neighbor->neighshell = NULL;
neighbor->listshell = NULL;
}
void setupNeighbor(Parameter *param, Atom *atom) {
@@ -77,7 +97,7 @@ void setupNeighbor(Parameter *param, Atom *atom) {
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = zprd;
MD_FLOAT atom_density = ((MD_FLOAT)(atom->Nlocal)) / ((xhi - xlo) * (yhi - ylo) * (zhi - zlo));
MD_FLOAT atom_density = ((MD_FLOAT)(atom->Natoms)) / ((xhi - xlo) * (yhi - ylo) * (zhi - zlo));
MD_FLOAT atoms_in_cell = MAX(CLUSTER_M, CLUSTER_N);
MD_FLOAT targetsizex = cbrt(atoms_in_cell / atom_density);
MD_FLOAT targetsizey = cbrt(atoms_in_cell / atom_density);
@@ -146,6 +166,7 @@ void setupNeighbor(Parameter *param, Atom *atom) {
}
MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
MD_FLOAT dl = atom->iclusters[ci].bbminx - atom->jclusters[cj].bbmaxx;
MD_FLOAT dh = atom->jclusters[cj].bbminx - atom->iclusters[ci].bbmaxx;
MD_FLOAT dm = MAX(dl, dh);
@@ -163,6 +184,7 @@ MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
dm = MAX(dl, dh);
dm0 = MAX(dm, 0.0);
d2 += dm0 * dm0;
return d2;
}
@@ -225,7 +247,6 @@ static unsigned int get_imask_simd_j8(int rdiag, int ci, int cj) {
void buildNeighbor(Atom *atom, Neighbor *neighbor) {
DEBUG_MESSAGE("buildNeighbor start\n");
/* extend atom arrays if necessary */
if(atom->Nclusters_local > nmax) {
nmax = atom->Nclusters_local;
@@ -249,7 +270,6 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
while(resize) {
int new_maxneighs = neighbor->maxneighs;
resize = 0;
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj1 = CJ1_FROM_CI(ci);
int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
@@ -262,14 +282,12 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
MD_FLOAT ibb_ymax = atom->iclusters[ci].bbmaxy;
MD_FLOAT ibb_zmin = atom->iclusters[ci].bbminz;
MD_FLOAT ibb_zmax = atom->iclusters[ci].bbmaxz;
for(int k = 0; k < nstencil; k++) {
int jbin = ibin + stencil[k];
int *loc_bin = &bin_clusters[jbin * clusters_per_bin];
int cj, m = -1;
MD_FLOAT jbb_xmin, jbb_xmax, jbb_ymin, jbb_ymax, jbb_zmin, jbb_zmax;
const int c = bin_nclusters[jbin];
if(c > 0) {
MD_FLOAT dl, dh, dm, dm0, d_bb_sq;
@@ -279,6 +297,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
if(neighbor->half_neigh && ci_cj1 > cj) {
continue;
}
jbb_zmin = atom->jclusters[cj].bbminz;
jbb_zmax = atom->jclusters[cj].bbmaxz;
dl = ibb_zmin - jbb_zmax;
@@ -287,7 +306,6 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
dm0 = MAX(dm, 0.0);
d_bb_sq = dm0 * dm0;
} while(m + 1 < c && d_bb_sq > cutneighsq);
jbb_xmin = atom->jclusters[cj].bbminx;
jbb_xmax = atom->jclusters[cj].bbmaxx;
jbb_ymin = atom->jclusters[cj].bbminy;
@@ -357,7 +375,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
jbb_zmin = atom->jclusters[cj].bbminz;
jbb_zmax = atom->jclusters[cj].bbmaxz;
}
}
}
}
}
@@ -383,14 +401,15 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
if(resize) {
neighbor->maxneighs = new_maxneighs * 1.2;
fprintf(stdout, "RESIZE %d\n", neighbor->maxneighs);
fprintf(stdout, "RESIZE %d, PROC %d\n", neighbor->maxneighs,me);
free(neighbor->neighbors);
free(neighbor->neighbors_imask);
neighbor->neighbors = (int *) malloc(nmax * neighbor->maxneighs * sizeof(int));
neighbor->neighbors_imask = (unsigned int *) malloc(nmax * neighbor->maxneighs * sizeof(unsigned int));
}
}
if(method == eightShell) neighborGhost(atom, neighbor);
/*
DEBUG_MESSAGE("\ncutneighsq = %f, rbb_sq = %f\n", cutneighsq, rbb_sq);
for(int ci = 0; ci < 6; ci++) {
@@ -511,46 +530,44 @@ int coord2bin(MD_FLOAT xin, MD_FLOAT yin) {
int ix, iy;
if(xin >= xprd) {
ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
ix = (int)((xin + eps - xprd) * bininvx) + nbinx - mbinxlo;
} else if(xin >= 0.0) {
ix = (int)(xin * bininvx) - mbinxlo;
ix = (int)((xin+eps) * bininvx) - mbinxlo;
} else {
ix = (int)(xin * bininvx) - mbinxlo - 1;
ix = (int)((xin+eps) * bininvx) - mbinxlo - 1;
}
if(yin >= yprd) {
iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
if(yin >= yprd) {
iy = (int)(((yin+eps) - yprd) * bininvy) + nbiny - mbinylo;
} else if(yin >= 0.0) {
iy = (int)(yin * bininvy) - mbinylo;
iy = (int)((yin+eps) * bininvy) - mbinylo;
} else {
iy = (int)(yin * bininvy) - mbinylo - 1;
iy = (int)((yin+eps) * bininvy) - mbinylo - 1;
}
return (iy * mbinx + ix + 1);
}
void coord2bin2D(MD_FLOAT xin, MD_FLOAT yin, int *ix, int *iy) {
if(xin >= xprd) {
*ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
*ix = (int)((xin + eps - xprd) * bininvx) + nbinx - mbinxlo;
} else if(xin >= 0.0) {
*ix = (int)(xin * bininvx) - mbinxlo;
*ix = (int)((xin+eps) * bininvx) - mbinxlo;
} else {
*ix = (int)(xin * bininvx) - mbinxlo - 1;
*ix = (int)((xin+eps) * bininvx) - mbinxlo - 1;
}
if(yin >= yprd) {
*iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
if(yin >= yprd) {
*iy = (int)((yin + eps - yprd) * bininvy) + nbiny - mbinylo;
} else if(yin >= 0.0) {
*iy = (int)(yin * bininvy) - mbinylo;
*iy = (int)((yin+eps) * bininvy) - mbinylo;
} else {
*iy = (int)(yin * bininvy) - mbinylo - 1;
*iy = (int)((yin+eps) * bininvy) - mbinylo - 1;
}
}
void binAtoms(Atom *atom) {
DEBUG_MESSAGE("binAtoms start\n");
int resize = 1;
while(resize > 0) {
resize = 0;
@@ -567,7 +584,7 @@ void binAtoms(Atom *atom) {
resize = 1;
}
}
if(resize) {
free(bins);
atoms_per_bin *= 2;
@@ -615,8 +632,7 @@ void buildClusters(Atom *atom) {
/* bin local atoms */
binAtoms(atom);
sortAtomsByZCoord(atom);
sortAtomsByZCoord(atom);
for(int bin = 0; bin < mbins; bin++) {
int c = bincount[bin];
int ac = 0;
@@ -688,6 +704,9 @@ void buildClusters(Atom *atom) {
void defineJClusters(Atom *atom) {
DEBUG_MESSAGE("defineJClusters start\n");
const int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
atom->ncj = atom->Nclusters_local / jfac;
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int cj0 = CJ0_FROM_CI(ci);
@@ -830,12 +849,11 @@ void binClusters(Atom *atom) {
}
}
}
for(int cg = 0; cg < atom->Nclusters_ghost && !resize; cg++) {
const int cj = ncj + cg;
int ix = -1, iy = -1;
MD_FLOAT xtmp, ytmp;
if(shellMethod == halfShell && !halfZoneCluster(atom, cj)) continue;
if(atom->jclusters[cj].natoms > 0) {
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
@@ -846,6 +864,7 @@ void binClusters(Atom *atom) {
coord2bin2D(xtmp, ytmp, &ix, &iy);
ix = MAX(MIN(ix, mbinx - 1), 0);
iy = MAX(MIN(iy, mbiny - 1), 0);
for(int cjj = 1; cjj < atom->jclusters[cj].natoms; cjj++) {
int nix, niy;
xtmp = cj_x[CL_X_OFFSET + cjj];
@@ -853,7 +872,7 @@ void binClusters(Atom *atom) {
coord2bin2D(xtmp, ytmp, &nix, &niy);
nix = MAX(MIN(nix, mbinx - 1), 0);
niy = MAX(MIN(niy, mbiny - 1), 0);
// Always put the cluster on the bin of its innermost atom so
// the cluster should be closer to local clusters
if(atom->PBCx[cg] > 0 && ix > nix) { ix = nix; }
@@ -861,7 +880,6 @@ void binClusters(Atom *atom) {
if(atom->PBCy[cg] > 0 && iy > niy) { iy = niy; }
if(atom->PBCy[cg] < 0 && iy < niy) { iy = niy; }
}
int bin = iy * mbinx + ix + 1;
int c = bin_nclusters[bin];
if(c < clusters_per_bin) {
@@ -883,25 +901,21 @@ void binClusters(Atom *atom) {
break;
}
}
if(!inserted) {
bin_clusters[bin * clusters_per_bin + c] = cj;
}
bin_nclusters[bin]++;
} else {
resize = 1;
}
}
}
if(resize) {
free(bin_clusters);
clusters_per_bin *= 2;
bin_clusters = (int*) malloc(mbins * clusters_per_bin * sizeof(int));
}
}
/*
DEBUG_MESSAGE("bin_nclusters\n");
for(int i = 0; i < mbins; i++) { DEBUG_MESSAGE("%d, ", bin_nclusters[i]); }
@@ -919,7 +933,6 @@ void updateSingleAtoms(Atom *atom) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
atom_x(Natom) = ci_x[CL_X_OFFSET + cii];
atom_y(Natom) = ci_x[CL_Y_OFFSET + cii];
@@ -928,12 +941,174 @@ void updateSingleAtoms(Atom *atom) {
atom->vy[Natom] = ci_v[CL_Y_OFFSET + cii];
atom->vz[Natom] = ci_v[CL_Z_OFFSET + cii];
Natom++;
}
}
}
if(Natom != atom->Nlocal) {
fprintf(stderr, "updateSingleAtoms(): Number of atoms changed!\n");
}
DEBUG_MESSAGE("updateSingleAtoms stop\n");
}
//MPI Shell Methods
static int eightZoneCluster(Atom* atom, int cj)
{
//Mapping: 0->0, 1->1, 2->2, 3->6, 4->3, 5->5, 6->4, 7->7
int zoneMapping[] = {0, 1, 2, 6, 3, 5, 4, 7};
int zone = 0;
MD_FLOAT *hi = atom->mybox.hi;
if (atom->jclusters[cj].bbminx +eps >=hi[_x]){
zone += 1;
}
if (atom->jclusters[cj].bbminy +eps >=hi[_y]){
zone += 2;
}
if (atom->jclusters[cj].bbminz +eps >=hi[_z]){
zone += 4;
}
return zoneMapping[zone];
}
static int halfZoneCluster(Atom* atom, int cj)
{
MD_FLOAT *hi = atom->mybox.hi;
MD_FLOAT *lo = atom->mybox.lo;
if(atom->jclusters[cj].bbmaxx < lo[_x] && atom->jclusters[cj].bbmaxy < hi[_y] &&
atom->jclusters[cj].bbmaxz < hi[_z]){
return 0;
} else if(atom->jclusters[cj].bbmaxy < lo[_y] && atom->jclusters[cj].bbmaxz < hi[_z]){
return 0;
} else if(atom->jclusters[cj].bbmaxz < lo[_z]){
return 0;
} else {
return 1;
}
}
int BoxGhostDistance(Atom *atom, int ci, int cj) {
MD_FLOAT dl = atom->jclusters[ci].bbminx - atom->jclusters[cj].bbmaxx;
MD_FLOAT dh = atom->jclusters[cj].bbminx - atom->jclusters[ci].bbmaxx;
MD_FLOAT dm = MAX(dl, dh);
MD_FLOAT dm0 = MAX(dm, 0.0);
MD_FLOAT dx2 = dm0 * dm0;
dl = atom->jclusters[ci].bbminy - atom->jclusters[cj].bbmaxy;
dh = atom->jclusters[cj].bbminy - atom->jclusters[ci].bbmaxy;
dm = MAX(dl, dh);
dm0 = MAX(dm, 0.0);
MD_FLOAT dy2 = dm0 * dm0;
dl = atom->jclusters[ci].bbminz - atom->jclusters[cj].bbmaxz;
dh = atom->jclusters[cj].bbminz - atom->jclusters[ci].bbmaxz;
dm = MAX(dl, dh);
dm0 = MAX(dm, 0.0);
MD_FLOAT dz2 = dm0 * dm0;
return dx2 > cutneighsq ? 0 : dy2 > cutneighsq ? 0 : dz2 > cutneighsq ? 0 : 1;
}
static int ghostClusterinRange(Atom *atom, int cs, int cg, MD_FLOAT rsq) {
int cs_vec_base = CJ_VECTOR_BASE_INDEX(cs);
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
MD_FLOAT *cs_x = &atom->cl_x[cs_vec_base];
MD_FLOAT *cg_x = &atom->cl_x[cj_vec_base];
for(int cii = 0; cii < atom->jclusters[cs].natoms; cii++) {
for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
MD_FLOAT delx = cs_x[CL_X_OFFSET + cii] - cg_x[CL_X_OFFSET + cjj];
MD_FLOAT dely = cs_x[CL_Y_OFFSET + cii] - cg_x[CL_Y_OFFSET + cjj];
MD_FLOAT delz = cs_x[CL_Z_OFFSET + cii] - cg_x[CL_Z_OFFSET + cjj];
if(delx * delx + dely * dely + delz * delz < rsq) {
return 1;
}
}
}
return 0;
}
static void neighborGhost(Atom *atom, Neighbor *neighbor) {
int Nshell=0;
int Ncluster_local = atom->Nclusters_local;
int Nclusterghost = atom->Nclusters_ghost;
if(neighbor->listshell) free(neighbor->listshell);
neighbor->listshell = (int*) malloc(Nclusterghost * sizeof(int));
int* listzone = (int*) malloc(8 * Nclusterghost * sizeof(int));
int countCluster[8] = {0,0,0,0,0,0,0,0};
//Selecting ghost atoms for interaction and putting them into regions
for(int cg = atom->ncj; cg < atom->ncj+Nclusterghost; cg++) {
int czone = eightZoneCluster(atom,cg);
int *list = &listzone[Nclusterghost*czone];
int n = countCluster[czone];
list[n] = cg;
countCluster[czone]++;
//It is only necessary to find neighbour particles for 3 regions
//if(czone == 1 || czone == 2 || czone == 3)
//neighbor->listshell[Nshell++] = cg;
}
for(int zone = 1; zone<=3; zone++){
int *list = &listzone[Nclusterghost*zone];
for(int n=0; n<countCluster[zone]; n++)
neighbor->listshell[Nshell++] = list[n];
}
neighbor->Nshell = Nshell;
if(neighbor->numNeighShell) free(neighbor->numNeighShell);
if(neighbor->neighshell) free(neighbor->neighshell);
neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
neighbor->numNeighShell = (int*) malloc(Nshell * sizeof(int));
int resize = 1;
while(resize)
{
resize = 0;
for(int ic = 0; ic < Nshell; ic++) {
int *neighshell = &(neighbor->neighshell[ic*neighbor->maxneighs]);
int n = 0;
int icluster = neighbor->listshell[ic];
int iczone = eightZoneCluster(atom, icluster);
for(int jczone=0; jczone<8; jczone++){
if(jczone <=iczone) continue;
if(iczone == 1 && (jczone==5||jczone==6||jczone==7)) continue;
if(iczone == 2 && (jczone==4||jczone==6||jczone==7)) continue;
if(iczone == 3 && (jczone==4||jczone==5||jczone==7)) continue;
int Ncluster = countCluster[jczone];
int* loc_zone = &listzone[jczone * Nclusterghost];
for(int k = 0; k < Ncluster ; k++) {
int jcluster = loc_zone[k];
if(BoxGhostDistance(atom, icluster, jcluster))
{
if(ghostClusterinRange(atom, icluster, jcluster, cutneighsq))
neighshell[n++] = jcluster;
}
}
}
neighbor->numNeighShell[ic] = n;
if(n >= neighbor->maxneighs){
resize = 1;
neighbor->maxneighs = n * 1.2;
fprintf(stdout, "RESIZE EIGHT SHELL %d, PROC %d\n", neighbor->maxneighs,me);
break;
}
}
if(resize) {
free(neighbor->neighshell);
neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
}
}
free(listzone);
}

View File

@@ -9,6 +9,11 @@
#include <atom.h>
#include <vtk.h>
#include <mpi.h>
#include <string.h>
static MPI_File _fh;
static inline void flushBuffer(char*);
void write_data_to_vtk_file(const char *filename, Atom* atom, int timestep) {
write_local_atoms_to_vtk_file(filename, atom, timestep);
@@ -188,3 +193,128 @@ int write_ghost_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int
fclose(fp);
return 0;
}
int vtkOpen(const char* filename, Comm* comm, Atom* atom ,int timestep)
{
char msg[256];
char timestep_filename[128];
snprintf(timestep_filename, sizeof timestep_filename, "%s_%d.vtk", filename, timestep);
MPI_File_open(MPI_COMM_WORLD, timestep_filename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &_fh);
if(_fh == MPI_FILE_NULL) {
if(comm->myproc == 0) fprintf(stderr, "Could not open VTK file for writing!\n");
return -1;
}
if (comm->myproc==0){
sprintf(msg, "# vtk DataFile Version 2.0\n");
sprintf(msg, "%sParticle data\n",msg);
sprintf(msg, "%sASCII\n",msg);
sprintf(msg, "%sDATASET UNSTRUCTURED_GRID\n",msg);
sprintf(msg, "%sPOINTS %d double\n",msg, atom->Natoms);
flushBuffer(msg);
}
}
int vtkVector(Comm* comm, Atom* atom, Parameter* param)
{
if (_fh == MPI_FILE_NULL) {
if(comm->myproc==0) printf("vtk not initialize! Call vtkOpen first!\n");
return -1;
}
int sizeline= 25; //#initial guess of characters in "%.4f %.4f %.4f\n"
int extrabuff = 100;
int sizebuff = sizeline*atom->Nlocal+extrabuff;
int mysize = 0;
char* msg = (char*) malloc(sizebuff);
sprintf(msg, "");
for(int i = 0; i < atom->Nlocal; i++){
if(mysize+extrabuff >= sizebuff){
sizebuff*= 1.5;
msg = (char*) realloc(msg, sizebuff);
}
//TODO: do not forget to add param->xlo, param->ylo, param->zlo
sprintf(msg, "%s%.4f %.4f %.4f\n",msg, atom_x(i), atom_y(i), atom_z(i));
mysize = strlen(msg);
}
int gatherSize[comm->numproc];
MPI_Allgather(&mysize, 1, MPI_INT, gatherSize, 1, MPI_INT, MPI_COMM_WORLD);
int offset=0;
int globalSize = 0;
for(int i = 0; i < comm->myproc; i++)
offset+= gatherSize[i];
for(int i = 0; i < comm->numproc; i++)
globalSize+= gatherSize[i];
MPI_Offset displ;
MPI_Datatype FileType;
int GlobalSize[] = {globalSize};
int LocalSize[] = {mysize};
int Start[] = {offset};
if(LocalSize[0]>0){
MPI_Type_create_subarray(1, GlobalSize, LocalSize, Start, MPI_ORDER_C, MPI_CHAR, &FileType);
} else {
MPI_Type_vector(0,0,0,MPI_CHAR,&FileType);
}
MPI_Type_commit(&FileType);
MPI_File_get_size(_fh, &displ);
MPI_File_set_view(_fh, displ, MPI_CHAR, FileType, "native", MPI_INFO_NULL);
MPI_File_write_all (_fh, msg, mysize , MPI_CHAR ,MPI_STATUS_IGNORE);
MPI_Barrier(MPI_COMM_WORLD);
MPI_File_set_view(_fh,0,MPI_CHAR, MPI_CHAR, "native", MPI_INFO_NULL);
if (comm->myproc==0){
sprintf(msg, "\n\n");
sprintf(msg, "%sCELLS %d %d\n", msg, atom->Natoms, atom->Natoms * 2);
for(int i = 0; i < atom->Natoms; i++)
sprintf(msg, "%s1 %d\n", msg, i);
flushBuffer(msg);
sprintf(msg, "\n\n");
sprintf(msg, "%sCELL_TYPES %d\n",msg, atom->Natoms);
for(int i = 0; i < atom->Natoms; i++)
sprintf(msg, "%s1\n",msg);
flushBuffer(msg);
sprintf(msg, "\n\n");
sprintf(msg, "%sPOINT_DATA %d\n",msg,atom->Natoms);
sprintf(msg, "%sSCALARS mass double\n",msg);
sprintf(msg, "%sLOOKUP_TABLE default\n",msg);
for(int i = 0; i < atom->Natoms; i++)
sprintf(msg, "%s1.0\n",msg);
sprintf(msg, "%s\n\n",msg);
flushBuffer(msg);
}
}
void vtkClose()
{
MPI_File_close(&_fh);
_fh=MPI_FILE_NULL;
}
//TODO: print ghost and cluster using MPI
void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep)
{
if(comm->numproc == 1)
{
write_data_to_vtk_file(filename, atom, timestep);
return;
}
vtkOpen(filename, comm, atom, timestep);
vtkVector(comm, atom, param);
vtkClose();
}
static inline void flushBuffer(char* msg){
MPI_Offset displ;
MPI_File_get_size(_fh, &displ);
MPI_File_write_at(_fh, displ, msg, strlen(msg), MPI_CHAR, MPI_STATUS_IGNORE);
}