Final MPI version
This commit is contained in:
		
							
								
								
									
										374
									
								
								gromacs/atom.c
									
									
									
									
									
								
							
							
						
						
									
										374
									
								
								gromacs/atom.c
									
									
									
									
									
								
							@@ -12,7 +12,8 @@
 | 
			
		||||
#include <atom.h>
 | 
			
		||||
#include <allocate.h>
 | 
			
		||||
#include <util.h>
 | 
			
		||||
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
 
 | 
			
		||||
void initAtom(Atom *atom) {
 | 
			
		||||
    atom->x  = NULL; atom->y  = NULL; atom->z  = NULL;
 | 
			
		||||
    atom->vx = NULL; atom->vy = NULL; atom->vz = NULL;
 | 
			
		||||
@@ -27,6 +28,7 @@ void initAtom(Atom *atom) {
 | 
			
		||||
    atom->Nclusters = 0;
 | 
			
		||||
    atom->Nclusters_local = 0;
 | 
			
		||||
    atom->Nclusters_ghost = 0;
 | 
			
		||||
    atom->NmaxGhost = 0;        //Temporal
 | 
			
		||||
    atom->Nclusters_max = 0;
 | 
			
		||||
    atom->type = NULL;
 | 
			
		||||
    atom->ntypes = 0;
 | 
			
		||||
@@ -37,10 +39,19 @@ void initAtom(Atom *atom) {
 | 
			
		||||
    atom->iclusters = NULL;
 | 
			
		||||
    atom->jclusters = NULL;
 | 
			
		||||
    atom->icluster_bin = NULL;
 | 
			
		||||
    atom->PBCx = NULL; 
 | 
			
		||||
    atom->PBCy = NULL; 
 | 
			
		||||
    atom->PBCz = NULL;
 | 
			
		||||
    initMasks(atom);
 | 
			
		||||
    //MPI
 | 
			
		||||
    Box *mybox = &(atom->mybox);                  
 | 
			
		||||
    mybox->xprd = mybox->yprd = mybox->zprd = 0;          
 | 
			
		||||
    mybox->lo[0]  = mybox->lo[1]  = mybox->lo[2] = 0;             
 | 
			
		||||
    mybox->hi[0]  = mybox->hi[1]  = mybox->hi[2] = 0;   
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void createAtom(Atom *atom, Parameter *param) {
 | 
			
		||||
    
 | 
			
		||||
    MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
 | 
			
		||||
    MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
 | 
			
		||||
    MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
 | 
			
		||||
@@ -90,7 +101,7 @@ void createAtom(Atom *atom, Parameter *param) {
 | 
			
		||||
            ytmp = 0.5 * alat * j;
 | 
			
		||||
            ztmp = 0.5 * alat * k;
 | 
			
		||||
 | 
			
		||||
            if(xtmp >= xlo && xtmp < xhi && ytmp >= ylo && ytmp < yhi && ztmp >= zlo && ztmp < zhi ) {
 | 
			
		||||
            if(xtmp >= xlo && xtmp < xhi && ytmp >= ylo && ytmp < yhi && ztmp >= zlo && ztmp < zhi ) {        
 | 
			
		||||
                n = k * (2 * param->ny) * (2 * param->nx) + j * (2 * param->nx) + i + 1;
 | 
			
		||||
                for(m = 0; m < 5; m++) { myrandom(&n); }
 | 
			
		||||
                vxtmp = myrandom(&n);
 | 
			
		||||
@@ -128,22 +139,26 @@ int type_str2int(const char *type) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int readAtom(Atom* atom, Parameter* param) {
 | 
			
		||||
    int me = 0;
 | 
			
		||||
    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
			
		||||
    int len = strlen(param->input_file);
 | 
			
		||||
    if(strncmp(¶m->input_file[len - 4], ".pdb", 4) == 0) { return readAtom_pdb(atom, param); }
 | 
			
		||||
    if(strncmp(¶m->input_file[len - 4], ".gro", 4) == 0) { return readAtom_gro(atom, param); }
 | 
			
		||||
    if(strncmp(¶m->input_file[len - 4], ".dmp", 4) == 0) { return readAtom_dmp(atom, param); }
 | 
			
		||||
    fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp\n", param->input_file);
 | 
			
		||||
    if(me==0) fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp\n", param->input_file);
 | 
			
		||||
    exit(-1);
 | 
			
		||||
    return -1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
			
		||||
    int me = 0;
 | 
			
		||||
    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
			
		||||
    FILE *fp = fopen(param->input_file, "r");
 | 
			
		||||
    char line[MAXLINE];
 | 
			
		||||
    int read_atoms = 0;
 | 
			
		||||
 | 
			
		||||
    if(!fp) {
 | 
			
		||||
        fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
			
		||||
        if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
			
		||||
        exit(-1);
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
@@ -153,11 +168,11 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
			
		||||
        char *item = strtok(line, " ");
 | 
			
		||||
        if(strncmp(item, "CRYST1", 6) == 0) {
 | 
			
		||||
            param->xlo = 0.0;
 | 
			
		||||
            param->xhi = atof(strtok(NULL, " "));
 | 
			
		||||
            param->xhi = atof(strtok(NULL, "\t "));
 | 
			
		||||
            param->ylo = 0.0;
 | 
			
		||||
            param->yhi = atof(strtok(NULL, " "));
 | 
			
		||||
            param->yhi = atof(strtok(NULL, "\t "));
 | 
			
		||||
            param->zlo = 0.0;
 | 
			
		||||
            param->zhi = atof(strtok(NULL, " "));
 | 
			
		||||
            param->zhi = atof(strtok(NULL, "\t "));
 | 
			
		||||
            param->xprd = param->xhi - param->xlo;
 | 
			
		||||
            param->yprd = param->yhi - param->ylo;
 | 
			
		||||
            param->zprd = param->zhi - param->zlo;
 | 
			
		||||
@@ -166,23 +181,23 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
			
		||||
            char *label;
 | 
			
		||||
            int atom_id, comp_id;
 | 
			
		||||
            MD_FLOAT occupancy, charge;
 | 
			
		||||
            atom_id = atoi(strtok(NULL, " ")) - 1;
 | 
			
		||||
            atom_id = atoi(strtok(NULL, "\t ")) - 1;
 | 
			
		||||
 | 
			
		||||
            while(atom_id + 1 >= atom->Nmax) {
 | 
			
		||||
                growAtom(atom);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            atom->type[atom_id] = type_str2int(strtok(NULL, " "));
 | 
			
		||||
            label = strtok(NULL, " ");
 | 
			
		||||
            comp_id = atoi(strtok(NULL, " "));
 | 
			
		||||
            atom_x(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
            atom_y(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
            atom_z(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
            atom->type[atom_id] = type_str2int(strtok(NULL, "\t "));
 | 
			
		||||
            label = strtok(NULL, "\t ");
 | 
			
		||||
            comp_id = atoi(strtok(NULL, "\t "));
 | 
			
		||||
            atom_x(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
            atom_y(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
            atom_z(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
            atom->vx[atom_id] = 0.0;
 | 
			
		||||
            atom->vy[atom_id] = 0.0;
 | 
			
		||||
            atom->vz[atom_id] = 0.0;
 | 
			
		||||
            occupancy = atof(strtok(NULL, " "));
 | 
			
		||||
            charge = atof(strtok(NULL, " "));
 | 
			
		||||
            occupancy = atof(strtok(NULL, "\t "));
 | 
			
		||||
            charge = atof(strtok(NULL, "\t "));
 | 
			
		||||
            atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
 | 
			
		||||
            atom->Natoms++;
 | 
			
		||||
            atom->Nlocal++;
 | 
			
		||||
@@ -194,14 +209,14 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
			
		||||
                  strncmp(item, "ENDMDL", 6) == 0) {
 | 
			
		||||
            // Do nothing
 | 
			
		||||
        } else {
 | 
			
		||||
            fprintf(stderr, "Invalid item: %s\n", item);
 | 
			
		||||
            if(me==0) fprintf(stderr, "Invalid item: %s\n", item);
 | 
			
		||||
            exit(-1);
 | 
			
		||||
            return -1;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(!read_atoms) {
 | 
			
		||||
        fprintf(stderr, "Input error: No atoms read!\n");
 | 
			
		||||
        if(me==0) fprintf(stderr, "Input error: No atoms read!\n");
 | 
			
		||||
        exit(-1);
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
@@ -217,12 +232,15 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
			
		||||
        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
 | 
			
		||||
    if(me==0) fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
 | 
			
		||||
    fclose(fp);
 | 
			
		||||
    return read_atoms;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int readAtom_gro(Atom* atom, Parameter* param) {
 | 
			
		||||
    int me = 0;
 | 
			
		||||
    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
			
		||||
 | 
			
		||||
    FILE *fp = fopen(param->input_file, "r");
 | 
			
		||||
    char line[MAXLINE];
 | 
			
		||||
    char desc[MAXLINE];
 | 
			
		||||
@@ -231,7 +249,7 @@ int readAtom_gro(Atom* atom, Parameter* param) {
 | 
			
		||||
    int i = 0;
 | 
			
		||||
 | 
			
		||||
    if(!fp) {
 | 
			
		||||
        fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
			
		||||
        if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
			
		||||
        exit(-1);
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
@@ -241,25 +259,25 @@ int readAtom_gro(Atom* atom, Parameter* param) {
 | 
			
		||||
    desc[i] = '\0';
 | 
			
		||||
    readline(line, fp);
 | 
			
		||||
    atoms_to_read = atoi(strtok(line, " "));
 | 
			
		||||
    fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
 | 
			
		||||
    if(me==0) fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
 | 
			
		||||
 | 
			
		||||
    while(!feof(fp) && read_atoms < atoms_to_read) {
 | 
			
		||||
        readline(line, fp);
 | 
			
		||||
        char *label = strtok(line, " ");
 | 
			
		||||
        int type = type_str2int(strtok(NULL, " "));
 | 
			
		||||
        int atom_id = atoi(strtok(NULL, " ")) - 1;
 | 
			
		||||
        char *label = strtok(line, "\t ");
 | 
			
		||||
        int type = type_str2int(strtok(NULL, "\t "));
 | 
			
		||||
        int atom_id = atoi(strtok(NULL, "\t ")) - 1;
 | 
			
		||||
        atom_id = read_atoms;
 | 
			
		||||
        while(atom_id + 1 >= atom->Nmax) {
 | 
			
		||||
            growAtom(atom);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        atom->type[atom_id] = type;
 | 
			
		||||
        atom_x(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
        atom_y(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
        atom_z(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
        atom->vx[atom_id] = atof(strtok(NULL, " "));
 | 
			
		||||
        atom->vy[atom_id] = atof(strtok(NULL, " "));
 | 
			
		||||
        atom->vz[atom_id] = atof(strtok(NULL, " "));
 | 
			
		||||
        atom_x(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
        atom_y(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
        atom_z(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
        atom->vx[atom_id] = atof(strtok(NULL, "\t "));
 | 
			
		||||
        atom->vy[atom_id] = atof(strtok(NULL, "\t "));
 | 
			
		||||
        atom->vz[atom_id] = atof(strtok(NULL, "\t "));
 | 
			
		||||
        atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
 | 
			
		||||
        atom->Natoms++;
 | 
			
		||||
        atom->Nlocal++;
 | 
			
		||||
@@ -269,18 +287,18 @@ int readAtom_gro(Atom* atom, Parameter* param) {
 | 
			
		||||
    if(!feof(fp)) {
 | 
			
		||||
        readline(line, fp);
 | 
			
		||||
        param->xlo = 0.0;
 | 
			
		||||
        param->xhi = atof(strtok(line, " "));
 | 
			
		||||
        param->xhi = atof(strtok(line, "\t "));
 | 
			
		||||
        param->ylo = 0.0;
 | 
			
		||||
        param->yhi = atof(strtok(NULL, " "));
 | 
			
		||||
        param->yhi = atof(strtok(NULL, "\t "));
 | 
			
		||||
        param->zlo = 0.0;
 | 
			
		||||
        param->zhi = atof(strtok(NULL, " "));
 | 
			
		||||
        param->zhi = atof(strtok(NULL, "\t "));
 | 
			
		||||
        param->xprd = param->xhi - param->xlo;
 | 
			
		||||
        param->yprd = param->yhi - param->ylo;
 | 
			
		||||
        param->zprd = param->zhi - param->zlo;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(read_atoms != atoms_to_read) {
 | 
			
		||||
        fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
 | 
			
		||||
        if(me==0) fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
 | 
			
		||||
        exit(-1);
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
@@ -296,12 +314,14 @@ int readAtom_gro(Atom* atom, Parameter* param) {
 | 
			
		||||
        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
 | 
			
		||||
    if(me==0) fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
 | 
			
		||||
    fclose(fp);
 | 
			
		||||
    return read_atoms;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int readAtom_dmp(Atom* atom, Parameter* param) {
 | 
			
		||||
    int me = 0;
 | 
			
		||||
    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
			
		||||
    FILE *fp = fopen(param->input_file, "r");
 | 
			
		||||
    char line[MAXLINE];
 | 
			
		||||
    int natoms = 0;
 | 
			
		||||
@@ -310,7 +330,7 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
 | 
			
		||||
    int ts = -1;
 | 
			
		||||
 | 
			
		||||
    if(!fp) {
 | 
			
		||||
        fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
			
		||||
        if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
			
		||||
        exit(-1);
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
@@ -333,47 +353,47 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
 | 
			
		||||
                }
 | 
			
		||||
            } else if(strncmp(item, "BOX BOUNDS pp pp pp", 19) == 0) {
 | 
			
		||||
                readline(line, fp);
 | 
			
		||||
                param->xlo = atof(strtok(line, " "));
 | 
			
		||||
                param->xhi = atof(strtok(NULL, " "));
 | 
			
		||||
                param->xlo = atof(strtok(line, "\t "));
 | 
			
		||||
                param->xhi = atof(strtok(NULL, "\t "));
 | 
			
		||||
                param->xprd = param->xhi - param->xlo;
 | 
			
		||||
 | 
			
		||||
                readline(line, fp);
 | 
			
		||||
                param->ylo = atof(strtok(line, " "));
 | 
			
		||||
                param->yhi = atof(strtok(NULL, " "));
 | 
			
		||||
                param->ylo = atof(strtok(line, "\t "));
 | 
			
		||||
                param->yhi = atof(strtok(NULL, "\t "));
 | 
			
		||||
                param->yprd = param->yhi - param->ylo;
 | 
			
		||||
 | 
			
		||||
                readline(line, fp);
 | 
			
		||||
                param->zlo = atof(strtok(line, " "));
 | 
			
		||||
                param->zhi = atof(strtok(NULL, " "));
 | 
			
		||||
                param->zlo = atof(strtok(line, "\t "));
 | 
			
		||||
                param->zhi = atof(strtok(NULL, "\t "));
 | 
			
		||||
                param->zprd = param->zhi - param->zlo;
 | 
			
		||||
            } else if(strncmp(item, "ATOMS id type x y z vx vy vz", 28) == 0) {
 | 
			
		||||
                for(int i = 0; i < natoms; i++) {
 | 
			
		||||
                    readline(line, fp);
 | 
			
		||||
                    atom_id = atoi(strtok(line, " ")) - 1;
 | 
			
		||||
                    atom->type[atom_id] = atoi(strtok(NULL, " "));
 | 
			
		||||
                    atom_x(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
                    atom_y(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
                    atom_z(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
                    atom->vx[atom_id] = atof(strtok(NULL, " "));
 | 
			
		||||
                    atom->vy[atom_id] = atof(strtok(NULL, " "));
 | 
			
		||||
                    atom->vz[atom_id] = atof(strtok(NULL, " "));
 | 
			
		||||
                    atom_id = atoi(strtok(line, "\t ")) - 1;
 | 
			
		||||
                    atom->type[atom_id] = atoi(strtok(NULL, "\t "));
 | 
			
		||||
                    atom_x(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
                    atom_y(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
                    atom_z(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
                    atom->vx[atom_id] = atof(strtok(NULL, "\t "));
 | 
			
		||||
                    atom->vy[atom_id] = atof(strtok(NULL, "\t "));
 | 
			
		||||
                    atom->vz[atom_id] = atof(strtok(NULL, "\t "));
 | 
			
		||||
                    atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
 | 
			
		||||
                    read_atoms++;
 | 
			
		||||
                }
 | 
			
		||||
            } else {
 | 
			
		||||
                fprintf(stderr, "Invalid item: %s\n", item);
 | 
			
		||||
                if(me==0) fprintf(stderr, "Invalid item: %s\n", item);
 | 
			
		||||
                exit(-1);
 | 
			
		||||
                return -1;
 | 
			
		||||
            }
 | 
			
		||||
        } else {
 | 
			
		||||
            fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
 | 
			
		||||
            if(me==0) fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
 | 
			
		||||
            exit(-1);
 | 
			
		||||
            return -1;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(ts < 0 || !natoms || !read_atoms) {
 | 
			
		||||
        fprintf(stderr, "Input error: atom data was not read!\n");
 | 
			
		||||
        if(me==0) fprintf(stderr, "Input error: atom data was not read!\n");
 | 
			
		||||
        exit(-1);
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
@@ -389,7 +409,7 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
 | 
			
		||||
        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
 | 
			
		||||
    if(me==0) fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
 | 
			
		||||
    fclose(fp);
 | 
			
		||||
    return natoms;
 | 
			
		||||
}
 | 
			
		||||
@@ -530,3 +550,249 @@ void growClusters(Atom *atom) {
 | 
			
		||||
    atom->cl_v = (MD_FLOAT*) reallocate(atom->cl_v, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT), nold * CLUSTER_M * 3 * sizeof(MD_FLOAT));
 | 
			
		||||
    atom->cl_type = (int*) reallocate(atom->cl_type, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * sizeof(int), nold * CLUSTER_M * sizeof(int));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* MPI added*/
 | 
			
		||||
void growPbc(Atom* atom) {
 | 
			
		||||
    int nold = atom->NmaxGhost;
 | 
			
		||||
    atom->NmaxGhost += DELTA;
 | 
			
		||||
 | 
			
		||||
    if (atom->PBCx || atom->PBCy || atom->PBCz){
 | 
			
		||||
        atom->PBCx = (int*) reallocate(atom->PBCx, ALIGNMENT, atom->NmaxGhost * sizeof(int), nold * sizeof(int));
 | 
			
		||||
        atom->PBCy = (int*) reallocate(atom->PBCy, ALIGNMENT, atom->NmaxGhost * sizeof(int), nold * sizeof(int));
 | 
			
		||||
        atom->PBCz = (int*) reallocate(atom->PBCz, ALIGNMENT, atom->NmaxGhost * sizeof(int), nold * sizeof(int));
 | 
			
		||||
    } else {
 | 
			
		||||
        atom->PBCx = (int*) malloc(atom->NmaxGhost * sizeof(int));
 | 
			
		||||
        atom->PBCy = (int*) malloc(atom->NmaxGhost * sizeof(int));
 | 
			
		||||
        atom->PBCz = (int*) malloc(atom->NmaxGhost * sizeof(int));
 | 
			
		||||
    } 
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void packForward(Atom* atom, int nc, int* list, MD_FLOAT* buf, int* pbc)
 | 
			
		||||
{
 | 
			
		||||
    for(int i = 0; i < nc; i++) {
 | 
			
		||||
        int cj = list[i];
 | 
			
		||||
        int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
 | 
			
		||||
        MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
 | 
			
		||||
        int displ = i*CLUSTER_N;
 | 
			
		||||
        
 | 
			
		||||
        for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
 | 
			
		||||
            buf[3*(displ+cjj)+0] = cj_x[CL_X_OFFSET + cjj] + pbc[_x] * atom->mybox.xprd;
 | 
			
		||||
            buf[3*(displ+cjj)+1] = cj_x[CL_Y_OFFSET + cjj] + pbc[_y] * atom->mybox.yprd;
 | 
			
		||||
            buf[3*(displ+cjj)+2] = cj_x[CL_Z_OFFSET + cjj] + pbc[_z] * atom->mybox.zprd; 
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
 | 
			
		||||
            buf[3*(displ+cjj)+0] = -1; //x 
 | 
			
		||||
            buf[3*(displ+cjj)+1] = -1; //y
 | 
			
		||||
            buf[3*(displ+cjj)+2] = -1; //z
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void unpackForward(Atom* atom, int nc, int c0, MD_FLOAT* buf)
 | 
			
		||||
{
 | 
			
		||||
    for(int i = 0; i < nc; i++) {
 | 
			
		||||
        int cj = c0+i;
 | 
			
		||||
        int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj); 
 | 
			
		||||
        MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
 | 
			
		||||
        int displ = i*CLUSTER_N;
 | 
			
		||||
 | 
			
		||||
        for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
 | 
			
		||||
            if(cj_x[CL_X_OFFSET + cjj]<INFINITY) cj_x[CL_X_OFFSET + cjj] = buf[3*(displ+cjj)+0];
 | 
			
		||||
            if(cj_x[CL_Y_OFFSET + cjj]<INFINITY) cj_x[CL_Y_OFFSET + cjj] = buf[3*(displ+cjj)+1]; 
 | 
			
		||||
            if(cj_x[CL_Z_OFFSET + cjj]<INFINITY) cj_x[CL_Z_OFFSET + cjj] = buf[3*(displ+cjj)+2];
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int packGhost(Atom* atom, int cj, MD_FLOAT* buf, int* pbc)
 | 
			
		||||
{    
 | 
			
		||||
    //#of elements per cluster natoms,x0,y0,z0,type_0, . . ,xn,yn,zn,type_n,bbminx,bbmaxxy,bbminy,bbmaxy,bbminz,bbmaxz
 | 
			
		||||
    //count = 4*N_CLUSTER+7, if N_CLUSTER =4 => count = 23 value/cluster + trackpbc[x] + trackpbc[y] + trackpbc[z]
 | 
			
		||||
    int m = 0;
 | 
			
		||||
    if(atom->jclusters[cj].natoms > 0) {    
 | 
			
		||||
        int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
 | 
			
		||||
        int cj_sca_base = CJ_SCALAR_BASE_INDEX(cj); 
 | 
			
		||||
        MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
 | 
			
		||||
        MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY;
 | 
			
		||||
        MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
 | 
			
		||||
        MD_FLOAT bbminz = INFINITY, bbmaxz = -INFINITY;
 | 
			
		||||
        
 | 
			
		||||
        buf[m++] = atom->jclusters[cj].natoms;
 | 
			
		||||
 | 
			
		||||
        for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
 | 
			
		||||
        
 | 
			
		||||
            MD_FLOAT xtmp = cj_x[CL_X_OFFSET + cjj] + pbc[_x] * atom->mybox.xprd;
 | 
			
		||||
            MD_FLOAT ytmp = cj_x[CL_Y_OFFSET + cjj] + pbc[_y] * atom->mybox.yprd;
 | 
			
		||||
            MD_FLOAT ztmp = cj_x[CL_Z_OFFSET + cjj] + pbc[_z] * atom->mybox.zprd;
 | 
			
		||||
            
 | 
			
		||||
            buf[m++] = xtmp;
 | 
			
		||||
            buf[m++] = ytmp;
 | 
			
		||||
            buf[m++] = ztmp;
 | 
			
		||||
            buf[m++]= atom->cl_type[cj_sca_base + cjj];
 | 
			
		||||
 | 
			
		||||
            if(bbminx > xtmp) { bbminx = xtmp; }
 | 
			
		||||
            if(bbmaxx < xtmp) { bbmaxx = xtmp; }
 | 
			
		||||
            if(bbminy > ytmp) { bbminy = ytmp; }
 | 
			
		||||
            if(bbmaxy < ytmp) { bbmaxy = ytmp; }
 | 
			
		||||
            if(bbminz > ztmp) { bbminz = ztmp; }
 | 
			
		||||
            if(bbmaxz < ztmp) { bbmaxz = ztmp; }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
 | 
			
		||||
            buf[m++] = -1; //x 
 | 
			
		||||
            buf[m++] = -1; //y
 | 
			
		||||
            buf[m++] = -1; //z
 | 
			
		||||
            buf[m++] = -1; //type
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        buf[m++] = bbminx;
 | 
			
		||||
        buf[m++] = bbmaxx;
 | 
			
		||||
        buf[m++] = bbminy;
 | 
			
		||||
        buf[m++] = bbmaxy;
 | 
			
		||||
        buf[m++] = bbminz;
 | 
			
		||||
        buf[m++] = bbmaxz;
 | 
			
		||||
        //TODO: check atom->ncj
 | 
			
		||||
        int ghostId = cj-atom->ncj;
 | 
			
		||||
        //check for ghost particles  
 | 
			
		||||
        buf[m++] = (cj-atom->ncj>=0) ? pbc[_x]+atom->PBCx[ghostId]:pbc[_x];
 | 
			
		||||
        buf[m++] = (cj-atom->ncj>=0) ? pbc[_y]+atom->PBCy[ghostId]:pbc[_y];
 | 
			
		||||
        buf[m++] = (cj-atom->ncj>=0) ? pbc[_z]+atom->PBCz[ghostId]:pbc[_z];
 | 
			
		||||
    }  
 | 
			
		||||
    return m;
 | 
			
		||||
}
 | 
			
		||||
    
 | 
			
		||||
int unpackGhost(Atom* atom, int cj, MD_FLOAT* buf)
 | 
			
		||||
{
 | 
			
		||||
    int m = 0;
 | 
			
		||||
    int jfac =  MAX(1, CLUSTER_N / CLUSTER_M);
 | 
			
		||||
    if(cj*jfac>=atom->Nclusters_max) growClusters(atom);
 | 
			
		||||
    if(atom->Nclusters_ghost>=atom->NmaxGhost) growPbc(atom); 
 | 
			
		||||
 | 
			
		||||
    int cj_sca_base = CJ_SCALAR_BASE_INDEX(cj); 
 | 
			
		||||
    int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
 | 
			
		||||
    MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base]; 
 | 
			
		||||
 | 
			
		||||
    atom->jclusters[cj].natoms = buf[m++];
 | 
			
		||||
    for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
 | 
			
		||||
 | 
			
		||||
        cj_x[CL_X_OFFSET + cjj] = buf[m++];
 | 
			
		||||
        cj_x[CL_Y_OFFSET + cjj] = buf[m++];
 | 
			
		||||
        cj_x[CL_Z_OFFSET + cjj] = buf[m++];
 | 
			
		||||
        atom->cl_type[cj_sca_base + cjj] = buf[m++];
 | 
			
		||||
        atom->Nghost++;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
 | 
			
		||||
        cj_x[CL_X_OFFSET + cjj] = INFINITY;
 | 
			
		||||
        cj_x[CL_Y_OFFSET + cjj] = INFINITY;
 | 
			
		||||
        cj_x[CL_Z_OFFSET + cjj] = INFINITY;
 | 
			
		||||
        atom->cl_type[cj_sca_base + cjj] = -1;
 | 
			
		||||
        m+=4;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    atom->jclusters[cj].bbminx = buf[m++];
 | 
			
		||||
    atom->jclusters[cj].bbmaxx = buf[m++];
 | 
			
		||||
    atom->jclusters[cj].bbminy = buf[m++];
 | 
			
		||||
    atom->jclusters[cj].bbmaxy = buf[m++];
 | 
			
		||||
    atom->jclusters[cj].bbminz = buf[m++];
 | 
			
		||||
    atom->jclusters[cj].bbmaxz = buf[m++];
 | 
			
		||||
    atom->PBCx[atom->Nclusters_ghost] = buf[m++];
 | 
			
		||||
    atom->PBCy[atom->Nclusters_ghost] = buf[m++];
 | 
			
		||||
    atom->PBCz[atom->Nclusters_ghost] = buf[m++]; 
 | 
			
		||||
    atom->Nclusters_ghost++;
 | 
			
		||||
  
 | 
			
		||||
}
 | 
			
		||||
   
 | 
			
		||||
void packReverse(Atom* atom, int nc, int c0, MD_FLOAT* buf)
 | 
			
		||||
{
 | 
			
		||||
    for(int i = 0; i < nc; i++) {
 | 
			
		||||
        int cj = c0+i;
 | 
			
		||||
        int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj); 
 | 
			
		||||
        MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
 | 
			
		||||
        int displ = i*CLUSTER_N;
 | 
			
		||||
 | 
			
		||||
        for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
 | 
			
		||||
            buf[3*(displ+cjj)+0] = cj_f[CL_X_OFFSET + cjj];
 | 
			
		||||
            buf[3*(displ+cjj)+1] = cj_f[CL_Y_OFFSET + cjj]; 
 | 
			
		||||
            buf[3*(displ+cjj)+2] = cj_f[CL_Z_OFFSET + cjj]; 
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
 | 
			
		||||
            buf[3*(displ+cjj)+0] = -1; //x 
 | 
			
		||||
            buf[3*(displ+cjj)+1] = -1; //y
 | 
			
		||||
            buf[3*(displ+cjj)+2] = -1; //z
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void unpackReverse(Atom* atom, int nc, int* list, MD_FLOAT* buf)
 | 
			
		||||
{
 | 
			
		||||
    for(int i = 0; i < nc; i++) {
 | 
			
		||||
        int cj = list[i];
 | 
			
		||||
        int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
 | 
			
		||||
        MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
 | 
			
		||||
        int displ = i*CLUSTER_N;
 | 
			
		||||
  
 | 
			
		||||
        for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
 | 
			
		||||
            cj_f[CL_X_OFFSET + cjj] += buf[3*(displ+cjj)+0];
 | 
			
		||||
            cj_f[CL_Y_OFFSET + cjj] += buf[3*(displ+cjj)+1]; 
 | 
			
		||||
            cj_f[CL_Z_OFFSET + cjj] += buf[3*(displ+cjj)+2];
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int packExchange(Atom* atom, int i, MD_FLOAT* buf)
 | 
			
		||||
{
 | 
			
		||||
  int m = 0;
 | 
			
		||||
  buf[m++] = atom_x(i);
 | 
			
		||||
  buf[m++] = atom_y(i);
 | 
			
		||||
  buf[m++] = atom_z(i);
 | 
			
		||||
  buf[m++] = atom_vx(i);
 | 
			
		||||
  buf[m++] = atom_vy(i);
 | 
			
		||||
  buf[m++] = atom_vz(i);
 | 
			
		||||
  buf[m++] = atom->type[i];
 | 
			
		||||
  return m;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int unpackExchange(Atom* atom, int i, MD_FLOAT* buf)
 | 
			
		||||
{
 | 
			
		||||
  while(i >= atom->Nmax) growAtom(atom);
 | 
			
		||||
  int m = 0;
 | 
			
		||||
  atom_x(i) = buf[m++];
 | 
			
		||||
  atom_y(i) = buf[m++];
 | 
			
		||||
  atom_z(i) = buf[m++];
 | 
			
		||||
  atom_vx(i) = buf[m++];
 | 
			
		||||
  atom_vy(i) = buf[m++];
 | 
			
		||||
  atom_vz(i) = buf[m++];
 | 
			
		||||
  atom->type[i] = buf[m++];
 | 
			
		||||
  return m;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void pbc(Atom* atom)
 | 
			
		||||
{
 | 
			
		||||
  for(int i = 0; i < atom->Nlocal; i++) {
 | 
			
		||||
   
 | 
			
		||||
    MD_FLOAT xprd = atom->mybox.xprd;
 | 
			
		||||
    MD_FLOAT yprd = atom->mybox.yprd;
 | 
			
		||||
    MD_FLOAT zprd = atom->mybox.zprd; 
 | 
			
		||||
 | 
			
		||||
    if(atom_x(i) < 0.0) atom_x(i) += xprd;
 | 
			
		||||
    if(atom_y(i) < 0.0) atom_y(i) += yprd;
 | 
			
		||||
    if(atom_z(i) < 0.0)  atom_z(i) +=zprd;
 | 
			
		||||
    if(atom_x(i) >= xprd) atom_x(i) -=xprd;    
 | 
			
		||||
    if(atom_y(i) >= yprd) atom_y(i) -=yprd;
 | 
			
		||||
    if(atom_z(i) >= zprd) atom_z(i) -=zprd;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void copy(Atom* atom, int i, int j)
 | 
			
		||||
{
 | 
			
		||||
  atom_x(i) = atom_x(j);
 | 
			
		||||
  atom_y(i) = atom_y(j);
 | 
			
		||||
  atom_z(i) = atom_z(j);
 | 
			
		||||
  atom_vx(i) = atom_vx(j);
 | 
			
		||||
  atom_vy(i) = atom_vy(j);
 | 
			
		||||
  atom_vz(i) = atom_vz(j);
 | 
			
		||||
  atom->type[i] = atom->type[j];
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -14,8 +14,9 @@
 | 
			
		||||
#include <stats.h>
 | 
			
		||||
#include <util.h>
 | 
			
		||||
#include <simd.h>
 | 
			
		||||
#include <math.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void computeForceGhostShell(Parameter*, Atom*, Neighbor*);
 | 
			
		||||
/*
 | 
			
		||||
static inline void gmx_load_simd_2xnn_interactions(
 | 
			
		||||
    int excl,
 | 
			
		||||
@@ -49,7 +50,6 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
 | 
			
		||||
    MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
 | 
			
		||||
    MD_FLOAT sigma6 = param->sigma6;
 | 
			
		||||
    MD_FLOAT epsilon = param->epsilon;
 | 
			
		||||
 | 
			
		||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
			
		||||
        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
			
		||||
        MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
 | 
			
		||||
@@ -60,13 +60,23 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for(int cg = atom->ncj; cg < atom->ncj+atom->Nclusters_ghost; cg++) {
 | 
			
		||||
        int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
 | 
			
		||||
        MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
 | 
			
		||||
        for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
 | 
			
		||||
            cj_f[CL_X_OFFSET + cjj] = 0.0;
 | 
			
		||||
            cj_f[CL_Y_OFFSET + cjj] = 0.0;
 | 
			
		||||
            cj_f[CL_Z_OFFSET + cjj] = 0.0;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    double S = getTimeStamp();
 | 
			
		||||
 | 
			
		||||
    #pragma omp parallel
 | 
			
		||||
    {
 | 
			
		||||
    LIKWID_MARKER_START("force");
 | 
			
		||||
 | 
			
		||||
    #pragma omp for schedule(runtime)
 | 
			
		||||
    #pragma omp for
 | 
			
		||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
			
		||||
        int ci_cj0 = CJ0_FROM_CI(ci);
 | 
			
		||||
        int ci_cj1 = CJ1_FROM_CI(ci);
 | 
			
		||||
@@ -75,14 +85,14 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
 | 
			
		||||
        MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
 | 
			
		||||
        neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
 | 
			
		||||
        int numneighs = neighbor->numneigh[ci];
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
        for(int k = 0; k < numneighs; k++) {
 | 
			
		||||
            int cj = neighs[k];
 | 
			
		||||
            int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
 | 
			
		||||
            int any = 0;
 | 
			
		||||
            MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
 | 
			
		||||
            MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
 | 
			
		||||
 | 
			
		||||
        
 | 
			
		||||
            for(int cii = 0; cii < CLUSTER_M; cii++) {
 | 
			
		||||
                MD_FLOAT xtmp = ci_x[CL_X_OFFSET + cii];
 | 
			
		||||
                MD_FLOAT ytmp = ci_x[CL_Y_OFFSET + cii];
 | 
			
		||||
@@ -103,6 +113,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
 | 
			
		||||
                    cond = neighbor->half_neigh ? (ci_cj0 != cj || cii < cjj) && (ci_cj1 != cj || cii < cjj + CLUSTER_N) :
 | 
			
		||||
                                                  (ci_cj0 != cj || cii != cjj) && (ci_cj1 != cj || cii != cjj + CLUSTER_N);
 | 
			
		||||
                    #endif
 | 
			
		||||
 | 
			
		||||
                    if(cond) {
 | 
			
		||||
                        MD_FLOAT delx = xtmp - cj_x[CL_X_OFFSET + cjj];
 | 
			
		||||
                        MD_FLOAT dely = ytmp - cj_x[CL_Y_OFFSET + cjj];
 | 
			
		||||
@@ -113,12 +124,11 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
 | 
			
		||||
                            MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
 | 
			
		||||
                            MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
 | 
			
		||||
 | 
			
		||||
                            if(neighbor->half_neigh) {
 | 
			
		||||
                            if(neighbor->half_neigh || param->method) {
 | 
			
		||||
                                cj_f[CL_X_OFFSET + cjj] -= delx * force;
 | 
			
		||||
                                cj_f[CL_Y_OFFSET + cjj] -= dely * force;
 | 
			
		||||
                                cj_f[CL_Z_OFFSET + cjj] -= delz * force;
 | 
			
		||||
                            }
 | 
			
		||||
 | 
			
		||||
                            fix += delx * force;
 | 
			
		||||
                            fiy += dely * force;
 | 
			
		||||
                            fiz += delz * force;
 | 
			
		||||
@@ -129,13 +139,11 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                if(any != 0) {
 | 
			
		||||
                    addStat(stats->clusters_within_cutoff, 1);
 | 
			
		||||
                } else {
 | 
			
		||||
                    addStat(stats->clusters_outside_cutoff, 1);
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                ci_f[CL_X_OFFSET + cii] += fix;
 | 
			
		||||
                ci_f[CL_Y_OFFSET + cii] += fiy;
 | 
			
		||||
                ci_f[CL_Z_OFFSET + cii] += fiz;
 | 
			
		||||
@@ -146,7 +154,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
 | 
			
		||||
        addStat(stats->num_neighs, numneighs);
 | 
			
		||||
        addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor); 
 | 
			
		||||
    LIKWID_MARKER_STOP("force");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -168,7 +176,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
 | 
			
		||||
    MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
 | 
			
		||||
    MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
 | 
			
		||||
 | 
			
		||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
			
		||||
    for(int ci = 0; ci < atom->Nclusters_local+atom->Nclusters_ghost; ci++) {
 | 
			
		||||
        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
			
		||||
        MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
 | 
			
		||||
        for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
 | 
			
		||||
@@ -178,6 +186,16 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for(int cg = atom->ncj; cg < atom->ncj+atom->Nclusters_ghost; cg++) {
 | 
			
		||||
        int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
 | 
			
		||||
        MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
 | 
			
		||||
        for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
 | 
			
		||||
            cj_f[CL_X_OFFSET + cjj] = 0.0;
 | 
			
		||||
            cj_f[CL_Y_OFFSET + cjj] = 0.0;
 | 
			
		||||
            cj_f[CL_Z_OFFSET + cjj] = 0.0;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    double S = getTimeStamp();
 | 
			
		||||
 | 
			
		||||
    #pragma omp parallel
 | 
			
		||||
@@ -213,7 +231,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
 | 
			
		||||
    #endif
 | 
			
		||||
    */
 | 
			
		||||
 | 
			
		||||
    #pragma omp for schedule(runtime)
 | 
			
		||||
    #pragma omp for
 | 
			
		||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
			
		||||
        int ci_cj0 = CJ0_FROM_CI(ci);
 | 
			
		||||
        #if CLUSTER_M > CLUSTER_N
 | 
			
		||||
@@ -322,7 +340,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
 | 
			
		||||
            fiz2 += tz2;
 | 
			
		||||
 | 
			
		||||
            #ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
 | 
			
		||||
            if(cj < CJ1_FROM_CI(atom->Nlocal)) {
 | 
			
		||||
            if(cj < CJ1_FROM_CI(atom->Nlocal)|| param->method) {
 | 
			
		||||
                simd_h_decr3(cj_f, tx0 + tx2, ty0 + ty2, tz0 + tz2);
 | 
			
		||||
            }
 | 
			
		||||
            #else
 | 
			
		||||
@@ -373,7 +391,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
 | 
			
		||||
            fiz2 += tz2;
 | 
			
		||||
 | 
			
		||||
            #ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
 | 
			
		||||
            if(cj < CJ1_FROM_CI(atom->Nlocal)) {
 | 
			
		||||
            if(cj < CJ1_FROM_CI(atom->Nlocal) || param->method) {
 | 
			
		||||
                simd_h_decr3(cj_f, tx0 + tx2, ty0 + ty2, tz0 + tz2);
 | 
			
		||||
            }
 | 
			
		||||
            #else
 | 
			
		||||
@@ -389,7 +407,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
 | 
			
		||||
        addStat(stats->num_neighs, numneighs);
 | 
			
		||||
        addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor); 
 | 
			
		||||
    LIKWID_MARKER_STOP("force");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -427,7 +445,7 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor
 | 
			
		||||
    {
 | 
			
		||||
    LIKWID_MARKER_START("force");
 | 
			
		||||
 | 
			
		||||
    #pragma omp for schedule(runtime)
 | 
			
		||||
    #pragma omp for
 | 
			
		||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
			
		||||
        int ci_cj0 = CJ0_FROM_CI(ci);
 | 
			
		||||
        #if CLUSTER_M > CLUSTER_N
 | 
			
		||||
@@ -562,7 +580,6 @@ double computeForceLJ_2xnn(Parameter *param, Atom *atom, Neighbor *neighbor, Sta
 | 
			
		||||
    if(neighbor->half_neigh) {
 | 
			
		||||
        return computeForceLJ_2xnn_half(param, atom, neighbor, stats);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return computeForceLJ_2xnn_full(param, atom, neighbor, stats);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -589,13 +606,23 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for(int cg = atom->ncj; cg < atom->ncj+atom->Nclusters_ghost; cg++) {
 | 
			
		||||
        int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
 | 
			
		||||
        MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
 | 
			
		||||
        for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
 | 
			
		||||
            cj_f[CL_X_OFFSET + cjj] = 0.0;
 | 
			
		||||
            cj_f[CL_Y_OFFSET + cjj] = 0.0;
 | 
			
		||||
            cj_f[CL_Z_OFFSET + cjj] = 0.0;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
  
 | 
			
		||||
    double S = getTimeStamp();
 | 
			
		||||
 | 
			
		||||
    #pragma omp parallel
 | 
			
		||||
    {
 | 
			
		||||
    LIKWID_MARKER_START("force");
 | 
			
		||||
 | 
			
		||||
    #pragma omp for schedule(runtime)
 | 
			
		||||
    #pragma omp for
 | 
			
		||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
			
		||||
        int ci_cj0 = CJ0_FROM_CI(ci);
 | 
			
		||||
        #if CLUSTER_M > CLUSTER_N
 | 
			
		||||
@@ -726,7 +753,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
			
		||||
            fiz3 = simd_add(fiz3, tz3);
 | 
			
		||||
 | 
			
		||||
            #ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
 | 
			
		||||
            if(cj < CJ1_FROM_CI(atom->Nlocal)) {
 | 
			
		||||
            if(cj < CJ1_FROM_CI(atom->Nlocal) || param->method) {
 | 
			
		||||
                simd_store(&cj_f[CL_X_OFFSET], simd_load(&cj_f[CL_X_OFFSET]) - (tx0 + tx1 + tx2 + tx3));
 | 
			
		||||
                simd_store(&cj_f[CL_Y_OFFSET], simd_load(&cj_f[CL_Y_OFFSET]) - (ty0 + ty1 + ty2 + ty3));
 | 
			
		||||
                simd_store(&cj_f[CL_Z_OFFSET], simd_load(&cj_f[CL_Z_OFFSET]) - (tz0 + tz1 + tz2 + tz3));
 | 
			
		||||
@@ -811,7 +838,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
			
		||||
            fiz3 = simd_add(fiz3, tz3);
 | 
			
		||||
 | 
			
		||||
            #ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
 | 
			
		||||
            if(cj < CJ1_FROM_CI(atom->Nlocal)) {
 | 
			
		||||
            if(cj < CJ1_FROM_CI(atom->Nlocal) || param->method) {
 | 
			
		||||
                simd_store(&cj_f[CL_X_OFFSET], simd_load(&cj_f[CL_X_OFFSET]) - (tx0 + tx1 + tx2 + tx3));
 | 
			
		||||
                simd_store(&cj_f[CL_Y_OFFSET], simd_load(&cj_f[CL_Y_OFFSET]) - (ty0 + ty1 + ty2 + ty3));
 | 
			
		||||
                simd_store(&cj_f[CL_Z_OFFSET], simd_load(&cj_f[CL_Z_OFFSET]) - (tz0 + tz1 + tz2 + tz3));
 | 
			
		||||
@@ -831,7 +858,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
			
		||||
        addStat(stats->num_neighs, numneighs);
 | 
			
		||||
        addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor); 
 | 
			
		||||
    LIKWID_MARKER_STOP("force");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -869,7 +896,7 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
			
		||||
    {
 | 
			
		||||
    LIKWID_MARKER_START("force");
 | 
			
		||||
 | 
			
		||||
    #pragma omp for schedule(runtime)
 | 
			
		||||
    #pragma omp for
 | 
			
		||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
			
		||||
        int ci_cj0 = CJ0_FROM_CI(ci);
 | 
			
		||||
        #if CLUSTER_M > CLUSTER_N
 | 
			
		||||
@@ -1070,3 +1097,120 @@ double computeForceLJ_4xn(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
 | 
			
		||||
 | 
			
		||||
    return computeForceLJ_4xn_full(param, atom, neighbor, stats);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//Routine for eight shell method
 | 
			
		||||
void computeForceGhostShell(Parameter *param, Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
    DEBUG_MESSAGE("computeForceLJ begin\n");
 | 
			
		||||
 | 
			
		||||
    int Nshell = neighbor->Nshell;
 | 
			
		||||
    int *neighs;
 | 
			
		||||
    MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
 | 
			
		||||
    MD_FLOAT sigma6 = param->sigma6;
 | 
			
		||||
    MD_FLOAT epsilon = param->epsilon;
 | 
			
		||||
    
 | 
			
		||||
    for(int ci = 0; ci < Nshell; ci++) {
 | 
			
		||||
        neighs = &neighbor->neighshell[ci * neighbor->maxneighs];
 | 
			
		||||
        int numneighs = neighbor->numNeighShell[ci];
 | 
			
		||||
        int cs = neighbor->listshell[ci];
 | 
			
		||||
        int cs_vec_base = CJ_VECTOR_BASE_INDEX(cs);
 | 
			
		||||
        MD_FLOAT *cs_x = &atom->cl_x[cs_vec_base];
 | 
			
		||||
        MD_FLOAT *cs_f = &atom->cl_f[cs_vec_base];
 | 
			
		||||
        
 | 
			
		||||
        for(int k = 0; k < numneighs; k++) {
 | 
			
		||||
            int cj = neighs[k];
 | 
			
		||||
            int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
 | 
			
		||||
            MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
 | 
			
		||||
            MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
 | 
			
		||||
 | 
			
		||||
            for(int css = 0; css < CLUSTER_N; css++) {
 | 
			
		||||
                MD_FLOAT x = cs_x[CL_X_OFFSET + css];
 | 
			
		||||
                MD_FLOAT y = cs_x[CL_Y_OFFSET + css];
 | 
			
		||||
                MD_FLOAT z = cs_x[CL_Z_OFFSET + css];
 | 
			
		||||
 | 
			
		||||
                MD_FLOAT fix = 0;
 | 
			
		||||
                MD_FLOAT fiy = 0;
 | 
			
		||||
                MD_FLOAT fiz = 0;
 | 
			
		||||
 | 
			
		||||
                for(int cjj = 0; cjj < CLUSTER_N; cjj++) {
 | 
			
		||||
                    
 | 
			
		||||
                    MD_FLOAT delx = x - cj_x[CL_X_OFFSET + cjj];
 | 
			
		||||
                    MD_FLOAT dely = y - cj_x[CL_Y_OFFSET + cjj];
 | 
			
		||||
                    MD_FLOAT delz = z - cj_x[CL_Z_OFFSET + cjj];
 | 
			
		||||
 | 
			
		||||
                    MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
 | 
			
		||||
                    if(rsq < cutforcesq) {
 | 
			
		||||
                        MD_FLOAT sr2 = 1.0 / rsq;
 | 
			
		||||
                        MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
 | 
			
		||||
                        MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
 | 
			
		||||
 | 
			
		||||
                        cj_f[CL_X_OFFSET + cjj] -= delx * force;
 | 
			
		||||
                        cj_f[CL_Y_OFFSET + cjj] -= dely * force;
 | 
			
		||||
                        cj_f[CL_Z_OFFSET + cjj] -= delz * force;
 | 
			
		||||
                        
 | 
			
		||||
                        fix += delx * force;
 | 
			
		||||
                        fiy += dely * force;
 | 
			
		||||
                        fiz += delz * force;
 | 
			
		||||
                    } 
 | 
			
		||||
                }
 | 
			
		||||
                
 | 
			
		||||
                cs_f[CL_X_OFFSET + css] += fix;
 | 
			
		||||
                cs_f[CL_Y_OFFSET + css] += fiy;
 | 
			
		||||
                cs_f[CL_Z_OFFSET + css] += fiz;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
       // addStat(stats->calculated_forces, 1);
 | 
			
		||||
       // addStat(stats->num_neighs, numneighs);
 | 
			
		||||
       // addStat(stats->force_iters, (long long int)((double)numneighs));
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
void computeForceGhostShell(Parameter *param, Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
    
 | 
			
		||||
    int Nshell = neighbor->Nshell;
 | 
			
		||||
    Pair* neighs;
 | 
			
		||||
    MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
 | 
			
		||||
    MD_FLOAT sigma6 = param->sigma6;
 | 
			
		||||
    MD_FLOAT epsilon = param->epsilon;
 | 
			
		||||
    
 | 
			
		||||
    for(int ci = 0; ci < Nshell; ci++) {
 | 
			
		||||
        neighs = &neighbor->neighshell[ci * neighbor->maxneighs];
 | 
			
		||||
        int numneighs = neighbor->numNeighShell[ci];
 | 
			
		||||
        int cs = neighbor->listshell[ci].cluster;
 | 
			
		||||
        int css = neighbor->listshell[ci].atom;
 | 
			
		||||
        int cs_vec_base = CJ_VECTOR_BASE_INDEX(cs);
 | 
			
		||||
        MD_FLOAT *cs_x = &atom->cl_x[cs_vec_base];
 | 
			
		||||
        MD_FLOAT *cs_f = &atom->cl_f[cs_vec_base];   
 | 
			
		||||
        MD_FLOAT x = cs_x[CL_X_OFFSET + css];
 | 
			
		||||
        MD_FLOAT y = cs_x[CL_Y_OFFSET + css];
 | 
			
		||||
        MD_FLOAT z = cs_x[CL_Z_OFFSET + css];
 | 
			
		||||
 | 
			
		||||
        for(int k = 0; k < numneighs; k++) {
 | 
			
		||||
            int cj = neighs[k].cluster;
 | 
			
		||||
            int cjj = neighs[k].atom;            
 | 
			
		||||
            int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
 | 
			
		||||
            MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
 | 
			
		||||
            MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
 | 
			
		||||
                    
 | 
			
		||||
            MD_FLOAT delx = x - cj_x[CL_X_OFFSET + cjj];
 | 
			
		||||
            MD_FLOAT dely = y - cj_x[CL_Y_OFFSET + cjj];
 | 
			
		||||
            MD_FLOAT delz = z - cj_x[CL_Z_OFFSET + cjj];
 | 
			
		||||
            MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
 | 
			
		||||
                    
 | 
			
		||||
            if(rsq < cutforcesq) {
 | 
			
		||||
                MD_FLOAT sr2 = 1.0 / rsq;
 | 
			
		||||
                MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
 | 
			
		||||
                MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
 | 
			
		||||
 | 
			
		||||
                cj_f[CL_X_OFFSET + cjj] -= delx * force;
 | 
			
		||||
                cj_f[CL_Y_OFFSET + cjj] -= dely * force;
 | 
			
		||||
                cj_f[CL_Z_OFFSET + cjj] -= delz * force;
 | 
			
		||||
                cs_f[CL_X_OFFSET + css] += delx * force;
 | 
			
		||||
                cs_f[CL_Y_OFFSET + css] += delx * force;
 | 
			
		||||
                cs_f[CL_Z_OFFSET + css] += delx * force;
 | 
			
		||||
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
*/
 | 
			
		||||
 
 | 
			
		||||
@@ -5,6 +5,7 @@
 | 
			
		||||
 * license that can be found in the LICENSE file.
 | 
			
		||||
 */
 | 
			
		||||
#include <parameter.h>
 | 
			
		||||
#include <box.h>
 | 
			
		||||
 | 
			
		||||
#ifndef __ATOM_H_
 | 
			
		||||
#define __ATOM_H_
 | 
			
		||||
@@ -102,7 +103,7 @@ typedef struct {
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    int Natoms, Nlocal, Nghost, Nmax;
 | 
			
		||||
    int Nclusters, Nclusters_local, Nclusters_ghost, Nclusters_max;
 | 
			
		||||
    int Nclusters, Nclusters_local, Nclusters_ghost, Nclusters_max, NmaxGhost,ncj;
 | 
			
		||||
    MD_FLOAT *x, *y, *z;
 | 
			
		||||
    MD_FLOAT *vx, *vy, *vz;
 | 
			
		||||
    int *border_map;
 | 
			
		||||
@@ -112,6 +113,7 @@ typedef struct {
 | 
			
		||||
    MD_FLOAT *sigma6;
 | 
			
		||||
    MD_FLOAT *cutforcesq;
 | 
			
		||||
    MD_FLOAT *cutneighsq;
 | 
			
		||||
    //track the movement of a particle along boundaries
 | 
			
		||||
    int *PBCx, *PBCy, *PBCz;
 | 
			
		||||
    // Data in cluster format
 | 
			
		||||
    MD_FLOAT *cl_x;
 | 
			
		||||
@@ -128,6 +130,9 @@ typedef struct {
 | 
			
		||||
    unsigned int masks_2xnn_fn[8];
 | 
			
		||||
    unsigned int masks_4xn_hn[16];
 | 
			
		||||
    unsigned int masks_4xn_fn[16];
 | 
			
		||||
 | 
			
		||||
    //Info Subdomain
 | 
			
		||||
    Box mybox;   
 | 
			
		||||
} Atom;
 | 
			
		||||
 | 
			
		||||
extern void initAtom(Atom*);
 | 
			
		||||
@@ -140,6 +145,18 @@ extern int readAtom_dmp(Atom*, Parameter*);
 | 
			
		||||
extern void growAtom(Atom*);
 | 
			
		||||
extern void growClusters(Atom*);
 | 
			
		||||
 | 
			
		||||
int  packGhost(Atom*, int, MD_FLOAT* , int*);
 | 
			
		||||
int  unpackGhost(Atom*, int, MD_FLOAT*);
 | 
			
		||||
int  packExchange(Atom*, int, MD_FLOAT*);
 | 
			
		||||
int  unpackExchange(Atom*, int, MD_FLOAT*);
 | 
			
		||||
void packForward(Atom*, int, int*, MD_FLOAT*, int*); 
 | 
			
		||||
void unpackForward(Atom*, int, int, MD_FLOAT*);
 | 
			
		||||
void packReverse(Atom* , int , int , MD_FLOAT*);
 | 
			
		||||
void unpackReverse(Atom*, int, int*, MD_FLOAT*);
 | 
			
		||||
void pbc(Atom*);
 | 
			
		||||
void copy(Atom*, int, int);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef AOS
 | 
			
		||||
#   define POS_DATA_LAYOUT     "AoS"
 | 
			
		||||
#   define atom_x(i)           atom->x[(i) * 3 + 0]
 | 
			
		||||
 
 | 
			
		||||
@@ -9,10 +9,13 @@
 | 
			
		||||
#include <atom.h>
 | 
			
		||||
#include <parameter.h>
 | 
			
		||||
#include <util.h>
 | 
			
		||||
 | 
			
		||||
#include <timers.h>
 | 
			
		||||
#include <timing.h>
 | 
			
		||||
#include <simd.h>
 | 
			
		||||
/*
 | 
			
		||||
void cpuInitialIntegrate(Parameter *param, Atom *atom) {
 | 
			
		||||
  
 | 
			
		||||
    DEBUG_MESSAGE("cpuInitialIntegrate start\n");
 | 
			
		||||
 | 
			
		||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
			
		||||
        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
			
		||||
        MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
 | 
			
		||||
@@ -32,9 +35,9 @@ void cpuInitialIntegrate(Parameter *param, Atom *atom) {
 | 
			
		||||
    DEBUG_MESSAGE("cpuInitialIntegrate end\n");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void cpuFinalIntegrate(Parameter *param, Atom *atom) {
 | 
			
		||||
    DEBUG_MESSAGE("cpuFinalIntegrate start\n");
 | 
			
		||||
void  cpuFinalIntegrate(Parameter *param, Atom *atom) {
 | 
			
		||||
 | 
			
		||||
    DEBUG_MESSAGE("cpuFinalIntegrate start\n");
 | 
			
		||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
			
		||||
        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
			
		||||
        MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
 | 
			
		||||
@@ -46,6 +49,56 @@ void cpuFinalIntegrate(Parameter *param, Atom *atom) {
 | 
			
		||||
            ci_v[CL_Z_OFFSET + cii] += param->dtforce * ci_f[CL_Z_OFFSET + cii];
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    DEBUG_MESSAGE("cpuFinalIntegrate end\n");
 | 
			
		||||
}
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
void cpuInitialIntegrate(Parameter *param, Atom *atom) {
 | 
			
		||||
  
 | 
			
		||||
    DEBUG_MESSAGE("cpuInitialIntegrate start\n");
 | 
			
		||||
    for(int ci = 0; ci < atom->Nclusters_local; ci+=2) {
 | 
			
		||||
        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
			
		||||
        MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
 | 
			
		||||
        MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
 | 
			
		||||
        MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
 | 
			
		||||
 | 
			
		||||
        MD_SIMD_FLOAT dtforce = simd_broadcast(param->dtforce);
 | 
			
		||||
        MD_SIMD_FLOAT dt = simd_broadcast(param->dt); 
 | 
			
		||||
        
 | 
			
		||||
        MD_SIMD_FLOAT vx_vector = simd_fma(simd_load(&ci_f[CL_X_OFFSET]), dtforce, simd_load(&ci_v[CL_X_OFFSET]));
 | 
			
		||||
        MD_SIMD_FLOAT vy_vector = simd_fma(simd_load(&ci_f[CL_Y_OFFSET]), dtforce, simd_load(&ci_v[CL_Y_OFFSET]));
 | 
			
		||||
        MD_SIMD_FLOAT vz_vector = simd_fma(simd_load(&ci_f[CL_Z_OFFSET]), dtforce, simd_load(&ci_v[CL_Z_OFFSET]));
 | 
			
		||||
        MD_SIMD_FLOAT x_vector = simd_fma(vx_vector, dt, simd_load(&ci_x[CL_X_OFFSET]));
 | 
			
		||||
        MD_SIMD_FLOAT y_vector = simd_fma(vy_vector, dt, simd_load(&ci_x[CL_Y_OFFSET]));
 | 
			
		||||
        MD_SIMD_FLOAT z_vector = simd_fma(vz_vector, dt, simd_load(&ci_x[CL_Z_OFFSET]));
 | 
			
		||||
        
 | 
			
		||||
        simd_store(&ci_v[CL_X_OFFSET], vx_vector);
 | 
			
		||||
        simd_store(&ci_v[CL_Y_OFFSET], vy_vector);
 | 
			
		||||
        simd_store(&ci_v[CL_Z_OFFSET], vz_vector);
 | 
			
		||||
        simd_store(&ci_x[CL_X_OFFSET], x_vector);
 | 
			
		||||
        simd_store(&ci_x[CL_Y_OFFSET], y_vector);
 | 
			
		||||
        simd_store(&ci_x[CL_Z_OFFSET], z_vector);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    DEBUG_MESSAGE("cpuInitialIntegrate end\n");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void  cpuFinalIntegrate(Parameter *param, Atom *atom) {
 | 
			
		||||
 | 
			
		||||
    DEBUG_MESSAGE("cpuFinalIntegrate start\n");
 | 
			
		||||
    for(int ci = 0; ci < atom->Nclusters_local; ci+=2) {
 | 
			
		||||
        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
			
		||||
        MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
 | 
			
		||||
        MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
 | 
			
		||||
 | 
			
		||||
        MD_SIMD_FLOAT dtforce = simd_broadcast(param->dtforce);
 | 
			
		||||
        MD_SIMD_FLOAT vx_vector = simd_fma(simd_load(&ci_f[CL_X_OFFSET]), dtforce, simd_load(&ci_v[CL_X_OFFSET]));
 | 
			
		||||
        MD_SIMD_FLOAT vy_vector = simd_fma(simd_load(&ci_f[CL_Y_OFFSET]), dtforce, simd_load(&ci_v[CL_Y_OFFSET]));
 | 
			
		||||
        MD_SIMD_FLOAT vz_vector = simd_fma(simd_load(&ci_f[CL_Z_OFFSET]), dtforce, simd_load(&ci_v[CL_Z_OFFSET]));
 | 
			
		||||
        simd_store(&ci_v[CL_X_OFFSET], vx_vector);
 | 
			
		||||
        simd_store(&ci_v[CL_Y_OFFSET], vy_vector);
 | 
			
		||||
        simd_store(&ci_v[CL_Z_OFFSET], vz_vector);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    DEBUG_MESSAGE("cpuFinalIntegrate end\n");
 | 
			
		||||
}
 | 
			
		||||
@@ -54,3 +107,6 @@ void cpuFinalIntegrate(Parameter *param, Atom *atom) {
 | 
			
		||||
void cudaInitialIntegrate(Parameter*, Atom*);
 | 
			
		||||
void cudaFinalIntegrate(Parameter*, Atom*);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
   
 | 
			
		||||
@@ -25,6 +25,11 @@
 | 
			
		||||
#define NBNXN_INTERACTION_MASK_DIAG_J8_0 0xf0f8fcfeU
 | 
			
		||||
#define NBNXN_INTERACTION_MASK_DIAG_J8_1 0x0080c0e0U
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    int cluster;
 | 
			
		||||
    int atom;
 | 
			
		||||
} Pair;
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    int every;
 | 
			
		||||
    int ncalls;
 | 
			
		||||
@@ -34,8 +39,20 @@ typedef struct {
 | 
			
		||||
    int half_neigh;
 | 
			
		||||
    int* neighbors;
 | 
			
		||||
    unsigned int* neighbors_imask;
 | 
			
		||||
    //MPI
 | 
			
		||||
    /*
 | 
			
		||||
    int Nshell;         //# of atoms in listShell(Cluster here cover all possible ghost interactions)
 | 
			
		||||
    int *numNeighShell; //# of neighs for each atom in listShell
 | 
			
		||||
    Pair *neighshell;    //list of neighs for each atom in listShell
 | 
			
		||||
    Pair *listshell;     //Atoms to compute the force
 | 
			
		||||
    */
 | 
			
		||||
    int Nshell;         //# of cluster in listShell(Cluster here cover all possible ghost interactions)
 | 
			
		||||
    int *numNeighShell; //# of neighs for each atom in listShell
 | 
			
		||||
    int *neighshell;    //list of neighs for each atom in listShell
 | 
			
		||||
    int *listshell;     //Atoms to compute the force
 | 
			
		||||
} Neighbor;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
extern void initNeighbor(Neighbor*, Parameter*);
 | 
			
		||||
extern void setupNeighbor(Parameter*, Atom*);
 | 
			
		||||
extern void binatoms(Atom*);
 | 
			
		||||
 
 | 
			
		||||
@@ -5,6 +5,8 @@
 | 
			
		||||
 * license that can be found in the LICENSE file.
 | 
			
		||||
 */
 | 
			
		||||
#include <atom.h>
 | 
			
		||||
#include <comm.h>
 | 
			
		||||
#include <parameter.h>
 | 
			
		||||
 | 
			
		||||
#ifndef __VTK_H_
 | 
			
		||||
#define __VTK_H_
 | 
			
		||||
@@ -13,4 +15,5 @@ extern int write_local_atoms_to_vtk_file(const char* filename, Atom* atom, int t
 | 
			
		||||
extern int write_ghost_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
 | 
			
		||||
extern int write_local_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep);
 | 
			
		||||
extern int write_ghost_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep);
 | 
			
		||||
extern void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep);
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										238
									
								
								gromacs/main.c
									
									
									
									
									
								
							
							
						
						
									
										238
									
								
								gromacs/main.c
									
									
									
									
									
								
							@@ -5,9 +5,7 @@
 | 
			
		||||
 * license that can be found in the LICENSE file.
 | 
			
		||||
 */
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include <math.h>
 | 
			
		||||
#include <omp.h>
 | 
			
		||||
//--
 | 
			
		||||
#include <likwid-marker.h>
 | 
			
		||||
//--
 | 
			
		||||
@@ -26,6 +24,10 @@
 | 
			
		||||
#include <util.h>
 | 
			
		||||
#include <vtk.h>
 | 
			
		||||
#include <xtc.h>
 | 
			
		||||
#include <comm.h>
 | 
			
		||||
#include <grid.h>
 | 
			
		||||
#include <shell_methods.h>
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
 | 
			
		||||
#define HLINE "----------------------------------------------------------------------------\n"
 | 
			
		||||
 | 
			
		||||
@@ -42,17 +44,55 @@ extern void copyDataFromCUDADevice(Atom *atom);
 | 
			
		||||
extern void cudaDeviceFree();
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats) {
 | 
			
		||||
double dynamicBalance(Comm* comm, Grid* grid, Atom* atom, Parameter* param, double time)
 | 
			
		||||
{
 | 
			
		||||
    double S, E;
 | 
			
		||||
    int dims = 3;   //TODO: Adjust to do in 3d and 2d
 | 
			
		||||
    S = getTimeStamp();
 | 
			
		||||
    if(param->balance == RCB) {
 | 
			
		||||
        rcbBalance(grid, atom, param, meanBisect,dims,0);
 | 
			
		||||
        neighComm(comm, param, grid);
 | 
			
		||||
    }else if(param->balance == meanTimeRCB){
 | 
			
		||||
        rcbBalance(grid, atom, param, meanTimeBisect,dims,time);
 | 
			
		||||
        neighComm(comm, param, grid);
 | 
			
		||||
    }else if(param->balance == Staggered) {
 | 
			
		||||
        staggeredBalance(grid, atom, param, time);
 | 
			
		||||
        neighComm(comm, param, grid);
 | 
			
		||||
        exchangeComm(comm,atom);
 | 
			
		||||
    }else { } //Do nothing
 | 
			
		||||
    //printGrid(grid);
 | 
			
		||||
    E = getTimeStamp();
 | 
			
		||||
 | 
			
		||||
    return E-S;
 | 
			
		||||
} 
 | 
			
		||||
 | 
			
		||||
double initialBalance(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid)
 | 
			
		||||
{      
 | 
			
		||||
    double E,S,time;
 | 
			
		||||
    int me;
 | 
			
		||||
    MPI_Comm_rank(world,&me);
 | 
			
		||||
    S = getTimeStamp();
 | 
			
		||||
    if(param->balance == meanTimeRCB || param->balance == RCB){
 | 
			
		||||
        rcbBalance(grid, atom, param, meanBisect,3,0);
 | 
			
		||||
        neighComm(comm, param, grid); 
 | 
			
		||||
    }      
 | 
			
		||||
    MPI_Allreduce(&atom->Nlocal, &atom->Natoms, 1, MPI_INT, MPI_SUM, world); 
 | 
			
		||||
    printf("Processor:%i, Local atoms:%i, Total atoms:%i\n",me, atom->Nlocal,atom->Natoms);
 | 
			
		||||
    MPI_Barrier(world);
 | 
			
		||||
    E = getTimeStamp();
 | 
			
		||||
    return E-S;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid) {
 | 
			
		||||
    if(param->force_field == FF_EAM) { initEam(eam, param); }
 | 
			
		||||
    double S, E;
 | 
			
		||||
    param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
 | 
			
		||||
    param->xprd = param->nx * param->lattice;
 | 
			
		||||
    param->yprd = param->ny * param->lattice;
 | 
			
		||||
    param->zprd = param->nz * param->lattice;
 | 
			
		||||
 | 
			
		||||
    S = getTimeStamp();
 | 
			
		||||
    initAtom(atom);
 | 
			
		||||
    initPbc(atom);
 | 
			
		||||
    //initPbc(atom);
 | 
			
		||||
    initStats(stats);
 | 
			
		||||
    initNeighbor(neighbor, param);
 | 
			
		||||
    if(param->input_file == NULL) {
 | 
			
		||||
@@ -60,13 +100,18 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
 | 
			
		||||
    } else {
 | 
			
		||||
        readAtom(atom, param);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    setupGrid(grid,atom,param);
 | 
			
		||||
    setupNeighbor(param, atom);
 | 
			
		||||
    setupComm(comm, param, grid);
 | 
			
		||||
    if(param->balance){  
 | 
			
		||||
        initialBalance(param, eam, atom, neighbor, stats, comm, grid);
 | 
			
		||||
    }
 | 
			
		||||
    setupThermo(param, atom->Natoms);
 | 
			
		||||
    if(param->input_file == NULL) { adjustThermo(param, atom); }
 | 
			
		||||
    buildClusters(atom);
 | 
			
		||||
    defineJClusters(atom);
 | 
			
		||||
    setupPbc(atom, param);
 | 
			
		||||
    //setupPbc(atom, param);
 | 
			
		||||
    ghostNeighbor(comm, atom, param); //change
 | 
			
		||||
    binClusters(atom);
 | 
			
		||||
    buildNeighbor(atom, neighbor);
 | 
			
		||||
    initDevice(atom, neighbor);
 | 
			
		||||
@@ -74,15 +119,15 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
 | 
			
		||||
    return E-S;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
double reneighbour(Comm* comm, Parameter *param, Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
    double S, E;
 | 
			
		||||
    S = getTimeStamp();
 | 
			
		||||
    LIKWID_MARKER_START("reneighbour");
 | 
			
		||||
    updateSingleAtoms(atom);
 | 
			
		||||
    updateAtomsPbc(atom, param);
 | 
			
		||||
    //updateAtomsPbc(atom, param);
 | 
			
		||||
    buildClusters(atom);
 | 
			
		||||
    defineJClusters(atom);
 | 
			
		||||
    setupPbc(atom, param);
 | 
			
		||||
    //setupPbc(atom, param);
 | 
			
		||||
    ghostNeighbor(comm, atom, param);
 | 
			
		||||
    binClusters(atom);
 | 
			
		||||
    buildNeighbor(atom, neighbor);
 | 
			
		||||
    LIKWID_MARKER_STOP("reneighbour");
 | 
			
		||||
@@ -90,15 +135,13 @@ double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
    return E-S;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void printAtomState(Atom *atom) {
 | 
			
		||||
    printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n",
 | 
			
		||||
            atom->Natoms, atom->Nlocal, atom->Nghost, atom->Nmax);
 | 
			
		||||
 | 
			
		||||
    /*     int nall = atom->Nlocal + atom->Nghost; */
 | 
			
		||||
 | 
			
		||||
    /*     for (int i=0; i<nall; i++) { */
 | 
			
		||||
    /*         printf("%d  %f %f %f\n", i, atom->x[i], atom->y[i], atom->z[i]); */
 | 
			
		||||
    /*     } */
 | 
			
		||||
double updateAtoms(Comm* comm, Atom* atom){
 | 
			
		||||
    double S,E;
 | 
			
		||||
    S = getTimeStamp();
 | 
			
		||||
        updateSingleAtoms(atom);
 | 
			
		||||
        exchangeComm(comm, atom);
 | 
			
		||||
    E = getTimeStamp();
 | 
			
		||||
    return E-S;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc, char** argv) {
 | 
			
		||||
@@ -108,7 +151,8 @@ int main(int argc, char** argv) {
 | 
			
		||||
    Neighbor neighbor;
 | 
			
		||||
    Stats stats;
 | 
			
		||||
    Parameter param;
 | 
			
		||||
 | 
			
		||||
    Comm comm; 
 | 
			
		||||
    Grid grid;
 | 
			
		||||
    LIKWID_MARKER_INIT;
 | 
			
		||||
#pragma omp parallel
 | 
			
		||||
    {
 | 
			
		||||
@@ -116,7 +160,7 @@ int main(int argc, char** argv) {
 | 
			
		||||
        //LIKWID_MARKER_REGISTER("reneighbour");
 | 
			
		||||
        //LIKWID_MARKER_REGISTER("pbc");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    initComm(&argc, &argv, &comm); //change
 | 
			
		||||
    initParameter(¶m);
 | 
			
		||||
    for(int i = 0; i < argc; i++) {
 | 
			
		||||
        if((strcmp(argv[i], "-p") == 0) || (strcmp(argv[i], "--param") == 0)) {
 | 
			
		||||
@@ -158,6 +202,24 @@ int main(int argc, char** argv) {
 | 
			
		||||
            param.half_neigh = atoi(argv[++i]);
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        if((strcmp(argv[i], "-method") == 0)) {
 | 
			
		||||
            param.method = atoi(argv[++i]);
 | 
			
		||||
            if (param.method>2 || param.method< 0){
 | 
			
		||||
                if(comm.myproc == 0) fprintf(stderr, "Method does not exist!\n");
 | 
			
		||||
                endComm(&comm);   
 | 
			
		||||
                exit(0);
 | 
			
		||||
            }
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        if((strcmp(argv[i], "-bal") == 0)) {
 | 
			
		||||
            param.balance = atoi(argv[++i]);
 | 
			
		||||
            if (param.balance>3 || param.balance< 0){
 | 
			
		||||
                if(comm.myproc == 0) fprintf(stderr, "Load balance does not exist!\n");
 | 
			
		||||
                endComm(&comm);   
 | 
			
		||||
                exit(0);
 | 
			
		||||
            }
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        if((strcmp(argv[i], "-m") == 0) || (strcmp(argv[i], "--mass") == 0)) {
 | 
			
		||||
            param.mass = atof(argv[++i]);
 | 
			
		||||
            continue;
 | 
			
		||||
@@ -188,6 +250,7 @@ int main(int argc, char** argv) {
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
 | 
			
		||||
            //TODO: add the shell and ac print options
 | 
			
		||||
            printf("MD Bench: A minimalistic re-implementation of miniMD\n");
 | 
			
		||||
            printf(HLINE);
 | 
			
		||||
            printf("-p <string>:          file to read parameters from (can be specified more than once)\n");
 | 
			
		||||
@@ -205,98 +268,101 @@ int main(int argc, char** argv) {
 | 
			
		||||
            exit(EXIT_SUCCESS);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    if(param.balance>0 && param.method == 1){
 | 
			
		||||
        if(comm.myproc == 0) fprintf(stderr, "Half Shell is not supported by load balance!\n");
 | 
			
		||||
        endComm(&comm);   
 | 
			
		||||
        exit(0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    param.cutneigh = param.cutforce + param.skin;
 | 
			
		||||
    setup(¶m, &eam, &atom, &neighbor, &stats);
 | 
			
		||||
    printParameter(¶m);
 | 
			
		||||
    printf(HLINE);
 | 
			
		||||
 | 
			
		||||
    printf("step\ttemp\t\tpressure\n");
 | 
			
		||||
    timer[SETUP]=setup(¶m, &eam, &atom, &neighbor, &stats, &comm, &grid);
 | 
			
		||||
    if(comm.myproc == 0) printParameter(¶m);
 | 
			
		||||
    if(comm.myproc == 0) printf(HLINE);
 | 
			
		||||
    if(comm.myproc == 0) printf("step\ttemp\t\tpressure\n");
 | 
			
		||||
    computeThermo(0, ¶m, &atom);
 | 
			
		||||
    #if defined(MEM_TRACER) || defined(INDEX_TRACER)
 | 
			
		||||
    traceAddresses(¶m, &atom, &neighbor, n + 1);
 | 
			
		||||
    #endif
 | 
			
		||||
 | 
			
		||||
    #ifdef CUDA_TARGET
 | 
			
		||||
    copyDataToCUDADevice(&atom);
 | 
			
		||||
    #endif
 | 
			
		||||
 | 
			
		||||
    if(param.force_field == FF_EAM) {
 | 
			
		||||
        timer[FORCE] = computeForceEam(&eam, ¶m, &atom, &neighbor, &stats);
 | 
			
		||||
    } else {
 | 
			
		||||
        timer[FORCE] = computeForceLJ(¶m, &atom, &neighbor, &stats);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    timer[NEIGH] = 0.0;
 | 
			
		||||
    timer[NEIGH]    = 0.0;
 | 
			
		||||
    timer[FORWARD]  = 0.0;
 | 
			
		||||
    timer[UPDATE]   = 0.0;
 | 
			
		||||
    timer[BALANCE]  = 0.0;
 | 
			
		||||
    timer[REVERSE]  = reverse(&comm, &atom, ¶m);
 | 
			
		||||
    MPI_Barrier(world);
 | 
			
		||||
    timer[TOTAL] = getTimeStamp();
 | 
			
		||||
 | 
			
		||||
    if(param.vtk_file != NULL) {
 | 
			
		||||
        write_data_to_vtk_file(param.vtk_file, &atom, 0);
 | 
			
		||||
        //write_data_to_vtk_file(param.vtk_file, &comm ,&atom, 0);
 | 
			
		||||
        printvtk(param.vtk_file, &comm, &atom, ¶m, 0); 
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    //TODO: modify xct
 | 
			
		||||
    if(param.xtc_file != NULL) {
 | 
			
		||||
        xtc_init(param.xtc_file, &atom, 0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for(int n = 0; n < param.ntimes; n++) {
 | 
			
		||||
    double forceTime=0.0;
 | 
			
		||||
    double commTime=0.0;
 | 
			
		||||
    for(int n = 0; n < param.ntimes; n++) {  
 | 
			
		||||
        initialIntegrate(¶m, &atom);
 | 
			
		||||
 | 
			
		||||
        if((n + 1) % param.reneigh_every) {
 | 
			
		||||
            if(!((n + 1) % param.prune_every)) {
 | 
			
		||||
            timer[FORWARD]+=forward(&comm, &atom, ¶m);
 | 
			
		||||
            if(!((n + 1) % param.prune_every)){
 | 
			
		||||
                pruneNeighbor(¶m, &atom, &neighbor);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            updatePbc(&atom, ¶m, 0);
 | 
			
		||||
        } else {
 | 
			
		||||
            #ifdef CUDA_TARGET
 | 
			
		||||
            copyDataFromCUDADevice(&atom);
 | 
			
		||||
            #endif
 | 
			
		||||
 | 
			
		||||
            timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
 | 
			
		||||
 | 
			
		||||
            timer[UPDATE] +=updateAtoms(&comm,&atom);
 | 
			
		||||
            if(param.balance && !((n+1)%param.balance_every))
 | 
			
		||||
                timer[BALANCE] +=dynamicBalance(&comm, &grid, &atom , ¶m, timer[FORCE]);   
 | 
			
		||||
            timer[NEIGH]  += reneighbour(&comm, ¶m, &atom, &neighbor);
 | 
			
		||||
            #ifdef CUDA_TARGET
 | 
			
		||||
            copyDataToCUDADevice(&atom);
 | 
			
		||||
            isReneighboured = 1;
 | 
			
		||||
            #endif
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        #if defined(MEM_TRACER) || defined(INDEX_TRACER)
 | 
			
		||||
        traceAddresses(¶m, &atom, &neighbor, n + 1);
 | 
			
		||||
        #endif
 | 
			
		||||
 | 
			
		||||
        if(param.force_field == FF_EAM) {
 | 
			
		||||
            timer[FORCE] += computeForceEam(&eam, ¶m, &atom, &neighbor, &stats);
 | 
			
		||||
            timer[FORCE] += computeForceEam(&eam, ¶m, &atom, &neighbor, &stats); 
 | 
			
		||||
        } else {
 | 
			
		||||
            timer[FORCE] += computeForceLJ(¶m, &atom, &neighbor, &stats);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        } 
 | 
			
		||||
        timer[REVERSE] += reverse(&comm, &atom, ¶m);
 | 
			
		||||
        finalIntegrate(¶m, &atom);
 | 
			
		||||
 | 
			
		||||
        if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
 | 
			
		||||
            computeThermo(n + 1, ¶m, &atom);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        int write_pos = !((n + 1) % param.x_out_every);
 | 
			
		||||
        int write_vel = !((n + 1) % param.v_out_every);
 | 
			
		||||
        if(write_pos || write_vel) {
 | 
			
		||||
            if(param.vtk_file != NULL) {
 | 
			
		||||
                write_data_to_vtk_file(param.vtk_file, &atom, n + 1);
 | 
			
		||||
                printvtk(param.vtk_file, &comm, &atom, ¶m, n+1);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            //TODO: xtc file
 | 
			
		||||
            if(param.xtc_file != NULL) {
 | 
			
		||||
                xtc_write(&atom, n + 1, write_pos, write_vel);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        } 
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #ifdef CUDA_TARGET
 | 
			
		||||
    copyDataFromCUDADevice(&atom);
 | 
			
		||||
    #endif
 | 
			
		||||
 | 
			
		||||
    MPI_Barrier(world);
 | 
			
		||||
    timer[TOTAL] = getTimeStamp() - timer[TOTAL];
 | 
			
		||||
    updateSingleAtoms(&atom);
 | 
			
		||||
    updateAtoms(&comm,&atom);
 | 
			
		||||
    computeThermo(-1, ¶m, &atom);
 | 
			
		||||
 | 
			
		||||
    //TODO:
 | 
			
		||||
    if(param.xtc_file != NULL) {
 | 
			
		||||
        xtc_end();
 | 
			
		||||
    }
 | 
			
		||||
@@ -304,41 +370,35 @@ int main(int argc, char** argv) {
 | 
			
		||||
    #ifdef CUDA_TARGET
 | 
			
		||||
    cudaDeviceFree();
 | 
			
		||||
    #endif
 | 
			
		||||
 | 
			
		||||
    printf(HLINE);
 | 
			
		||||
    printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
 | 
			
		||||
    printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
 | 
			
		||||
            timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
 | 
			
		||||
    printf(HLINE);
 | 
			
		||||
    double mint[NUMTIMER];
 | 
			
		||||
    double maxt[NUMTIMER];
 | 
			
		||||
    double sumt[NUMTIMER];
 | 
			
		||||
    timer[REST] = timer[TOTAL]-timer[FORCE]-timer[NEIGH]-timer[BALANCE]-timer[FORWARD]-timer[REVERSE];
 | 
			
		||||
    MPI_Reduce(timer,mint,NUMTIMER,MPI_DOUBLE,MPI_MIN,0,world);
 | 
			
		||||
    MPI_Reduce(timer,maxt,NUMTIMER,MPI_DOUBLE,MPI_MAX,0,world);
 | 
			
		||||
    MPI_Reduce(timer,sumt,NUMTIMER,MPI_DOUBLE,MPI_SUM,0,world);
 | 
			
		||||
    int Nghost;
 | 
			
		||||
    MPI_Reduce(&atom.Nghost,&Nghost,1,MPI_INT,MPI_SUM,0,world);
 | 
			
		||||
    
 | 
			
		||||
    int nthreads = 0;
 | 
			
		||||
    int chunkSize = 0;
 | 
			
		||||
    omp_sched_t schedKind;
 | 
			
		||||
    char schedType[10];
 | 
			
		||||
#pragma omp parallel
 | 
			
		||||
#pragma omp master
 | 
			
		||||
    {
 | 
			
		||||
	omp_get_schedule(&schedKind, &chunkSize);
 | 
			
		||||
 | 
			
		||||
    	switch (schedKind)
 | 
			
		||||
    	{
 | 
			
		||||
        	case omp_sched_static:  strcpy(schedType, "static"); break;
 | 
			
		||||
        	case omp_sched_dynamic: strcpy(schedType, "dynamic"); break;
 | 
			
		||||
        	case omp_sched_guided:  strcpy(schedType, "guided"); break;
 | 
			
		||||
        	case omp_sched_auto:    strcpy(schedType, "auto"); break;
 | 
			
		||||
    	}
 | 
			
		||||
 | 
			
		||||
    	nthreads = omp_get_max_threads();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    printf("Num threads: %d\n", nthreads);
 | 
			
		||||
    printf("Schedule: (%s,%d)\n", schedType, chunkSize);
 | 
			
		||||
 | 
			
		||||
    printf("Performance: %.2f million atom updates per second\n",
 | 
			
		||||
            1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
 | 
			
		||||
    #ifdef COMPUTE_STATS
 | 
			
		||||
    if(comm.myproc == 0){
 | 
			
		||||
        int n = comm.numproc;
 | 
			
		||||
        printf(HLINE);
 | 
			
		||||
        printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, Nghost, param.ntimes);
 | 
			
		||||
        printf("TOTAL %.2fs\n\n",timer[TOTAL]);
 | 
			
		||||
        printf("%4s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|\n","","FORCE ", "NEIGH ", "BALANCE", "FORWARD", "REVERSE","UPDATE","REST ","SETUP");
 | 
			
		||||
        printf("----|-------|-------|-------|-------|-------|-------|-------|-------|\n");
 | 
			
		||||
        printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "AVG", sumt[FORCE]/n,sumt[NEIGH]/n,sumt[BALANCE]/n,sumt[FORWARD]/n,sumt[REVERSE]/n,sumt[UPDATE]/n,sumt[REST]/n,sumt[SETUP]/n);
 | 
			
		||||
        printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MIN", mint[FORCE],mint[NEIGH],mint[BALANCE],mint[FORWARD],mint[REVERSE],mint[UPDATE],mint[REST],mint[SETUP]);
 | 
			
		||||
        printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MAX", maxt[FORCE],maxt[NEIGH],maxt[BALANCE],maxt[FORWARD],maxt[REVERSE],maxt[UPDATE],maxt[REST],maxt[SETUP]);
 | 
			
		||||
        printf(HLINE);
 | 
			
		||||
        printf("Performance: %.2f million atom updates per second\n",
 | 
			
		||||
                1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
 | 
			
		||||
        
 | 
			
		||||
#ifdef COMPUTE_STATS
 | 
			
		||||
    displayStatistics(&atom, ¶m, &stats, timer);
 | 
			
		||||
    #endif
 | 
			
		||||
#endif
 | 
			
		||||
    }
 | 
			
		||||
    endComm(&comm);
 | 
			
		||||
    LIKWID_MARKER_CLOSE;
 | 
			
		||||
    return EXIT_SUCCESS;
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -7,15 +7,15 @@
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <math.h>
 | 
			
		||||
 | 
			
		||||
#include <neighbor.h>
 | 
			
		||||
#include <parameter.h>
 | 
			
		||||
#include <atom.h>
 | 
			
		||||
#include <util.h>
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
 | 
			
		||||
#define SMALL 1.0e-6
 | 
			
		||||
#define FACTOR 0.999
 | 
			
		||||
 | 
			
		||||
#define eps 1.0e-9
 | 
			
		||||
static MD_FLOAT xprd, yprd, zprd;
 | 
			
		||||
static MD_FLOAT bininvx, bininvy;
 | 
			
		||||
static int mbinxlo, mbinylo;
 | 
			
		||||
@@ -34,9 +34,16 @@ static int nmax;
 | 
			
		||||
static int nstencil;      // # of bins in stencil
 | 
			
		||||
static int* stencil;      // stencil list of bin offsets
 | 
			
		||||
static MD_FLOAT binsizex, binsizey;
 | 
			
		||||
int me;             //rank
 | 
			
		||||
int method;         // method 
 | 
			
		||||
int shellMethod;    //If shell method exist  
 | 
			
		||||
 | 
			
		||||
static int coord2bin(MD_FLOAT, MD_FLOAT);
 | 
			
		||||
static MD_FLOAT bindist(int, int);
 | 
			
		||||
//static int ghostZone(Atom*, int);
 | 
			
		||||
static int halfZoneCluster(Atom*,int);
 | 
			
		||||
static int ghostClusterinRange(Atom*, int, int, MD_FLOAT);
 | 
			
		||||
static void neighborGhost(Atom*, Neighbor*);
 | 
			
		||||
 | 
			
		||||
/* exported subroutines */
 | 
			
		||||
void initNeighbor(Neighbor *neighbor, Parameter *param) {
 | 
			
		||||
@@ -53,12 +60,25 @@ void initNeighbor(Neighbor *neighbor, Parameter *param) {
 | 
			
		||||
    bincount = NULL;
 | 
			
		||||
    bin_clusters = NULL;
 | 
			
		||||
    bin_nclusters = NULL;
 | 
			
		||||
    neighbor->half_neigh = param->half_neigh;
 | 
			
		||||
    neighbor->maxneighs = 100;
 | 
			
		||||
    neighbor->maxneighs = 200;
 | 
			
		||||
    neighbor->numneigh = NULL;
 | 
			
		||||
    neighbor->numneigh_masked = NULL;
 | 
			
		||||
    neighbor->neighbors = NULL;
 | 
			
		||||
    neighbor->neighbors_imask = NULL;
 | 
			
		||||
    //MPI
 | 
			
		||||
    shellMethod = 0;
 | 
			
		||||
    method = param->method;
 | 
			
		||||
    if(method == halfShell || method == eightShell){ 
 | 
			
		||||
        param->half_neigh = 1;
 | 
			
		||||
        shellMethod = 1;
 | 
			
		||||
    }
 | 
			
		||||
    me = 0;
 | 
			
		||||
    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
			
		||||
    neighbor->half_neigh = param->half_neigh;
 | 
			
		||||
    neighbor->Nshell = 0;  
 | 
			
		||||
    neighbor->numNeighShell = NULL;
 | 
			
		||||
    neighbor->neighshell = NULL;
 | 
			
		||||
    neighbor->listshell = NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void setupNeighbor(Parameter *param, Atom *atom) {
 | 
			
		||||
@@ -77,7 +97,7 @@ void setupNeighbor(Parameter *param, Atom *atom) {
 | 
			
		||||
    MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
 | 
			
		||||
    MD_FLOAT zlo = 0.0; MD_FLOAT zhi = zprd;
 | 
			
		||||
 | 
			
		||||
    MD_FLOAT atom_density = ((MD_FLOAT)(atom->Nlocal)) / ((xhi - xlo) * (yhi - ylo) * (zhi - zlo));
 | 
			
		||||
    MD_FLOAT atom_density = ((MD_FLOAT)(atom->Natoms)) / ((xhi - xlo) * (yhi - ylo) * (zhi - zlo));
 | 
			
		||||
    MD_FLOAT atoms_in_cell = MAX(CLUSTER_M, CLUSTER_N);
 | 
			
		||||
    MD_FLOAT targetsizex = cbrt(atoms_in_cell / atom_density);
 | 
			
		||||
    MD_FLOAT targetsizey = cbrt(atoms_in_cell / atom_density);
 | 
			
		||||
@@ -146,6 +166,7 @@ void setupNeighbor(Parameter *param, Atom *atom) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
 | 
			
		||||
    
 | 
			
		||||
    MD_FLOAT dl = atom->iclusters[ci].bbminx - atom->jclusters[cj].bbmaxx;
 | 
			
		||||
    MD_FLOAT dh = atom->jclusters[cj].bbminx - atom->iclusters[ci].bbmaxx;
 | 
			
		||||
    MD_FLOAT dm = MAX(dl, dh);
 | 
			
		||||
@@ -163,6 +184,7 @@ MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
 | 
			
		||||
    dm = MAX(dl, dh);
 | 
			
		||||
    dm0 = MAX(dm, 0.0);
 | 
			
		||||
    d2 += dm0 * dm0;
 | 
			
		||||
    
 | 
			
		||||
    return d2;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -225,7 +247,6 @@ static unsigned int get_imask_simd_j8(int rdiag, int ci, int cj) {
 | 
			
		||||
 | 
			
		||||
void buildNeighbor(Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
    DEBUG_MESSAGE("buildNeighbor start\n");
 | 
			
		||||
 | 
			
		||||
    /* extend atom arrays if necessary */
 | 
			
		||||
    if(atom->Nclusters_local > nmax) {
 | 
			
		||||
        nmax = atom->Nclusters_local;
 | 
			
		||||
@@ -249,7 +270,6 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
    while(resize) {
 | 
			
		||||
        int new_maxneighs = neighbor->maxneighs;
 | 
			
		||||
        resize = 0;
 | 
			
		||||
 | 
			
		||||
        for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
			
		||||
            int ci_cj1 = CJ1_FROM_CI(ci);
 | 
			
		||||
            int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
 | 
			
		||||
@@ -262,14 +282,12 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
            MD_FLOAT ibb_ymax = atom->iclusters[ci].bbmaxy;
 | 
			
		||||
            MD_FLOAT ibb_zmin = atom->iclusters[ci].bbminz;
 | 
			
		||||
            MD_FLOAT ibb_zmax = atom->iclusters[ci].bbmaxz;
 | 
			
		||||
 | 
			
		||||
            for(int k = 0; k < nstencil; k++) {
 | 
			
		||||
                int jbin = ibin + stencil[k];
 | 
			
		||||
                int *loc_bin = &bin_clusters[jbin * clusters_per_bin];
 | 
			
		||||
                int cj, m = -1;
 | 
			
		||||
                MD_FLOAT jbb_xmin, jbb_xmax, jbb_ymin, jbb_ymax, jbb_zmin, jbb_zmax;
 | 
			
		||||
                const int c = bin_nclusters[jbin];
 | 
			
		||||
 | 
			
		||||
                if(c > 0) {
 | 
			
		||||
                    MD_FLOAT dl, dh, dm, dm0, d_bb_sq;
 | 
			
		||||
 | 
			
		||||
@@ -279,6 +297,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
                        if(neighbor->half_neigh && ci_cj1 > cj) {
 | 
			
		||||
                            continue;
 | 
			
		||||
                        }
 | 
			
		||||
                        
 | 
			
		||||
                        jbb_zmin = atom->jclusters[cj].bbminz;
 | 
			
		||||
                        jbb_zmax = atom->jclusters[cj].bbmaxz;
 | 
			
		||||
                        dl = ibb_zmin - jbb_zmax;
 | 
			
		||||
@@ -287,7 +306,6 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
                        dm0 = MAX(dm, 0.0);
 | 
			
		||||
                        d_bb_sq = dm0 * dm0;
 | 
			
		||||
                    } while(m + 1 < c && d_bb_sq > cutneighsq);
 | 
			
		||||
 | 
			
		||||
                    jbb_xmin = atom->jclusters[cj].bbminx;
 | 
			
		||||
                    jbb_xmax = atom->jclusters[cj].bbmaxx;
 | 
			
		||||
                    jbb_ymin = atom->jclusters[cj].bbminy;
 | 
			
		||||
@@ -357,7 +375,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
                            jbb_zmin = atom->jclusters[cj].bbminz;
 | 
			
		||||
                            jbb_zmax = atom->jclusters[cj].bbmaxz;
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                    } 
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
@@ -383,14 +401,15 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
 | 
			
		||||
        if(resize) {
 | 
			
		||||
            neighbor->maxneighs = new_maxneighs * 1.2;
 | 
			
		||||
            fprintf(stdout, "RESIZE %d\n", neighbor->maxneighs);
 | 
			
		||||
            fprintf(stdout, "RESIZE %d, PROC %d\n", neighbor->maxneighs,me);
 | 
			
		||||
            free(neighbor->neighbors);
 | 
			
		||||
            free(neighbor->neighbors_imask);
 | 
			
		||||
            neighbor->neighbors = (int *) malloc(nmax * neighbor->maxneighs * sizeof(int));
 | 
			
		||||
            neighbor->neighbors_imask = (unsigned int *) malloc(nmax * neighbor->maxneighs * sizeof(unsigned int));
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(method == eightShell) neighborGhost(atom, neighbor);
 | 
			
		||||
    
 | 
			
		||||
    /*
 | 
			
		||||
    DEBUG_MESSAGE("\ncutneighsq = %f, rbb_sq = %f\n", cutneighsq, rbb_sq);
 | 
			
		||||
    for(int ci = 0; ci < 6; ci++) {
 | 
			
		||||
@@ -511,46 +530,44 @@ int coord2bin(MD_FLOAT xin, MD_FLOAT yin) {
 | 
			
		||||
    int ix, iy;
 | 
			
		||||
 | 
			
		||||
    if(xin >= xprd) {
 | 
			
		||||
        ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
 | 
			
		||||
        ix = (int)((xin + eps - xprd) * bininvx) + nbinx - mbinxlo;
 | 
			
		||||
    } else if(xin >= 0.0) {
 | 
			
		||||
        ix = (int)(xin * bininvx) - mbinxlo;
 | 
			
		||||
        ix = (int)((xin+eps) * bininvx) - mbinxlo;
 | 
			
		||||
    } else {
 | 
			
		||||
        ix = (int)(xin * bininvx) - mbinxlo - 1;
 | 
			
		||||
        ix = (int)((xin+eps) * bininvx) - mbinxlo - 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(yin >= yprd) {
 | 
			
		||||
        iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
 | 
			
		||||
    if(yin >= yprd) {   
 | 
			
		||||
        iy = (int)(((yin+eps) - yprd) * bininvy) + nbiny - mbinylo;
 | 
			
		||||
    } else if(yin >= 0.0) {
 | 
			
		||||
        iy = (int)(yin * bininvy) - mbinylo;
 | 
			
		||||
        iy = (int)((yin+eps) * bininvy) - mbinylo;
 | 
			
		||||
    } else {
 | 
			
		||||
        iy = (int)(yin * bininvy) - mbinylo - 1;
 | 
			
		||||
        iy = (int)((yin+eps) * bininvy) - mbinylo - 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
    return (iy * mbinx + ix + 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void coord2bin2D(MD_FLOAT xin, MD_FLOAT yin, int *ix, int *iy) {
 | 
			
		||||
    if(xin >= xprd) {
 | 
			
		||||
        *ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
 | 
			
		||||
        *ix = (int)((xin + eps - xprd) * bininvx) + nbinx - mbinxlo;
 | 
			
		||||
    } else if(xin >= 0.0) {
 | 
			
		||||
        *ix = (int)(xin * bininvx) - mbinxlo;
 | 
			
		||||
        *ix = (int)((xin+eps) * bininvx) - mbinxlo;
 | 
			
		||||
    } else {
 | 
			
		||||
        *ix = (int)(xin * bininvx) - mbinxlo - 1;
 | 
			
		||||
        *ix = (int)((xin+eps) * bininvx) - mbinxlo - 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(yin >= yprd) {
 | 
			
		||||
        *iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
 | 
			
		||||
    if(yin >= yprd) {  
 | 
			
		||||
        *iy = (int)((yin + eps - yprd) * bininvy) + nbiny - mbinylo;
 | 
			
		||||
    } else if(yin >= 0.0) {
 | 
			
		||||
        *iy = (int)(yin * bininvy) - mbinylo;
 | 
			
		||||
        *iy = (int)((yin+eps) * bininvy) - mbinylo;
 | 
			
		||||
    } else {
 | 
			
		||||
        *iy = (int)(yin * bininvy) - mbinylo - 1;
 | 
			
		||||
        *iy = (int)((yin+eps) * bininvy) - mbinylo - 1;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void binAtoms(Atom *atom) {
 | 
			
		||||
    DEBUG_MESSAGE("binAtoms start\n");
 | 
			
		||||
    int resize = 1;
 | 
			
		||||
 | 
			
		||||
    while(resize > 0) {
 | 
			
		||||
        resize = 0;
 | 
			
		||||
 | 
			
		||||
@@ -567,7 +584,7 @@ void binAtoms(Atom *atom) {
 | 
			
		||||
                resize = 1;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        
 | 
			
		||||
        if(resize) {
 | 
			
		||||
            free(bins);
 | 
			
		||||
            atoms_per_bin *= 2;
 | 
			
		||||
@@ -615,8 +632,7 @@ void buildClusters(Atom *atom) {
 | 
			
		||||
 | 
			
		||||
    /* bin local atoms */
 | 
			
		||||
    binAtoms(atom);
 | 
			
		||||
    sortAtomsByZCoord(atom);
 | 
			
		||||
 | 
			
		||||
    sortAtomsByZCoord(atom);   
 | 
			
		||||
    for(int bin = 0; bin < mbins; bin++) {
 | 
			
		||||
        int c = bincount[bin];
 | 
			
		||||
        int ac = 0;
 | 
			
		||||
@@ -688,6 +704,9 @@ void buildClusters(Atom *atom) {
 | 
			
		||||
void defineJClusters(Atom *atom) {
 | 
			
		||||
    DEBUG_MESSAGE("defineJClusters start\n");
 | 
			
		||||
 | 
			
		||||
    const int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
 | 
			
		||||
    atom->ncj = atom->Nclusters_local / jfac;   
 | 
			
		||||
 | 
			
		||||
    for(int ci = 0; ci < atom->Nclusters_local; ci++) {
 | 
			
		||||
        int cj0 = CJ0_FROM_CI(ci);
 | 
			
		||||
 | 
			
		||||
@@ -830,12 +849,11 @@ void binClusters(Atom *atom) {
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for(int cg = 0; cg < atom->Nclusters_ghost && !resize; cg++) {
 | 
			
		||||
            const int cj = ncj + cg;
 | 
			
		||||
            int ix = -1, iy = -1;
 | 
			
		||||
            MD_FLOAT xtmp, ytmp;
 | 
			
		||||
 | 
			
		||||
            if(shellMethod == halfShell && !halfZoneCluster(atom, cj)) continue;
 | 
			
		||||
            if(atom->jclusters[cj].natoms > 0) {
 | 
			
		||||
                int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
 | 
			
		||||
                MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
 | 
			
		||||
@@ -846,6 +864,7 @@ void binClusters(Atom *atom) {
 | 
			
		||||
                coord2bin2D(xtmp, ytmp, &ix, &iy);
 | 
			
		||||
                ix = MAX(MIN(ix, mbinx - 1), 0);
 | 
			
		||||
                iy = MAX(MIN(iy, mbiny - 1), 0);
 | 
			
		||||
            
 | 
			
		||||
                for(int cjj = 1; cjj < atom->jclusters[cj].natoms; cjj++) {
 | 
			
		||||
                    int nix, niy;
 | 
			
		||||
                    xtmp = cj_x[CL_X_OFFSET + cjj];
 | 
			
		||||
@@ -853,7 +872,7 @@ void binClusters(Atom *atom) {
 | 
			
		||||
                    coord2bin2D(xtmp, ytmp, &nix, &niy);
 | 
			
		||||
                    nix = MAX(MIN(nix, mbinx - 1), 0);
 | 
			
		||||
                    niy = MAX(MIN(niy, mbiny - 1), 0);
 | 
			
		||||
 | 
			
		||||
            
 | 
			
		||||
                    // Always put the cluster on the bin of its innermost atom so
 | 
			
		||||
                    // the cluster should be closer to local clusters
 | 
			
		||||
                    if(atom->PBCx[cg] > 0 && ix > nix) { ix = nix; }
 | 
			
		||||
@@ -861,7 +880,6 @@ void binClusters(Atom *atom) {
 | 
			
		||||
                    if(atom->PBCy[cg] > 0 && iy > niy) { iy = niy; }
 | 
			
		||||
                    if(atom->PBCy[cg] < 0 && iy < niy) { iy = niy; }
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                int bin = iy * mbinx + ix + 1;
 | 
			
		||||
                int c = bin_nclusters[bin];
 | 
			
		||||
                if(c < clusters_per_bin) {
 | 
			
		||||
@@ -883,25 +901,21 @@ void binClusters(Atom *atom) {
 | 
			
		||||
                            break;
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    if(!inserted) {
 | 
			
		||||
                        bin_clusters[bin * clusters_per_bin + c] = cj;
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    bin_nclusters[bin]++;
 | 
			
		||||
                } else {
 | 
			
		||||
                    resize = 1;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if(resize) {
 | 
			
		||||
            free(bin_clusters);
 | 
			
		||||
            clusters_per_bin *= 2;
 | 
			
		||||
            bin_clusters = (int*) malloc(mbins * clusters_per_bin * sizeof(int));
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
    DEBUG_MESSAGE("bin_nclusters\n");
 | 
			
		||||
    for(int i = 0; i < mbins; i++) { DEBUG_MESSAGE("%d, ", bin_nclusters[i]); }
 | 
			
		||||
@@ -919,7 +933,6 @@ void updateSingleAtoms(Atom *atom) {
 | 
			
		||||
        int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
 | 
			
		||||
        MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
 | 
			
		||||
        MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
 | 
			
		||||
 | 
			
		||||
        for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
 | 
			
		||||
            atom_x(Natom) = ci_x[CL_X_OFFSET + cii];
 | 
			
		||||
            atom_y(Natom) = ci_x[CL_Y_OFFSET + cii];
 | 
			
		||||
@@ -928,12 +941,174 @@ void updateSingleAtoms(Atom *atom) {
 | 
			
		||||
            atom->vy[Natom] = ci_v[CL_Y_OFFSET + cii];
 | 
			
		||||
            atom->vz[Natom] = ci_v[CL_Z_OFFSET + cii];
 | 
			
		||||
            Natom++;
 | 
			
		||||
        }
 | 
			
		||||
        }       
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(Natom != atom->Nlocal) {
 | 
			
		||||
        fprintf(stderr, "updateSingleAtoms(): Number of atoms changed!\n");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    DEBUG_MESSAGE("updateSingleAtoms stop\n");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//MPI Shell Methods
 | 
			
		||||
 | 
			
		||||
static int eightZoneCluster(Atom* atom, int cj)
 | 
			
		||||
{   
 | 
			
		||||
    //Mapping: 0->0, 1->1, 2->2, 3->6, 4->3, 5->5, 6->4, 7->7
 | 
			
		||||
    int zoneMapping[] = {0, 1, 2, 6, 3, 5, 4, 7};
 | 
			
		||||
    int zone = 0;
 | 
			
		||||
    MD_FLOAT *hi = atom->mybox.hi;
 | 
			
		||||
 | 
			
		||||
    if (atom->jclusters[cj].bbminx +eps >=hi[_x]){
 | 
			
		||||
        zone += 1;
 | 
			
		||||
    }
 | 
			
		||||
    if (atom->jclusters[cj].bbminy  +eps >=hi[_y]){
 | 
			
		||||
        zone += 2;
 | 
			
		||||
    }
 | 
			
		||||
    if (atom->jclusters[cj].bbminz  +eps >=hi[_z]){
 | 
			
		||||
        zone += 4;
 | 
			
		||||
    }
 | 
			
		||||
    return zoneMapping[zone];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int halfZoneCluster(Atom* atom, int cj)
 | 
			
		||||
{   
 | 
			
		||||
    MD_FLOAT *hi = atom->mybox.hi;
 | 
			
		||||
    MD_FLOAT *lo = atom->mybox.lo;
 | 
			
		||||
 | 
			
		||||
    if(atom->jclusters[cj].bbmaxx < lo[_x] && atom->jclusters[cj].bbmaxy < hi[_y] && 
 | 
			
		||||
       atom->jclusters[cj].bbmaxz < hi[_z]){
 | 
			
		||||
        return 0;
 | 
			
		||||
    } else if(atom->jclusters[cj].bbmaxy < lo[_y] && atom->jclusters[cj].bbmaxz < hi[_z]){
 | 
			
		||||
        return 0;
 | 
			
		||||
    } else if(atom->jclusters[cj].bbmaxz < lo[_z]){
 | 
			
		||||
        return 0;
 | 
			
		||||
    } else { 
 | 
			
		||||
        return 1;
 | 
			
		||||
    } 
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int BoxGhostDistance(Atom *atom, int ci, int cj) {
 | 
			
		||||
    
 | 
			
		||||
    MD_FLOAT dl = atom->jclusters[ci].bbminx - atom->jclusters[cj].bbmaxx;
 | 
			
		||||
    MD_FLOAT dh = atom->jclusters[cj].bbminx - atom->jclusters[ci].bbmaxx;
 | 
			
		||||
    MD_FLOAT dm = MAX(dl, dh);
 | 
			
		||||
    MD_FLOAT dm0 = MAX(dm, 0.0);
 | 
			
		||||
    MD_FLOAT dx2 = dm0 * dm0;
 | 
			
		||||
   
 | 
			
		||||
    dl = atom->jclusters[ci].bbminy - atom->jclusters[cj].bbmaxy;
 | 
			
		||||
    dh = atom->jclusters[cj].bbminy - atom->jclusters[ci].bbmaxy;
 | 
			
		||||
    dm = MAX(dl, dh);
 | 
			
		||||
    dm0 = MAX(dm, 0.0);
 | 
			
		||||
    MD_FLOAT dy2 = dm0 * dm0;
 | 
			
		||||
 | 
			
		||||
    dl = atom->jclusters[ci].bbminz - atom->jclusters[cj].bbmaxz;
 | 
			
		||||
    dh = atom->jclusters[cj].bbminz - atom->jclusters[ci].bbmaxz;
 | 
			
		||||
    dm = MAX(dl, dh);
 | 
			
		||||
    dm0 = MAX(dm, 0.0);
 | 
			
		||||
    MD_FLOAT dz2 = dm0 * dm0;
 | 
			
		||||
    
 | 
			
		||||
    return dx2 > cutneighsq ? 0 : dy2 > cutneighsq ? 0 : dz2 > cutneighsq ? 0 : 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int ghostClusterinRange(Atom *atom, int cs, int cg, MD_FLOAT rsq) {
 | 
			
		||||
    int cs_vec_base = CJ_VECTOR_BASE_INDEX(cs);
 | 
			
		||||
    int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
 | 
			
		||||
    MD_FLOAT *cs_x = &atom->cl_x[cs_vec_base];
 | 
			
		||||
    MD_FLOAT *cg_x = &atom->cl_x[cj_vec_base];
 | 
			
		||||
 | 
			
		||||
    for(int cii = 0; cii < atom->jclusters[cs].natoms; cii++) {
 | 
			
		||||
        for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
 | 
			
		||||
            MD_FLOAT delx = cs_x[CL_X_OFFSET + cii] - cg_x[CL_X_OFFSET + cjj];
 | 
			
		||||
            MD_FLOAT dely = cs_x[CL_Y_OFFSET + cii] - cg_x[CL_Y_OFFSET + cjj];
 | 
			
		||||
            MD_FLOAT delz = cs_x[CL_Z_OFFSET + cii] - cg_x[CL_Z_OFFSET + cjj];
 | 
			
		||||
            if(delx * delx + dely * dely + delz * delz < rsq) {
 | 
			
		||||
                return 1;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void neighborGhost(Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
    int Nshell=0;
 | 
			
		||||
    int Ncluster_local = atom->Nclusters_local;
 | 
			
		||||
    int Nclusterghost  = atom->Nclusters_ghost;
 | 
			
		||||
    if(neighbor->listshell) free(neighbor->listshell);
 | 
			
		||||
    neighbor->listshell = (int*) malloc(Nclusterghost * sizeof(int));
 | 
			
		||||
    int* listzone  = (int*) malloc(8 * Nclusterghost * sizeof(int));
 | 
			
		||||
    int countCluster[8] = {0,0,0,0,0,0,0,0};
 | 
			
		||||
    
 | 
			
		||||
    //Selecting ghost atoms for interaction and putting them into regions
 | 
			
		||||
   for(int cg = atom->ncj; cg < atom->ncj+Nclusterghost; cg++) {
 | 
			
		||||
        int czone = eightZoneCluster(atom,cg);        
 | 
			
		||||
        int *list = &listzone[Nclusterghost*czone];
 | 
			
		||||
        int n  = countCluster[czone];
 | 
			
		||||
        list[n] = cg;
 | 
			
		||||
        countCluster[czone]++;     
 | 
			
		||||
       //It is only necessary to find neighbour particles for 3 regions
 | 
			
		||||
       //if(czone == 1 || czone == 2 || czone == 3)
 | 
			
		||||
       //neighbor->listshell[Nshell++] = cg;  
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    for(int zone = 1; zone<=3; zone++){
 | 
			
		||||
        int *list = &listzone[Nclusterghost*zone];
 | 
			
		||||
        for(int n=0; n<countCluster[zone]; n++)
 | 
			
		||||
            neighbor->listshell[Nshell++] = list[n]; 
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    neighbor->Nshell = Nshell;
 | 
			
		||||
    if(neighbor->numNeighShell) free(neighbor->numNeighShell);
 | 
			
		||||
    if(neighbor->neighshell) free(neighbor->neighshell);
 | 
			
		||||
    neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
 | 
			
		||||
    neighbor->numNeighShell = (int*) malloc(Nshell * sizeof(int));
 | 
			
		||||
    
 | 
			
		||||
    int resize = 1;
 | 
			
		||||
 | 
			
		||||
    while(resize)
 | 
			
		||||
    {
 | 
			
		||||
        resize = 0;
 | 
			
		||||
        for(int ic = 0; ic < Nshell; ic++) {   
 | 
			
		||||
            int *neighshell = &(neighbor->neighshell[ic*neighbor->maxneighs]); 
 | 
			
		||||
            int n = 0;  
 | 
			
		||||
            int icluster = neighbor->listshell[ic];
 | 
			
		||||
            int iczone = eightZoneCluster(atom, icluster);
 | 
			
		||||
            
 | 
			
		||||
            for(int jczone=0; jczone<8; jczone++){
 | 
			
		||||
                
 | 
			
		||||
                if(jczone <=iczone) continue;
 | 
			
		||||
                if(iczone == 1 && (jczone==5||jczone==6||jczone==7)) continue;
 | 
			
		||||
                if(iczone == 2 && (jczone==4||jczone==6||jczone==7)) continue;
 | 
			
		||||
                if(iczone == 3 && (jczone==4||jczone==5||jczone==7)) continue;
 | 
			
		||||
                
 | 
			
		||||
                int Ncluster = countCluster[jczone];
 | 
			
		||||
                int* loc_zone = &listzone[jczone * Nclusterghost];
 | 
			
		||||
 | 
			
		||||
                for(int k = 0; k < Ncluster ; k++) {
 | 
			
		||||
                    int jcluster = loc_zone[k];    
 | 
			
		||||
                                    
 | 
			
		||||
                    if(BoxGhostDistance(atom, icluster, jcluster)) 
 | 
			
		||||
                    {
 | 
			
		||||
                        if(ghostClusterinRange(atom, icluster, jcluster, cutneighsq))
 | 
			
		||||
                            neighshell[n++] = jcluster;
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            neighbor->numNeighShell[ic] = n; 
 | 
			
		||||
            
 | 
			
		||||
            if(n >= neighbor->maxneighs){
 | 
			
		||||
                resize = 1;
 | 
			
		||||
                neighbor->maxneighs = n * 1.2;
 | 
			
		||||
                fprintf(stdout, "RESIZE EIGHT SHELL %d, PROC %d\n", neighbor->maxneighs,me);
 | 
			
		||||
                break;
 | 
			
		||||
            }  
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if(resize) {
 | 
			
		||||
            free(neighbor->neighshell);
 | 
			
		||||
            neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
 | 
			
		||||
        }
 | 
			
		||||
    }  
 | 
			
		||||
    free(listzone); 
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										130
									
								
								gromacs/vtk.c
									
									
									
									
									
								
							
							
						
						
									
										130
									
								
								gromacs/vtk.c
									
									
									
									
									
								
							@@ -9,6 +9,11 @@
 | 
			
		||||
 | 
			
		||||
#include <atom.h>
 | 
			
		||||
#include <vtk.h>
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
static MPI_File _fh; 
 | 
			
		||||
static inline void flushBuffer(char*); 
 | 
			
		||||
 | 
			
		||||
void write_data_to_vtk_file(const char *filename, Atom* atom, int timestep) {
 | 
			
		||||
    write_local_atoms_to_vtk_file(filename, atom, timestep);
 | 
			
		||||
@@ -188,3 +193,128 @@ int write_ghost_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int
 | 
			
		||||
    fclose(fp);
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int vtkOpen(const char* filename, Comm* comm, Atom* atom ,int timestep)
 | 
			
		||||
{
 | 
			
		||||
    char msg[256];
 | 
			
		||||
    char timestep_filename[128];
 | 
			
		||||
    snprintf(timestep_filename, sizeof timestep_filename, "%s_%d.vtk", filename, timestep);
 | 
			
		||||
    MPI_File_open(MPI_COMM_WORLD, timestep_filename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &_fh);
 | 
			
		||||
    if(_fh == MPI_FILE_NULL) {
 | 
			
		||||
        if(comm->myproc == 0) fprintf(stderr, "Could not open VTK file for writing!\n");
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
    if (comm->myproc==0){
 | 
			
		||||
        sprintf(msg, "# vtk DataFile Version 2.0\n");
 | 
			
		||||
        sprintf(msg, "%sParticle data\n",msg);
 | 
			
		||||
        sprintf(msg, "%sASCII\n",msg);
 | 
			
		||||
        sprintf(msg, "%sDATASET UNSTRUCTURED_GRID\n",msg);
 | 
			
		||||
        sprintf(msg, "%sPOINTS %d double\n",msg, atom->Natoms);  
 | 
			
		||||
        flushBuffer(msg);
 | 
			
		||||
    } 
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int vtkVector(Comm* comm, Atom* atom, Parameter* param)
 | 
			
		||||
{ 
 | 
			
		||||
    if (_fh == MPI_FILE_NULL) {
 | 
			
		||||
        if(comm->myproc==0) printf("vtk not initialize! Call vtkOpen first!\n");
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    int sizeline= 25;   //#initial guess of characters in "%.4f %.4f %.4f\n" 
 | 
			
		||||
    int extrabuff = 100;
 | 
			
		||||
    int sizebuff = sizeline*atom->Nlocal+extrabuff; 
 | 
			
		||||
    int mysize = 0;
 | 
			
		||||
    char* msg = (char*) malloc(sizebuff);
 | 
			
		||||
    sprintf(msg, "");
 | 
			
		||||
    for(int i = 0; i < atom->Nlocal; i++){
 | 
			
		||||
        if(mysize+extrabuff >= sizebuff){
 | 
			
		||||
            sizebuff*= 1.5;
 | 
			
		||||
            msg = (char*) realloc(msg, sizebuff); 
 | 
			
		||||
        }
 | 
			
		||||
        //TODO: do not forget to add param->xlo, param->ylo, param->zlo   
 | 
			
		||||
        sprintf(msg, "%s%.4f %.4f %.4f\n",msg, atom_x(i), atom_y(i), atom_z(i));
 | 
			
		||||
        mysize = strlen(msg);
 | 
			
		||||
    }
 | 
			
		||||
    int gatherSize[comm->numproc];
 | 
			
		||||
 | 
			
		||||
    MPI_Allgather(&mysize, 1, MPI_INT, gatherSize, 1, MPI_INT, MPI_COMM_WORLD);
 | 
			
		||||
    int offset=0;
 | 
			
		||||
    int globalSize = 0;
 | 
			
		||||
    
 | 
			
		||||
    for(int i = 0; i < comm->myproc; i++)
 | 
			
		||||
        offset+= gatherSize[i];
 | 
			
		||||
    
 | 
			
		||||
    for(int i = 0; i < comm->numproc; i++)
 | 
			
		||||
        globalSize+= gatherSize[i];
 | 
			
		||||
    
 | 
			
		||||
    MPI_Offset displ;   
 | 
			
		||||
    MPI_Datatype FileType;       
 | 
			
		||||
    int GlobalSize[] = {globalSize}; 
 | 
			
		||||
    int LocalSize[]  = {mysize};
 | 
			
		||||
    int Start[] = {offset};
 | 
			
		||||
 | 
			
		||||
    if(LocalSize[0]>0){
 | 
			
		||||
        MPI_Type_create_subarray(1, GlobalSize, LocalSize, Start, MPI_ORDER_C, MPI_CHAR, &FileType);    
 | 
			
		||||
    } else {
 | 
			
		||||
        MPI_Type_vector(0,0,0,MPI_CHAR,&FileType);
 | 
			
		||||
    }
 | 
			
		||||
    MPI_Type_commit(&FileType);
 | 
			
		||||
    MPI_File_get_size(_fh, &displ);
 | 
			
		||||
    MPI_File_set_view(_fh, displ, MPI_CHAR, FileType, "native", MPI_INFO_NULL);
 | 
			
		||||
    MPI_File_write_all (_fh, msg, mysize , MPI_CHAR ,MPI_STATUS_IGNORE);
 | 
			
		||||
    MPI_Barrier(MPI_COMM_WORLD); 
 | 
			
		||||
    MPI_File_set_view(_fh,0,MPI_CHAR, MPI_CHAR, "native", MPI_INFO_NULL);    
 | 
			
		||||
     
 | 
			
		||||
    if (comm->myproc==0){
 | 
			
		||||
        
 | 
			
		||||
        sprintf(msg, "\n\n");
 | 
			
		||||
        sprintf(msg, "%sCELLS %d %d\n", msg, atom->Natoms, atom->Natoms * 2); 
 | 
			
		||||
 | 
			
		||||
        for(int i = 0; i < atom->Natoms; i++) 
 | 
			
		||||
            sprintf(msg, "%s1 %d\n", msg, i);
 | 
			
		||||
        flushBuffer(msg);
 | 
			
		||||
        
 | 
			
		||||
        sprintf(msg, "\n\n"); 
 | 
			
		||||
        sprintf(msg, "%sCELL_TYPES %d\n",msg, atom->Natoms);
 | 
			
		||||
        for(int i = 0; i < atom->Natoms; i++) 
 | 
			
		||||
            sprintf(msg, "%s1\n",msg);
 | 
			
		||||
        flushBuffer(msg);
 | 
			
		||||
 | 
			
		||||
        sprintf(msg, "\n\n"); 
 | 
			
		||||
        sprintf(msg, "%sPOINT_DATA %d\n",msg,atom->Natoms);
 | 
			
		||||
        sprintf(msg, "%sSCALARS mass double\n",msg);
 | 
			
		||||
        sprintf(msg, "%sLOOKUP_TABLE default\n",msg);
 | 
			
		||||
        for(int i = 0; i < atom->Natoms; i++) 
 | 
			
		||||
            sprintf(msg, "%s1.0\n",msg);
 | 
			
		||||
        sprintf(msg, "%s\n\n",msg);
 | 
			
		||||
        flushBuffer(msg);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void vtkClose()
 | 
			
		||||
{
 | 
			
		||||
    MPI_File_close(&_fh);
 | 
			
		||||
    _fh=MPI_FILE_NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//TODO: print ghost and cluster using MPI
 | 
			
		||||
void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep)
 | 
			
		||||
{
 | 
			
		||||
    if(comm->numproc == 1)
 | 
			
		||||
    {
 | 
			
		||||
        write_data_to_vtk_file(filename, atom, timestep);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    vtkOpen(filename, comm, atom, timestep);
 | 
			
		||||
    vtkVector(comm, atom, param);
 | 
			
		||||
    vtkClose(); 
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void flushBuffer(char* msg){
 | 
			
		||||
    MPI_Offset displ; 
 | 
			
		||||
    MPI_File_get_size(_fh, &displ);
 | 
			
		||||
    MPI_File_write_at(_fh, displ, msg, strlen(msg), MPI_CHAR, MPI_STATUS_IGNORE);
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user