Final MPI version
This commit is contained in:
		
							
								
								
									
										327
									
								
								lammps/atom.c
									
									
									
									
									
								
							
							
						
						
									
										327
									
								
								lammps/atom.c
									
									
									
									
									
								
							@@ -9,10 +9,12 @@
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include <math.h>
 | 
			
		||||
 | 
			
		||||
#include <parameter.h>
 | 
			
		||||
#include <atom.h>
 | 
			
		||||
#include <allocate.h>
 | 
			
		||||
#include <device.h>
 | 
			
		||||
#include <util.h>
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
 | 
			
		||||
#define DELTA 20000
 | 
			
		||||
 | 
			
		||||
@@ -21,10 +23,10 @@
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef MAX
 | 
			
		||||
#define MAX(a,b)    ((a) > (b) ? (a) : (b))
 | 
			
		||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
void initAtom(Atom *atom) {
 | 
			
		||||
void initAtom(Atom *atom){
 | 
			
		||||
    atom->x  = NULL; atom->y  = NULL; atom->z  = NULL;
 | 
			
		||||
    atom->vx = NULL; atom->vy = NULL; atom->vz = NULL;
 | 
			
		||||
    atom->fx = NULL; atom->fy = NULL; atom->fz = NULL;
 | 
			
		||||
@@ -41,6 +43,7 @@ void initAtom(Atom *atom) {
 | 
			
		||||
    atom->radius = NULL;
 | 
			
		||||
    atom->av = NULL;
 | 
			
		||||
    atom->r = NULL;
 | 
			
		||||
    atom->border_map = NULL;
 | 
			
		||||
 | 
			
		||||
    DeviceAtom *d_atom = &(atom->d_atom);
 | 
			
		||||
    d_atom->x  = NULL; d_atom->y  = NULL; d_atom->z  = NULL;
 | 
			
		||||
@@ -52,12 +55,19 @@ void initAtom(Atom *atom) {
 | 
			
		||||
    d_atom->sigma6 = NULL;
 | 
			
		||||
    d_atom->cutforcesq = NULL;
 | 
			
		||||
    d_atom->cutneighsq = NULL;
 | 
			
		||||
    //MPI
 | 
			
		||||
    Box *mybox = &(atom->mybox);                  
 | 
			
		||||
    mybox->xprd = mybox->yprd = mybox->zprd = 0;          
 | 
			
		||||
    mybox->lo[_x]  = mybox->lo[_y]  = mybox->lo[_z] = 0;             
 | 
			
		||||
    mybox->hi[_x]  = mybox->hi[_y]  = mybox->hi[_z] = 0;   
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void createAtom(Atom *atom, Parameter *param) {
 | 
			
		||||
    MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
 | 
			
		||||
    MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
 | 
			
		||||
    MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
 | 
			
		||||
    
 | 
			
		||||
    MD_FLOAT xlo = 0; MD_FLOAT xhi = param->xprd;
 | 
			
		||||
    MD_FLOAT ylo = 0; MD_FLOAT yhi = param->yprd;
 | 
			
		||||
    MD_FLOAT zlo = 0; MD_FLOAT zhi = param->zprd;
 | 
			
		||||
    
 | 
			
		||||
    atom->Natoms = 4 * param->nx * param->ny * param->nz;
 | 
			
		||||
    atom->Nlocal = 0;
 | 
			
		||||
    atom->ntypes = param->ntypes;
 | 
			
		||||
@@ -107,15 +117,15 @@ void createAtom(Atom *atom, Parameter *param) {
 | 
			
		||||
            xtmp = 0.5 * alat * i;
 | 
			
		||||
            ytmp = 0.5 * alat * j;
 | 
			
		||||
            ztmp = 0.5 * alat * k;
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
            if( xtmp >= xlo && xtmp < xhi &&
 | 
			
		||||
                    ytmp >= ylo && ytmp < yhi &&
 | 
			
		||||
                    ztmp >= zlo && ztmp < zhi ) {
 | 
			
		||||
 | 
			
		||||
                
 | 
			
		||||
                n = k * (2 * param->ny) * (2 * param->nx) +
 | 
			
		||||
                    j * (2 * param->nx) +
 | 
			
		||||
                    i + 1;
 | 
			
		||||
 | 
			
		||||
                
 | 
			
		||||
                for(m = 0; m < 5; m++) {
 | 
			
		||||
                    myrandom(&n);
 | 
			
		||||
                }
 | 
			
		||||
@@ -131,7 +141,7 @@ void createAtom(Atom *atom, Parameter *param) {
 | 
			
		||||
                }
 | 
			
		||||
                vztmp = myrandom(&n);
 | 
			
		||||
 | 
			
		||||
                if(atom->Nlocal == atom->Nmax) {
 | 
			
		||||
                while(atom->Nlocal >= atom->Nmax) {
 | 
			
		||||
                    growAtom(atom);
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
@@ -163,38 +173,42 @@ int type_str2int(const char *type) {
 | 
			
		||||
    return -1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int readAtom(Atom* atom, Parameter* param) {
 | 
			
		||||
int readAtom(Atom *atom, Parameter *param) {
 | 
			
		||||
    int me = 0;
 | 
			
		||||
    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
			
		||||
    int len = strlen(param->input_file);
 | 
			
		||||
    if(strncmp(¶m->input_file[len - 4], ".pdb", 4) == 0) { return readAtom_pdb(atom, param); }
 | 
			
		||||
    if(strncmp(¶m->input_file[len - 4], ".gro", 4) == 0) { return readAtom_gro(atom, param); }
 | 
			
		||||
    if(strncmp(¶m->input_file[len - 4], ".dmp", 4) == 0) { return readAtom_dmp(atom, param); }
 | 
			
		||||
    if(strncmp(¶m->input_file[len - 3], ".in",  3) == 0) { return readAtom_in(atom, param); }
 | 
			
		||||
    fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp, in\n", param->input_file);
 | 
			
		||||
    if(me==0) fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp, in\n", param->input_file);
 | 
			
		||||
    exit(-1);
 | 
			
		||||
    return -1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
			
		||||
    int me = 0;
 | 
			
		||||
    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
			
		||||
    FILE *fp = fopen(param->input_file, "r");
 | 
			
		||||
    char line[MAXLINE];
 | 
			
		||||
    int read_atoms = 0;
 | 
			
		||||
 | 
			
		||||
    if(!fp) {
 | 
			
		||||
        fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
			
		||||
        if(me==0)fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
			
		||||
        exit(-1);
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    while(!feof(fp)) {
 | 
			
		||||
        readline(line, fp);
 | 
			
		||||
        char *item = strtok(line, " ");
 | 
			
		||||
        char *item = strtok(line, "\t ");
 | 
			
		||||
        if(strncmp(item, "CRYST1", 6) == 0) {
 | 
			
		||||
            param->xlo = 0.0;
 | 
			
		||||
            param->xhi = atof(strtok(NULL, " "));
 | 
			
		||||
            param->xhi = atof(strtok(NULL, "\t "));
 | 
			
		||||
            param->ylo = 0.0;
 | 
			
		||||
            param->yhi = atof(strtok(NULL, " "));
 | 
			
		||||
            param->yhi = atof(strtok(NULL, "\t "));
 | 
			
		||||
            param->zlo = 0.0;
 | 
			
		||||
            param->zhi = atof(strtok(NULL, " "));
 | 
			
		||||
            param->zhi = atof(strtok(NULL, "\t "));
 | 
			
		||||
            param->xprd = param->xhi - param->xlo;
 | 
			
		||||
            param->yprd = param->yhi - param->ylo;
 | 
			
		||||
            param->zprd = param->zhi - param->zlo;
 | 
			
		||||
@@ -203,23 +217,23 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
			
		||||
            char *label;
 | 
			
		||||
            int atom_id, comp_id;
 | 
			
		||||
            MD_FLOAT occupancy, charge;
 | 
			
		||||
            atom_id = atoi(strtok(NULL, " ")) - 1;
 | 
			
		||||
            atom_id = atoi(strtok(NULL, "\t ")) - 1;
 | 
			
		||||
 | 
			
		||||
            while(atom_id + 1 >= atom->Nmax) {
 | 
			
		||||
                growAtom(atom);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            atom->type[atom_id] = type_str2int(strtok(NULL, " "));
 | 
			
		||||
            label = strtok(NULL, " ");
 | 
			
		||||
            comp_id = atoi(strtok(NULL, " "));
 | 
			
		||||
            atom_x(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
            atom_y(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
            atom_z(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
            atom->type[atom_id] = type_str2int(strtok(NULL, "\t "));
 | 
			
		||||
            label = strtok(NULL, "\t ");
 | 
			
		||||
            comp_id = atoi(strtok(NULL, "\t "));
 | 
			
		||||
            atom_x(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
            atom_y(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
            atom_z(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
            atom_vx(atom_id) = 0.0;
 | 
			
		||||
            atom_vy(atom_id) = 0.0;
 | 
			
		||||
            atom_vz(atom_id) = 0.0;
 | 
			
		||||
            occupancy = atof(strtok(NULL, " "));
 | 
			
		||||
            charge = atof(strtok(NULL, " "));
 | 
			
		||||
            occupancy = atof(strtok(NULL, "\t "));
 | 
			
		||||
            charge = atof(strtok(NULL, "\t "));
 | 
			
		||||
            atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
 | 
			
		||||
            atom->Natoms++;
 | 
			
		||||
            atom->Nlocal++;
 | 
			
		||||
@@ -231,14 +245,14 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
			
		||||
                  strncmp(item, "ENDMDL", 6) == 0) {
 | 
			
		||||
            // Do nothing
 | 
			
		||||
        } else {
 | 
			
		||||
            fprintf(stderr, "Invalid item: %s\n", item);
 | 
			
		||||
            if(me==0)fprintf(stderr, "Invalid item: %s\n", item);
 | 
			
		||||
            exit(-1);
 | 
			
		||||
            return -1;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(!read_atoms) {
 | 
			
		||||
        fprintf(stderr, "Input error: No atoms read!\n");
 | 
			
		||||
        if(me==0)fprintf(stderr, "Input error: No atoms read!\n");
 | 
			
		||||
        exit(-1);
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
@@ -254,12 +268,15 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
 | 
			
		||||
        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
 | 
			
		||||
    if(me==0)fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
 | 
			
		||||
    fclose(fp);
 | 
			
		||||
    return read_atoms;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int readAtom_gro(Atom* atom, Parameter* param) {
 | 
			
		||||
    int me = 0;
 | 
			
		||||
    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
			
		||||
 | 
			
		||||
    FILE *fp = fopen(param->input_file, "r");
 | 
			
		||||
    char line[MAXLINE];
 | 
			
		||||
    char desc[MAXLINE];
 | 
			
		||||
@@ -268,7 +285,7 @@ int readAtom_gro(Atom* atom, Parameter* param) {
 | 
			
		||||
    int i = 0;
 | 
			
		||||
 | 
			
		||||
    if(!fp) {
 | 
			
		||||
        fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
			
		||||
        if(me==0)fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
			
		||||
        exit(-1);
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
@@ -277,26 +294,26 @@ int readAtom_gro(Atom* atom, Parameter* param) {
 | 
			
		||||
    for(i = 0; desc[i] != '\n'; i++);
 | 
			
		||||
    desc[i] = '\0';
 | 
			
		||||
    readline(line, fp);
 | 
			
		||||
    atoms_to_read = atoi(strtok(line, " "));
 | 
			
		||||
    fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
 | 
			
		||||
    atoms_to_read = atoi(strtok(line, "\t "));
 | 
			
		||||
    if(me==0)fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
 | 
			
		||||
 | 
			
		||||
    while(!feof(fp) && read_atoms < atoms_to_read) {
 | 
			
		||||
        readline(line, fp);
 | 
			
		||||
        char *label = strtok(line, " ");
 | 
			
		||||
        int type = type_str2int(strtok(NULL, " "));
 | 
			
		||||
        int atom_id = atoi(strtok(NULL, " ")) - 1;
 | 
			
		||||
        char *label = strtok(line, "\t ");
 | 
			
		||||
        int type = type_str2int(strtok(NULL, "\t "));
 | 
			
		||||
        int atom_id = atoi(strtok(NULL, "\t ")) - 1;
 | 
			
		||||
        atom_id = read_atoms;
 | 
			
		||||
        while(atom_id + 1 >= atom->Nmax) {
 | 
			
		||||
            growAtom(atom);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        atom->type[atom_id] = type;
 | 
			
		||||
        atom_x(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
        atom_y(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
        atom_z(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
        atom_vx(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
        atom_vy(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
        atom_vz(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
        atom_x(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
        atom_y(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
        atom_z(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
        atom_vx(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
        atom_vy(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
        atom_vz(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
        atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
 | 
			
		||||
        atom->Natoms++;
 | 
			
		||||
        atom->Nlocal++;
 | 
			
		||||
@@ -306,18 +323,18 @@ int readAtom_gro(Atom* atom, Parameter* param) {
 | 
			
		||||
    if(!feof(fp)) {
 | 
			
		||||
        readline(line, fp);
 | 
			
		||||
        param->xlo = 0.0;
 | 
			
		||||
        param->xhi = atof(strtok(line, " "));
 | 
			
		||||
        param->xhi = atof(strtok(line, "\t "));
 | 
			
		||||
        param->ylo = 0.0;
 | 
			
		||||
        param->yhi = atof(strtok(NULL, " "));
 | 
			
		||||
        param->yhi = atof(strtok(NULL, "\t "));
 | 
			
		||||
        param->zlo = 0.0;
 | 
			
		||||
        param->zhi = atof(strtok(NULL, " "));
 | 
			
		||||
        param->zhi = atof(strtok(NULL, "\t "));
 | 
			
		||||
        param->xprd = param->xhi - param->xlo;
 | 
			
		||||
        param->yprd = param->yhi - param->ylo;
 | 
			
		||||
        param->zprd = param->zhi - param->zlo;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(read_atoms != atoms_to_read) {
 | 
			
		||||
        fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
 | 
			
		||||
        if(me==0)fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
 | 
			
		||||
        exit(-1);
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
@@ -333,12 +350,14 @@ int readAtom_gro(Atom* atom, Parameter* param) {
 | 
			
		||||
        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
 | 
			
		||||
    if(me==0)fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
 | 
			
		||||
    fclose(fp);
 | 
			
		||||
    return read_atoms;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int readAtom_dmp(Atom* atom, Parameter* param) {
 | 
			
		||||
    int me = 0;
 | 
			
		||||
    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
			
		||||
    FILE *fp = fopen(param->input_file, "r");
 | 
			
		||||
    char line[MAXLINE];
 | 
			
		||||
    int natoms = 0;
 | 
			
		||||
@@ -347,7 +366,7 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
 | 
			
		||||
    int ts = -1;
 | 
			
		||||
 | 
			
		||||
    if(!fp) {
 | 
			
		||||
        fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
			
		||||
        if(me==0)fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
			
		||||
        exit(-1);
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
@@ -370,47 +389,47 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
 | 
			
		||||
                }
 | 
			
		||||
            } else if(strncmp(item, "BOX BOUNDS pp pp pp", 19) == 0) {
 | 
			
		||||
                readline(line, fp);
 | 
			
		||||
                param->xlo = atof(strtok(line, " "));
 | 
			
		||||
                param->xhi = atof(strtok(NULL, " "));
 | 
			
		||||
                param->xlo = atof(strtok(line, "\t "));
 | 
			
		||||
                param->xhi = atof(strtok(NULL, "\t "));
 | 
			
		||||
                param->xprd = param->xhi - param->xlo;
 | 
			
		||||
 | 
			
		||||
                readline(line, fp);
 | 
			
		||||
                param->ylo = atof(strtok(line, " "));
 | 
			
		||||
                param->yhi = atof(strtok(NULL, " "));
 | 
			
		||||
                param->ylo = atof(strtok(line, "\t "));
 | 
			
		||||
                param->yhi = atof(strtok(NULL, "\t "));
 | 
			
		||||
                param->yprd = param->yhi - param->ylo;
 | 
			
		||||
 | 
			
		||||
                readline(line, fp);
 | 
			
		||||
                param->zlo = atof(strtok(line, " "));
 | 
			
		||||
                param->zhi = atof(strtok(NULL, " "));
 | 
			
		||||
                param->zlo = atof(strtok(line, "\t "));
 | 
			
		||||
                param->zhi = atof(strtok(NULL, "\t "));
 | 
			
		||||
                param->zprd = param->zhi - param->zlo;
 | 
			
		||||
            } else if(strncmp(item, "ATOMS id type x y z vx vy vz", 28) == 0) {
 | 
			
		||||
                for(int i = 0; i < natoms; i++) {
 | 
			
		||||
                    readline(line, fp);
 | 
			
		||||
                    atom_id = atoi(strtok(line, " ")) - 1;
 | 
			
		||||
                    atom->type[atom_id] = atoi(strtok(NULL, " "));
 | 
			
		||||
                    atom_x(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
                    atom_y(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
                    atom_z(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
                    atom_vx(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
                    atom_vy(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
                    atom_vz(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
                    atom_id = atoi(strtok(line, "\t ")) - 1;
 | 
			
		||||
                    atom->type[atom_id] = atoi(strtok(NULL, "\t "));
 | 
			
		||||
                    atom_x(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
                    atom_y(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
                    atom_z(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
                    atom_vx(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
                    atom_vy(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
                    atom_vz(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
                    atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
 | 
			
		||||
                    read_atoms++;
 | 
			
		||||
                }
 | 
			
		||||
            } else {
 | 
			
		||||
                fprintf(stderr, "Invalid item: %s\n", item);
 | 
			
		||||
                if(me==0)fprintf(stderr, "Invalid item: %s\n", item);
 | 
			
		||||
                exit(-1);
 | 
			
		||||
                return -1;
 | 
			
		||||
            }
 | 
			
		||||
        } else {
 | 
			
		||||
            fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
 | 
			
		||||
            if(me==0)fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
 | 
			
		||||
            exit(-1);
 | 
			
		||||
            return -1;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(ts < 0 || !natoms || !read_atoms) {
 | 
			
		||||
        fprintf(stderr, "Input error: atom data was not read!\n");
 | 
			
		||||
        if(me==0)fprintf(stderr, "Input error: atom data was not read!\n");
 | 
			
		||||
        exit(-1);
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
@@ -426,30 +445,34 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
 | 
			
		||||
        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
 | 
			
		||||
    if(me==0)fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
 | 
			
		||||
    return natoms;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int readAtom_in(Atom* atom, Parameter* param) {
 | 
			
		||||
    int me = 0;
 | 
			
		||||
    MPI_Comm_rank(MPI_COMM_WORLD, &me);
 | 
			
		||||
    FILE *fp = fopen(param->input_file, "r");
 | 
			
		||||
    char line[MAXLINE];
 | 
			
		||||
    int natoms = 0;
 | 
			
		||||
    int atom_id = 0;
 | 
			
		||||
 | 
			
		||||
    if(!fp) {
 | 
			
		||||
        fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
			
		||||
        if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
 | 
			
		||||
        exit(-1);
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    readline(line, fp);
 | 
			
		||||
    natoms = atoi(strtok(line, " "));
 | 
			
		||||
    param->xlo = atof(strtok(NULL, " "));
 | 
			
		||||
    param->xhi = atof(strtok(NULL, " "));
 | 
			
		||||
    param->ylo = atof(strtok(NULL, " "));
 | 
			
		||||
    param->yhi = atof(strtok(NULL, " "));
 | 
			
		||||
    param->zlo = atof(strtok(NULL, " "));
 | 
			
		||||
    param->zhi = atof(strtok(NULL, " "));
 | 
			
		||||
    natoms = atoi(strtok(line, "\t "));
 | 
			
		||||
    param->xlo = atof(strtok(NULL, "\t "));
 | 
			
		||||
    param->xhi = atof(strtok(NULL, "\t "));
 | 
			
		||||
    param->ylo = atof(strtok(NULL, "\t "));
 | 
			
		||||
    param->yhi = atof(strtok(NULL, "\t "));
 | 
			
		||||
    param->zlo = atof(strtok(NULL, "\t "));
 | 
			
		||||
    param->zhi = atof(strtok(NULL, "\t "));
 | 
			
		||||
    param->xprd = param->xhi - param->xlo; 
 | 
			
		||||
    param->yprd = param->yhi - param->ylo;
 | 
			
		||||
    param->zprd = param->zhi - param->zlo;
 | 
			
		||||
    atom->Natoms = natoms;
 | 
			
		||||
    atom->Nlocal = natoms;
 | 
			
		||||
    atom->ntypes = 1;
 | 
			
		||||
@@ -462,27 +485,26 @@ int readAtom_in(Atom* atom, Parameter* param) {
 | 
			
		||||
        readline(line, fp);
 | 
			
		||||
 | 
			
		||||
        // TODO: store mass per atom
 | 
			
		||||
        char *s_mass = strtok(line, " ");
 | 
			
		||||
        char *s_mass = strtok(line, "\t ");
 | 
			
		||||
        if(strncmp(s_mass, "inf", 3) == 0) {
 | 
			
		||||
            // Set atom's mass to INFINITY
 | 
			
		||||
        } else {
 | 
			
		||||
            param->mass = atof(s_mass);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        atom->radius[atom_id] = atof(strtok(NULL, " "));
 | 
			
		||||
        atom_x(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
        atom_y(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
        atom_z(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
        atom_vx(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
        atom_vy(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
        atom_vz(atom_id) = atof(strtok(NULL, " "));
 | 
			
		||||
    
 | 
			
		||||
        atom->radius[atom_id] = atof(strtok(NULL, "\t "));
 | 
			
		||||
        atom_x(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
        atom_y(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
        atom_z(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
        atom_vx(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
        atom_vy(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
        atom_vz(atom_id) = atof(strtok(NULL, "\t "));
 | 
			
		||||
        atom->type[atom_id] = 0;
 | 
			
		||||
        atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
 | 
			
		||||
        atom_id++;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(!natoms) {
 | 
			
		||||
        fprintf(stderr, "Input error: atom data was not read!\n");
 | 
			
		||||
        if(me==0)fprintf(stderr, "Input error: atom data was not read!\n");
 | 
			
		||||
        exit(-1);
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
@@ -498,25 +520,10 @@ int readAtom_in(Atom* atom, Parameter* param) {
 | 
			
		||||
        atom->cutforcesq[i] = param->cutforce * param->cutforce;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
 | 
			
		||||
    if(me==0)fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
 | 
			
		||||
    return natoms;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void writeAtom(Atom *atom, Parameter *param) {
 | 
			
		||||
    FILE *fp = fopen(param->write_atom_file, "w");
 | 
			
		||||
 | 
			
		||||
    for(int i = 0; i < atom->Nlocal; i++) {
 | 
			
		||||
        fprintf(fp, "%d,%f,%f,%f,%f,%f,%f,%f,0\n",
 | 
			
		||||
            atom->type[i], 1.0,
 | 
			
		||||
            atom_x(i), atom_y(i), atom_z(i),
 | 
			
		||||
            atom_vx(i), atom_vy(i), atom_vz(i));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fclose(fp);
 | 
			
		||||
    fprintf(stdout, "Wrote input data to %s, grid size: %f, %f, %f\n",
 | 
			
		||||
        param->write_atom_file, param->xprd, param->yprd, param->zprd);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void growAtom(Atom *atom) {
 | 
			
		||||
    DeviceAtom *d_atom = &(atom->d_atom);
 | 
			
		||||
    int nold = atom->Nmax;
 | 
			
		||||
@@ -545,7 +552,125 @@ void growAtom(Atom *atom) {
 | 
			
		||||
    REALLOC(type, int, atom->Nmax * sizeof(int), nold * sizeof(int));
 | 
			
		||||
 | 
			
		||||
    // DEM
 | 
			
		||||
    atom->radius = (MD_FLOAT *) reallocate(atom->radius, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
 | 
			
		||||
    atom->radius = (MD_FLOAT*) reallocate(atom->radius, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
 | 
			
		||||
    atom->av = (MD_FLOAT*) reallocate(atom->av, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
 | 
			
		||||
    atom->r  = (MD_FLOAT*) reallocate(atom->r,  ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 4, nold * sizeof(MD_FLOAT) * 4);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* MPI added*/
 | 
			
		||||
void packForward(Atom* atom, int n ,int* list, MD_FLOAT* buf, int* pbc)
 | 
			
		||||
{
 | 
			
		||||
  int i, j;
 | 
			
		||||
    for(i = 0; i < n; i++) {
 | 
			
		||||
      j = list[i];
 | 
			
		||||
      buf_x(i) = atom_x(j) + pbc[0] * atom->mybox.xprd;
 | 
			
		||||
      buf_y(i) = atom_y(j) + pbc[1] * atom->mybox.yprd;
 | 
			
		||||
      buf_z(i) = atom_z(j) + pbc[2] * atom->mybox.zprd;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void unpackForward(Atom* atom, int n, int first, MD_FLOAT* buf)
 | 
			
		||||
{
 | 
			
		||||
  for(int i = 0; i < n; i++) {
 | 
			
		||||
    atom_x((first + i)) = buf_x(i);
 | 
			
		||||
    atom_y((first + i)) = buf_y(i);
 | 
			
		||||
    atom_z((first + i)) = buf_z(i);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int packGhost(Atom* atom, int i, MD_FLOAT* buf, int* pbc)
 | 
			
		||||
{    
 | 
			
		||||
    int m = 0; 
 | 
			
		||||
    buf[m++] = atom_x(i) + pbc[_x] * atom->mybox.xprd;
 | 
			
		||||
    buf[m++] = atom_y(i) + pbc[_y] * atom->mybox.yprd;
 | 
			
		||||
    buf[m++] = atom_z(i) + pbc[_z] * atom->mybox.zprd;
 | 
			
		||||
    buf[m++] = atom->type[i];
 | 
			
		||||
    return m;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int unpackGhost(Atom* atom, int i, MD_FLOAT* buf)
 | 
			
		||||
{
 | 
			
		||||
  while (i>=atom->Nmax) growAtom(atom);
 | 
			
		||||
  int m = 0;
 | 
			
		||||
  atom_x(i) = buf[m++];
 | 
			
		||||
  atom_y(i) = buf[m++];
 | 
			
		||||
  atom_z(i) = buf[m++];
 | 
			
		||||
  atom->type[i] = buf[m++];
 | 
			
		||||
  atom->Nghost++;
 | 
			
		||||
  return m;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void packReverse(Atom* atom, int n, int first, MD_FLOAT* buf)
 | 
			
		||||
{
 | 
			
		||||
    for(int i = 0; i < n; i++) {
 | 
			
		||||
        buf_x(i) = atom_fx(first + i);
 | 
			
		||||
        buf_y(i) = atom_fy(first + i);
 | 
			
		||||
        buf_z(i) = atom_fz(first + i);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void unpackReverse(Atom* atom, int n, int* list, MD_FLOAT* buf)
 | 
			
		||||
{
 | 
			
		||||
  int i, j;
 | 
			
		||||
  for(i = 0; i < n; i++) {
 | 
			
		||||
    j = list[i];
 | 
			
		||||
    atom_fx(j) += buf_x(i);
 | 
			
		||||
    atom_fy(j) += buf_y(i);
 | 
			
		||||
    atom_fz(j) += buf_z(i);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int packExchange(Atom* atom, int i, MD_FLOAT* buf)
 | 
			
		||||
{
 | 
			
		||||
  int m = 0;
 | 
			
		||||
  buf[m++] = atom_x(i);
 | 
			
		||||
  buf[m++] = atom_y(i);
 | 
			
		||||
  buf[m++] = atom_z(i);
 | 
			
		||||
  buf[m++] = atom_vx(i);
 | 
			
		||||
  buf[m++] = atom_vy(i);
 | 
			
		||||
  buf[m++] = atom_vz(i);
 | 
			
		||||
  buf[m++] = atom->type[i];
 | 
			
		||||
  return m;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int unpackExchange(Atom* atom, int i, MD_FLOAT* buf)
 | 
			
		||||
{
 | 
			
		||||
  while(i >= atom->Nmax) growAtom(atom);
 | 
			
		||||
  int m = 0;
 | 
			
		||||
  atom_x(i) = buf[m++];
 | 
			
		||||
  atom_y(i) = buf[m++];
 | 
			
		||||
  atom_z(i) = buf[m++];
 | 
			
		||||
  atom_vx(i) = buf[m++];
 | 
			
		||||
  atom_vy(i) = buf[m++];
 | 
			
		||||
  atom_vz(i) = buf[m++];
 | 
			
		||||
  atom->type[i] = buf[m++];
 | 
			
		||||
  return m;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void pbc(Atom* atom)
 | 
			
		||||
{
 | 
			
		||||
  for(int i = 0; i < atom->Nlocal; i++) {
 | 
			
		||||
   
 | 
			
		||||
    MD_FLOAT xprd = atom->mybox.xprd;
 | 
			
		||||
    MD_FLOAT yprd = atom->mybox.yprd;
 | 
			
		||||
    MD_FLOAT zprd = atom->mybox.zprd; 
 | 
			
		||||
    
 | 
			
		||||
    if(atom_x(i) < 0.0) atom_x(i) += xprd;
 | 
			
		||||
    if(atom_y(i) < 0.0) atom_y(i) += yprd;
 | 
			
		||||
    if(atom_z(i) < 0.0)  atom_z(i)+= zprd;
 | 
			
		||||
    if(atom_x(i) >= xprd) atom_x(i) -= xprd;
 | 
			
		||||
    if(atom_y(i) >= yprd) atom_y(i) -= yprd;
 | 
			
		||||
    if(atom_z(i) >= zprd) atom_z(i) -= zprd;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void copy(Atom* atom, int i, int j)
 | 
			
		||||
{
 | 
			
		||||
  atom_x(i) = atom_x(j);
 | 
			
		||||
  atom_y(i) = atom_y(j);
 | 
			
		||||
  atom_z(i) = atom_z(j);
 | 
			
		||||
  atom_vx(i) = atom_vx(j);
 | 
			
		||||
  atom_vy(i) = atom_vy(j);
 | 
			
		||||
  atom_vz(i) = atom_vz(j);
 | 
			
		||||
  atom->type[i] = atom->type[j];
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -13,13 +13,17 @@
 | 
			
		||||
#include <parameter.h>
 | 
			
		||||
#include <stats.h>
 | 
			
		||||
#include <timing.h>
 | 
			
		||||
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
#include <util.h>
 | 
			
		||||
#ifdef __SIMD_KERNEL__
 | 
			
		||||
#include <simd.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
void computeForceGhostShell(Parameter*, Atom*, Neighbor*);
 | 
			
		||||
 | 
			
		||||
double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
 | 
			
		||||
    int Nlocal = atom->Nlocal;
 | 
			
		||||
    int Nghost = atom->Nghost;
 | 
			
		||||
    int* neighs;
 | 
			
		||||
    #ifndef EXPLICIT_TYPES
 | 
			
		||||
    MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
 | 
			
		||||
@@ -41,21 +45,21 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
 | 
			
		||||
    {
 | 
			
		||||
    LIKWID_MARKER_START("force");
 | 
			
		||||
 | 
			
		||||
    #pragma omp for schedule(runtime)
 | 
			
		||||
    #pragma omp for
 | 
			
		||||
    for(int i = 0; i < Nlocal; i++) {
 | 
			
		||||
        neighs = &neighbor->neighbors[i * neighbor->maxneighs];
 | 
			
		||||
        int numneighs = neighbor->numneigh[i];
 | 
			
		||||
        MD_FLOAT xtmp = atom_x(i);
 | 
			
		||||
        MD_FLOAT ytmp = atom_y(i);
 | 
			
		||||
        MD_FLOAT ztmp = atom_z(i);
 | 
			
		||||
        MD_FLOAT fix = 0;
 | 
			
		||||
        MD_FLOAT fiy = 0;
 | 
			
		||||
        MD_FLOAT fiz = 0;
 | 
			
		||||
 | 
			
		||||
        MD_FLOAT fix = 0.0;
 | 
			
		||||
        MD_FLOAT fiy = 0.0;
 | 
			
		||||
        MD_FLOAT fiz = 0.0;
 | 
			
		||||
        
 | 
			
		||||
        #ifdef EXPLICIT_TYPES
 | 
			
		||||
        const int type_i = atom->type[i];
 | 
			
		||||
        #endif
 | 
			
		||||
 | 
			
		||||
        
 | 
			
		||||
        for(int k = 0; k < numneighs; k++) {
 | 
			
		||||
            int j = neighs[k];
 | 
			
		||||
            MD_FLOAT delx = xtmp - atom_x(j);
 | 
			
		||||
@@ -70,26 +74,25 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
 | 
			
		||||
            const MD_FLOAT sigma6 = atom->sigma6[type_ij];
 | 
			
		||||
            const MD_FLOAT epsilon = atom->epsilon[type_ij];
 | 
			
		||||
            #endif
 | 
			
		||||
 | 
			
		||||
            if(rsq < cutforcesq) {
 | 
			
		||||
                MD_FLOAT sr2 = num1 / rsq;
 | 
			
		||||
                MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
 | 
			
		||||
                MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
 | 
			
		||||
                MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;  
 | 
			
		||||
                fix += delx * force;
 | 
			
		||||
                fiy += dely * force;
 | 
			
		||||
                fiz += delz * force;
 | 
			
		||||
                fiz += delz * force; 
 | 
			
		||||
        
 | 
			
		||||
            #ifdef USE_REFERENCE_VERSION
 | 
			
		||||
                addStat(stats->atoms_within_cutoff, 1);
 | 
			
		||||
            } else {
 | 
			
		||||
                addStat(stats->atoms_outside_cutoff, 1);
 | 
			
		||||
            #endif
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        }              
 | 
			
		||||
        atom_fx(i) += fix;
 | 
			
		||||
        atom_fy(i) += fiy;
 | 
			
		||||
        atom_fz(i) += fiz;
 | 
			
		||||
 | 
			
		||||
        
 | 
			
		||||
        #ifdef USE_REFERENCE_VERSION
 | 
			
		||||
        if(numneighs % VECTOR_WIDTH > 0) {
 | 
			
		||||
            addStat(stats->atoms_outside_cutoff, VECTOR_WIDTH - (numneighs % VECTOR_WIDTH));
 | 
			
		||||
@@ -102,13 +105,13 @@ double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *n
 | 
			
		||||
 | 
			
		||||
    LIKWID_MARKER_STOP("force");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    double E = getTimeStamp();
 | 
			
		||||
    return E-S;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
 | 
			
		||||
    int Nlocal = atom->Nlocal;
 | 
			
		||||
    int Nghost = atom->Nghost;
 | 
			
		||||
    int* neighs;
 | 
			
		||||
    #ifndef EXPLICIT_TYPES
 | 
			
		||||
    MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
 | 
			
		||||
@@ -119,19 +122,18 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
			
		||||
    const MD_FLOAT num48 = 48.0;
 | 
			
		||||
    const MD_FLOAT num05 = 0.5;
 | 
			
		||||
 | 
			
		||||
    for(int i = 0; i < Nlocal; i++) {
 | 
			
		||||
    for(int i = 0; i < Nlocal+Nghost; i++) {
 | 
			
		||||
        atom_fx(i) = 0.0;
 | 
			
		||||
        atom_fy(i) = 0.0;
 | 
			
		||||
        atom_fz(i) = 0.0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    double S = getTimeStamp();
 | 
			
		||||
 | 
			
		||||
    #pragma omp parallel
 | 
			
		||||
    {
 | 
			
		||||
    LIKWID_MARKER_START("forceLJ-halfneigh");
 | 
			
		||||
 | 
			
		||||
    #pragma omp for schedule(runtime)
 | 
			
		||||
    #pragma omp for
 | 
			
		||||
    for(int i = 0; i < Nlocal; i++) {
 | 
			
		||||
        neighs = &neighbor->neighbors[i * neighbor->maxneighs];
 | 
			
		||||
        int numneighs = neighbor->numneigh[i];
 | 
			
		||||
@@ -172,16 +174,14 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
			
		||||
                fix += delx * force;
 | 
			
		||||
                fiy += dely * force;
 | 
			
		||||
                fiz += delz * force;
 | 
			
		||||
 | 
			
		||||
                // We do not need to update forces for ghost atoms
 | 
			
		||||
                if(j < Nlocal) {
 | 
			
		||||
                // We need to update forces for ghost atoms if shell_method  or half stencil is requiered
 | 
			
		||||
                if((param->half_neigh && j<Nlocal) || param->method){
 | 
			
		||||
                    atom_fx(j) -= delx * force;
 | 
			
		||||
                    atom_fy(j) -= dely * force;
 | 
			
		||||
                    atom_fz(j) -= delz * force;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        atom_fx(i) += fix;
 | 
			
		||||
        atom_fy(i) += fiy;
 | 
			
		||||
        atom_fz(i) += fiz;
 | 
			
		||||
@@ -190,6 +190,7 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
			
		||||
        addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor); 
 | 
			
		||||
    LIKWID_MARKER_STOP("forceLJ-halfneigh");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -227,7 +228,7 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig
 | 
			
		||||
    {
 | 
			
		||||
    LIKWID_MARKER_START("force");
 | 
			
		||||
 | 
			
		||||
    #pragma omp for schedule(runtime)
 | 
			
		||||
    #pragma omp for
 | 
			
		||||
    for(int i = 0; i < Nlocal; i++) {
 | 
			
		||||
        neighs = &neighbor->neighbors[i * neighbor->maxneighs];
 | 
			
		||||
        int numneighs = neighbor->numneigh[i];
 | 
			
		||||
@@ -276,3 +277,58 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig
 | 
			
		||||
    double E = getTimeStamp();
 | 
			
		||||
    return E-S;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void computeForceGhostShell(Parameter *param, Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
    int Nshell = neighbor->Nshell;
 | 
			
		||||
    int* neighs;
 | 
			
		||||
    #ifndef EXPLICIT_TYPES
 | 
			
		||||
    MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
 | 
			
		||||
    MD_FLOAT sigma6 = param->sigma6;
 | 
			
		||||
    MD_FLOAT epsilon = param->epsilon;
 | 
			
		||||
    #endif
 | 
			
		||||
    const MD_FLOAT num1 = 1.0;
 | 
			
		||||
    const MD_FLOAT num48 = 48.0;
 | 
			
		||||
    const MD_FLOAT num05 = 0.5;
 | 
			
		||||
 | 
			
		||||
    for(int i = 0; i < Nshell; i++) {
 | 
			
		||||
        neighs = &(neighbor->neighshell[i * neighbor->maxneighs]);
 | 
			
		||||
        int numneigh = neighbor->numNeighShell[i];
 | 
			
		||||
        int iatom = neighbor->listshell[i];
 | 
			
		||||
        MD_FLOAT xtmp = atom_x(iatom);
 | 
			
		||||
        MD_FLOAT ytmp = atom_y(iatom);
 | 
			
		||||
        MD_FLOAT ztmp = atom_z(iatom);
 | 
			
		||||
        MD_FLOAT fix = 0;
 | 
			
		||||
        MD_FLOAT fiy = 0;
 | 
			
		||||
        MD_FLOAT fiz = 0;
 | 
			
		||||
 | 
			
		||||
        #ifdef EXPLICIT_TYPES
 | 
			
		||||
        const int type_i = atom->type[i];
 | 
			
		||||
        #endif
 | 
			
		||||
 | 
			
		||||
        for(int k = 0; k < numneigh; k++) {
 | 
			
		||||
            int jatom = neighs[k];
 | 
			
		||||
            MD_FLOAT delx = xtmp - atom_x(jatom);
 | 
			
		||||
            MD_FLOAT dely = ytmp - atom_y(jatom);
 | 
			
		||||
            MD_FLOAT delz = ztmp - atom_z(jatom);
 | 
			
		||||
            MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
 | 
			
		||||
 | 
			
		||||
            if(rsq < cutforcesq) {
 | 
			
		||||
                MD_FLOAT sr2 = num1 / rsq;
 | 
			
		||||
                MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
 | 
			
		||||
                MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
 | 
			
		||||
                fix += delx * force;
 | 
			
		||||
                fiy += dely * force;
 | 
			
		||||
                fiz += delz * force;
 | 
			
		||||
                
 | 
			
		||||
                atom_fx(jatom) -= delx * force;
 | 
			
		||||
                atom_fy(jatom) -= dely * force;
 | 
			
		||||
                atom_fz(jatom) -= delz * force;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        atom_fx(iatom) += fix;
 | 
			
		||||
        atom_fy(iatom) += fiy;
 | 
			
		||||
        atom_fz(iatom) += fiz;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -4,8 +4,9 @@
 | 
			
		||||
 * Use of this source code is governed by a LGPL-3.0
 | 
			
		||||
 * license that can be found in the LICENSE file.
 | 
			
		||||
 */
 | 
			
		||||
#include <parameter.h>
 | 
			
		||||
 | 
			
		||||
#include <box.h>
 | 
			
		||||
#include <parameter.h>
 | 
			
		||||
#ifndef __ATOM_H_
 | 
			
		||||
#define __ATOM_H_
 | 
			
		||||
 | 
			
		||||
@@ -56,6 +57,8 @@ typedef struct {
 | 
			
		||||
    MD_FLOAT *sigma6;
 | 
			
		||||
    MD_FLOAT *cutforcesq;
 | 
			
		||||
    MD_FLOAT *cutneighsq;
 | 
			
		||||
    //TODO: insert the id number
 | 
			
		||||
    //MD_FLOAT *Atom_id;
 | 
			
		||||
 | 
			
		||||
    // DEM
 | 
			
		||||
    MD_FLOAT *radius;
 | 
			
		||||
@@ -64,6 +67,9 @@ typedef struct {
 | 
			
		||||
 | 
			
		||||
    // Device data
 | 
			
		||||
    DeviceAtom d_atom;
 | 
			
		||||
     
 | 
			
		||||
    //Info Subdomain
 | 
			
		||||
    Box mybox;            
 | 
			
		||||
} Atom;
 | 
			
		||||
 | 
			
		||||
extern void initAtom(Atom*);
 | 
			
		||||
@@ -73,9 +79,19 @@ extern int readAtom_pdb(Atom*, Parameter*);
 | 
			
		||||
extern int readAtom_gro(Atom*, Parameter*);
 | 
			
		||||
extern int readAtom_dmp(Atom*, Parameter*);
 | 
			
		||||
extern int readAtom_in(Atom*, Parameter*);
 | 
			
		||||
extern void writeAtom(Atom*, Parameter*);
 | 
			
		||||
extern void growAtom(Atom*);
 | 
			
		||||
 | 
			
		||||
int  packGhost(Atom*, int, MD_FLOAT*, int*);
 | 
			
		||||
int  unpackGhost(Atom*, int, MD_FLOAT*);
 | 
			
		||||
int  packExchange(Atom*, int, MD_FLOAT*);
 | 
			
		||||
int  unpackExchange(Atom*, int, MD_FLOAT*);
 | 
			
		||||
void packForward(Atom*, int, int*, MD_FLOAT*, int*); 
 | 
			
		||||
void unpackForward(Atom*, int, int, MD_FLOAT*);
 | 
			
		||||
void packReverse(Atom* , int , int , MD_FLOAT*);
 | 
			
		||||
void unpackReverse(Atom*, int, int*, MD_FLOAT*);
 | 
			
		||||
void pbc(Atom*);
 | 
			
		||||
void copy(Atom*, int, int);
 | 
			
		||||
 | 
			
		||||
#ifdef AOS
 | 
			
		||||
#   define POS_DATA_LAYOUT     "AoS"
 | 
			
		||||
#   define atom_x(i)           atom->x[(i) * 3 + 0]
 | 
			
		||||
@@ -100,4 +116,8 @@ extern void growAtom(Atom*);
 | 
			
		||||
#   define atom_fz(i)          atom->fz[i]
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#   define buf_x(i)            buf[3*(i)] 
 | 
			
		||||
#   define buf_y(i)            buf[3*(i)+1]
 | 
			
		||||
#   define buf_z(i)            buf[3*(i)+2]
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -20,9 +20,14 @@ typedef struct {
 | 
			
		||||
    int ncalls;
 | 
			
		||||
    int maxneighs;
 | 
			
		||||
    int half_neigh;
 | 
			
		||||
    int half_stencil;
 | 
			
		||||
    int *neighbors;
 | 
			
		||||
    int *numneigh;
 | 
			
		||||
 | 
			
		||||
    //MPI
 | 
			
		||||
    int Nshell;         //# of atoms in listShell
 | 
			
		||||
    int *numNeighShell; //# of neighs for each atom in listShell
 | 
			
		||||
    int *neighshell;    //list of neighs for each atom in listShell
 | 
			
		||||
    int *listshell;     //Atoms to compute the force
 | 
			
		||||
    // Device data
 | 
			
		||||
    DeviceNeighbor d_neighbor;
 | 
			
		||||
} Neighbor;
 | 
			
		||||
 
 | 
			
		||||
@@ -5,8 +5,11 @@
 | 
			
		||||
 * license that can be found in the LICENSE file.
 | 
			
		||||
 */
 | 
			
		||||
#include <atom.h>
 | 
			
		||||
#include <comm.h>
 | 
			
		||||
#include <parameter.h>
 | 
			
		||||
 | 
			
		||||
#ifndef __VTK_H_
 | 
			
		||||
#define __VTK_H_
 | 
			
		||||
extern int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
 | 
			
		||||
extern void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep);
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										352
									
								
								lammps/main.c
									
									
									
									
									
								
							
							
						
						
									
										352
									
								
								lammps/main.c
									
									
									
									
									
								
							@@ -11,10 +11,7 @@
 | 
			
		||||
#include <limits.h>
 | 
			
		||||
#include <math.h>
 | 
			
		||||
#include <float.h>
 | 
			
		||||
#include <omp.h>
 | 
			
		||||
 | 
			
		||||
#include <likwid-marker.h>
 | 
			
		||||
 | 
			
		||||
#include <allocate.h>
 | 
			
		||||
#include <atom.h>
 | 
			
		||||
#include <device.h>
 | 
			
		||||
@@ -24,13 +21,19 @@
 | 
			
		||||
#include <timing.h>
 | 
			
		||||
#include <neighbor.h>
 | 
			
		||||
#include <parameter.h>
 | 
			
		||||
#include <pbc.h>
 | 
			
		||||
#include <stats.h>
 | 
			
		||||
#include <timers.h>
 | 
			
		||||
#include <util.h>
 | 
			
		||||
#include <vtk.h>
 | 
			
		||||
#include <comm.h>
 | 
			
		||||
#include <grid.h>
 | 
			
		||||
#include <shell_methods.h>
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
 | 
			
		||||
#define HLINE "----------------------------------------------------------------------------\n"
 | 
			
		||||
#ifdef CUDA_TARGET
 | 
			
		||||
extern double computeForceLJFullNeigh_cuda(Parameter*, Atom*, Neighbor*);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
extern double computeForceLJFullNeigh_plain_c(Parameter*, Atom*, Neighbor*, Stats*);
 | 
			
		||||
extern double computeForceLJFullNeigh_simd(Parameter*, Atom*, Neighbor*, Stats*);
 | 
			
		||||
@@ -38,69 +41,6 @@ extern double computeForceLJHalfNeigh(Parameter*, Atom*, Neighbor*, Stats*);
 | 
			
		||||
extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
 | 
			
		||||
extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
 | 
			
		||||
 | 
			
		||||
#ifdef CUDA_TARGET
 | 
			
		||||
extern double computeForceLJFullNeigh_cuda(Parameter*, Atom*, Neighbor*);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats) {
 | 
			
		||||
    if(param->force_field == FF_EAM) { initEam(eam, param); }
 | 
			
		||||
    double S, E;
 | 
			
		||||
    param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
 | 
			
		||||
    param->xprd = param->nx * param->lattice;
 | 
			
		||||
    param->yprd = param->ny * param->lattice;
 | 
			
		||||
    param->zprd = param->nz * param->lattice;
 | 
			
		||||
 | 
			
		||||
    S = getTimeStamp();
 | 
			
		||||
    initAtom(atom);
 | 
			
		||||
    initPbc(atom);
 | 
			
		||||
    initStats(stats);
 | 
			
		||||
    initNeighbor(neighbor, param);
 | 
			
		||||
    if(param->input_file == NULL) {
 | 
			
		||||
        createAtom(atom, param);
 | 
			
		||||
    } else {
 | 
			
		||||
        readAtom(atom, param);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    setupNeighbor(param);
 | 
			
		||||
    setupThermo(param, atom->Natoms);
 | 
			
		||||
    if(param->input_file == NULL) { adjustThermo(param, atom); }
 | 
			
		||||
    #ifdef SORT_ATOMS
 | 
			
		||||
    atom->Nghost = 0;
 | 
			
		||||
    sortAtom(atom);
 | 
			
		||||
    #endif
 | 
			
		||||
    setupPbc(atom, param);
 | 
			
		||||
    initDevice(atom, neighbor);
 | 
			
		||||
    updatePbc(atom, param, true);
 | 
			
		||||
    buildNeighbor(atom, neighbor);
 | 
			
		||||
    E = getTimeStamp();
 | 
			
		||||
    return E-S;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
    double S, E;
 | 
			
		||||
    S = getTimeStamp();
 | 
			
		||||
    LIKWID_MARKER_START("reneighbour");
 | 
			
		||||
    updateAtomsPbc(atom, param);
 | 
			
		||||
    #ifdef SORT_ATOMS
 | 
			
		||||
    atom->Nghost = 0;
 | 
			
		||||
    sortAtom(atom);
 | 
			
		||||
    #endif
 | 
			
		||||
    setupPbc(atom, param);
 | 
			
		||||
    updatePbc(atom, param, true);
 | 
			
		||||
    buildNeighbor(atom, neighbor);
 | 
			
		||||
    LIKWID_MARKER_STOP("reneighbour");
 | 
			
		||||
    E = getTimeStamp();
 | 
			
		||||
    return E-S;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void printAtomState(Atom *atom) {
 | 
			
		||||
    printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n", atom->Natoms, atom->Nlocal, atom->Nghost, atom->Nmax);
 | 
			
		||||
    // int nall = atom->Nlocal + atom->Nghost;
 | 
			
		||||
    // for (int i=0; i<nall; i++) {
 | 
			
		||||
    //     printf("%d  %f %f %f\n", i, atom->x[i], atom->y[i], atom->z[i]);
 | 
			
		||||
    // }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
 | 
			
		||||
    if(param->force_field == FF_EAM) {
 | 
			
		||||
        return computeForceEam(eam, param, atom, neighbor, stats);
 | 
			
		||||
@@ -113,7 +53,7 @@ double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(param->half_neigh) {
 | 
			
		||||
    if(param->half_neigh || param->method) {
 | 
			
		||||
        return computeForceLJHalfNeigh(param, atom, neighbor, stats);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -124,6 +64,102 @@ double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor,
 | 
			
		||||
    #endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
double dynamicBalance(Comm* comm, Grid* grid, Atom* atom, Parameter* param, double time){
 | 
			
		||||
    double S, E;
 | 
			
		||||
    int dims = 3;   //TODO: Adjust to do in 3d and 2d
 | 
			
		||||
    S = getTimeStamp();
 | 
			
		||||
    if(param->balance == RCB) {
 | 
			
		||||
        rcbBalance(grid, atom, param, meanBisect,dims,0);
 | 
			
		||||
        neighComm(comm, param, grid);
 | 
			
		||||
    }else if(param->balance == meanTimeRCB){
 | 
			
		||||
        rcbBalance(grid, atom, param, meanTimeBisect,dims,time);
 | 
			
		||||
        neighComm(comm, param, grid);
 | 
			
		||||
    }else if(param->balance == Staggered) {
 | 
			
		||||
        staggeredBalance(grid, atom, param, time);
 | 
			
		||||
        neighComm(comm, param, grid);
 | 
			
		||||
        exchangeComm(comm,atom);
 | 
			
		||||
    }else { } //Do nothing
 | 
			
		||||
    //printGrid(grid);
 | 
			
		||||
  E = getTimeStamp();
 | 
			
		||||
  
 | 
			
		||||
  return E-S;
 | 
			
		||||
} 
 | 
			
		||||
 | 
			
		||||
double initialBalance(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid)
 | 
			
		||||
{      
 | 
			
		||||
    double E,S,time;
 | 
			
		||||
    int me;
 | 
			
		||||
    MPI_Comm_rank(world,&me);
 | 
			
		||||
    S = getTimeStamp();
 | 
			
		||||
    if(param->balance == meanTimeRCB || param->balance == RCB){
 | 
			
		||||
        rcbBalance(grid, atom, param, meanBisect,3,0);
 | 
			
		||||
        neighComm(comm, param, grid); 
 | 
			
		||||
    }      
 | 
			
		||||
    MPI_Allreduce(&atom->Nlocal, &atom->Natoms, 1, MPI_INT, MPI_SUM, world); 
 | 
			
		||||
    printf("Processor:%i, Local atoms:%i, Total atoms:%i\n",me, atom->Nlocal,atom->Natoms);
 | 
			
		||||
    MPI_Barrier(world);
 | 
			
		||||
    E = getTimeStamp();
 | 
			
		||||
    return E-S;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid) {
 | 
			
		||||
    if(param->force_field == FF_EAM) { initEam(eam, param); }
 | 
			
		||||
    double S, E;
 | 
			
		||||
    param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
 | 
			
		||||
    param->xprd = param->nx * param->lattice;
 | 
			
		||||
    param->yprd = param->ny * param->lattice;
 | 
			
		||||
    param->zprd = param->nz * param->lattice;
 | 
			
		||||
    S = getTimeStamp();
 | 
			
		||||
    initAtom(atom);
 | 
			
		||||
    initStats(stats);
 | 
			
		||||
    initNeighbor(neighbor, param);
 | 
			
		||||
    if(param->input_file == NULL) {
 | 
			
		||||
        createAtom(atom, param);
 | 
			
		||||
    } else {
 | 
			
		||||
        readAtom(atom, param);
 | 
			
		||||
    }
 | 
			
		||||
    setupGrid(grid,atom,param);
 | 
			
		||||
    setupNeighbor(param);
 | 
			
		||||
    setupComm(comm, param, grid);
 | 
			
		||||
    if(param->balance){  
 | 
			
		||||
        initialBalance(param, eam, atom, neighbor, stats, comm, grid);
 | 
			
		||||
    }
 | 
			
		||||
    setupThermo(param, atom->Natoms);
 | 
			
		||||
    if(param->input_file == NULL) { adjustThermo(param, atom); }
 | 
			
		||||
    #ifdef SORT_ATOMS
 | 
			
		||||
    atom->Nghost = 0;
 | 
			
		||||
    sortAtom(atom);
 | 
			
		||||
    #endif
 | 
			
		||||
    initDevice(atom, neighbor);
 | 
			
		||||
    ghostNeighbor(comm, atom, param); 
 | 
			
		||||
    buildNeighbor(atom, neighbor);
 | 
			
		||||
    E = getTimeStamp();
 | 
			
		||||
    return E-S;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
double reneighbour(Comm* comm, Parameter *param, Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
    double S, E;
 | 
			
		||||
    S = getTimeStamp();
 | 
			
		||||
    LIKWID_MARKER_START("reneighbour");
 | 
			
		||||
    #ifdef SORT_ATOMS
 | 
			
		||||
    atom->Nghost = 0;
 | 
			
		||||
    sortAtom(atom);
 | 
			
		||||
    #endif
 | 
			
		||||
    ghostNeighbor(comm, atom, param);
 | 
			
		||||
    buildNeighbor(atom, neighbor);
 | 
			
		||||
    LIKWID_MARKER_STOP("reneighbour");
 | 
			
		||||
    E = getTimeStamp();
 | 
			
		||||
    return E-S;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
double updateAtoms(Comm* comm, Atom* atom){
 | 
			
		||||
    double S,E;
 | 
			
		||||
    S = getTimeStamp();
 | 
			
		||||
        exchangeComm(comm, atom);
 | 
			
		||||
    E = getTimeStamp();
 | 
			
		||||
    return E-S;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void writeInput(Parameter *param, Atom *atom) {
 | 
			
		||||
    FILE *fpin = fopen("input.in", "w");
 | 
			
		||||
    fprintf(fpin, "0,%f,0,%f,0,%f\n", param->xprd, param->yprd, param->zprd);
 | 
			
		||||
@@ -142,18 +178,19 @@ int main(int argc, char** argv) {
 | 
			
		||||
    Neighbor neighbor;
 | 
			
		||||
    Stats stats;
 | 
			
		||||
    Parameter param;
 | 
			
		||||
 | 
			
		||||
    Comm comm; 
 | 
			
		||||
    Grid grid;
 | 
			
		||||
    LIKWID_MARKER_INIT;
 | 
			
		||||
#pragma omp parallel
 | 
			
		||||
    {
 | 
			
		||||
        LIKWID_MARKER_REGISTER("force");
 | 
			
		||||
        //LIKWID_MARKER_REGISTER("reneighbour");
 | 
			
		||||
        //LIKWID_MARKER_REGISTER("pbc");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    } 
 | 
			
		||||
    initComm(&argc, &argv, &comm);
 | 
			
		||||
    initParameter(¶m);
 | 
			
		||||
    for(int i = 0; i < argc; i++) {
 | 
			
		||||
        if((strcmp(argv[i], "-p") == 0) || strcmp(argv[i], "--params") == 0) {
 | 
			
		||||
        if((strcmp(argv[i], "-p") == 0)) {
 | 
			
		||||
            readParameter(¶m, argv[++i]);
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
@@ -191,6 +228,24 @@ int main(int argc, char** argv) {
 | 
			
		||||
        if((strcmp(argv[i], "-half") == 0)) {
 | 
			
		||||
            param.half_neigh = atoi(argv[++i]);
 | 
			
		||||
            continue;
 | 
			
		||||
        } 
 | 
			
		||||
        if((strcmp(argv[i], "-method") == 0)) {
 | 
			
		||||
            param.method = atoi(argv[++i]);
 | 
			
		||||
            if (param.method>3 || param.method< 0){
 | 
			
		||||
                if(comm.myproc == 0) fprintf(stderr, "Method does not exist!\n");
 | 
			
		||||
                endComm(&comm);   
 | 
			
		||||
                exit(0);
 | 
			
		||||
            }
 | 
			
		||||
            continue;
 | 
			
		||||
        } 
 | 
			
		||||
        if((strcmp(argv[i], "-bal") == 0)) {
 | 
			
		||||
            param.balance = atoi(argv[++i]);
 | 
			
		||||
            if (param.balance>3 || param.balance< 0){
 | 
			
		||||
                if(comm.myproc == 0) fprintf(stderr, "Load Balance does not exist!\n");
 | 
			
		||||
                endComm(&comm);   
 | 
			
		||||
                exit(0);
 | 
			
		||||
            }
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        if((strcmp(argv[i], "-r") == 0) || (strcmp(argv[i], "--radius") == 0)) {
 | 
			
		||||
            param.cutforce = atof(argv[++i]);
 | 
			
		||||
@@ -208,71 +263,71 @@ int main(int argc, char** argv) {
 | 
			
		||||
            param.vtk_file = strdup(argv[++i]);
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        if((strcmp(argv[i], "-w") == 0)) {
 | 
			
		||||
            param.write_atom_file = strdup(argv[++i]);
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
 | 
			
		||||
            printf("MD Bench: A minimalistic re-implementation of miniMD\n");
 | 
			
		||||
            printf(HLINE);
 | 
			
		||||
            printf("-p / --params <string>:     file to read parameters from (can be specified more than once)\n");
 | 
			
		||||
            printf("-f <string>:                force field (lj, eam or dem), default lj\n");
 | 
			
		||||
            printf("-i <string>:                input file with atom positions (dump)\n");
 | 
			
		||||
            printf("-e <string>:                input file for EAM\n");
 | 
			
		||||
            printf("-n / --nsteps <int>:        set number of timesteps for simulation\n");
 | 
			
		||||
            printf("-nx/-ny/-nz <int>:          set linear dimension of systembox in x/y/z direction\n");
 | 
			
		||||
            printf("-half <int>:                use half (1) or full (0) neighbor lists\n");
 | 
			
		||||
            printf("-r / --radius <real>:       set cutoff radius\n");
 | 
			
		||||
            printf("-s / --skin <real>:         set skin (verlet buffer)\n");
 | 
			
		||||
            printf("-w <file>:                  write input atoms to file\n");
 | 
			
		||||
            printf("--freq <real>:              processor frequency (GHz)\n");
 | 
			
		||||
            printf("--vtk <string>:             VTK file for visualization\n");
 | 
			
		||||
            printf(HLINE);
 | 
			
		||||
            exit(EXIT_SUCCESS);
 | 
			
		||||
            if(comm.myproc ==0 ){
 | 
			
		||||
                printf("MD Bench: A minimalistic re-implementation of miniMD\n");
 | 
			
		||||
                printf(HLINE);
 | 
			
		||||
                printf("-p <string>:          file to read parameters from (can be specified more than once)\n");
 | 
			
		||||
                printf("-f <string>:          force field (lj, eam or dem), default lj\n");
 | 
			
		||||
                printf("-i <string>:          input file with atom positions (dump)\n");
 | 
			
		||||
                printf("-e <string>:          input file for EAM\n");
 | 
			
		||||
                printf("-n / --nsteps <int>:  set number of timesteps for simulation\n");
 | 
			
		||||
                printf("-nx/-ny/-nz <int>:    set linear dimension of systembox in x/y/z direction\n");
 | 
			
		||||
                printf("-r / --radius <real>: set cutoff radius\n");
 | 
			
		||||
                printf("-s / --skin <real>:   set skin (verlet buffer)\n");
 | 
			
		||||
                printf("--freq <real>:        processor frequency (GHz)\n");
 | 
			
		||||
                printf("--vtk <string>:       VTK file for visualization\n");
 | 
			
		||||
                printf(HLINE);
 | 
			
		||||
            }
 | 
			
		||||
                exit(EXIT_SUCCESS);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    if(param.balance>0 && param.method == 1){
 | 
			
		||||
        if(comm.myproc == 0) fprintf(stderr, "Half Shell is not supported by load balance!\n");
 | 
			
		||||
        endComm(&comm);   
 | 
			
		||||
        exit(0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    param.cutneigh = param.cutforce + param.skin;
 | 
			
		||||
    setup(¶m, &eam, &atom, &neighbor, &stats);
 | 
			
		||||
    printParameter(¶m);
 | 
			
		||||
    printf(HLINE);
 | 
			
		||||
 | 
			
		||||
    printf("step\ttemp\t\tpressure\n");
 | 
			
		||||
    timer[SETUP]=setup(¶m, &eam, &atom, &neighbor, &stats, &comm, &grid);
 | 
			
		||||
    if(comm.myproc == 0)printParameter(¶m);
 | 
			
		||||
    if(comm.myproc == 0)printf(HLINE);
 | 
			
		||||
    if(comm.myproc == 0) printf("step\ttemp\t\tpressure\n"); 
 | 
			
		||||
    computeThermo(0, ¶m, &atom);
 | 
			
		||||
    #if defined(MEM_TRACER) || defined(INDEX_TRACER)
 | 
			
		||||
    traceAddresses(¶m, &atom, &neighbor, n + 1);
 | 
			
		||||
    traceAddresses(¶m, &atom, &neighbor, n + 1);// TODO: trace adress
 | 
			
		||||
    #endif
 | 
			
		||||
 | 
			
		||||
    if(param.write_atom_file != NULL) {
 | 
			
		||||
        writeAtom(&atom, ¶m);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    //writeInput(¶m, &atom);
 | 
			
		||||
 | 
			
		||||
    timer[FORCE] = computeForce(&eam, ¶m, &atom, &neighbor, &stats);
 | 
			
		||||
    timer[NEIGH] = 0.0;
 | 
			
		||||
    timer[TOTAL] = getTimeStamp();
 | 
			
		||||
 | 
			
		||||
    timer[FORCE]    = computeForce(&eam, ¶m, &atom, &neighbor, &stats); 
 | 
			
		||||
    timer[NEIGH]    = 0.0;
 | 
			
		||||
    timer[FORWARD]  = 0.0;
 | 
			
		||||
    timer[UPDATE]   = 0.0;
 | 
			
		||||
    timer[BALANCE]  = 0.0;  
 | 
			
		||||
    timer[REVERSE]  = reverse(&comm, &atom, ¶m);
 | 
			
		||||
    MPI_Barrier(world);
 | 
			
		||||
    timer[TOTAL]    = getTimeStamp();
 | 
			
		||||
    if(param.vtk_file != NULL) {
 | 
			
		||||
        write_atoms_to_vtk_file(param.vtk_file, &atom, 0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
        printvtk(param.vtk_file, &comm, &atom, ¶m, 0);
 | 
			
		||||
    } 
 | 
			
		||||
    for(int n = 0; n < param.ntimes; n++) {
 | 
			
		||||
        bool reneigh = (n + 1) % param.reneigh_every == 0;
 | 
			
		||||
        initialIntegrate(reneigh, ¶m, &atom);
 | 
			
		||||
        if((n + 1) % param.reneigh_every) {
 | 
			
		||||
            updatePbc(&atom, ¶m, false);
 | 
			
		||||
        if(reneigh) { 
 | 
			
		||||
            timer[UPDATE] +=updateAtoms(&comm,&atom);         
 | 
			
		||||
            if(param.balance && !((n+1)%param.balance_every))
 | 
			
		||||
                timer[BALANCE] +=dynamicBalance(&comm, &grid, &atom , ¶m, timer[FORCE]);
 | 
			
		||||
            timer[NEIGH] += reneighbour(&comm, ¶m, &atom, &neighbor);
 | 
			
		||||
        } else {
 | 
			
		||||
            timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
            timer[FORWARD] += forward(&comm, &atom, ¶m);
 | 
			
		||||
        } 
 | 
			
		||||
        #if defined(MEM_TRACER) || defined(INDEX_TRACER)
 | 
			
		||||
        traceAddresses(¶m, &atom, &neighbor, n + 1);
 | 
			
		||||
        #endif
 | 
			
		||||
 | 
			
		||||
        timer[FORCE] += computeForce(&eam, ¶m, &atom, &neighbor, &stats);
 | 
			
		||||
        timer[REVERSE] += reverse(&comm, &atom, ¶m);
 | 
			
		||||
        finalIntegrate(reneigh, ¶m, &atom);
 | 
			
		||||
 | 
			
		||||
        
 | 
			
		||||
        if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
 | 
			
		||||
            #ifdef CUDA_TARGET
 | 
			
		||||
            memcpyFromGPU(atom.x, atom.d_atom.x, atom.Nmax * sizeof(MD_FLOAT) * 3);
 | 
			
		||||
@@ -281,47 +336,42 @@ int main(int argc, char** argv) {
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if(param.vtk_file != NULL) {
 | 
			
		||||
            write_atoms_to_vtk_file(param.vtk_file, &atom, n + 1);
 | 
			
		||||
        }
 | 
			
		||||
            printvtk(param.vtk_file, &comm, &atom ,¶m, n+1);
 | 
			
		||||
        } 
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    MPI_Barrier(world);
 | 
			
		||||
    timer[TOTAL] = getTimeStamp() - timer[TOTAL];
 | 
			
		||||
    computeThermo(-1, ¶m, &atom);
 | 
			
		||||
        
 | 
			
		||||
    double mint[NUMTIMER];
 | 
			
		||||
    double maxt[NUMTIMER];
 | 
			
		||||
    double sumt[NUMTIMER];
 | 
			
		||||
    timer[REST] = timer[TOTAL]-timer[FORCE]-timer[NEIGH]-timer[BALANCE]-timer[FORWARD]-timer[REVERSE];
 | 
			
		||||
    MPI_Reduce(timer,mint,NUMTIMER,MPI_DOUBLE,MPI_MIN,0,world);
 | 
			
		||||
    MPI_Reduce(timer,maxt,NUMTIMER,MPI_DOUBLE,MPI_MAX,0,world);
 | 
			
		||||
    MPI_Reduce(timer,sumt,NUMTIMER,MPI_DOUBLE,MPI_SUM,0,world);
 | 
			
		||||
    int Nghost;
 | 
			
		||||
    MPI_Reduce(&atom.Nghost,&Nghost,1,MPI_INT,MPI_SUM,0,world);
 | 
			
		||||
 | 
			
		||||
    printf(HLINE);
 | 
			
		||||
    printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
 | 
			
		||||
    printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
 | 
			
		||||
            timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
 | 
			
		||||
    printf(HLINE);
 | 
			
		||||
 | 
			
		||||
    int nthreads = 0;
 | 
			
		||||
    int chunkSize = 0;
 | 
			
		||||
    omp_sched_t schedKind;
 | 
			
		||||
    char schedType[10];
 | 
			
		||||
#pragma omp parallel
 | 
			
		||||
#pragma omp master
 | 
			
		||||
    {
 | 
			
		||||
    	omp_get_schedule(&schedKind, &chunkSize);
 | 
			
		||||
 | 
			
		||||
    	switch (schedKind)
 | 
			
		||||
    	{
 | 
			
		||||
        	case omp_sched_static:  strcpy(schedType, "static"); break;
 | 
			
		||||
        	case omp_sched_dynamic: strcpy(schedType, "dynamic"); break;
 | 
			
		||||
        	case omp_sched_guided:  strcpy(schedType, "guided"); break;
 | 
			
		||||
        	case omp_sched_auto:    strcpy(schedType, "auto"); break;
 | 
			
		||||
    	}
 | 
			
		||||
	
 | 
			
		||||
	nthreads = omp_get_max_threads();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    printf("Num threads: %d\n", nthreads);
 | 
			
		||||
    printf("Schedule: (%s,%d)\n", schedType, chunkSize);
 | 
			
		||||
    
 | 
			
		||||
    printf("Performance: %.2f million atom updates per second\n",
 | 
			
		||||
    if(comm.myproc == 0){
 | 
			
		||||
        int n = comm.numproc;
 | 
			
		||||
        printf(HLINE);
 | 
			
		||||
        printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, Nghost, param.ntimes);
 | 
			
		||||
        printf("TOTAL %.2fs\n\n",timer[TOTAL]);
 | 
			
		||||
        printf("%4s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|\n","","FORCE ", "NEIGH ", "BALANCE", "FORWARD", "REVERSE","UPDATE","REST ","SETUP");
 | 
			
		||||
        printf("----|-------|-------|-------|-------|-------|-------|-------|-------|\n");
 | 
			
		||||
        printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "AVG", sumt[FORCE]/n,sumt[NEIGH]/n,sumt[BALANCE]/n,sumt[FORWARD]/n,sumt[REVERSE]/n,sumt[UPDATE]/n,sumt[REST]/n,sumt[SETUP]/n);
 | 
			
		||||
        printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MIN", mint[FORCE],mint[NEIGH],mint[BALANCE],mint[FORWARD],mint[REVERSE],mint[UPDATE],mint[REST],mint[SETUP]);
 | 
			
		||||
        printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MAX", maxt[FORCE],maxt[NEIGH],maxt[BALANCE],maxt[FORWARD],maxt[REVERSE],maxt[UPDATE],maxt[REST],maxt[SETUP]);
 | 
			
		||||
        printf(HLINE);
 | 
			
		||||
        printf("Performance: %.2f million atom updates per second\n",
 | 
			
		||||
            1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
 | 
			
		||||
            
 | 
			
		||||
#ifdef COMPUTE_STATS
 | 
			
		||||
    displayStatistics(&atom, ¶m, &stats, timer);
 | 
			
		||||
#endif
 | 
			
		||||
    } 
 | 
			
		||||
    endComm(&comm);
 | 
			
		||||
    LIKWID_MARKER_CLOSE;
 | 
			
		||||
    return EXIT_SUCCESS;
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -11,27 +11,40 @@
 | 
			
		||||
#include <neighbor.h>
 | 
			
		||||
#include <parameter.h>
 | 
			
		||||
#include <atom.h>
 | 
			
		||||
#include <util.h>
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
#include <sort.h>
 | 
			
		||||
 | 
			
		||||
#define SMALL 1.0e-6
 | 
			
		||||
#define FACTOR 0.999
 | 
			
		||||
 | 
			
		||||
MD_FLOAT xprd, yprd, zprd;
 | 
			
		||||
MD_FLOAT bininvx, bininvy, bininvz;
 | 
			
		||||
int mbinxlo, mbinylo, mbinzlo;
 | 
			
		||||
int pad_x, pad_y, pad_z;
 | 
			
		||||
int nbinx, nbiny, nbinz;
 | 
			
		||||
int mbinx, mbiny, mbinz; // n bins in x, y, z
 | 
			
		||||
int mbinx, mbiny, mbinz; // m bins in x, y, z
 | 
			
		||||
int *bincount;
 | 
			
		||||
int *bins;
 | 
			
		||||
int mbins; //total number of bins
 | 
			
		||||
int atoms_per_bin;  // max atoms per bin
 | 
			
		||||
int mbins;              //total number of bins
 | 
			
		||||
int atoms_per_bin;      // max atoms per bin
 | 
			
		||||
MD_FLOAT cutneigh;
 | 
			
		||||
MD_FLOAT cutneighsq;  // neighbor cutoff squared
 | 
			
		||||
MD_FLOAT cutneighsq;    // neighbor cutoff squared
 | 
			
		||||
int nmax;
 | 
			
		||||
int nstencil;      // # of bins in stencil
 | 
			
		||||
int* stencil;      // stencil list of bin offsets
 | 
			
		||||
int nstencil;           // # of bins in stencil
 | 
			
		||||
int* stencil;           // stencil list of bin offsets
 | 
			
		||||
MD_FLOAT binsizex, binsizey, binsizez;
 | 
			
		||||
int me;                 //rank
 | 
			
		||||
int method;             // method
 | 
			
		||||
int half_stencil;       //If half stencil exist 
 | 
			
		||||
int shellMethod;        //If shell method exist   
 | 
			
		||||
 | 
			
		||||
static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT);
 | 
			
		||||
static MD_FLOAT bindist(int, int, int);
 | 
			
		||||
static int ghostZone(Atom*, int);
 | 
			
		||||
static int eightZone(Atom*, int);
 | 
			
		||||
static int halfZone(Atom*, int);
 | 
			
		||||
static void neighborGhost(Atom*, Neighbor*);
 | 
			
		||||
static inline int interaction(Atom* atom, int i, int j);
 | 
			
		||||
 | 
			
		||||
/* exported subroutines */
 | 
			
		||||
void initNeighbor(Neighbor *neighbor, Parameter *param) {
 | 
			
		||||
@@ -51,7 +64,25 @@ void initNeighbor(Neighbor *neighbor, Parameter *param) {
 | 
			
		||||
    neighbor->maxneighs = 100;
 | 
			
		||||
    neighbor->numneigh = NULL;
 | 
			
		||||
    neighbor->neighbors = NULL;
 | 
			
		||||
    //========== MPI =============
 | 
			
		||||
    shellMethod = 0;
 | 
			
		||||
    half_stencil = 0;
 | 
			
		||||
    method = param->method;
 | 
			
		||||
    if(method == halfShell || method == eightShell){ 
 | 
			
		||||
        param->half_neigh = 1;
 | 
			
		||||
        shellMethod = 1;
 | 
			
		||||
    }
 | 
			
		||||
    if(method == halfStencil){
 | 
			
		||||
        param->half_neigh = 0;
 | 
			
		||||
        half_stencil = 1;
 | 
			
		||||
    }
 | 
			
		||||
    me = 0;
 | 
			
		||||
    MPI_Comm_rank(MPI_COMM_WORLD,&me);
 | 
			
		||||
    neighbor->half_neigh = param->half_neigh;
 | 
			
		||||
    neighbor->Nshell = 0;  
 | 
			
		||||
    neighbor->numNeighShell = NULL;
 | 
			
		||||
    neighbor->neighshell = NULL;
 | 
			
		||||
    neighbor->listshell = NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void setupNeighbor(Parameter* param) {
 | 
			
		||||
@@ -64,7 +95,6 @@ void setupNeighbor(Parameter* param) {
 | 
			
		||||
        yprd = param->yprd;
 | 
			
		||||
        zprd = param->zprd;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // TODO: update lo and hi for standard case and use them here instead
 | 
			
		||||
    MD_FLOAT xlo = 0.0; MD_FLOAT xhi = xprd;
 | 
			
		||||
    MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
 | 
			
		||||
@@ -93,54 +123,48 @@ void setupNeighbor(Parameter* param) {
 | 
			
		||||
        bininvy = 1.0 / binsizey;
 | 
			
		||||
        bininvz = 1.0 / binsizez;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    coord = xlo - cutneigh - SMALL * xprd;
 | 
			
		||||
    mbinxlo = (int) (coord * bininvx);
 | 
			
		||||
    if (coord < 0.0) { mbinxlo = mbinxlo - 1; }
 | 
			
		||||
    coord = xhi + cutneigh + SMALL * xprd;
 | 
			
		||||
    mbinxhi = (int) (coord * bininvx);
 | 
			
		||||
 | 
			
		||||
    coord = ylo - cutneigh - SMALL * yprd;
 | 
			
		||||
    mbinylo = (int) (coord * bininvy);
 | 
			
		||||
    if (coord < 0.0) { mbinylo = mbinylo - 1; }
 | 
			
		||||
    coord = yhi + cutneigh + SMALL * yprd;
 | 
			
		||||
    mbinyhi = (int) (coord * bininvy);
 | 
			
		||||
 | 
			
		||||
    coord = zlo - cutneigh - SMALL * zprd;
 | 
			
		||||
    mbinzlo = (int) (coord * bininvz);
 | 
			
		||||
    if (coord < 0.0) { mbinzlo = mbinzlo - 1; }
 | 
			
		||||
    coord = zhi + cutneigh + SMALL * zprd;
 | 
			
		||||
    mbinzhi = (int) (coord * bininvz);
 | 
			
		||||
 | 
			
		||||
    mbinxlo = mbinxlo - 1;
 | 
			
		||||
    mbinxhi = mbinxhi + 1;
 | 
			
		||||
    mbinx = mbinxhi - mbinxlo + 1;
 | 
			
		||||
 | 
			
		||||
    mbinylo = mbinylo - 1;
 | 
			
		||||
    mbinyhi = mbinyhi + 1;
 | 
			
		||||
    mbiny = mbinyhi - mbinylo + 1;
 | 
			
		||||
 | 
			
		||||
    mbinzlo = mbinzlo - 1;
 | 
			
		||||
    mbinzhi = mbinzhi + 1;
 | 
			
		||||
    mbinz = mbinzhi - mbinzlo + 1;
 | 
			
		||||
    pad_x = (int)(cutneigh*bininvx);
 | 
			
		||||
    while(pad_x * binsizex < FACTOR * cutneigh) pad_x++;
 | 
			
		||||
    pad_y = (int)(cutneigh*bininvy);
 | 
			
		||||
    while(pad_y * binsizey < FACTOR * cutneigh) pad_y++;
 | 
			
		||||
    pad_z = (int)(cutneigh*bininvz);
 | 
			
		||||
    while(pad_z * binsizez < FACTOR * cutneigh) pad_z++;
 | 
			
		||||
 | 
			
		||||
    nextx = (int) (cutneigh * bininvx);
 | 
			
		||||
    if(nextx * binsizex < FACTOR * cutneigh) nextx++;
 | 
			
		||||
    if(nextx * binsizex < FACTOR * cutneigh){
 | 
			
		||||
        nextx++;
 | 
			
		||||
        pad_x++;
 | 
			
		||||
    } 
 | 
			
		||||
    nexty = (int) (cutneigh * bininvy);
 | 
			
		||||
    if(nexty * binsizey < FACTOR * cutneigh) nexty++;
 | 
			
		||||
    if(nexty * binsizey < FACTOR * cutneigh){
 | 
			
		||||
        nexty++;
 | 
			
		||||
        pad_y++;
 | 
			
		||||
    } 
 | 
			
		||||
    nextz = (int) (cutneigh * bininvz);
 | 
			
		||||
    if(nextz * binsizez < FACTOR * cutneigh) nextz++;
 | 
			
		||||
    if(nextz * binsizez < FACTOR * cutneigh){
 | 
			
		||||
        nextz++;
 | 
			
		||||
        pad_z++;
 | 
			
		||||
    } 
 | 
			
		||||
 | 
			
		||||
    mbinx = nbinx+4*pad_x;
 | 
			
		||||
    mbiny = nbiny+4*pad_y;
 | 
			
		||||
    mbinz = nbinz+4*pad_z;
 | 
			
		||||
 | 
			
		||||
    if (stencil) { free(stencil); }
 | 
			
		||||
    stencil = (int*) malloc((2 * nextz + 1) * (2 * nexty + 1) * (2 * nextx + 1) * sizeof(int));
 | 
			
		||||
    nstencil = 0;
 | 
			
		||||
 
 | 
			
		||||
    int kstart = -nextz;
 | 
			
		||||
 | 
			
		||||
    int jstart = -nexty; 
 | 
			
		||||
    int istart = -nextx;
 | 
			
		||||
    int ibin = 0;
 | 
			
		||||
    for(int k = kstart; k <= nextz; k++) {
 | 
			
		||||
        for(int j = -nexty; j <= nexty; j++) {
 | 
			
		||||
            for(int i = -nextx; i <= nextx; i++) {
 | 
			
		||||
                if(bindist(i, j, k) < cutneighsq) {
 | 
			
		||||
                    stencil[nstencil++] = k * mbiny * mbinx + j * mbinx + i;
 | 
			
		||||
        for(int j = jstart; j <= nexty; j++) {
 | 
			
		||||
            for(int i = istart; i <= nextx; i++) {
 | 
			
		||||
                if(bindist(i, j, k) < cutneighsq) {     
 | 
			
		||||
                    int jbin = k * mbiny * mbinx + j * mbinx + i;
 | 
			
		||||
                    if(ibin>jbin && half_stencil) continue;                  
 | 
			
		||||
                    stencil[nstencil++] = jbin;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
@@ -154,8 +178,7 @@ void setupNeighbor(Parameter* param) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
    int nall = atom->Nlocal + atom->Nghost;
 | 
			
		||||
 | 
			
		||||
    int nall = atom->Nlocal + atom->Nghost;  
 | 
			
		||||
    /* extend atom arrays if necessary */
 | 
			
		||||
    if(nall > nmax) {
 | 
			
		||||
        nmax = nall;
 | 
			
		||||
@@ -164,16 +187,13 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
        neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
 | 
			
		||||
        neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /* bin local & ghost atoms */
 | 
			
		||||
    binatoms(atom);
 | 
			
		||||
    int resize = 1;
 | 
			
		||||
 | 
			
		||||
    /* loop over each atom, storing neighbors */
 | 
			
		||||
    while(resize) {
 | 
			
		||||
        int new_maxneighs = neighbor->maxneighs;
 | 
			
		||||
        resize = 0;
 | 
			
		||||
 | 
			
		||||
        for(int i = 0; i < atom->Nlocal; i++) {
 | 
			
		||||
            int* neighptr = &(neighbor->neighbors[i * neighbor->maxneighs]);
 | 
			
		||||
            int n = 0;
 | 
			
		||||
@@ -184,21 +204,22 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
            #ifdef EXPLICIT_TYPES
 | 
			
		||||
            int type_i = atom->type[i];
 | 
			
		||||
            #endif
 | 
			
		||||
 | 
			
		||||
            for(int k = 0; k < nstencil; k++) {
 | 
			
		||||
                int jbin = ibin + stencil[k];
 | 
			
		||||
                int* loc_bin = &bins[jbin * atoms_per_bin];
 | 
			
		||||
 | 
			
		||||
                for(int m = 0; m < bincount[jbin]; m++) {
 | 
			
		||||
                for(int m = 0; m < bincount[jbin]; m++) {    
 | 
			
		||||
                    int j = loc_bin[m];
 | 
			
		||||
                    if((j == i) || (neighbor->half_neigh && (j < i))) {
 | 
			
		||||
                        continue;
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    
 | 
			
		||||
                    if((j==i) || (neighbor->half_neigh && (j<i))) 
 | 
			
		||||
                        continue;              
 | 
			
		||||
                    if(half_stencil && ibin==jbin  && !interaction(atom,i,j))
 | 
			
		||||
                        continue;          
 | 
			
		||||
                
 | 
			
		||||
                    MD_FLOAT delx = xtmp - atom_x(j);
 | 
			
		||||
                    MD_FLOAT dely = ytmp - atom_y(j);
 | 
			
		||||
                    MD_FLOAT dely = ytmp - atom_y(j);  
 | 
			
		||||
                    MD_FLOAT delz = ztmp - atom_z(j);
 | 
			
		||||
                    MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
 | 
			
		||||
 | 
			
		||||
                    #ifdef EXPLICIT_TYPES
 | 
			
		||||
                    int type_j = atom->type[j];
 | 
			
		||||
                    const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
 | 
			
		||||
@@ -210,8 +231,8 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            neighbor->numneigh[i] = n;
 | 
			
		||||
            
 | 
			
		||||
            if(n >= neighbor->maxneighs) {
 | 
			
		||||
                resize = 1;
 | 
			
		||||
 | 
			
		||||
@@ -220,14 +241,15 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if(resize) {
 | 
			
		||||
            printf("RESIZE %d\n", neighbor->maxneighs);
 | 
			
		||||
            printf("RESIZE %d, PROC %d\n", neighbor->maxneighs,me);
 | 
			
		||||
            neighbor->maxneighs = new_maxneighs * 1.2;
 | 
			
		||||
            free(neighbor->neighbors);
 | 
			
		||||
            neighbor->neighbors = (int*) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(method == eightShell) neighborGhost(atom, neighbor);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* internal subroutines */
 | 
			
		||||
@@ -257,44 +279,28 @@ MD_FLOAT bindist(int i, int j, int k) {
 | 
			
		||||
    } else {
 | 
			
		||||
        delz = (k + 1) * binsizez;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return (delx * delx + dely * dely + delz * delz);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int coord2bin(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin) {
 | 
			
		||||
    int ix, iy, iz;
 | 
			
		||||
 | 
			
		||||
    if(xin >= xprd) {
 | 
			
		||||
        ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
 | 
			
		||||
    } else if(xin >= 0.0) {
 | 
			
		||||
        ix = (int)(xin * bininvx) - mbinxlo;
 | 
			
		||||
    } else {
 | 
			
		||||
        ix = (int)(xin * bininvx) - mbinxlo - 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(yin >= yprd) {
 | 
			
		||||
        iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
 | 
			
		||||
    } else if(yin >= 0.0) {
 | 
			
		||||
        iy = (int)(yin * bininvy) - mbinylo;
 | 
			
		||||
    } else {
 | 
			
		||||
        iy = (int)(yin * bininvy) - mbinylo - 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(zin >= zprd) {
 | 
			
		||||
        iz = (int)((zin - zprd) * bininvz) + nbinz - mbinzlo;
 | 
			
		||||
    } else if(zin >= 0.0) {
 | 
			
		||||
        iz = (int)(zin * bininvz) - mbinzlo;
 | 
			
		||||
    } else {
 | 
			
		||||
        iz = (int)(zin * bininvz) - mbinzlo - 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return (iz * mbiny * mbinx + iy * mbinx + ix + 1);
 | 
			
		||||
   int ix, iy, iz;
 | 
			
		||||
   MD_FLOAT eps = 1e-9; 
 | 
			
		||||
   MD_FLOAT xlo=0.0; MD_FLOAT ylo=0.0; MD_FLOAT zlo=0.0;
 | 
			
		||||
   xlo = fabs(xlo - pad_x*binsizex)+eps;
 | 
			
		||||
   ylo = fabs(ylo - pad_y*binsizey)+eps;
 | 
			
		||||
   zlo = fabs(zlo - pad_z*binsizez)+eps;
 | 
			
		||||
   ix = (int) ((xin + xlo)*bininvx);
 | 
			
		||||
   iy = (int) ((yin + ylo)*bininvy);
 | 
			
		||||
   iz = (int) ((zin + zlo)*bininvz);
 | 
			
		||||
    
 | 
			
		||||
    return (iz * mbiny * mbinx + iy * mbinx + ix);
 | 
			
		||||
    //return (iz * mbiny * mbinx + iy * mbinx + ix + 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void binatoms(Atom *atom) {
 | 
			
		||||
void binatoms(Atom *atom) {    
 | 
			
		||||
    int nall = atom->Nlocal + atom->Nghost;
 | 
			
		||||
    int resize = 1;
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    while(resize > 0) {
 | 
			
		||||
        resize = 0;
 | 
			
		||||
 | 
			
		||||
@@ -304,7 +310,7 @@ void binatoms(Atom *atom) {
 | 
			
		||||
 | 
			
		||||
        for(int i = 0; i < nall; i++) {
 | 
			
		||||
            int ibin = coord2bin(atom_x(i), atom_y(i), atom_z(i));
 | 
			
		||||
 | 
			
		||||
            if(shellMethod && !ghostZone(atom, i)) continue; 
 | 
			
		||||
            if(bincount[ibin] < atoms_per_bin) {
 | 
			
		||||
                int ac = bincount[ibin]++;
 | 
			
		||||
                bins[ibin * atoms_per_bin + ac] = i;
 | 
			
		||||
@@ -325,11 +331,9 @@ void sortAtom(Atom* atom) {
 | 
			
		||||
    binatoms(atom);
 | 
			
		||||
    int Nmax = atom->Nmax;
 | 
			
		||||
    int* binpos = bincount;
 | 
			
		||||
 | 
			
		||||
    for(int i = 1; i < mbins; i++) {
 | 
			
		||||
        binpos[i] += binpos[i - 1];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #ifdef AOS
 | 
			
		||||
    MD_FLOAT* new_x = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
 | 
			
		||||
    MD_FLOAT* new_vx = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
 | 
			
		||||
@@ -367,7 +371,6 @@ void sortAtom(Atom* atom) {
 | 
			
		||||
            #endif
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    free(atom->x);
 | 
			
		||||
    free(atom->vx);
 | 
			
		||||
    atom->x = new_x;
 | 
			
		||||
@@ -383,3 +386,158 @@ void sortAtom(Atom* atom) {
 | 
			
		||||
    atom->vz = new_vz;
 | 
			
		||||
    #endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* internal subroutines 
 | 
			
		||||
Added with MPI*/
 | 
			
		||||
 | 
			
		||||
static int ghostZone(Atom* atom, int i){
 | 
			
		||||
    if(i<atom->Nlocal)  return 1;
 | 
			
		||||
    else if(method == halfShell)  return halfZone(atom,i);
 | 
			
		||||
    else if(method == eightShell) return eightZone(atom,i); 
 | 
			
		||||
    else return 0;  
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int eightZone(Atom* atom, int i)
 | 
			
		||||
{   
 | 
			
		||||
    //Mapping: 0->0, 1->1, 2->2, 3->6, 4->3, 5->5, 6->4, 7->7
 | 
			
		||||
    int zoneMapping[] = {0, 1, 2, 6, 3, 5, 4, 7};
 | 
			
		||||
    MD_FLOAT *hi = atom->mybox.hi;
 | 
			
		||||
    int zone = 0;
 | 
			
		||||
 | 
			
		||||
    if(BigOrEqual(atom_x(i),hi[_x])) {
 | 
			
		||||
        zone += 1;
 | 
			
		||||
    }
 | 
			
		||||
    if(BigOrEqual(atom_y(i),hi[_y])) {
 | 
			
		||||
        zone += 2;
 | 
			
		||||
    }
 | 
			
		||||
    if(BigOrEqual(atom_z(i),hi[_z])) {
 | 
			
		||||
        zone += 4;
 | 
			
		||||
    }   
 | 
			
		||||
    return zoneMapping[zone];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int halfZone(Atom* atom, int i)
 | 
			
		||||
{   
 | 
			
		||||
    MD_FLOAT *hi = atom->mybox.hi;
 | 
			
		||||
    MD_FLOAT *lo = atom->mybox.lo;
 | 
			
		||||
 | 
			
		||||
    if(atom_x(i)<lo[_x] && atom_y(i)<hi[_y] && atom_z(i)<hi[_z]){
 | 
			
		||||
        return 0;
 | 
			
		||||
    } else if(atom_y(i)<lo[_y] && atom_z(i)<hi[_z]){
 | 
			
		||||
        return 0;
 | 
			
		||||
    } else if(atom_z(i)<lo[_z]){
 | 
			
		||||
        return 0;
 | 
			
		||||
    } else {
 | 
			
		||||
        return 1;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void neighborGhost(Atom *atom, Neighbor *neighbor) {
 | 
			
		||||
    int Nshell=0;
 | 
			
		||||
    int Nlocal = atom->Nlocal;
 | 
			
		||||
    int Nghost = atom->Nghost;
 | 
			
		||||
    if(neighbor->listshell) free(neighbor->listshell);
 | 
			
		||||
    neighbor->listshell = (int*) malloc(Nghost * sizeof(int));
 | 
			
		||||
    int* listzone  = (int*) malloc(8 * Nghost * sizeof(int));
 | 
			
		||||
    int countAtoms[8] = {0,0,0,0,0,0,0,0};
 | 
			
		||||
 
 | 
			
		||||
    //Selecting ghost atoms for interaction
 | 
			
		||||
   for(int i = Nlocal; i < Nlocal+Nghost; i++) {
 | 
			
		||||
        int izone = ghostZone(atom,i);
 | 
			
		||||
        int *list = &listzone[Nghost*izone];
 | 
			
		||||
        int n  = countAtoms[izone];
 | 
			
		||||
        list[n] = i;
 | 
			
		||||
        countAtoms[izone]++;     
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for(int zone = 1; zone<=3; zone++){
 | 
			
		||||
        int *list = &listzone[Nghost*zone];
 | 
			
		||||
        for(int n=0; n<countAtoms[zone]; n++)
 | 
			
		||||
            neighbor->listshell[Nshell++] = list[n]; 
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    neighbor->Nshell = Nshell;
 | 
			
		||||
    if(neighbor->numNeighShell) free(neighbor->numNeighShell);
 | 
			
		||||
    if(neighbor->neighshell) free(neighbor->neighshell);
 | 
			
		||||
    neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
 | 
			
		||||
    neighbor->numNeighShell = (int*) malloc(Nshell * sizeof(int));
 | 
			
		||||
    int resize = 1;
 | 
			
		||||
 | 
			
		||||
    while(resize)
 | 
			
		||||
    {
 | 
			
		||||
        resize = 0;
 | 
			
		||||
        for(int i = 0; i < Nshell; i++) {   
 | 
			
		||||
            int *neighshell = &(neighbor->neighshell[i*neighbor->maxneighs]); 
 | 
			
		||||
            int n = 0;  
 | 
			
		||||
            int iatom = neighbor->listshell[i];
 | 
			
		||||
            int izone = ghostZone(atom, iatom);
 | 
			
		||||
            MD_FLOAT xtmp = atom_x(iatom);
 | 
			
		||||
            MD_FLOAT ytmp = atom_y(iatom);
 | 
			
		||||
            MD_FLOAT ztmp = atom_z(iatom);
 | 
			
		||||
            int ibin = coord2bin(xtmp, ytmp, ztmp);
 | 
			
		||||
            
 | 
			
		||||
            #ifdef EXPLICIT_TYPES
 | 
			
		||||
            int type_i = atom->type[iatom];
 | 
			
		||||
            #endif
 | 
			
		||||
 | 
			
		||||
            for(int k = 0; k < nstencil; k++) {
 | 
			
		||||
                int jbin = ibin + stencil[k];
 | 
			
		||||
                int* loc_bin = &bins[jbin * atoms_per_bin];
 | 
			
		||||
                for(int m = 0; m < bincount[jbin]; m++) {    
 | 
			
		||||
                    int jatom = loc_bin[m];
 | 
			
		||||
                
 | 
			
		||||
                    int jzone = ghostZone(atom,jatom);
 | 
			
		||||
 | 
			
		||||
                    if(jzone <=izone) continue;
 | 
			
		||||
                    if(izone == 1 && (jzone==5||jzone==6||jzone==7)) continue;
 | 
			
		||||
                    if(izone == 2 && (jzone==4||jzone==6||jzone==7)) continue;
 | 
			
		||||
                    if(izone == 3 && (jzone==4||jzone==5||jzone==7)) continue;
 | 
			
		||||
    
 | 
			
		||||
                    MD_FLOAT delx = xtmp - atom_x(jatom);
 | 
			
		||||
                    MD_FLOAT dely = ytmp - atom_y(jatom);  
 | 
			
		||||
                    MD_FLOAT delz = ztmp - atom_z(jatom);
 | 
			
		||||
                    MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
 | 
			
		||||
 | 
			
		||||
                    #ifdef EXPLICIT_TYPES
 | 
			
		||||
                    int type_j = atom->type[jatom];
 | 
			
		||||
                    const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
 | 
			
		||||
                    #else
 | 
			
		||||
                    const MD_FLOAT cutoff = cutneighsq;
 | 
			
		||||
                    #endif
 | 
			
		||||
                    if(rsq <= cutoff) {
 | 
			
		||||
                        neighshell[n++] = jatom;
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }         
 | 
			
		||||
            
 | 
			
		||||
            neighbor->numNeighShell[i] = n; 
 | 
			
		||||
            if(n >= neighbor->maxneighs){
 | 
			
		||||
                resize = 1;
 | 
			
		||||
                neighbor->maxneighs = n * 1.2;
 | 
			
		||||
                break;
 | 
			
		||||
            }  
 | 
			
		||||
        }
 | 
			
		||||
            
 | 
			
		||||
        if(resize) {
 | 
			
		||||
            free(neighbor->neighshell);
 | 
			
		||||
            neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
 | 
			
		||||
        }
 | 
			
		||||
    }  
 | 
			
		||||
    free(listzone); 
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline int interaction(Atom* atom, int i, int j) {
 | 
			
		||||
       
 | 
			
		||||
    if(i<j && j<atom->Nlocal) {
 | 
			
		||||
        return 1;
 | 
			
		||||
    } else if( atom_z(j)>atom_z(i) && j>=atom->Nlocal) {
 | 
			
		||||
        return 1;
 | 
			
		||||
    } else if(Equal(atom_z(j),atom_z(i)) && atom_y(j)<atom_y(i) && j>=atom->Nlocal){
 | 
			
		||||
        return 1;  
 | 
			
		||||
    } else if(Equal(atom_z(j),atom_z(i)) && Equal(atom_y(j),atom_y(i)) && atom_x(j)<atom_x(i) && j>=atom->Nlocal){
 | 
			
		||||
        return 1;
 | 
			
		||||
    }  else {
 | 
			
		||||
        return 0;
 | 
			
		||||
    }
 | 
			
		||||
}                          
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										173
									
								
								lammps/vtk.c
									
									
									
									
									
								
							
							
						
						
									
										173
									
								
								lammps/vtk.c
									
									
									
									
									
								
							@@ -6,8 +6,12 @@
 | 
			
		||||
 */
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include <vtk.h>
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
 | 
			
		||||
#include <atom.h>
 | 
			
		||||
static MPI_File _fh; 
 | 
			
		||||
static inline void flushBuffer(char*); 
 | 
			
		||||
 | 
			
		||||
int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
 | 
			
		||||
    char timestep_filename[128];
 | 
			
		||||
@@ -18,12 +22,12 @@ int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
 | 
			
		||||
        fprintf(stderr, "Could not open VTK file for writing!\n");
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fprintf(fp, "# vtk DataFile Version 2.0\n");
 | 
			
		||||
    fprintf(fp, "Particle data\n");
 | 
			
		||||
    fprintf(fp, "ASCII\n");
 | 
			
		||||
    fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
 | 
			
		||||
    fprintf(fp, "POINTS %d double\n", atom->Nlocal);
 | 
			
		||||
 | 
			
		||||
    for(int i = 0; i < atom->Nlocal; ++i) {
 | 
			
		||||
        fprintf(fp, "%.4f %.4f %.4f\n", atom_x(i), atom_y(i), atom_z(i));
 | 
			
		||||
    }
 | 
			
		||||
@@ -48,3 +52,168 @@ int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
 | 
			
		||||
    fclose(fp);
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int vtkOpen(const char* filename, Comm* comm, Atom* atom ,int timestep)
 | 
			
		||||
{
 | 
			
		||||
    char msg[256];
 | 
			
		||||
    char timestep_filename[128];
 | 
			
		||||
    snprintf(timestep_filename, sizeof timestep_filename, "%s_%d.vtk", filename, timestep);
 | 
			
		||||
    MPI_File_open(MPI_COMM_WORLD, timestep_filename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &_fh);
 | 
			
		||||
    if(_fh == MPI_FILE_NULL) {
 | 
			
		||||
        if(comm->myproc == 0) fprintf(stderr, "Could not open VTK file for writing!\n");
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
    if (comm->myproc==0){
 | 
			
		||||
        sprintf(msg, "# vtk DataFile Version 2.0\n");
 | 
			
		||||
        sprintf(msg, "%sParticle data\n",msg);
 | 
			
		||||
        sprintf(msg, "%sASCII\n",msg);
 | 
			
		||||
        sprintf(msg, "%sDATASET UNSTRUCTURED_GRID\n",msg);
 | 
			
		||||
        sprintf(msg, "%sPOINTS %d double\n",msg, atom->Natoms);  
 | 
			
		||||
        flushBuffer(msg);
 | 
			
		||||
    } 
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int vtkVector(Comm* comm, Atom* atom, Parameter* param)
 | 
			
		||||
{ 
 | 
			
		||||
    if (_fh == MPI_FILE_NULL) {
 | 
			
		||||
        if(comm->myproc==0) printf("vtk not initialize! Call vtkOpen first!\n");
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    int sizeline= 25;   //#initial guess of characters in "%.4f %.4f %.4f\n" 
 | 
			
		||||
    int extrabuff = 100;
 | 
			
		||||
    int sizebuff = sizeline*atom->Nlocal+extrabuff; 
 | 
			
		||||
    int mysize = 0;
 | 
			
		||||
    char* msg = (char*) malloc(sizebuff);
 | 
			
		||||
    sprintf(msg, "");
 | 
			
		||||
    for(int i = 0; i < atom->Nlocal; i++){
 | 
			
		||||
        if(mysize+extrabuff >= sizebuff){
 | 
			
		||||
            sizebuff*= 1.5;
 | 
			
		||||
            msg = (char*) realloc(msg, sizebuff); 
 | 
			
		||||
        }
 | 
			
		||||
        //TODO: do not forget to add param->xlo, param->ylo, param->zlo   
 | 
			
		||||
        sprintf(msg, "%s%.4f %.4f %.4f\n",msg, atom_x(i), atom_y(i), atom_z(i));
 | 
			
		||||
        mysize = strlen(msg);
 | 
			
		||||
    }
 | 
			
		||||
    int gatherSize[comm->numproc];
 | 
			
		||||
 | 
			
		||||
    MPI_Allgather(&mysize, 1, MPI_INT, gatherSize, 1, MPI_INT, MPI_COMM_WORLD);
 | 
			
		||||
    int offset=0;
 | 
			
		||||
    int globalSize = 0;
 | 
			
		||||
    
 | 
			
		||||
    for(int i = 0; i < comm->myproc; i++)
 | 
			
		||||
        offset+= gatherSize[i];
 | 
			
		||||
    
 | 
			
		||||
    for(int i = 0; i < comm->numproc; i++)
 | 
			
		||||
        globalSize+= gatherSize[i];
 | 
			
		||||
    
 | 
			
		||||
    MPI_Offset displ;   
 | 
			
		||||
    MPI_Datatype FileType;       
 | 
			
		||||
    int GlobalSize[] = {globalSize}; 
 | 
			
		||||
    int LocalSize[]  = {mysize};
 | 
			
		||||
    int Start[] = {offset};
 | 
			
		||||
 | 
			
		||||
    if(LocalSize[0]>0){
 | 
			
		||||
        MPI_Type_create_subarray(1, GlobalSize, LocalSize, Start, MPI_ORDER_C, MPI_CHAR, &FileType);    
 | 
			
		||||
    } else {
 | 
			
		||||
        MPI_Type_vector(0,0,0,MPI_CHAR,&FileType);
 | 
			
		||||
    }
 | 
			
		||||
    MPI_Type_commit(&FileType);
 | 
			
		||||
    MPI_File_get_size(_fh, &displ);
 | 
			
		||||
    MPI_File_set_view(_fh, displ, MPI_CHAR, FileType, "native", MPI_INFO_NULL);
 | 
			
		||||
    MPI_File_write_all (_fh, msg, mysize , MPI_CHAR ,MPI_STATUS_IGNORE);
 | 
			
		||||
    MPI_Barrier(MPI_COMM_WORLD); 
 | 
			
		||||
    MPI_File_set_view(_fh,0,MPI_CHAR, MPI_CHAR, "native", MPI_INFO_NULL);    
 | 
			
		||||
     
 | 
			
		||||
    if (comm->myproc==0){
 | 
			
		||||
        
 | 
			
		||||
        sprintf(msg, "\n\n");
 | 
			
		||||
        sprintf(msg, "%sCELLS %d %d\n", msg, atom->Natoms, atom->Natoms * 2); 
 | 
			
		||||
 | 
			
		||||
        for(int i = 0; i < atom->Natoms; i++) 
 | 
			
		||||
            sprintf(msg, "%s1 %d\n", msg, i);
 | 
			
		||||
        flushBuffer(msg);
 | 
			
		||||
        
 | 
			
		||||
        sprintf(msg, "\n\n"); 
 | 
			
		||||
        sprintf(msg, "%sCELL_TYPES %d\n",msg, atom->Natoms);
 | 
			
		||||
        for(int i = 0; i < atom->Natoms; i++) 
 | 
			
		||||
            sprintf(msg, "%s1\n",msg);
 | 
			
		||||
        flushBuffer(msg);
 | 
			
		||||
 | 
			
		||||
        sprintf(msg, "\n\n"); 
 | 
			
		||||
        sprintf(msg, "%sPOINT_DATA %d\n",msg,atom->Natoms);
 | 
			
		||||
        sprintf(msg, "%sSCALARS mass double\n",msg);
 | 
			
		||||
        sprintf(msg, "%sLOOKUP_TABLE default\n",msg);
 | 
			
		||||
        for(int i = 0; i < atom->Natoms; i++) 
 | 
			
		||||
            sprintf(msg, "%s1.0\n",msg);
 | 
			
		||||
        sprintf(msg, "%s\n\n",msg);
 | 
			
		||||
        flushBuffer(msg);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void vtkClose()
 | 
			
		||||
{
 | 
			
		||||
    MPI_File_close(&_fh);
 | 
			
		||||
    _fh=MPI_FILE_NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int printGhost(const char* filename, Atom* atom, int timestep, int me) {
 | 
			
		||||
    char timestep_filename[128];
 | 
			
		||||
    snprintf(timestep_filename, sizeof timestep_filename, "%s_%d_ghost%i.vtk", filename, timestep,me);
 | 
			
		||||
    FILE* fp = fopen(timestep_filename, "wb");
 | 
			
		||||
 | 
			
		||||
    if(fp == NULL) {
 | 
			
		||||
        fprintf(stderr, "Could not open VTK file for writing!\n");
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
    fprintf(fp, "# vtk DataFile Version 2.0\n");
 | 
			
		||||
    fprintf(fp, "Particle data\n");
 | 
			
		||||
    fprintf(fp, "ASCII\n");
 | 
			
		||||
    fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
 | 
			
		||||
    fprintf(fp, "POINTS %d double\n", atom->Nghost);
 | 
			
		||||
 | 
			
		||||
    for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; ++i) {
 | 
			
		||||
        fprintf(fp, "%.4f %.4f %.4f\n", atom_x(i), atom_y(i), atom_z(i));
 | 
			
		||||
    }
 | 
			
		||||
    fprintf(fp, "\n\n");
 | 
			
		||||
    fprintf(fp, "CELLS %d %d\n", atom->Nlocal, atom->Nlocal * 2);
 | 
			
		||||
    for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; ++i) {
 | 
			
		||||
        fprintf(fp, "1 %d\n", i);
 | 
			
		||||
    }
 | 
			
		||||
    fprintf(fp, "\n\n");
 | 
			
		||||
    fprintf(fp, "CELL_TYPES %d\n", atom->Nlocal);
 | 
			
		||||
    for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; ++i) {
 | 
			
		||||
        fprintf(fp, "1\n");
 | 
			
		||||
    }
 | 
			
		||||
    fprintf(fp, "\n\n");
 | 
			
		||||
    fprintf(fp, "POINT_DATA %d\n", atom->Nghost);
 | 
			
		||||
    fprintf(fp, "SCALARS mass double\n");
 | 
			
		||||
    fprintf(fp, "LOOKUP_TABLE default\n");
 | 
			
		||||
    for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; i++) {
 | 
			
		||||
        fprintf(fp, "1.0\n");
 | 
			
		||||
    }
 | 
			
		||||
    fprintf(fp, "\n\n");
 | 
			
		||||
    fclose(fp);
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep)
 | 
			
		||||
{
 | 
			
		||||
    if(comm->numproc == 1)
 | 
			
		||||
    {
 | 
			
		||||
        write_atoms_to_vtk_file(filename, atom, timestep);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    vtkOpen(filename, comm, atom, timestep);
 | 
			
		||||
    vtkVector(comm, atom, param);
 | 
			
		||||
    vtkClose(); 
 | 
			
		||||
    //printGhost(filename, atom, timestep, comm->myproc);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void flushBuffer(char* msg){
 | 
			
		||||
    MPI_Offset displ; 
 | 
			
		||||
    MPI_File_get_size(_fh, &displ);
 | 
			
		||||
    MPI_File_write_at(_fh, displ, msg, strlen(msg), MPI_CHAR, MPI_STATUS_IGNORE);
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user