diff --git a/lammps/atom.c b/lammps/atom.c index 79b8f64..d6fac51 100644 --- a/lammps/atom.c +++ b/lammps/atom.c @@ -440,16 +440,17 @@ void growAtom(Atom *atom) { #ifdef AOS atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3); + atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3); #else atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->y = (MD_FLOAT*) reallocate(atom->y, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->z = (MD_FLOAT*) reallocate(atom->z, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->fy = (MD_FLOAT*) reallocate(atom->fy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->fz = (MD_FLOAT*) reallocate(atom->fz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); #endif atom->vx = (MD_FLOAT*) reallocate(atom->vx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->vy = (MD_FLOAT*) reallocate(atom->vy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->vz = (MD_FLOAT*) reallocate(atom->vz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); - atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); - atom->fy = (MD_FLOAT*) reallocate(atom->fy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); - atom->fz = (MD_FLOAT*) reallocate(atom->fz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->type = (int *) reallocate(atom->type, ALIGNMENT, atom->Nmax * sizeof(int), nold * sizeof(int)); } diff --git a/lammps/force_eam.c b/lammps/force_eam.c index b686abf..539d488 100644 --- a/lammps/force_eam.c +++ b/lammps/force_eam.c @@ -41,7 +41,7 @@ double computeForceEam(Eam* eam, Parameter* param, Atom *atom, Neighbor *neighbo int Nlocal = atom->Nlocal; int* neighs; - MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; int ntypes = atom->ntypes; MD_FLOAT* fp = eam->fp; + int ntypes = atom->ntypes; MD_FLOAT* fp = eam->fp; MD_FLOAT* rhor_spline = eam->rhor_spline; MD_FLOAT* frho_spline = eam->frho_spline; MD_FLOAT* z2r_spline = eam->z2r_spline; MD_FLOAT rdr = eam->rdr; int nr = eam->nr; int nr_tot = eam->nr_tot; MD_FLOAT rdrho = eam->rdrho; int nrho = eam->nrho; int nrho_tot = eam->nrho_tot; @@ -200,9 +200,9 @@ double computeForceEam(Eam* eam, Parameter* param, Atom *atom, Neighbor *neighbo } } - fx[i] = fix; - fy[i] = fiy; - fz[i] = fiz; + atom_fx(i) = fix; + atom_fy(i) = fiy; + atom_fz(i) = fiz; addStat(stats->total_force_neighs, numneighs); addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH); } diff --git a/lammps/force_lj.c b/lammps/force_lj.c index 68ae157..82fe953 100644 --- a/lammps/force_lj.c +++ b/lammps/force_lj.c @@ -31,9 +31,6 @@ double computeForceLJFullNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) { int Nlocal = atom->Nlocal; int* neighs; - MD_FLOAT* fx = atom->fx; - MD_FLOAT* fy = atom->fy; - MD_FLOAT* fz = atom->fz; #ifndef EXPLICIT_TYPES MD_FLOAT cutforcesq = param->cutforce * param->cutforce; MD_FLOAT sigma6 = param->sigma6; @@ -41,9 +38,9 @@ double computeForceLJFullNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, #endif for(int i = 0; i < Nlocal; i++) { - fx[i] = 0.0; - fy[i] = 0.0; - fz[i] = 0.0; + atom_fx(i) = 0.0; + atom_fy(i) = 0.0; + atom_fz(i) = 0.0; } double S = getTimeStamp(); @@ -94,9 +91,9 @@ double computeForceLJFullNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, } } - fx[i] += fix; - fy[i] += fiy; - fz[i] += fiz; + atom_fx(i) += fix; + atom_fy(i) += fiy; + atom_fz(i) += fiz; addStat(stats->total_force_neighs, numneighs); addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH); @@ -110,9 +107,6 @@ double computeForceLJFullNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) { int Nlocal = atom->Nlocal; int* neighs; - MD_FLOAT* fx = atom->fx; - MD_FLOAT* fy = atom->fy; - MD_FLOAT* fz = atom->fz; #ifndef EXPLICIT_TYPES MD_FLOAT cutforcesq = param->cutforce * param->cutforce; MD_FLOAT sigma6 = param->sigma6; @@ -120,9 +114,9 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, #endif for(int i = 0; i < Nlocal; i++) { - fx[i] = 0.0; - fy[i] = 0.0; - fz[i] = 0.0; + atom_fx(i) = 0.0; + atom_fy(i) = 0.0; + atom_fz(i) = 0.0; } double S = getTimeStamp(); @@ -167,17 +161,16 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, // We do not need to update forces for ghost atoms if(j < Nlocal) { - // TODO: Experiment with AoS layout for forces - fx[j] -= delx * force; - fy[j] -= dely * force; - fz[j] -= delz * force; + atom_fx(j) -= delx * force; + atom_fy(j) -= dely * force; + atom_fz(j) -= delz * force; } } } - fx[i] += fix; - fy[i] += fiy; - fz[i] += fiz; + atom_fx(i) += fix; + atom_fy(i) += fiy; + atom_fz(i) += fiz; addStat(stats->total_force_neighs, numneighs); addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH); diff --git a/lammps/includes/atom.h b/lammps/includes/atom.h index 159a497..cf161c9 100644 --- a/lammps/includes/atom.h +++ b/lammps/includes/atom.h @@ -52,11 +52,17 @@ extern void growAtom(Atom*); #define atom_x(i) atom->x[(i) * 3 + 0] #define atom_y(i) atom->x[(i) * 3 + 1] #define atom_z(i) atom->x[(i) * 3 + 2] +#define atom_fx(i) atom->fx[(i) * 3 + 0] +#define atom_fy(i) atom->fx[(i) * 3 + 1] +#define atom_fz(i) atom->fx[(i) * 3 + 2] #else #define POS_DATA_LAYOUT "SoA" #define atom_x(i) atom->x[i] #define atom_y(i) atom->y[i] #define atom_z(i) atom->z[i] +#define atom_fx(i) atom->fx[i] +#define atom_fy(i) atom->fy[i] +#define atom_fz(i) atom->fz[i] #endif #endif diff --git a/lammps/main.c b/lammps/main.c index 9347369..3b7b63c 100644 --- a/lammps/main.c +++ b/lammps/main.c @@ -93,13 +93,12 @@ double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) { } void initialIntegrate(Parameter *param, Atom *atom) { - MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz; for(int i = 0; i < atom->Nlocal; i++) { - vx[i] += param->dtforce * fx[i]; - vy[i] += param->dtforce * fy[i]; - vz[i] += param->dtforce * fz[i]; + vx[i] += param->dtforce * atom_fx(i); + vy[i] += param->dtforce * atom_fy(i); + vz[i] += param->dtforce * atom_fz(i); atom_x(i) = atom_x(i) + param->dt * vx[i]; atom_y(i) = atom_y(i) + param->dt * vy[i]; atom_z(i) = atom_z(i) + param->dt * vz[i]; @@ -111,9 +110,9 @@ void finalIntegrate(Parameter *param, Atom *atom) { MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz; for(int i = 0; i < atom->Nlocal; i++) { - vx[i] += param->dtforce * fx[i]; - vy[i] += param->dtforce * fy[i]; - vz[i] += param->dtforce * fz[i]; + vx[i] += param->dtforce * atom_fx(i); + vy[i] += param->dtforce * atom_fy(i); + vz[i] += param->dtforce * atom_fz(i); } } diff --git a/lammps/tracing.c b/lammps/tracing.c index d91a97a..a6d07cf 100644 --- a/lammps/tracing.c +++ b/lammps/tracing.c @@ -30,7 +30,6 @@ void traceAddresses(Parameter *param, Atom *atom, Neighbor *neighbor, int timest INDEX_TRACER_INIT; int Nlocal = atom->Nlocal; int* neighs; - MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; INDEX_TRACE_NATOMS(Nlocal, atom->Nghost, neighbor->maxneighs); for(int i = 0; i < Nlocal; i++) { @@ -60,12 +59,12 @@ void traceAddresses(Parameter *param, Atom *atom, Neighbor *neighbor, int timest #endif } - MEM_TRACE(fx[i], 'R'); - MEM_TRACE(fx[i], 'W'); - MEM_TRACE(fy[i], 'R'); - MEM_TRACE(fy[i], 'W'); - MEM_TRACE(fz[i], 'R'); - MEM_TRACE(fz[i], 'W'); + MEM_TRACE(atom_fx(i), 'R'); + MEM_TRACE(atom_fx(i), 'W'); + MEM_TRACE(atom_fy(i), 'R'); + MEM_TRACE(atom_fy(i), 'W'); + MEM_TRACE(atom_fz(i), 'R'); + MEM_TRACE(atom_fz(i), 'W'); } INDEX_TRACER_END;