Add version iterating most internal loop multiple times
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
48
src/force.c
48
src/force.c
@@ -27,12 +27,12 @@
|
||||
#include <parameter.h>
|
||||
#include <atom.h>
|
||||
|
||||
double computeForce(
|
||||
Parameter *param,
|
||||
Atom *atom,
|
||||
Neighbor *neighbor,
|
||||
int profile)
|
||||
{
|
||||
// Number of times to compute the most internal loop
|
||||
#ifndef INTERNAL_LOOP_NTIMES
|
||||
#define INTERNAL_LOOP_NTIMES 1
|
||||
#endif
|
||||
|
||||
double computeForce(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||
int Nlocal = atom->Nlocal;
|
||||
int* neighs;
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
@@ -47,10 +47,6 @@ double computeForce(
|
||||
fz[i] = 0.0;
|
||||
}
|
||||
|
||||
if(profile) {
|
||||
// LIKWID_MARKER_START("force");
|
||||
}
|
||||
|
||||
#pragma omp parallel for
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||
@@ -64,20 +60,22 @@ double computeForce(
|
||||
|
||||
// printf("%d: %d\n", i, numneighs);
|
||||
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int j = neighs[k];
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
for(int n = 0; n < INTERNAL_LOOP_NTIMES; n++) {
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int j = neighs[k];
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
if(rsq < cutforcesq) {
|
||||
MD_FLOAT sr2 = 1.0 / rsq;
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
if(rsq < cutforcesq) {
|
||||
MD_FLOAT sr2 = 1.0 / rsq;
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -86,9 +84,5 @@ double computeForce(
|
||||
fz[i] += fiz;
|
||||
}
|
||||
|
||||
if(profile) {
|
||||
// LIKWID_MARKER_STOP("force");
|
||||
}
|
||||
|
||||
return 0.0;
|
||||
}
|
||||
|
@@ -16,7 +16,7 @@
|
||||
#define LATTICE_DISTANCE 10.0
|
||||
#define NEIGH_DISTANCE 1.0
|
||||
|
||||
extern double computeForce( Parameter*, Atom*, Neighbor*, int);
|
||||
extern double computeForce( Parameter*, Atom*, Neighbor*);
|
||||
|
||||
void init(Parameter *param) {
|
||||
param->epsilon = 1.0;
|
||||
@@ -188,19 +188,19 @@ int main(int argc, const char *argv[]) {
|
||||
DEBUG("Building neighbor lists...\n");
|
||||
buildNeighbor(atom, &neighbor);
|
||||
DEBUG("Computing forces...\n");
|
||||
computeForce(¶m, atom, &neighbor, 0);
|
||||
computeForce(¶m, atom, &neighbor);
|
||||
|
||||
double S, E;
|
||||
S = getTimeStamp();
|
||||
LIKWID_MARKER_START("force");
|
||||
for(int i = 0; i < param.ntimes; i++) {
|
||||
computeForce(¶m, atom, &neighbor, 1);
|
||||
computeForce(¶m, atom, &neighbor);
|
||||
}
|
||||
LIKWID_MARKER_STOP("force");
|
||||
E = getTimeStamp();
|
||||
double T_accum = E-S;
|
||||
const double atoms_updates_per_sec = atom->Nlocal * param.ntimes / T_accum;
|
||||
const double cycles_per_atom = T_accum * freq / (atom->Nlocal * param.ntimes);
|
||||
const double atoms_updates_per_sec = (double)(atom->Nlocal * INTERNAL_LOOP_NTIMES * param.ntimes) / T_accum;
|
||||
const double cycles_per_atom = T_accum * freq / (double)(atom->Nlocal * param.ntimes * INTERNAL_LOOP_NTIMES);
|
||||
const double cycles_per_neigh = cycles_per_atom / (double)(atoms_per_unit_cell - 1);
|
||||
|
||||
if(!csv) {
|
||||
|
@@ -47,7 +47,7 @@ typedef enum {
|
||||
NUMTIMER
|
||||
} timertype;
|
||||
|
||||
extern double computeForce( Parameter*, Atom*, Neighbor*, int);
|
||||
extern double computeForce( Parameter*, Atom*, Neighbor*);
|
||||
|
||||
void init(Parameter *param)
|
||||
{
|
||||
@@ -205,7 +205,7 @@ int main (int argc, char** argv)
|
||||
|
||||
setup(¶m, &atom, &neighbor);
|
||||
computeThermo(0, ¶m, &atom);
|
||||
computeForce(¶m, &atom, &neighbor, 1);
|
||||
computeForce(¶m, &atom, &neighbor);
|
||||
|
||||
timer[FORCE] = 0.0;
|
||||
timer[NEIGH] = 0.0;
|
||||
@@ -221,7 +221,7 @@ int main (int argc, char** argv)
|
||||
timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
|
||||
}
|
||||
|
||||
timer[FORCE] += computeForce(¶m, &atom, &neighbor, 1);
|
||||
timer[FORCE] += computeForce(¶m, &atom, &neighbor);
|
||||
finalIntegrate(¶m, &atom);
|
||||
|
||||
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
|
||||
|
Reference in New Issue
Block a user