Merge master branch into stub
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
commit
78e6e5c773
2
Makefile
2
Makefile
@ -68,5 +68,5 @@ clean:
|
||||
|
||||
distclean: clean
|
||||
@echo "===> DIST CLEAN"
|
||||
@rm -f $(TARGET)
|
||||
@rm -f $(TARGET)*
|
||||
@rm -f tags
|
||||
|
@ -1,9 +1,9 @@
|
||||
# Supported: GCC, CLANG, ICC
|
||||
TAG ?= GCC
|
||||
TAG ?= ICC
|
||||
# SP or DP
|
||||
DATA_TYPE ?= DP
|
||||
# AOS or SOA
|
||||
DATA_LAYOUT ?= SOA
|
||||
DATA_LAYOUT ?= AOS
|
||||
|
||||
#Feature options
|
||||
OPTIONS += -DALIGNMENT=64 -DLIKWID_PERFMON
|
||||
|
@ -10,8 +10,6 @@ ANSI_CFLAGS += -Wextra
|
||||
CFLAGS = -O3 -march=znver1 -ffast-math -funroll-loops # -fopenmp
|
||||
ASFLAGS = -masm=intel
|
||||
LFLAGS =
|
||||
DEFINES = -D_GNU_SOURCE
|
||||
#INCLUDES =
|
||||
#LIBS = -lm
|
||||
DEFINES = -D_GNU_SOURCE -DLIKWID_PERFMON
|
||||
INCLUDES = $(LIKWID_INC)
|
||||
LIBS = -lm $(LIKWID_LIB) -llikwid
|
||||
|
@ -3,15 +3,15 @@ LINKER = $(CC)
|
||||
|
||||
OPENMP = #-qopenmp
|
||||
PROFILE = #-profile-functions -g -pg
|
||||
# OPTS = -fast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE)
|
||||
OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE)
|
||||
#OPTS = -fast -xCORE-AVX2 $(PROFILE)
|
||||
#OPTS = -fast -xAVX $(PROFILE)
|
||||
#OPTS = -fast -xSSE4.2 $(PROFILE)
|
||||
#OPTS = -fast -no-vec $(PROFILE)
|
||||
OPTS = -fast -xHost $(PROFILE)
|
||||
#OPTS = -fast -xHost $(PROFILE)
|
||||
CFLAGS = $(PROFILE) -restrict $(OPENMP) $(OPTS)
|
||||
ASFLAGS = -masm=intel
|
||||
ASFLAGS = #-masm=intel
|
||||
LFLAGS = $(PROFILE) $(OPTS) $(OPENMP)
|
||||
DEFINES = -D_GNU_SOURCE # -DALIGNMENT=64 -DLIKWID_PERFMON -DPRECISION=1
|
||||
DEFINES = -D_GNU_SOURCE #-DLIKWID_PERFMON
|
||||
INCLUDES = #$(LIKWID_INC)
|
||||
LIBS = -lm #$(LIKWID_LIB) -llikwid
|
||||
|
65
src/force.c
65
src/force.c
@ -40,9 +40,8 @@ double computeForce(
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz;
|
||||
MD_FLOAT S, E;
|
||||
double S, E;
|
||||
|
||||
S = getTimeStamp();
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
fx[i] = 0.0;
|
||||
fy[i] = 0.0;
|
||||
@ -50,49 +49,47 @@ double computeForce(
|
||||
}
|
||||
|
||||
if(profile) {
|
||||
LIKWID_MARKER_START("force");
|
||||
// LIKWID_MARKER_START("force");
|
||||
}
|
||||
|
||||
for(int t = 0; t < ntimes; t++) {
|
||||
#pragma omp parallel for
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numneigh[i];
|
||||
MD_FLOAT xtmp = atom_x(i);
|
||||
MD_FLOAT ytmp = atom_y(i);
|
||||
MD_FLOAT ztmp = atom_z(i);
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numneigh[i];
|
||||
MD_FLOAT xtmp = atom_x(i);
|
||||
MD_FLOAT ytmp = atom_y(i);
|
||||
MD_FLOAT ztmp = atom_z(i);
|
||||
MD_FLOAT fix = 0;
|
||||
MD_FLOAT fiy = 0;
|
||||
MD_FLOAT fiz = 0;
|
||||
|
||||
MD_FLOAT fix = 0;
|
||||
MD_FLOAT fiy = 0;
|
||||
MD_FLOAT fiz = 0;
|
||||
// printf("%d: %d\n", i, numneighs);
|
||||
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int j = neighs[k];
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int j = neighs[k];
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
if(rsq < cutforcesq) {
|
||||
MD_FLOAT sr2 = 1.0 / rsq;
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
}
|
||||
if(rsq < cutforcesq) {
|
||||
MD_FLOAT sr2 = 1.0 / rsq;
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
}
|
||||
|
||||
fx[i] += fix;
|
||||
fy[i] += fiy;
|
||||
fz[i] += fiz;
|
||||
}
|
||||
|
||||
fx[i] += fix;
|
||||
fy[i] += fiy;
|
||||
fz[i] += fiz;
|
||||
}
|
||||
|
||||
if(profile) {
|
||||
LIKWID_MARKER_STOP("force");
|
||||
// LIKWID_MARKER_STOP("force");
|
||||
}
|
||||
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
return 0.0;
|
||||
}
|
||||
|
@ -158,11 +158,17 @@ int main(int argc, const char *argv[]) {
|
||||
}
|
||||
}
|
||||
|
||||
const double estim_volume = (double)(atom->Nlocal * 6 * sizeof(MD_FLOAT) + atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int)) / 1000.0;
|
||||
const double estim_volume = (double)
|
||||
(atom->Nlocal * 6 * sizeof(MD_FLOAT) +
|
||||
atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int)) / 1000.0;
|
||||
printf("System size (unit cells): %dx%dx%d\n", param.nx, param.ny, param.nz);
|
||||
printf("Atoms per unit cell: %d\n", atoms_per_unit_cell);
|
||||
printf("Total number of atoms: %d\n", atom->Nlocal);
|
||||
printf("Estimated memory volume (kB): %.4f\n", estim_volume);
|
||||
printf("Estimated total data volume (kB): %.4f\n", estim_volume );
|
||||
printf("Estimated atom data volume (kB): %.4f\n",
|
||||
(double)(atom->Nlocal * 3 * sizeof(MD_FLOAT) / 1000.0));
|
||||
printf("Estimated neighborlist data volume (kB): %.4f\n",
|
||||
(double)(atom->Nlocal * (atoms_per_unit_cell - 1 + 2) * sizeof(int)) / 1000.0);
|
||||
|
||||
DEBUG("Initializing neighbor lists...\n");
|
||||
initNeighbor(&neighbor, ¶m);
|
||||
@ -171,10 +177,20 @@ int main(int argc, const char *argv[]) {
|
||||
DEBUG("Building neighbor lists...\n");
|
||||
buildNeighbor(atom, &neighbor);
|
||||
DEBUG("Computing forces...\n");
|
||||
computeForce(¶m, atom, &neighbor, 0, 1);
|
||||
computeForce(¶m, atom, &neighbor, 0);
|
||||
|
||||
double T_accum = computeForce(¶m, atom, &neighbor, 1, param.ntimes);
|
||||
printf("Total time: %.4f, Time/force: %.4f\n", T_accum, T_accum / param.ntimes);
|
||||
double S, E;
|
||||
S = getTimeStamp();
|
||||
LIKWID_MARKER_START("force");
|
||||
for(int i = 0; i < param.ntimes; i++) {
|
||||
computeForce(¶m, atom, &neighbor, 1);
|
||||
}
|
||||
LIKWID_MARKER_STOP("force");
|
||||
E = getTimeStamp();
|
||||
double T_accum = E-S;
|
||||
|
||||
printf("Total time: %.4f, Mega atom updates/s: %.4f\n",
|
||||
T_accum, atom->Nlocal * param.ntimes/T_accum/1.E6);
|
||||
LIKWID_MARKER_CLOSE;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user