2021-03-25 06:49:28 +01:00
|
|
|
/*
|
2022-09-05 10:39:42 +02:00
|
|
|
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
|
|
|
* All rights reserved. This file is part of MD-Bench.
|
|
|
|
* Use of this source code is governed by a LGPL-3.0
|
|
|
|
* license that can be found in the LICENSE file.
|
2021-03-25 06:49:28 +01:00
|
|
|
*/
|
2022-08-16 18:36:47 +02:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
//---
|
|
|
|
#include <atom.h>
|
2021-03-25 06:49:28 +01:00
|
|
|
#include <likwid-marker.h>
|
|
|
|
#include <neighbor.h>
|
|
|
|
#include <parameter.h>
|
2021-12-01 00:07:45 +01:00
|
|
|
#include <stats.h>
|
2022-08-16 18:36:47 +02:00
|
|
|
#include <timing.h>
|
2021-03-25 06:49:28 +01:00
|
|
|
|
2022-11-15 00:55:46 +01:00
|
|
|
#ifdef __SIMD_KERNEL__
|
2022-08-17 17:56:31 +02:00
|
|
|
#include <simd.h>
|
2022-08-16 18:36:47 +02:00
|
|
|
#endif
|
2022-04-05 02:57:23 +02:00
|
|
|
|
|
|
|
double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
2021-03-25 06:49:28 +01:00
|
|
|
int Nlocal = atom->Nlocal;
|
|
|
|
int* neighs;
|
2022-08-16 18:36:47 +02:00
|
|
|
#ifndef EXPLICIT_TYPES
|
2021-03-25 06:49:28 +01:00
|
|
|
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
|
|
|
MD_FLOAT sigma6 = param->sigma6;
|
|
|
|
MD_FLOAT epsilon = param->epsilon;
|
2022-08-16 18:36:47 +02:00
|
|
|
#endif
|
2023-02-09 17:33:07 +01:00
|
|
|
const MD_FLOAT num1 = 1.0;
|
|
|
|
const MD_FLOAT num48 = 48.0;
|
|
|
|
const MD_FLOAT num05 = 0.5;
|
2021-03-25 06:49:28 +01:00
|
|
|
|
|
|
|
for(int i = 0; i < Nlocal; i++) {
|
2022-03-18 01:40:51 +01:00
|
|
|
atom_fx(i) = 0.0;
|
|
|
|
atom_fy(i) = 0.0;
|
|
|
|
atom_fz(i) = 0.0;
|
2021-03-25 06:49:28 +01:00
|
|
|
}
|
2021-10-11 16:57:02 +02:00
|
|
|
double S = getTimeStamp();
|
2023-01-22 15:31:47 +01:00
|
|
|
|
|
|
|
#pragma omp parallel
|
|
|
|
{
|
2021-06-30 13:44:02 +02:00
|
|
|
LIKWID_MARKER_START("force");
|
2021-10-12 22:39:54 +02:00
|
|
|
|
2023-01-22 15:31:47 +01:00
|
|
|
#pragma omp for
|
2021-10-26 09:11:17 +02:00
|
|
|
for(int i = 0; i < Nlocal; i++) {
|
|
|
|
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
|
|
|
int numneighs = neighbor->numneigh[i];
|
|
|
|
MD_FLOAT xtmp = atom_x(i);
|
|
|
|
MD_FLOAT ytmp = atom_y(i);
|
|
|
|
MD_FLOAT ztmp = atom_z(i);
|
|
|
|
MD_FLOAT fix = 0;
|
|
|
|
MD_FLOAT fiy = 0;
|
|
|
|
MD_FLOAT fiz = 0;
|
|
|
|
|
2022-08-16 18:36:47 +02:00
|
|
|
#ifdef EXPLICIT_TYPES
|
2021-10-26 09:11:17 +02:00
|
|
|
const int type_i = atom->type[i];
|
2022-08-16 18:36:47 +02:00
|
|
|
#endif
|
2021-10-12 22:39:54 +02:00
|
|
|
|
2021-10-26 09:11:17 +02:00
|
|
|
for(int k = 0; k < numneighs; k++) {
|
|
|
|
int j = neighs[k];
|
|
|
|
MD_FLOAT delx = xtmp - atom_x(j);
|
|
|
|
MD_FLOAT dely = ytmp - atom_y(j);
|
|
|
|
MD_FLOAT delz = ztmp - atom_z(j);
|
|
|
|
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
2021-10-12 22:39:54 +02:00
|
|
|
|
2022-08-16 18:36:47 +02:00
|
|
|
#ifdef EXPLICIT_TYPES
|
2021-10-26 09:11:17 +02:00
|
|
|
const int type_j = atom->type[j];
|
|
|
|
const int type_ij = type_i * atom->ntypes + type_j;
|
|
|
|
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
|
|
|
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
|
|
|
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
2022-08-16 18:36:47 +02:00
|
|
|
#endif
|
2021-06-16 00:56:00 +02:00
|
|
|
|
2021-10-26 09:11:17 +02:00
|
|
|
if(rsq < cutforcesq) {
|
2023-02-09 17:33:07 +01:00
|
|
|
MD_FLOAT sr2 = num1 / rsq;
|
2021-10-26 09:11:17 +02:00
|
|
|
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
2023-02-09 17:33:07 +01:00
|
|
|
MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
|
2021-10-26 09:11:17 +02:00
|
|
|
fix += delx * force;
|
|
|
|
fiy += dely * force;
|
|
|
|
fiz += delz * force;
|
2022-08-16 18:36:47 +02:00
|
|
|
#ifdef USE_REFERENCE_VERSION
|
2022-02-25 14:40:33 +01:00
|
|
|
addStat(stats->atoms_within_cutoff, 1);
|
|
|
|
} else {
|
|
|
|
addStat(stats->atoms_outside_cutoff, 1);
|
2022-08-16 18:36:47 +02:00
|
|
|
#endif
|
2021-10-12 22:39:54 +02:00
|
|
|
}
|
2021-04-07 00:46:51 +02:00
|
|
|
}
|
2021-10-26 09:11:17 +02:00
|
|
|
|
2022-03-18 01:40:51 +01:00
|
|
|
atom_fx(i) += fix;
|
|
|
|
atom_fy(i) += fiy;
|
|
|
|
atom_fz(i) += fiz;
|
2021-12-01 00:07:45 +01:00
|
|
|
|
|
|
|
addStat(stats->total_force_neighs, numneighs);
|
|
|
|
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
|
2021-03-25 06:49:28 +01:00
|
|
|
}
|
2021-10-12 22:39:54 +02:00
|
|
|
|
2021-06-30 13:44:02 +02:00
|
|
|
LIKWID_MARKER_STOP("force");
|
2023-01-22 15:31:47 +01:00
|
|
|
}
|
|
|
|
|
2021-05-19 23:51:02 +02:00
|
|
|
double E = getTimeStamp();
|
|
|
|
return E-S;
|
2021-03-25 06:49:28 +01:00
|
|
|
}
|
2022-03-10 16:30:37 +01:00
|
|
|
|
2022-03-18 01:28:11 +01:00
|
|
|
double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
2022-03-10 16:30:37 +01:00
|
|
|
int Nlocal = atom->Nlocal;
|
|
|
|
int* neighs;
|
2022-08-16 18:36:47 +02:00
|
|
|
#ifndef EXPLICIT_TYPES
|
2022-03-10 16:30:37 +01:00
|
|
|
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
|
|
|
MD_FLOAT sigma6 = param->sigma6;
|
|
|
|
MD_FLOAT epsilon = param->epsilon;
|
2022-08-16 18:36:47 +02:00
|
|
|
#endif
|
2023-02-13 14:13:53 +01:00
|
|
|
const MD_FLOAT num1 = 1.0;
|
|
|
|
const MD_FLOAT num48 = 48.0;
|
|
|
|
const MD_FLOAT num05 = 0.5;
|
2022-03-10 16:30:37 +01:00
|
|
|
|
|
|
|
for(int i = 0; i < Nlocal; i++) {
|
2022-03-18 01:40:51 +01:00
|
|
|
atom_fx(i) = 0.0;
|
|
|
|
atom_fy(i) = 0.0;
|
|
|
|
atom_fz(i) = 0.0;
|
2022-03-10 16:30:37 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
double S = getTimeStamp();
|
2023-01-22 15:31:47 +01:00
|
|
|
|
|
|
|
#pragma omp parallel
|
|
|
|
{
|
2022-03-10 16:30:37 +01:00
|
|
|
LIKWID_MARKER_START("forceLJ-halfneigh");
|
|
|
|
|
2023-01-22 15:31:47 +01:00
|
|
|
#pragma omp for
|
2022-03-10 16:30:37 +01:00
|
|
|
for(int i = 0; i < Nlocal; i++) {
|
|
|
|
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
2022-03-10 17:06:45 +01:00
|
|
|
int numneighs = neighbor->numneigh[i];
|
2022-03-10 16:30:37 +01:00
|
|
|
MD_FLOAT xtmp = atom_x(i);
|
|
|
|
MD_FLOAT ytmp = atom_y(i);
|
|
|
|
MD_FLOAT ztmp = atom_z(i);
|
|
|
|
MD_FLOAT fix = 0;
|
|
|
|
MD_FLOAT fiy = 0;
|
|
|
|
MD_FLOAT fiz = 0;
|
|
|
|
|
2022-08-16 18:36:47 +02:00
|
|
|
#ifdef EXPLICIT_TYPES
|
2022-03-10 16:30:37 +01:00
|
|
|
const int type_i = atom->type[i];
|
2022-08-16 18:36:47 +02:00
|
|
|
#endif
|
2022-03-10 16:30:37 +01:00
|
|
|
|
2022-03-21 17:02:09 +01:00
|
|
|
// Pragma required to vectorize the inner loop
|
2022-08-16 18:36:47 +02:00
|
|
|
#ifdef ENABLE_OMP_SIMD
|
2022-04-01 15:57:54 +02:00
|
|
|
#pragma omp simd reduction(+: fix,fiy,fiz)
|
2022-08-16 18:36:47 +02:00
|
|
|
#endif
|
2022-03-10 16:30:37 +01:00
|
|
|
for(int k = 0; k < numneighs; k++) {
|
|
|
|
int j = neighs[k];
|
|
|
|
MD_FLOAT delx = xtmp - atom_x(j);
|
|
|
|
MD_FLOAT dely = ytmp - atom_y(j);
|
|
|
|
MD_FLOAT delz = ztmp - atom_z(j);
|
|
|
|
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
|
|
|
|
2022-08-16 18:36:47 +02:00
|
|
|
#ifdef EXPLICIT_TYPES
|
2022-03-10 16:30:37 +01:00
|
|
|
const int type_j = atom->type[j];
|
|
|
|
const int type_ij = type_i * atom->ntypes + type_j;
|
|
|
|
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
|
|
|
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
|
|
|
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
2022-08-16 18:36:47 +02:00
|
|
|
#endif
|
2022-03-10 16:30:37 +01:00
|
|
|
|
|
|
|
if(rsq < cutforcesq) {
|
2023-02-13 14:13:53 +01:00
|
|
|
MD_FLOAT sr2 = num1 / rsq;
|
2022-03-10 16:30:37 +01:00
|
|
|
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
2023-02-13 14:13:53 +01:00
|
|
|
MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
|
2022-03-10 16:30:37 +01:00
|
|
|
fix += delx * force;
|
|
|
|
fiy += dely * force;
|
|
|
|
fiz += delz * force;
|
|
|
|
|
2022-03-18 01:28:11 +01:00
|
|
|
// We do not need to update forces for ghost atoms
|
|
|
|
if(j < Nlocal) {
|
2022-03-18 01:40:51 +01:00
|
|
|
atom_fx(j) -= delx * force;
|
|
|
|
atom_fy(j) -= dely * force;
|
|
|
|
atom_fz(j) -= delz * force;
|
2022-03-18 01:28:11 +01:00
|
|
|
}
|
2022-03-10 16:30:37 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-18 01:40:51 +01:00
|
|
|
atom_fx(i) += fix;
|
|
|
|
atom_fy(i) += fiy;
|
|
|
|
atom_fz(i) += fiz;
|
2022-03-10 16:30:37 +01:00
|
|
|
|
|
|
|
addStat(stats->total_force_neighs, numneighs);
|
|
|
|
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
|
|
|
|
}
|
|
|
|
|
|
|
|
LIKWID_MARKER_STOP("forceLJ-halfneigh");
|
2023-01-22 15:31:47 +01:00
|
|
|
}
|
|
|
|
|
2022-03-10 16:30:37 +01:00
|
|
|
double E = getTimeStamp();
|
|
|
|
return E-S;
|
|
|
|
}
|
2022-04-05 02:57:23 +02:00
|
|
|
|
|
|
|
double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
|
|
|
int Nlocal = atom->Nlocal;
|
|
|
|
int* neighs;
|
|
|
|
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
|
|
|
MD_FLOAT sigma6 = param->sigma6;
|
|
|
|
MD_FLOAT epsilon = param->epsilon;
|
|
|
|
|
|
|
|
for(int i = 0; i < Nlocal; i++) {
|
|
|
|
atom_fx(i) = 0.0;
|
|
|
|
atom_fy(i) = 0.0;
|
|
|
|
atom_fz(i) = 0.0;
|
|
|
|
}
|
|
|
|
|
|
|
|
double S = getTimeStamp();
|
|
|
|
|
2022-11-15 00:55:46 +01:00
|
|
|
#ifndef __SIMD_KERNEL__
|
2022-08-16 18:36:47 +02:00
|
|
|
fprintf(stderr, "Error: SIMD kernel not implemented for specified instruction set!");
|
2022-07-19 02:30:26 +02:00
|
|
|
exit(-1);
|
|
|
|
#else
|
2022-08-16 18:36:47 +02:00
|
|
|
MD_SIMD_FLOAT cutforcesq_vec = simd_broadcast(cutforcesq);
|
|
|
|
MD_SIMD_FLOAT sigma6_vec = simd_broadcast(sigma6);
|
|
|
|
MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon);
|
|
|
|
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
|
|
|
|
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
|
|
|
|
|
2023-01-22 15:31:47 +01:00
|
|
|
|
|
|
|
#pragma omp parallel
|
|
|
|
{
|
|
|
|
LIKWID_MARKER_START("force");
|
|
|
|
|
|
|
|
#pragma omp for
|
2022-04-05 02:57:23 +02:00
|
|
|
for(int i = 0; i < Nlocal; i++) {
|
|
|
|
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
|
|
|
int numneighs = neighbor->numneigh[i];
|
|
|
|
MD_SIMD_INT numneighs_vec = simd_int_broadcast(numneighs);
|
|
|
|
MD_SIMD_FLOAT xtmp = simd_broadcast(atom_x(i));
|
|
|
|
MD_SIMD_FLOAT ytmp = simd_broadcast(atom_y(i));
|
|
|
|
MD_SIMD_FLOAT ztmp = simd_broadcast(atom_z(i));
|
|
|
|
MD_SIMD_FLOAT fix = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiy = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiz = simd_zero();
|
|
|
|
|
|
|
|
for(int k = 0; k < numneighs; k += VECTOR_WIDTH) {
|
|
|
|
// If the last iteration of this loop is separated from the rest, this mask can be set only there
|
|
|
|
MD_SIMD_MASK mask_numneighs = simd_mask_int_cond_lt(simd_int_add(simd_int_broadcast(k), simd_int_seq()), numneighs_vec);
|
|
|
|
MD_SIMD_INT j = simd_int_mask_load(&neighs[k], mask_numneighs);
|
|
|
|
#ifdef AOS
|
|
|
|
MD_SIMD_INT j3 = simd_int_add(simd_int_add(j, j), j); // j * 3
|
|
|
|
MD_SIMD_FLOAT delx = xtmp - simd_gather(j3, &(atom->x[0]), sizeof(MD_FLOAT));
|
|
|
|
MD_SIMD_FLOAT dely = ytmp - simd_gather(j3, &(atom->x[1]), sizeof(MD_FLOAT));
|
|
|
|
MD_SIMD_FLOAT delz = ztmp - simd_gather(j3, &(atom->x[2]), sizeof(MD_FLOAT));
|
|
|
|
#else
|
|
|
|
MD_SIMD_FLOAT delx = xtmp - simd_gather(j, atom->x, sizeof(MD_FLOAT));
|
|
|
|
MD_SIMD_FLOAT dely = ytmp - simd_gather(j, atom->y, sizeof(MD_FLOAT));
|
|
|
|
MD_SIMD_FLOAT delz = ztmp - simd_gather(j, atom->z, sizeof(MD_FLOAT));
|
|
|
|
#endif
|
|
|
|
MD_SIMD_FLOAT rsq = simd_fma(delx, delx, simd_fma(dely, dely, simd_mul(delz, delz)));
|
|
|
|
MD_SIMD_MASK cutoff_mask = simd_mask_and(mask_numneighs, simd_mask_cond_lt(rsq, cutforcesq_vec));
|
|
|
|
MD_SIMD_FLOAT sr2 = simd_reciprocal(rsq);
|
|
|
|
MD_SIMD_FLOAT sr6 = simd_mul(sr2, simd_mul(sr2, simd_mul(sr2, sigma6_vec)));
|
|
|
|
MD_SIMD_FLOAT force = simd_mul(c48_vec, simd_mul(sr6, simd_mul(simd_sub(sr6, c05_vec), simd_mul(sr2, eps_vec))));
|
|
|
|
|
|
|
|
fix = simd_masked_add(fix, simd_mul(delx, force), cutoff_mask);
|
|
|
|
fiy = simd_masked_add(fiy, simd_mul(dely, force), cutoff_mask);
|
|
|
|
fiz = simd_masked_add(fiz, simd_mul(delz, force), cutoff_mask);
|
|
|
|
}
|
|
|
|
|
|
|
|
atom_fx(i) += simd_h_reduce_sum(fix);
|
|
|
|
atom_fy(i) += simd_h_reduce_sum(fiy);
|
|
|
|
atom_fz(i) += simd_h_reduce_sum(fiz);
|
|
|
|
}
|
|
|
|
|
|
|
|
LIKWID_MARKER_STOP("force");
|
2023-01-22 15:31:47 +01:00
|
|
|
}
|
2023-02-07 23:53:32 +01:00
|
|
|
#endif
|
2023-01-22 15:31:47 +01:00
|
|
|
|
2022-04-05 02:57:23 +02:00
|
|
|
double E = getTimeStamp();
|
|
|
|
return E-S;
|
|
|
|
}
|