2021-03-25 06:49:28 +01:00
|
|
|
/*
|
2022-09-05 10:39:42 +02:00
|
|
|
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
|
|
|
* All rights reserved. This file is part of MD-Bench.
|
|
|
|
* Use of this source code is governed by a LGPL-3.0
|
|
|
|
* license that can be found in the LICENSE file.
|
2021-03-25 06:49:28 +01:00
|
|
|
*/
|
2022-01-31 17:49:22 +01:00
|
|
|
#include <stdio.h>
|
2021-03-25 06:49:28 +01:00
|
|
|
|
2022-01-31 17:49:22 +01:00
|
|
|
#include <likwid-marker.h>
|
2021-03-25 06:49:28 +01:00
|
|
|
#include <timing.h>
|
|
|
|
#include <neighbor.h>
|
|
|
|
#include <parameter.h>
|
|
|
|
#include <atom.h>
|
2021-12-01 00:07:45 +01:00
|
|
|
#include <stats.h>
|
2022-02-01 20:16:04 +01:00
|
|
|
#include <util.h>
|
2022-02-02 18:00:44 +01:00
|
|
|
#include <simd.h>
|
2022-02-01 20:16:04 +01:00
|
|
|
|
2021-03-25 06:49:28 +01:00
|
|
|
|
2023-03-28 22:30:30 +02:00
|
|
|
/*
|
2023-03-23 00:58:25 +01:00
|
|
|
static inline void gmx_load_simd_2xnn_interactions(
|
|
|
|
int excl,
|
|
|
|
MD_SIMD_BITMASK filter0, MD_SIMD_BITMASK filter2,
|
|
|
|
MD_SIMD_MASK *interact0, MD_SIMD_MASK *interact2) {
|
|
|
|
|
|
|
|
//SimdInt32 mask_pr_S(excl);
|
|
|
|
MD_SIMD_INT32 mask_pr_S = simd_int32_broadcast(excl);
|
2023-03-23 02:17:27 +01:00
|
|
|
*interact0 = cvtIB2B(simd_test_bits(mask_pr_S & filter0));
|
|
|
|
*interact2 = cvtIB2B(simd_test_bits(mask_pr_S & filter2));
|
2023-03-23 00:58:25 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void gmx_load_simd_4xn_interactions(
|
|
|
|
int excl,
|
|
|
|
MD_SIMD_BITMASK filter0, MD_SIMD_BITMASK filter1, MD_SIMD_BITMASK filter2, MD_SIMD_BITMASK filter3,
|
|
|
|
MD_SIMD_MASK *interact0, MD_SIMD_MASK *interact1, MD_SIMD_MASK *interact2, MD_SIMD_MASK *interact3) {
|
|
|
|
|
|
|
|
//SimdInt32 mask_pr_S(excl);
|
|
|
|
MD_SIMD_INT32 mask_pr_S = simd_int32_broadcast(excl);
|
|
|
|
*interact0 = cvtIB2B(simd_test_bits(mask_pr_S & filter0));
|
|
|
|
*interact1 = cvtIB2B(simd_test_bits(mask_pr_S & filter1));
|
|
|
|
*interact2 = cvtIB2B(simd_test_bits(mask_pr_S & filter2));
|
|
|
|
*interact3 = cvtIB2B(simd_test_bits(mask_pr_S & filter3));
|
|
|
|
}
|
2023-03-28 22:30:30 +02:00
|
|
|
*/
|
2023-03-23 00:58:25 +01:00
|
|
|
|
2022-02-02 18:00:44 +01:00
|
|
|
double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
2022-02-01 20:16:04 +01:00
|
|
|
DEBUG_MESSAGE("computeForceLJ begin\n");
|
2021-03-25 06:49:28 +01:00
|
|
|
int Nlocal = atom->Nlocal;
|
2023-03-30 01:57:26 +02:00
|
|
|
int *neighs;
|
2021-03-25 06:49:28 +01:00
|
|
|
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
|
|
|
MD_FLOAT sigma6 = param->sigma6;
|
|
|
|
MD_FLOAT epsilon = param->epsilon;
|
|
|
|
|
2022-01-28 18:07:41 +01:00
|
|
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
2022-03-09 17:23:49 +01:00
|
|
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
|
|
|
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
|
|
|
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
|
|
|
|
ci_f[CL_X_OFFSET + cii] = 0.0;
|
|
|
|
ci_f[CL_Y_OFFSET + cii] = 0.0;
|
|
|
|
ci_f[CL_Z_OFFSET + cii] = 0.0;
|
2022-01-28 18:07:41 +01:00
|
|
|
}
|
2021-03-25 06:49:28 +01:00
|
|
|
}
|
|
|
|
|
2021-10-11 16:57:02 +02:00
|
|
|
double S = getTimeStamp();
|
2023-01-22 15:31:47 +01:00
|
|
|
|
|
|
|
#pragma omp parallel
|
|
|
|
{
|
2021-06-30 13:44:02 +02:00
|
|
|
LIKWID_MARKER_START("force");
|
2021-10-12 22:39:54 +02:00
|
|
|
|
2024-01-11 17:09:18 +01:00
|
|
|
#pragma omp for schedule(runtime)
|
2022-01-28 18:07:41 +01:00
|
|
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
2022-03-10 22:33:41 +01:00
|
|
|
int ci_cj0 = CJ0_FROM_CI(ci);
|
|
|
|
int ci_cj1 = CJ1_FROM_CI(ci);
|
2022-03-09 17:23:49 +01:00
|
|
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
|
|
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
|
|
|
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
2022-01-28 18:07:41 +01:00
|
|
|
neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
|
|
|
|
int numneighs = neighbor->numneigh[ci];
|
2021-10-12 22:39:54 +02:00
|
|
|
|
2021-10-26 09:11:17 +02:00
|
|
|
for(int k = 0; k < numneighs; k++) {
|
2023-03-30 01:57:26 +02:00
|
|
|
int cj = neighs[k];
|
2022-03-09 17:23:49 +01:00
|
|
|
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
2022-02-28 16:10:09 +01:00
|
|
|
int any = 0;
|
2022-03-09 17:23:49 +01:00
|
|
|
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
2022-03-22 23:47:05 +01:00
|
|
|
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
2022-03-09 17:23:49 +01:00
|
|
|
|
2022-03-09 02:25:39 +01:00
|
|
|
for(int cii = 0; cii < CLUSTER_M; cii++) {
|
2022-03-09 17:23:49 +01:00
|
|
|
MD_FLOAT xtmp = ci_x[CL_X_OFFSET + cii];
|
|
|
|
MD_FLOAT ytmp = ci_x[CL_Y_OFFSET + cii];
|
|
|
|
MD_FLOAT ztmp = ci_x[CL_Z_OFFSET + cii];
|
2022-01-28 18:07:41 +01:00
|
|
|
MD_FLOAT fix = 0;
|
|
|
|
MD_FLOAT fiy = 0;
|
|
|
|
MD_FLOAT fiz = 0;
|
2021-10-12 22:39:54 +02:00
|
|
|
|
2022-03-09 17:23:49 +01:00
|
|
|
for(int cjj = 0; cjj < CLUSTER_N; cjj++) {
|
2022-03-23 14:31:47 +01:00
|
|
|
int cond;
|
2022-03-10 22:33:41 +01:00
|
|
|
#if CLUSTER_M == CLUSTER_N
|
2022-03-23 14:31:47 +01:00
|
|
|
cond = neighbor->half_neigh ? (ci_cj0 != cj || cii < cjj) :
|
|
|
|
(ci_cj0 != cj || cii != cjj);
|
2022-03-10 22:33:41 +01:00
|
|
|
#elif CLUSTER_M < CLUSTER_N
|
2022-03-23 14:31:47 +01:00
|
|
|
cond = neighbor->half_neigh ? (ci_cj0 != cj || cii + CLUSTER_M * (ci & 0x1) < cjj) :
|
|
|
|
(ci_cj0 != cj || cii + CLUSTER_M * (ci & 0x1) != cjj);
|
2022-03-10 22:33:41 +01:00
|
|
|
#else
|
2022-03-23 14:31:47 +01:00
|
|
|
cond = neighbor->half_neigh ? (ci_cj0 != cj || cii < cjj) && (ci_cj1 != cj || cii < cjj + CLUSTER_N) :
|
|
|
|
(ci_cj0 != cj || cii != cjj) && (ci_cj1 != cj || cii != cjj + CLUSTER_N);
|
2022-03-10 22:33:41 +01:00
|
|
|
#endif
|
2022-03-23 14:31:47 +01:00
|
|
|
if(cond) {
|
2022-03-09 17:23:49 +01:00
|
|
|
MD_FLOAT delx = xtmp - cj_x[CL_X_OFFSET + cjj];
|
|
|
|
MD_FLOAT dely = ytmp - cj_x[CL_Y_OFFSET + cjj];
|
|
|
|
MD_FLOAT delz = ztmp - cj_x[CL_Z_OFFSET + cjj];
|
2022-01-31 17:49:22 +01:00
|
|
|
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
|
|
|
if(rsq < cutforcesq) {
|
|
|
|
MD_FLOAT sr2 = 1.0 / rsq;
|
|
|
|
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
|
|
|
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
|
2022-03-22 23:47:05 +01:00
|
|
|
|
|
|
|
if(neighbor->half_neigh) {
|
|
|
|
cj_f[CL_X_OFFSET + cjj] -= delx * force;
|
|
|
|
cj_f[CL_Y_OFFSET + cjj] -= dely * force;
|
|
|
|
cj_f[CL_Z_OFFSET + cjj] -= delz * force;
|
|
|
|
}
|
|
|
|
|
2022-01-31 17:49:22 +01:00
|
|
|
fix += delx * force;
|
|
|
|
fiy += dely * force;
|
|
|
|
fiz += delz * force;
|
2022-02-28 16:10:09 +01:00
|
|
|
any = 1;
|
2022-02-25 14:19:48 +01:00
|
|
|
addStat(stats->atoms_within_cutoff, 1);
|
|
|
|
} else {
|
|
|
|
addStat(stats->atoms_outside_cutoff, 1);
|
2022-01-31 17:49:22 +01:00
|
|
|
}
|
2022-01-28 18:07:41 +01:00
|
|
|
}
|
|
|
|
}
|
2021-06-16 00:56:00 +02:00
|
|
|
|
2022-02-28 16:10:09 +01:00
|
|
|
if(any != 0) {
|
|
|
|
addStat(stats->clusters_within_cutoff, 1);
|
|
|
|
} else {
|
|
|
|
addStat(stats->clusters_outside_cutoff, 1);
|
|
|
|
}
|
|
|
|
|
2022-03-09 17:23:49 +01:00
|
|
|
ci_f[CL_X_OFFSET + cii] += fix;
|
|
|
|
ci_f[CL_Y_OFFSET + cii] += fiy;
|
|
|
|
ci_f[CL_Z_OFFSET + cii] += fiz;
|
2021-10-12 22:39:54 +02:00
|
|
|
}
|
2021-04-07 00:46:51 +02:00
|
|
|
}
|
2021-10-26 09:11:17 +02:00
|
|
|
|
2022-02-25 14:19:48 +01:00
|
|
|
addStat(stats->calculated_forces, 1);
|
2022-02-08 00:55:27 +01:00
|
|
|
addStat(stats->num_neighs, numneighs);
|
2022-03-09 02:25:39 +01:00
|
|
|
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
|
2021-03-25 06:49:28 +01:00
|
|
|
}
|
2021-10-12 22:39:54 +02:00
|
|
|
|
2021-06-30 13:44:02 +02:00
|
|
|
LIKWID_MARKER_STOP("force");
|
2023-01-22 15:31:47 +01:00
|
|
|
}
|
|
|
|
|
2021-05-19 23:51:02 +02:00
|
|
|
double E = getTimeStamp();
|
2022-02-01 20:16:04 +01:00
|
|
|
DEBUG_MESSAGE("computeForceLJ end\n");
|
2021-05-19 23:51:02 +02:00
|
|
|
return E-S;
|
2021-03-25 06:49:28 +01:00
|
|
|
}
|
2022-02-02 18:00:44 +01:00
|
|
|
|
2022-03-22 23:47:05 +01:00
|
|
|
double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
|
|
|
DEBUG_MESSAGE("computeForceLJ_2xnn begin\n");
|
|
|
|
int Nlocal = atom->Nlocal;
|
2023-03-30 01:57:26 +02:00
|
|
|
int *neighs;
|
2022-03-22 23:47:05 +01:00
|
|
|
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
|
|
|
MD_FLOAT sigma6 = param->sigma6;
|
|
|
|
MD_FLOAT epsilon = param->epsilon;
|
|
|
|
MD_SIMD_FLOAT cutforcesq_vec = simd_broadcast(cutforcesq);
|
|
|
|
MD_SIMD_FLOAT sigma6_vec = simd_broadcast(sigma6);
|
|
|
|
MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon);
|
|
|
|
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
|
|
|
|
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
|
|
|
|
|
|
|
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
|
|
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
|
|
|
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
|
|
|
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
|
|
|
|
ci_f[CL_X_OFFSET + cii] = 0.0;
|
|
|
|
ci_f[CL_Y_OFFSET + cii] = 0.0;
|
|
|
|
ci_f[CL_Z_OFFSET + cii] = 0.0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
double S = getTimeStamp();
|
2023-01-22 15:31:47 +01:00
|
|
|
|
|
|
|
#pragma omp parallel
|
|
|
|
{
|
2022-03-22 23:47:05 +01:00
|
|
|
LIKWID_MARKER_START("force");
|
|
|
|
|
2023-03-28 19:33:26 +02:00
|
|
|
/*
|
2023-03-23 00:58:25 +01:00
|
|
|
MD_SIMD_BITMASK filter0 = simd_load_bitmask((const int *) &atom->exclusion_filter[0 * (VECTOR_WIDTH / UNROLL_J)]);
|
|
|
|
MD_SIMD_BITMASK filter2 = simd_load_bitmask((const int *) &atom->exclusion_filter[2 * (VECTOR_WIDTH / UNROLL_J)]);
|
|
|
|
|
2023-03-23 02:17:27 +01:00
|
|
|
MD_SIMD_FLOAT diagonal_jmi_S = simd_load(atom->diagonal_2xnn_j_minus_i);
|
|
|
|
MD_SIMD_FLOAT zero_S = simd_broadcast(0.0);
|
|
|
|
MD_SIMD_FLOAT one_S = simd_broadcast(1.0);
|
|
|
|
|
2023-03-28 02:19:46 +02:00
|
|
|
#if CLUSTER_M <= CLUSTER_N
|
|
|
|
MD_SIMD_MASK diagonal_mask0, diagonal_mask2;
|
|
|
|
diagonal_mask0 = simd_mask_cond_lt(zero_S, diagonal_jmi_S);
|
2023-03-23 02:17:27 +01:00
|
|
|
diagonal_jmi_S = diagonal_jmi_S - one_S;
|
|
|
|
diagonal_jmi_S = diagonal_jmi_S - one_S;
|
2023-03-28 02:19:46 +02:00
|
|
|
diagonal_mask2 = simd_mask_cond_lt(zero_S, diagonal_jmi_S);
|
|
|
|
#else
|
2023-03-23 02:17:27 +01:00
|
|
|
MD_SIMD_MASK diagonal_mask00, diagonal_mask02, diagonal_mask10, diagonal_mask12;
|
|
|
|
diagonal_mask00 = simd_mask_cond_lt(zero_S, diagonal_jmi_S);
|
|
|
|
diagonal_jmi_S = diagonal_jmi_S - one_S;
|
|
|
|
diagonal_jmi_S = diagonal_jmi_S - one_S;
|
|
|
|
diagonal_mask02 = simd_mask_cond_lt(zero_S, diagonal_jmi_S);
|
|
|
|
diagonal_jmi_S = diagonal_jmi_S - one_S;
|
|
|
|
diagonal_jmi_S = diagonal_jmi_S - one_S;
|
|
|
|
diagonal_mask10 = simd_mask_cond_lt(zero_S, diagonal_jmi_S);
|
|
|
|
diagonal_jmi_S = diagonal_jmi_S - one_S;
|
|
|
|
diagonal_jmi_S = diagonal_jmi_S - one_S;
|
|
|
|
diagonal_mask12 = simd_mask_cond_lt(zero_S, diagonal_jmi_S);
|
2023-03-28 02:19:46 +02:00
|
|
|
#endif
|
2023-03-28 19:33:26 +02:00
|
|
|
*/
|
2023-03-23 02:17:27 +01:00
|
|
|
|
2024-01-11 17:09:18 +01:00
|
|
|
#pragma omp for schedule(runtime)
|
2022-03-22 23:47:05 +01:00
|
|
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
|
|
|
int ci_cj0 = CJ0_FROM_CI(ci);
|
|
|
|
#if CLUSTER_M > CLUSTER_N
|
|
|
|
int ci_cj1 = CJ1_FROM_CI(ci);
|
|
|
|
#endif
|
|
|
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
|
|
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
|
|
|
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
|
|
|
neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
|
|
|
|
int numneighs = neighbor->numneigh[ci];
|
2023-03-28 18:04:18 +02:00
|
|
|
int numneighs_masked = neighbor->numneigh_masked[ci];
|
2022-03-22 23:47:05 +01:00
|
|
|
|
|
|
|
MD_SIMD_FLOAT xi0_tmp = simd_load_h_dual(&ci_x[CL_X_OFFSET + 0]);
|
|
|
|
MD_SIMD_FLOAT xi2_tmp = simd_load_h_dual(&ci_x[CL_X_OFFSET + 2]);
|
|
|
|
MD_SIMD_FLOAT yi0_tmp = simd_load_h_dual(&ci_x[CL_Y_OFFSET + 0]);
|
|
|
|
MD_SIMD_FLOAT yi2_tmp = simd_load_h_dual(&ci_x[CL_Y_OFFSET + 2]);
|
|
|
|
MD_SIMD_FLOAT zi0_tmp = simd_load_h_dual(&ci_x[CL_Z_OFFSET + 0]);
|
|
|
|
MD_SIMD_FLOAT zi2_tmp = simd_load_h_dual(&ci_x[CL_Z_OFFSET + 2]);
|
|
|
|
MD_SIMD_FLOAT fix0 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiy0 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiz0 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fix2 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiy2 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiz2 = simd_zero();
|
|
|
|
|
2023-03-28 18:04:18 +02:00
|
|
|
for(int k = 0; k < numneighs_masked; k++) {
|
2023-03-30 01:57:26 +02:00
|
|
|
int cj = neighs[k];
|
2022-03-22 23:47:05 +01:00
|
|
|
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
2023-03-30 01:57:26 +02:00
|
|
|
//int imask = neighs_imask[k];
|
2022-03-22 23:47:05 +01:00
|
|
|
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
|
|
|
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
2023-03-28 19:33:26 +02:00
|
|
|
//MD_SIMD_MASK interact0;
|
|
|
|
//MD_SIMD_MASK interact2;
|
2022-03-22 23:47:05 +01:00
|
|
|
|
2023-03-28 02:19:46 +02:00
|
|
|
//gmx_load_simd_2xnn_interactions((int)imask, filter0, filter2, &interact0, &interact2);
|
2023-03-23 02:17:27 +01:00
|
|
|
|
2022-03-22 23:47:05 +01:00
|
|
|
MD_SIMD_FLOAT xj_tmp = simd_load_h_duplicate(&cj_x[CL_X_OFFSET]);
|
|
|
|
MD_SIMD_FLOAT yj_tmp = simd_load_h_duplicate(&cj_x[CL_Y_OFFSET]);
|
|
|
|
MD_SIMD_FLOAT zj_tmp = simd_load_h_duplicate(&cj_x[CL_Z_OFFSET]);
|
2023-03-28 23:34:07 +02:00
|
|
|
MD_SIMD_FLOAT delx0 = xi0_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely0 = yi0_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz0 = zi0_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT delx2 = xi2_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely2 = yi2_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz2 = zi2_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT rsq0 = simd_fma(delx0, delx0, simd_fma(dely0, dely0, delz0 * delz0));
|
|
|
|
MD_SIMD_FLOAT rsq2 = simd_fma(delx2, delx2, simd_fma(dely2, dely2, delz2 * delz2));
|
2022-03-22 23:47:05 +01:00
|
|
|
|
2023-03-28 02:19:46 +02:00
|
|
|
#if CLUSTER_M == CLUSTER_N
|
|
|
|
unsigned int cond0 = (unsigned int)(cj == ci_cj0);
|
2023-03-28 19:33:26 +02:00
|
|
|
MD_SIMD_MASK excl_mask0 = simd_mask_from_u32(atom->masks_2xnn_hn[cond0 * 2 + 0]);
|
|
|
|
MD_SIMD_MASK excl_mask2 = simd_mask_from_u32(atom->masks_2xnn_hn[cond0 * 2 + 1]);
|
2023-03-28 17:32:42 +02:00
|
|
|
#else
|
|
|
|
#if CLUSTER_M < CLUSTER_N
|
2023-03-28 02:19:46 +02:00
|
|
|
unsigned int cond0 = (unsigned int)((cj << 1) + 0 == ci);
|
|
|
|
unsigned int cond1 = (unsigned int)((cj << 1) + 1 == ci);
|
|
|
|
#else
|
|
|
|
unsigned int cond0 = (unsigned int)(cj == ci_cj0);
|
|
|
|
unsigned int cond1 = (unsigned int)(cj == ci_cj1);
|
2023-03-28 17:32:42 +02:00
|
|
|
#endif
|
2023-03-28 19:33:26 +02:00
|
|
|
MD_SIMD_MASK excl_mask0 = simd_mask_from_u32(atom->masks_2xnn_hn[cond0 * 4 + cond1 * 2 + 0]);
|
|
|
|
MD_SIMD_MASK excl_mask2 = simd_mask_from_u32(atom->masks_2xnn_hn[cond0 * 4 + cond1 * 2 + 1]);
|
2023-03-28 02:19:46 +02:00
|
|
|
#endif
|
|
|
|
|
2023-03-23 02:17:27 +01:00
|
|
|
MD_SIMD_MASK cutoff_mask0 = simd_mask_cond_lt(rsq0, cutforcesq_vec);
|
|
|
|
MD_SIMD_MASK cutoff_mask2 = simd_mask_cond_lt(rsq2, cutforcesq_vec);
|
2023-03-28 02:19:46 +02:00
|
|
|
cutoff_mask0 = simd_mask_and(cutoff_mask0, excl_mask0);
|
|
|
|
cutoff_mask2 = simd_mask_and(cutoff_mask2, excl_mask2);
|
|
|
|
|
|
|
|
/*
|
|
|
|
#if CLUSTER_M <= CLUSTER_N
|
|
|
|
if(ci == ci_cj0) {
|
|
|
|
cutoff_mask0 = simd_mask_and(cutoff_mask0, diagonal_mask0);
|
|
|
|
cutoff_mask2 = simd_mask_and(cutoff_mask2, diagonal_mask2);
|
2023-03-23 02:17:27 +01:00
|
|
|
}
|
2023-03-28 02:19:46 +02:00
|
|
|
#else
|
|
|
|
if(ci == ci_cj0) {
|
2023-03-23 02:17:27 +01:00
|
|
|
cutoff_mask0 = cutoff_mask0 && diagonal_mask00;
|
|
|
|
cutoff_mask2 = cutoff_mask2 && diagonal_mask02;
|
2023-03-28 02:19:46 +02:00
|
|
|
} else if(ci == ci_cj1) {
|
2023-03-23 02:17:27 +01:00
|
|
|
cutoff_mask0 = cutoff_mask0 && diagonal_mask10;
|
|
|
|
cutoff_mask2 = cutoff_mask2 && diagonal_mask12;
|
|
|
|
}
|
2023-03-28 02:19:46 +02:00
|
|
|
#endif
|
|
|
|
*/
|
2022-03-22 23:47:05 +01:00
|
|
|
|
|
|
|
MD_SIMD_FLOAT sr2_0 = simd_reciprocal(rsq0);
|
|
|
|
MD_SIMD_FLOAT sr2_2 = simd_reciprocal(rsq2);
|
2023-03-28 17:32:42 +02:00
|
|
|
MD_SIMD_FLOAT sr6_0 = sr2_0 * sr2_0 * sr2_0 * sigma6_vec;
|
|
|
|
MD_SIMD_FLOAT sr6_2 = sr2_2 * sr2_2 * sr2_2 * sigma6_vec;
|
2023-03-28 02:19:46 +02:00
|
|
|
MD_SIMD_FLOAT force0 = c48_vec * sr6_0 * (sr6_0 - c05_vec) * sr2_0 * eps_vec;
|
|
|
|
MD_SIMD_FLOAT force2 = c48_vec * sr6_2 * (sr6_2 - c05_vec) * sr2_2 * eps_vec;
|
2023-03-28 18:04:18 +02:00
|
|
|
|
|
|
|
MD_SIMD_FLOAT tx0 = select_by_mask(delx0 * force0, cutoff_mask0);
|
|
|
|
MD_SIMD_FLOAT ty0 = select_by_mask(dely0 * force0, cutoff_mask0);
|
|
|
|
MD_SIMD_FLOAT tz0 = select_by_mask(delz0 * force0, cutoff_mask0);
|
|
|
|
MD_SIMD_FLOAT tx2 = select_by_mask(delx2 * force2, cutoff_mask2);
|
|
|
|
MD_SIMD_FLOAT ty2 = select_by_mask(dely2 * force2, cutoff_mask2);
|
|
|
|
MD_SIMD_FLOAT tz2 = select_by_mask(delz2 * force2, cutoff_mask2);
|
|
|
|
|
|
|
|
fix0 += tx0;
|
|
|
|
fiy0 += ty0;
|
|
|
|
fiz0 += tz0;
|
|
|
|
fix2 += tx2;
|
|
|
|
fiy2 += ty2;
|
|
|
|
fiz2 += tz2;
|
|
|
|
|
|
|
|
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
|
|
|
|
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
|
|
|
|
simd_h_decr3(cj_f, tx0 + tx2, ty0 + ty2, tz0 + tz2);
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
simd_h_decr3(cj_f, tx0 + tx2, ty0 + ty2, tz0 + tz2);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
for(int k = numneighs_masked; k < numneighs; k++) {
|
2023-03-30 01:57:26 +02:00
|
|
|
int cj = neighs[k];
|
2023-03-28 18:04:18 +02:00
|
|
|
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
|
|
|
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
|
|
|
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
|
|
|
|
|
|
|
MD_SIMD_FLOAT xj_tmp = simd_load_h_duplicate(&cj_x[CL_X_OFFSET]);
|
|
|
|
MD_SIMD_FLOAT yj_tmp = simd_load_h_duplicate(&cj_x[CL_Y_OFFSET]);
|
|
|
|
MD_SIMD_FLOAT zj_tmp = simd_load_h_duplicate(&cj_x[CL_Z_OFFSET]);
|
2023-03-28 23:34:07 +02:00
|
|
|
MD_SIMD_FLOAT delx0 = xi0_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely0 = yi0_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz0 = zi0_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT delx2 = xi2_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely2 = yi2_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz2 = zi2_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT rsq0 = simd_fma(delx0, delx0, simd_fma(dely0, dely0, delz0 * delz0));
|
|
|
|
MD_SIMD_FLOAT rsq2 = simd_fma(delx2, delx2, simd_fma(dely2, dely2, delz2 * delz2));
|
2023-03-28 18:04:18 +02:00
|
|
|
|
|
|
|
MD_SIMD_MASK cutoff_mask0 = simd_mask_cond_lt(rsq0, cutforcesq_vec);
|
|
|
|
MD_SIMD_MASK cutoff_mask2 = simd_mask_cond_lt(rsq2, cutforcesq_vec);
|
|
|
|
|
|
|
|
MD_SIMD_FLOAT sr2_0 = simd_reciprocal(rsq0);
|
|
|
|
MD_SIMD_FLOAT sr2_2 = simd_reciprocal(rsq2);
|
|
|
|
MD_SIMD_FLOAT sr6_0 = sr2_0 * sr2_0 * sr2_0 * sigma6_vec;
|
|
|
|
MD_SIMD_FLOAT sr6_2 = sr2_2 * sr2_2 * sr2_2 * sigma6_vec;
|
|
|
|
MD_SIMD_FLOAT force0 = c48_vec * sr6_0 * (sr6_0 - c05_vec) * sr2_0 * eps_vec;
|
|
|
|
MD_SIMD_FLOAT force2 = c48_vec * sr6_2 * (sr6_2 - c05_vec) * sr2_2 * eps_vec;
|
2023-03-28 02:19:46 +02:00
|
|
|
|
|
|
|
MD_SIMD_FLOAT tx0 = select_by_mask(delx0 * force0, cutoff_mask0);
|
|
|
|
MD_SIMD_FLOAT ty0 = select_by_mask(dely0 * force0, cutoff_mask0);
|
|
|
|
MD_SIMD_FLOAT tz0 = select_by_mask(delz0 * force0, cutoff_mask0);
|
|
|
|
MD_SIMD_FLOAT tx2 = select_by_mask(delx2 * force2, cutoff_mask2);
|
|
|
|
MD_SIMD_FLOAT ty2 = select_by_mask(dely2 * force2, cutoff_mask2);
|
|
|
|
MD_SIMD_FLOAT tz2 = select_by_mask(delz2 * force2, cutoff_mask2);
|
|
|
|
|
|
|
|
fix0 += tx0;
|
|
|
|
fiy0 += ty0;
|
|
|
|
fiz0 += tz0;
|
|
|
|
fix2 += tx2;
|
|
|
|
fiy2 += ty2;
|
|
|
|
fiz2 += tz2;
|
2022-03-22 23:47:05 +01:00
|
|
|
|
2022-04-04 21:52:40 +02:00
|
|
|
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
|
|
|
|
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
|
|
|
|
simd_h_decr3(cj_f, tx0 + tx2, ty0 + ty2, tz0 + tz2);
|
|
|
|
}
|
|
|
|
#else
|
2022-03-22 23:47:05 +01:00
|
|
|
simd_h_decr3(cj_f, tx0 + tx2, ty0 + ty2, tz0 + tz2);
|
2022-04-04 21:52:40 +02:00
|
|
|
#endif
|
2022-03-22 23:47:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
simd_h_dual_incr_reduced_sum(&ci_f[CL_X_OFFSET], fix0, fix2);
|
|
|
|
simd_h_dual_incr_reduced_sum(&ci_f[CL_Y_OFFSET], fiy0, fiy2);
|
|
|
|
simd_h_dual_incr_reduced_sum(&ci_f[CL_Z_OFFSET], fiz0, fiz2);
|
|
|
|
|
|
|
|
addStat(stats->calculated_forces, 1);
|
|
|
|
addStat(stats->num_neighs, numneighs);
|
|
|
|
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
|
|
|
|
}
|
|
|
|
|
|
|
|
LIKWID_MARKER_STOP("force");
|
2023-01-22 15:31:47 +01:00
|
|
|
}
|
|
|
|
|
2022-03-22 23:47:05 +01:00
|
|
|
double E = getTimeStamp();
|
|
|
|
DEBUG_MESSAGE("computeForceLJ_2xnn end\n");
|
|
|
|
return E-S;
|
|
|
|
}
|
|
|
|
|
|
|
|
double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
2022-03-15 02:40:56 +01:00
|
|
|
DEBUG_MESSAGE("computeForceLJ_2xnn begin\n");
|
2022-02-02 18:00:44 +01:00
|
|
|
int Nlocal = atom->Nlocal;
|
2023-03-30 01:57:26 +02:00
|
|
|
int *neighs;
|
2022-02-02 18:00:44 +01:00
|
|
|
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
|
|
|
MD_FLOAT sigma6 = param->sigma6;
|
|
|
|
MD_FLOAT epsilon = param->epsilon;
|
|
|
|
MD_SIMD_FLOAT cutforcesq_vec = simd_broadcast(cutforcesq);
|
|
|
|
MD_SIMD_FLOAT sigma6_vec = simd_broadcast(sigma6);
|
2022-03-15 02:40:56 +01:00
|
|
|
MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon);
|
2022-02-02 18:00:44 +01:00
|
|
|
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
|
|
|
|
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
|
|
|
|
|
2022-03-15 19:59:10 +01:00
|
|
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
|
|
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
|
|
|
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
|
|
|
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
|
|
|
|
ci_f[CL_X_OFFSET + cii] = 0.0;
|
|
|
|
ci_f[CL_Y_OFFSET + cii] = 0.0;
|
|
|
|
ci_f[CL_Z_OFFSET + cii] = 0.0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-02 18:00:44 +01:00
|
|
|
double S = getTimeStamp();
|
2023-01-22 15:31:47 +01:00
|
|
|
|
|
|
|
#pragma omp parallel
|
|
|
|
{
|
2022-02-02 18:00:44 +01:00
|
|
|
LIKWID_MARKER_START("force");
|
|
|
|
|
2024-01-11 17:09:18 +01:00
|
|
|
#pragma omp for schedule(runtime)
|
2022-02-02 18:00:44 +01:00
|
|
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
2022-03-17 00:35:21 +01:00
|
|
|
int ci_cj0 = CJ0_FROM_CI(ci);
|
|
|
|
#if CLUSTER_M > CLUSTER_N
|
|
|
|
int ci_cj1 = CJ1_FROM_CI(ci);
|
|
|
|
#endif
|
2022-03-09 17:23:49 +01:00
|
|
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
|
|
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
|
|
|
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
2022-02-02 18:00:44 +01:00
|
|
|
neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
|
|
|
|
int numneighs = neighbor->numneigh[ci];
|
2023-03-28 19:33:26 +02:00
|
|
|
int numneighs_masked = neighbor->numneigh_masked[ci];
|
2022-02-02 18:00:44 +01:00
|
|
|
|
2022-03-15 02:40:56 +01:00
|
|
|
MD_SIMD_FLOAT xi0_tmp = simd_load_h_dual(&ci_x[CL_X_OFFSET + 0]);
|
|
|
|
MD_SIMD_FLOAT xi2_tmp = simd_load_h_dual(&ci_x[CL_X_OFFSET + 2]);
|
|
|
|
MD_SIMD_FLOAT yi0_tmp = simd_load_h_dual(&ci_x[CL_Y_OFFSET + 0]);
|
|
|
|
MD_SIMD_FLOAT yi2_tmp = simd_load_h_dual(&ci_x[CL_Y_OFFSET + 2]);
|
|
|
|
MD_SIMD_FLOAT zi0_tmp = simd_load_h_dual(&ci_x[CL_Z_OFFSET + 0]);
|
|
|
|
MD_SIMD_FLOAT zi2_tmp = simd_load_h_dual(&ci_x[CL_Z_OFFSET + 2]);
|
2022-02-02 18:00:44 +01:00
|
|
|
MD_SIMD_FLOAT fix0 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiy0 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiz0 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fix2 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiy2 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiz2 = simd_zero();
|
|
|
|
|
2023-03-28 19:33:26 +02:00
|
|
|
for(int k = 0; k < numneighs_masked; k++) {
|
2023-03-30 01:57:26 +02:00
|
|
|
int cj = neighs[k];
|
2022-03-09 17:23:49 +01:00
|
|
|
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
|
|
|
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
2022-03-15 02:40:56 +01:00
|
|
|
unsigned int mask0, mask1, mask2, mask3;
|
2022-03-09 17:23:49 +01:00
|
|
|
|
2022-03-15 02:40:56 +01:00
|
|
|
MD_SIMD_FLOAT xj_tmp = simd_load_h_duplicate(&cj_x[CL_X_OFFSET]);
|
|
|
|
MD_SIMD_FLOAT yj_tmp = simd_load_h_duplicate(&cj_x[CL_Y_OFFSET]);
|
|
|
|
MD_SIMD_FLOAT zj_tmp = simd_load_h_duplicate(&cj_x[CL_Z_OFFSET]);
|
2023-03-28 23:34:07 +02:00
|
|
|
MD_SIMD_FLOAT delx0 = xi0_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely0 = yi0_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz0 = zi0_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT delx2 = xi2_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely2 = yi2_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz2 = zi2_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT rsq0 = simd_fma(delx0, delx0, simd_fma(dely0, dely0, delz0 * delz0));
|
|
|
|
MD_SIMD_FLOAT rsq2 = simd_fma(delx2, delx2, simd_fma(dely2, dely2, delz2 * delz2));
|
2022-03-09 17:23:49 +01:00
|
|
|
|
2022-03-15 02:40:56 +01:00
|
|
|
#if CLUSTER_M == CLUSTER_N
|
|
|
|
unsigned int cond0 = (unsigned int)(cj == ci_cj0);
|
2023-03-28 19:33:26 +02:00
|
|
|
MD_SIMD_MASK excl_mask0 = simd_mask_from_u32(atom->masks_2xnn_fn[cond0 * 2 + 0]);
|
|
|
|
MD_SIMD_MASK excl_mask2 = simd_mask_from_u32(atom->masks_2xnn_fn[cond0 * 2 + 1]);
|
|
|
|
#else
|
|
|
|
#if CLUSTER_M < CLUSTER_N
|
2022-03-15 02:40:56 +01:00
|
|
|
unsigned int cond0 = (unsigned int)((cj << 1) + 0 == ci);
|
|
|
|
unsigned int cond1 = (unsigned int)((cj << 1) + 1 == ci);
|
2022-03-09 17:23:49 +01:00
|
|
|
#else
|
2022-03-15 02:40:56 +01:00
|
|
|
unsigned int cond0 = (unsigned int)(cj == ci_cj0);
|
|
|
|
unsigned int cond1 = (unsigned int)(cj == ci_cj1);
|
2022-03-09 17:23:49 +01:00
|
|
|
#endif
|
2023-03-28 19:33:26 +02:00
|
|
|
MD_SIMD_MASK excl_mask0 = simd_mask_from_u32(atom->masks_2xnn_fn[cond0 * 4 + cond1 * 2 + 0]);
|
|
|
|
MD_SIMD_MASK excl_mask2 = simd_mask_from_u32(atom->masks_2xnn_fn[cond0 * 4 + cond1 * 2 + 1]);
|
|
|
|
#endif
|
2022-03-09 17:23:49 +01:00
|
|
|
|
|
|
|
MD_SIMD_MASK cutoff_mask0 = simd_mask_and(excl_mask0, simd_mask_cond_lt(rsq0, cutforcesq_vec));
|
|
|
|
MD_SIMD_MASK cutoff_mask2 = simd_mask_and(excl_mask2, simd_mask_cond_lt(rsq2, cutforcesq_vec));
|
|
|
|
|
|
|
|
MD_SIMD_FLOAT sr2_0 = simd_reciprocal(rsq0);
|
|
|
|
MD_SIMD_FLOAT sr2_2 = simd_reciprocal(rsq2);
|
2023-03-28 19:33:26 +02:00
|
|
|
MD_SIMD_FLOAT sr6_0 = sr2_0 * sr2_0 * sr2_0 * sigma6_vec;
|
|
|
|
MD_SIMD_FLOAT sr6_2 = sr2_2 * sr2_2 * sr2_2 * sigma6_vec;
|
|
|
|
MD_SIMD_FLOAT force0 = c48_vec * sr6_0 * (sr6_0 - c05_vec) * sr2_0 * eps_vec;
|
|
|
|
MD_SIMD_FLOAT force2 = c48_vec * sr6_2 * (sr6_2 - c05_vec) * sr2_2 * eps_vec;
|
|
|
|
|
|
|
|
fix0 = simd_masked_add(fix0, simd_mul(delx0, force0), cutoff_mask0);
|
|
|
|
fiy0 = simd_masked_add(fiy0, simd_mul(dely0, force0), cutoff_mask0);
|
|
|
|
fiz0 = simd_masked_add(fiz0, simd_mul(delz0, force0), cutoff_mask0);
|
|
|
|
fix2 = simd_masked_add(fix2, simd_mul(delx2, force2), cutoff_mask2);
|
|
|
|
fiy2 = simd_masked_add(fiy2, simd_mul(dely2, force2), cutoff_mask2);
|
|
|
|
fiz2 = simd_masked_add(fiz2, simd_mul(delz2, force2), cutoff_mask2);
|
|
|
|
}
|
2022-03-09 17:23:49 +01:00
|
|
|
|
2023-03-28 19:33:26 +02:00
|
|
|
for(int k = numneighs_masked; k < numneighs; k++) {
|
2023-03-30 01:57:26 +02:00
|
|
|
int cj = neighs[k];
|
2023-03-28 19:33:26 +02:00
|
|
|
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
|
|
|
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
2022-03-09 17:23:49 +01:00
|
|
|
|
2023-03-28 19:33:26 +02:00
|
|
|
MD_SIMD_FLOAT xj_tmp = simd_load_h_duplicate(&cj_x[CL_X_OFFSET]);
|
|
|
|
MD_SIMD_FLOAT yj_tmp = simd_load_h_duplicate(&cj_x[CL_Y_OFFSET]);
|
|
|
|
MD_SIMD_FLOAT zj_tmp = simd_load_h_duplicate(&cj_x[CL_Z_OFFSET]);
|
2023-03-28 23:34:07 +02:00
|
|
|
MD_SIMD_FLOAT delx0 = xi0_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely0 = yi0_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz0 = zi0_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT delx2 = xi2_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely2 = yi2_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz2 = zi2_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT rsq0 = simd_fma(delx0, delx0, simd_fma(dely0, dely0, delz0 * delz0));
|
|
|
|
MD_SIMD_FLOAT rsq2 = simd_fma(delx2, delx2, simd_fma(dely2, dely2, delz2 * delz2));
|
2023-03-28 19:33:26 +02:00
|
|
|
|
|
|
|
MD_SIMD_MASK cutoff_mask0 = simd_mask_cond_lt(rsq0, cutforcesq_vec);
|
|
|
|
MD_SIMD_MASK cutoff_mask2 = simd_mask_cond_lt(rsq2, cutforcesq_vec);
|
|
|
|
|
|
|
|
MD_SIMD_FLOAT sr2_0 = simd_reciprocal(rsq0);
|
|
|
|
MD_SIMD_FLOAT sr2_2 = simd_reciprocal(rsq2);
|
|
|
|
MD_SIMD_FLOAT sr6_0 = sr2_0 * sr2_0 * sr2_0 * sigma6_vec;
|
|
|
|
MD_SIMD_FLOAT sr6_2 = sr2_2 * sr2_2 * sr2_2 * sigma6_vec;
|
|
|
|
MD_SIMD_FLOAT force0 = c48_vec * sr6_0 * (sr6_0 - c05_vec) * sr2_0 * eps_vec;
|
|
|
|
MD_SIMD_FLOAT force2 = c48_vec * sr6_2 * (sr6_2 - c05_vec) * sr2_2 * eps_vec;
|
2022-03-09 17:23:49 +01:00
|
|
|
|
|
|
|
fix0 = simd_masked_add(fix0, simd_mul(delx0, force0), cutoff_mask0);
|
|
|
|
fiy0 = simd_masked_add(fiy0, simd_mul(dely0, force0), cutoff_mask0);
|
|
|
|
fiz0 = simd_masked_add(fiz0, simd_mul(delz0, force0), cutoff_mask0);
|
|
|
|
fix2 = simd_masked_add(fix2, simd_mul(delx2, force2), cutoff_mask2);
|
|
|
|
fiy2 = simd_masked_add(fiy2, simd_mul(dely2, force2), cutoff_mask2);
|
|
|
|
fiz2 = simd_masked_add(fiz2, simd_mul(delz2, force2), cutoff_mask2);
|
|
|
|
}
|
|
|
|
|
2022-03-16 17:54:52 +01:00
|
|
|
simd_h_dual_incr_reduced_sum(&ci_f[CL_X_OFFSET], fix0, fix2);
|
|
|
|
simd_h_dual_incr_reduced_sum(&ci_f[CL_Y_OFFSET], fiy0, fiy2);
|
|
|
|
simd_h_dual_incr_reduced_sum(&ci_f[CL_Z_OFFSET], fiz0, fiz2);
|
2022-03-09 17:23:49 +01:00
|
|
|
|
|
|
|
addStat(stats->calculated_forces, 1);
|
|
|
|
addStat(stats->num_neighs, numneighs);
|
2022-11-18 01:00:20 +01:00
|
|
|
addStat(stats->force_iters, (long long int)((double)numneighs));
|
|
|
|
//addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
|
2022-03-09 17:23:49 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
LIKWID_MARKER_STOP("force");
|
2023-01-22 15:31:47 +01:00
|
|
|
}
|
|
|
|
|
2022-03-09 17:23:49 +01:00
|
|
|
double E = getTimeStamp();
|
2022-03-15 02:40:56 +01:00
|
|
|
DEBUG_MESSAGE("computeForceLJ_2xnn end\n");
|
2022-03-09 17:23:49 +01:00
|
|
|
return E-S;
|
|
|
|
}
|
|
|
|
|
2022-03-22 23:47:05 +01:00
|
|
|
double computeForceLJ_2xnn(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
|
|
|
if(neighbor->half_neigh) {
|
|
|
|
return computeForceLJ_2xnn_half(param, atom, neighbor, stats);
|
|
|
|
}
|
|
|
|
|
|
|
|
return computeForceLJ_2xnn_full(param, atom, neighbor, stats);
|
|
|
|
}
|
|
|
|
|
|
|
|
double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
|
|
|
DEBUG_MESSAGE("computeForceLJ_4xn begin\n");
|
|
|
|
int Nlocal = atom->Nlocal;
|
2023-03-30 01:57:26 +02:00
|
|
|
int *neighs;
|
2022-03-22 23:47:05 +01:00
|
|
|
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
|
|
|
MD_FLOAT sigma6 = param->sigma6;
|
|
|
|
MD_FLOAT epsilon = param->epsilon;
|
|
|
|
MD_SIMD_FLOAT cutforcesq_vec = simd_broadcast(cutforcesq);
|
|
|
|
MD_SIMD_FLOAT sigma6_vec = simd_broadcast(sigma6);
|
|
|
|
MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon);
|
|
|
|
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
|
|
|
|
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
|
|
|
|
|
|
|
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
|
|
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
|
|
|
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
|
|
|
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
|
|
|
|
ci_f[CL_X_OFFSET + cii] = 0.0;
|
|
|
|
ci_f[CL_Y_OFFSET + cii] = 0.0;
|
|
|
|
ci_f[CL_Z_OFFSET + cii] = 0.0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-02 14:19:59 +01:00
|
|
|
double S = getTimeStamp();
|
2023-01-22 15:31:47 +01:00
|
|
|
|
|
|
|
#pragma omp parallel
|
|
|
|
{
|
2023-01-02 14:19:59 +01:00
|
|
|
LIKWID_MARKER_START("force");
|
|
|
|
|
2024-01-11 17:09:18 +01:00
|
|
|
#pragma omp for schedule(runtime)
|
2022-03-22 23:47:05 +01:00
|
|
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
|
|
|
int ci_cj0 = CJ0_FROM_CI(ci);
|
|
|
|
#if CLUSTER_M > CLUSTER_N
|
|
|
|
int ci_cj1 = CJ1_FROM_CI(ci);
|
|
|
|
#endif
|
|
|
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
|
|
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
|
|
|
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
|
|
|
neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
|
|
|
|
int numneighs = neighbor->numneigh[ci];
|
2023-03-28 23:00:21 +02:00
|
|
|
int numneighs_masked = neighbor->numneigh_masked[ci];
|
2022-03-22 23:47:05 +01:00
|
|
|
|
|
|
|
MD_SIMD_FLOAT xi0_tmp = simd_broadcast(ci_x[CL_X_OFFSET + 0]);
|
|
|
|
MD_SIMD_FLOAT xi1_tmp = simd_broadcast(ci_x[CL_X_OFFSET + 1]);
|
|
|
|
MD_SIMD_FLOAT xi2_tmp = simd_broadcast(ci_x[CL_X_OFFSET + 2]);
|
|
|
|
MD_SIMD_FLOAT xi3_tmp = simd_broadcast(ci_x[CL_X_OFFSET + 3]);
|
|
|
|
MD_SIMD_FLOAT yi0_tmp = simd_broadcast(ci_x[CL_Y_OFFSET + 0]);
|
|
|
|
MD_SIMD_FLOAT yi1_tmp = simd_broadcast(ci_x[CL_Y_OFFSET + 1]);
|
|
|
|
MD_SIMD_FLOAT yi2_tmp = simd_broadcast(ci_x[CL_Y_OFFSET + 2]);
|
|
|
|
MD_SIMD_FLOAT yi3_tmp = simd_broadcast(ci_x[CL_Y_OFFSET + 3]);
|
|
|
|
MD_SIMD_FLOAT zi0_tmp = simd_broadcast(ci_x[CL_Z_OFFSET + 0]);
|
|
|
|
MD_SIMD_FLOAT zi1_tmp = simd_broadcast(ci_x[CL_Z_OFFSET + 1]);
|
|
|
|
MD_SIMD_FLOAT zi2_tmp = simd_broadcast(ci_x[CL_Z_OFFSET + 2]);
|
|
|
|
MD_SIMD_FLOAT zi3_tmp = simd_broadcast(ci_x[CL_Z_OFFSET + 3]);
|
|
|
|
MD_SIMD_FLOAT fix0 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiy0 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiz0 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fix1 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiy1 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiz1 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fix2 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiy2 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiz2 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fix3 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiy3 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiz3 = simd_zero();
|
|
|
|
|
2023-03-28 23:00:21 +02:00
|
|
|
for(int k = 0; k < numneighs_masked; k++) {
|
2023-03-30 01:57:26 +02:00
|
|
|
int cj = neighs[k];
|
2022-03-22 23:47:05 +01:00
|
|
|
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
|
|
|
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
2022-03-23 15:54:18 +01:00
|
|
|
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
2022-03-22 23:47:05 +01:00
|
|
|
MD_SIMD_FLOAT xj_tmp = simd_load(&cj_x[CL_X_OFFSET]);
|
|
|
|
MD_SIMD_FLOAT yj_tmp = simd_load(&cj_x[CL_Y_OFFSET]);
|
|
|
|
MD_SIMD_FLOAT zj_tmp = simd_load(&cj_x[CL_Z_OFFSET]);
|
2023-03-28 23:00:21 +02:00
|
|
|
MD_SIMD_FLOAT delx0 = xi0_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely0 = yi0_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz0 = zi0_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT delx1 = xi1_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely1 = yi1_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz1 = zi1_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT delx2 = xi2_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely2 = yi2_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz2 = zi2_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT delx3 = xi3_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely3 = yi3_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz3 = zi3_tmp - zj_tmp;
|
2022-03-22 23:47:05 +01:00
|
|
|
|
|
|
|
#if CLUSTER_M == CLUSTER_N
|
|
|
|
unsigned int cond0 = (unsigned int)(cj == ci_cj0);
|
2023-03-28 22:30:30 +02:00
|
|
|
MD_SIMD_MASK excl_mask0 = simd_mask_from_u32(atom->masks_4xn_hn[cond0 * 4 + 0]);
|
|
|
|
MD_SIMD_MASK excl_mask1 = simd_mask_from_u32(atom->masks_4xn_hn[cond0 * 4 + 1]);
|
|
|
|
MD_SIMD_MASK excl_mask2 = simd_mask_from_u32(atom->masks_4xn_hn[cond0 * 4 + 2]);
|
|
|
|
MD_SIMD_MASK excl_mask3 = simd_mask_from_u32(atom->masks_4xn_hn[cond0 * 4 + 3]);
|
|
|
|
#else
|
|
|
|
#if CLUSTER_M < CLUSTER_N
|
2022-03-22 23:47:05 +01:00
|
|
|
unsigned int cond0 = (unsigned int)((cj << 1) + 0 == ci);
|
|
|
|
unsigned int cond1 = (unsigned int)((cj << 1) + 1 == ci);
|
|
|
|
#else
|
|
|
|
unsigned int cond0 = (unsigned int)(cj == ci_cj0);
|
|
|
|
unsigned int cond1 = (unsigned int)(cj == ci_cj1);
|
2023-03-28 22:30:30 +02:00
|
|
|
#endif
|
|
|
|
MD_SIMD_MASK excl_mask0 = simd_mask_from_u32(atom->masks_4xn_hn[cond0 * 8 + cond1 * 4 + 0]);
|
|
|
|
MD_SIMD_MASK excl_mask1 = simd_mask_from_u32(atom->masks_4xn_hn[cond0 * 8 + cond1 * 4 + 1]);
|
|
|
|
MD_SIMD_MASK excl_mask2 = simd_mask_from_u32(atom->masks_4xn_hn[cond0 * 8 + cond1 * 4 + 2]);
|
|
|
|
MD_SIMD_MASK excl_mask3 = simd_mask_from_u32(atom->masks_4xn_hn[cond0 * 8 + cond1 * 4 + 3]);
|
2022-03-22 23:47:05 +01:00
|
|
|
#endif
|
|
|
|
|
2023-03-28 23:00:21 +02:00
|
|
|
MD_SIMD_FLOAT rsq0 = simd_fma(delx0, delx0, simd_fma(dely0, dely0, delz0 * delz0));
|
|
|
|
MD_SIMD_FLOAT rsq1 = simd_fma(delx1, delx1, simd_fma(dely1, dely1, delz1 * delz1));
|
|
|
|
MD_SIMD_FLOAT rsq2 = simd_fma(delx2, delx2, simd_fma(dely2, dely2, delz2 * delz2));
|
|
|
|
MD_SIMD_FLOAT rsq3 = simd_fma(delx3, delx3, simd_fma(dely3, dely3, delz3 * delz3));
|
2022-03-22 23:47:05 +01:00
|
|
|
|
|
|
|
MD_SIMD_MASK cutoff_mask0 = simd_mask_and(excl_mask0, simd_mask_cond_lt(rsq0, cutforcesq_vec));
|
|
|
|
MD_SIMD_MASK cutoff_mask1 = simd_mask_and(excl_mask1, simd_mask_cond_lt(rsq1, cutforcesq_vec));
|
|
|
|
MD_SIMD_MASK cutoff_mask2 = simd_mask_and(excl_mask2, simd_mask_cond_lt(rsq2, cutforcesq_vec));
|
|
|
|
MD_SIMD_MASK cutoff_mask3 = simd_mask_and(excl_mask3, simd_mask_cond_lt(rsq3, cutforcesq_vec));
|
|
|
|
|
|
|
|
MD_SIMD_FLOAT sr2_0 = simd_reciprocal(rsq0);
|
|
|
|
MD_SIMD_FLOAT sr2_1 = simd_reciprocal(rsq1);
|
|
|
|
MD_SIMD_FLOAT sr2_2 = simd_reciprocal(rsq2);
|
|
|
|
MD_SIMD_FLOAT sr2_3 = simd_reciprocal(rsq3);
|
|
|
|
|
2023-03-28 23:00:21 +02:00
|
|
|
MD_SIMD_FLOAT sr6_0 = sr2_0 * sr2_0 * sr2_0 * sigma6_vec;
|
|
|
|
MD_SIMD_FLOAT sr6_1 = sr2_1 * sr2_1 * sr2_1 * sigma6_vec;
|
|
|
|
MD_SIMD_FLOAT sr6_2 = sr2_2 * sr2_2 * sr2_2 * sigma6_vec;
|
|
|
|
MD_SIMD_FLOAT sr6_3 = sr2_3 * sr2_3 * sr2_3 * sigma6_vec;
|
|
|
|
|
|
|
|
MD_SIMD_FLOAT force0 = c48_vec * sr6_0 * (sr6_0 - c05_vec) * sr2_0 * eps_vec;
|
|
|
|
MD_SIMD_FLOAT force1 = c48_vec * sr6_1 * (sr6_1 - c05_vec) * sr2_1 * eps_vec;
|
|
|
|
MD_SIMD_FLOAT force2 = c48_vec * sr6_2 * (sr6_2 - c05_vec) * sr2_2 * eps_vec;
|
|
|
|
MD_SIMD_FLOAT force3 = c48_vec * sr6_3 * (sr6_3 - c05_vec) * sr2_3 * eps_vec;
|
|
|
|
|
|
|
|
MD_SIMD_FLOAT tx0 = select_by_mask(delx0 * force0, cutoff_mask0);
|
|
|
|
MD_SIMD_FLOAT ty0 = select_by_mask(dely0 * force0, cutoff_mask0);
|
|
|
|
MD_SIMD_FLOAT tz0 = select_by_mask(delz0 * force0, cutoff_mask0);
|
|
|
|
MD_SIMD_FLOAT tx1 = select_by_mask(delx1 * force1, cutoff_mask1);
|
|
|
|
MD_SIMD_FLOAT ty1 = select_by_mask(dely1 * force1, cutoff_mask1);
|
|
|
|
MD_SIMD_FLOAT tz1 = select_by_mask(delz1 * force1, cutoff_mask1);
|
|
|
|
MD_SIMD_FLOAT tx2 = select_by_mask(delx2 * force2, cutoff_mask2);
|
|
|
|
MD_SIMD_FLOAT ty2 = select_by_mask(dely2 * force2, cutoff_mask2);
|
|
|
|
MD_SIMD_FLOAT tz2 = select_by_mask(delz2 * force2, cutoff_mask2);
|
|
|
|
MD_SIMD_FLOAT tx3 = select_by_mask(delx3 * force3, cutoff_mask3);
|
|
|
|
MD_SIMD_FLOAT ty3 = select_by_mask(dely3 * force3, cutoff_mask3);
|
|
|
|
MD_SIMD_FLOAT tz3 = select_by_mask(delz3 * force3, cutoff_mask3);
|
|
|
|
|
|
|
|
fix0 = simd_add(fix0, tx0);
|
|
|
|
fiy0 = simd_add(fiy0, ty0);
|
|
|
|
fiz0 = simd_add(fiz0, tz0);
|
|
|
|
fix1 = simd_add(fix1, tx1);
|
|
|
|
fiy1 = simd_add(fiy1, ty1);
|
|
|
|
fiz1 = simd_add(fiz1, tz1);
|
|
|
|
fix2 = simd_add(fix2, tx2);
|
|
|
|
fiy2 = simd_add(fiy2, ty2);
|
|
|
|
fiz2 = simd_add(fiz2, tz2);
|
|
|
|
fix3 = simd_add(fix3, tx3);
|
|
|
|
fiy3 = simd_add(fiy3, ty3);
|
|
|
|
fiz3 = simd_add(fiz3, tz3);
|
|
|
|
|
|
|
|
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
|
|
|
|
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
|
|
|
|
simd_store(&cj_f[CL_X_OFFSET], simd_load(&cj_f[CL_X_OFFSET]) - (tx0 + tx1 + tx2 + tx3));
|
|
|
|
simd_store(&cj_f[CL_Y_OFFSET], simd_load(&cj_f[CL_Y_OFFSET]) - (ty0 + ty1 + ty2 + ty3));
|
|
|
|
simd_store(&cj_f[CL_Z_OFFSET], simd_load(&cj_f[CL_Z_OFFSET]) - (tz0 + tz1 + tz2 + tz3));
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
simd_store(&cj_f[CL_X_OFFSET], simd_load(&cj_f[CL_X_OFFSET]) - (tx0 + tx1 + tx2 + tx3));
|
|
|
|
simd_store(&cj_f[CL_Y_OFFSET], simd_load(&cj_f[CL_Y_OFFSET]) - (ty0 + ty1 + ty2 + ty3));
|
|
|
|
simd_store(&cj_f[CL_Z_OFFSET], simd_load(&cj_f[CL_Z_OFFSET]) - (tz0 + tz1 + tz2 + tz3));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
for(int k = numneighs_masked; k < numneighs; k++) {
|
2023-03-30 01:57:26 +02:00
|
|
|
int cj = neighs[k];
|
2023-03-28 23:00:21 +02:00
|
|
|
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
|
|
|
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
|
|
|
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
|
|
|
MD_SIMD_FLOAT xj_tmp = simd_load(&cj_x[CL_X_OFFSET]);
|
|
|
|
MD_SIMD_FLOAT yj_tmp = simd_load(&cj_x[CL_Y_OFFSET]);
|
|
|
|
MD_SIMD_FLOAT zj_tmp = simd_load(&cj_x[CL_Z_OFFSET]);
|
|
|
|
MD_SIMD_FLOAT delx0 = xi0_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely0 = yi0_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz0 = zi0_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT delx1 = xi1_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely1 = yi1_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz1 = zi1_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT delx2 = xi2_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely2 = yi2_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz2 = zi2_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT delx3 = xi3_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely3 = yi3_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz3 = zi3_tmp - zj_tmp;
|
|
|
|
|
|
|
|
MD_SIMD_FLOAT rsq0 = simd_fma(delx0, delx0, simd_fma(dely0, dely0, delz0 * delz0));
|
|
|
|
MD_SIMD_FLOAT rsq1 = simd_fma(delx1, delx1, simd_fma(dely1, dely1, delz1 * delz1));
|
|
|
|
MD_SIMD_FLOAT rsq2 = simd_fma(delx2, delx2, simd_fma(dely2, dely2, delz2 * delz2));
|
|
|
|
MD_SIMD_FLOAT rsq3 = simd_fma(delx3, delx3, simd_fma(dely3, dely3, delz3 * delz3));
|
|
|
|
|
|
|
|
MD_SIMD_MASK cutoff_mask0 = simd_mask_cond_lt(rsq0, cutforcesq_vec);
|
|
|
|
MD_SIMD_MASK cutoff_mask1 = simd_mask_cond_lt(rsq1, cutforcesq_vec);
|
|
|
|
MD_SIMD_MASK cutoff_mask2 = simd_mask_cond_lt(rsq2, cutforcesq_vec);
|
|
|
|
MD_SIMD_MASK cutoff_mask3 = simd_mask_cond_lt(rsq3, cutforcesq_vec);
|
|
|
|
|
|
|
|
MD_SIMD_FLOAT sr2_0 = simd_reciprocal(rsq0);
|
|
|
|
MD_SIMD_FLOAT sr2_1 = simd_reciprocal(rsq1);
|
|
|
|
MD_SIMD_FLOAT sr2_2 = simd_reciprocal(rsq2);
|
|
|
|
MD_SIMD_FLOAT sr2_3 = simd_reciprocal(rsq3);
|
|
|
|
|
|
|
|
MD_SIMD_FLOAT sr6_0 = sr2_0 * sr2_0 * sr2_0 * sigma6_vec;
|
|
|
|
MD_SIMD_FLOAT sr6_1 = sr2_1 * sr2_1 * sr2_1 * sigma6_vec;
|
|
|
|
MD_SIMD_FLOAT sr6_2 = sr2_2 * sr2_2 * sr2_2 * sigma6_vec;
|
|
|
|
MD_SIMD_FLOAT sr6_3 = sr2_3 * sr2_3 * sr2_3 * sigma6_vec;
|
|
|
|
|
|
|
|
MD_SIMD_FLOAT force0 = c48_vec * sr6_0 * (sr6_0 - c05_vec) * sr2_0 * eps_vec;
|
|
|
|
MD_SIMD_FLOAT force1 = c48_vec * sr6_1 * (sr6_1 - c05_vec) * sr2_1 * eps_vec;
|
|
|
|
MD_SIMD_FLOAT force2 = c48_vec * sr6_2 * (sr6_2 - c05_vec) * sr2_2 * eps_vec;
|
|
|
|
MD_SIMD_FLOAT force3 = c48_vec * sr6_3 * (sr6_3 - c05_vec) * sr2_3 * eps_vec;
|
|
|
|
|
|
|
|
MD_SIMD_FLOAT tx0 = select_by_mask(delx0 * force0, cutoff_mask0);
|
|
|
|
MD_SIMD_FLOAT ty0 = select_by_mask(dely0 * force0, cutoff_mask0);
|
|
|
|
MD_SIMD_FLOAT tz0 = select_by_mask(delz0 * force0, cutoff_mask0);
|
|
|
|
MD_SIMD_FLOAT tx1 = select_by_mask(delx1 * force1, cutoff_mask1);
|
|
|
|
MD_SIMD_FLOAT ty1 = select_by_mask(dely1 * force1, cutoff_mask1);
|
|
|
|
MD_SIMD_FLOAT tz1 = select_by_mask(delz1 * force1, cutoff_mask1);
|
|
|
|
MD_SIMD_FLOAT tx2 = select_by_mask(delx2 * force2, cutoff_mask2);
|
|
|
|
MD_SIMD_FLOAT ty2 = select_by_mask(dely2 * force2, cutoff_mask2);
|
|
|
|
MD_SIMD_FLOAT tz2 = select_by_mask(delz2 * force2, cutoff_mask2);
|
|
|
|
MD_SIMD_FLOAT tx3 = select_by_mask(delx3 * force3, cutoff_mask3);
|
|
|
|
MD_SIMD_FLOAT ty3 = select_by_mask(dely3 * force3, cutoff_mask3);
|
|
|
|
MD_SIMD_FLOAT tz3 = select_by_mask(delz3 * force3, cutoff_mask3);
|
2022-03-23 15:54:18 +01:00
|
|
|
|
|
|
|
fix0 = simd_add(fix0, tx0);
|
|
|
|
fiy0 = simd_add(fiy0, ty0);
|
|
|
|
fiz0 = simd_add(fiz0, tz0);
|
|
|
|
fix1 = simd_add(fix1, tx1);
|
|
|
|
fiy1 = simd_add(fiy1, ty1);
|
|
|
|
fiz1 = simd_add(fiz1, tz1);
|
|
|
|
fix2 = simd_add(fix2, tx2);
|
|
|
|
fiy2 = simd_add(fiy2, ty2);
|
|
|
|
fiz2 = simd_add(fiz2, tz2);
|
|
|
|
fix3 = simd_add(fix3, tx3);
|
|
|
|
fiy3 = simd_add(fiy3, ty3);
|
|
|
|
fiz3 = simd_add(fiz3, tz3);
|
|
|
|
|
2022-04-04 21:52:40 +02:00
|
|
|
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
|
|
|
|
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
|
|
|
|
simd_store(&cj_f[CL_X_OFFSET], simd_load(&cj_f[CL_X_OFFSET]) - (tx0 + tx1 + tx2 + tx3));
|
|
|
|
simd_store(&cj_f[CL_Y_OFFSET], simd_load(&cj_f[CL_Y_OFFSET]) - (ty0 + ty1 + ty2 + ty3));
|
|
|
|
simd_store(&cj_f[CL_Z_OFFSET], simd_load(&cj_f[CL_Z_OFFSET]) - (tz0 + tz1 + tz2 + tz3));
|
|
|
|
}
|
|
|
|
#else
|
2022-03-23 15:54:18 +01:00
|
|
|
simd_store(&cj_f[CL_X_OFFSET], simd_load(&cj_f[CL_X_OFFSET]) - (tx0 + tx1 + tx2 + tx3));
|
|
|
|
simd_store(&cj_f[CL_Y_OFFSET], simd_load(&cj_f[CL_Y_OFFSET]) - (ty0 + ty1 + ty2 + ty3));
|
|
|
|
simd_store(&cj_f[CL_Z_OFFSET], simd_load(&cj_f[CL_Z_OFFSET]) - (tz0 + tz1 + tz2 + tz3));
|
2022-04-04 21:52:40 +02:00
|
|
|
#endif
|
2022-03-22 23:47:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
simd_incr_reduced_sum(&ci_f[CL_X_OFFSET], fix0, fix1, fix2, fix3);
|
|
|
|
simd_incr_reduced_sum(&ci_f[CL_Y_OFFSET], fiy0, fiy1, fiy2, fiy3);
|
|
|
|
simd_incr_reduced_sum(&ci_f[CL_Z_OFFSET], fiz0, fiz1, fiz2, fiz3);
|
|
|
|
|
|
|
|
addStat(stats->calculated_forces, 1);
|
|
|
|
addStat(stats->num_neighs, numneighs);
|
|
|
|
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
|
|
|
|
}
|
|
|
|
|
|
|
|
LIKWID_MARKER_STOP("force");
|
2023-01-22 15:31:47 +01:00
|
|
|
}
|
|
|
|
|
2022-03-22 23:47:05 +01:00
|
|
|
double E = getTimeStamp();
|
|
|
|
DEBUG_MESSAGE("computeForceLJ_4xn end\n");
|
|
|
|
return E-S;
|
|
|
|
}
|
|
|
|
|
|
|
|
double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
2022-03-09 17:23:49 +01:00
|
|
|
DEBUG_MESSAGE("computeForceLJ_4xn begin\n");
|
|
|
|
int Nlocal = atom->Nlocal;
|
2023-03-30 01:57:26 +02:00
|
|
|
int *neighs;
|
2022-03-09 17:23:49 +01:00
|
|
|
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
|
|
|
MD_FLOAT sigma6 = param->sigma6;
|
|
|
|
MD_FLOAT epsilon = param->epsilon;
|
|
|
|
MD_SIMD_FLOAT cutforcesq_vec = simd_broadcast(cutforcesq);
|
|
|
|
MD_SIMD_FLOAT sigma6_vec = simd_broadcast(sigma6);
|
2022-03-15 02:40:56 +01:00
|
|
|
MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon);
|
2022-03-09 17:23:49 +01:00
|
|
|
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
|
|
|
|
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
|
|
|
|
|
2022-03-16 17:54:52 +01:00
|
|
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
|
|
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
|
|
|
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
|
|
|
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
|
|
|
|
ci_f[CL_X_OFFSET + cii] = 0.0;
|
|
|
|
ci_f[CL_Y_OFFSET + cii] = 0.0;
|
|
|
|
ci_f[CL_Z_OFFSET + cii] = 0.0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-02 14:19:59 +01:00
|
|
|
double S = getTimeStamp();
|
2023-01-22 15:31:47 +01:00
|
|
|
|
|
|
|
#pragma omp parallel
|
|
|
|
{
|
2023-01-02 14:19:59 +01:00
|
|
|
LIKWID_MARKER_START("force");
|
|
|
|
|
2024-01-11 17:09:18 +01:00
|
|
|
#pragma omp for schedule(runtime)
|
2022-03-09 17:23:49 +01:00
|
|
|
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
2022-03-10 01:31:50 +01:00
|
|
|
int ci_cj0 = CJ0_FROM_CI(ci);
|
|
|
|
#if CLUSTER_M > CLUSTER_N
|
|
|
|
int ci_cj1 = CJ1_FROM_CI(ci);
|
|
|
|
#endif
|
2022-03-09 17:23:49 +01:00
|
|
|
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
|
|
|
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
|
|
|
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
|
|
|
neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
|
|
|
|
int numneighs = neighbor->numneigh[ci];
|
2023-03-28 23:00:21 +02:00
|
|
|
int numneighs_masked = neighbor->numneigh_masked[ci];
|
2022-03-09 17:23:49 +01:00
|
|
|
|
|
|
|
MD_SIMD_FLOAT xi0_tmp = simd_broadcast(ci_x[CL_X_OFFSET + 0]);
|
|
|
|
MD_SIMD_FLOAT xi1_tmp = simd_broadcast(ci_x[CL_X_OFFSET + 1]);
|
|
|
|
MD_SIMD_FLOAT xi2_tmp = simd_broadcast(ci_x[CL_X_OFFSET + 2]);
|
|
|
|
MD_SIMD_FLOAT xi3_tmp = simd_broadcast(ci_x[CL_X_OFFSET + 3]);
|
|
|
|
MD_SIMD_FLOAT yi0_tmp = simd_broadcast(ci_x[CL_Y_OFFSET + 0]);
|
|
|
|
MD_SIMD_FLOAT yi1_tmp = simd_broadcast(ci_x[CL_Y_OFFSET + 1]);
|
|
|
|
MD_SIMD_FLOAT yi2_tmp = simd_broadcast(ci_x[CL_Y_OFFSET + 2]);
|
|
|
|
MD_SIMD_FLOAT yi3_tmp = simd_broadcast(ci_x[CL_Y_OFFSET + 3]);
|
|
|
|
MD_SIMD_FLOAT zi0_tmp = simd_broadcast(ci_x[CL_Z_OFFSET + 0]);
|
|
|
|
MD_SIMD_FLOAT zi1_tmp = simd_broadcast(ci_x[CL_Z_OFFSET + 1]);
|
|
|
|
MD_SIMD_FLOAT zi2_tmp = simd_broadcast(ci_x[CL_Z_OFFSET + 2]);
|
|
|
|
MD_SIMD_FLOAT zi3_tmp = simd_broadcast(ci_x[CL_Z_OFFSET + 3]);
|
|
|
|
MD_SIMD_FLOAT fix0 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiy0 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiz0 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fix1 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiy1 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiz1 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fix2 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiy2 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiz2 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fix3 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiy3 = simd_zero();
|
|
|
|
MD_SIMD_FLOAT fiz3 = simd_zero();
|
|
|
|
|
2023-03-28 23:00:21 +02:00
|
|
|
for(int k = 0; k < numneighs_masked; k++) {
|
2023-03-30 01:57:26 +02:00
|
|
|
int cj = neighs[k];
|
2022-03-09 17:23:49 +01:00
|
|
|
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
|
|
|
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
|
|
|
MD_SIMD_FLOAT xj_tmp = simd_load(&cj_x[CL_X_OFFSET]);
|
|
|
|
MD_SIMD_FLOAT yj_tmp = simd_load(&cj_x[CL_Y_OFFSET]);
|
|
|
|
MD_SIMD_FLOAT zj_tmp = simd_load(&cj_x[CL_Z_OFFSET]);
|
2023-03-28 23:00:21 +02:00
|
|
|
MD_SIMD_FLOAT delx0 = xi0_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely0 = yi0_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz0 = zi0_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT delx1 = xi1_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely1 = yi1_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz1 = zi1_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT delx2 = xi2_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely2 = yi2_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz2 = zi2_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT delx3 = xi3_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely3 = yi3_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz3 = zi3_tmp - zj_tmp;
|
2022-02-02 18:00:44 +01:00
|
|
|
|
2022-03-10 01:31:50 +01:00
|
|
|
#if CLUSTER_M == CLUSTER_N
|
2022-03-15 02:40:56 +01:00
|
|
|
unsigned int cond0 = (unsigned int)(cj == ci_cj0);
|
2023-03-28 22:30:30 +02:00
|
|
|
MD_SIMD_MASK excl_mask0 = simd_mask_from_u32(atom->masks_4xn_fn[cond0 * 4 + 0]);
|
|
|
|
MD_SIMD_MASK excl_mask1 = simd_mask_from_u32(atom->masks_4xn_fn[cond0 * 4 + 1]);
|
|
|
|
MD_SIMD_MASK excl_mask2 = simd_mask_from_u32(atom->masks_4xn_fn[cond0 * 4 + 2]);
|
|
|
|
MD_SIMD_MASK excl_mask3 = simd_mask_from_u32(atom->masks_4xn_fn[cond0 * 4 + 3]);
|
|
|
|
#else
|
|
|
|
#if CLUSTER_M < CLUSTER_N
|
2022-03-15 02:40:56 +01:00
|
|
|
unsigned int cond0 = (unsigned int)((cj << 1) + 0 == ci);
|
|
|
|
unsigned int cond1 = (unsigned int)((cj << 1) + 1 == ci);
|
2022-02-04 17:52:48 +01:00
|
|
|
#else
|
2022-03-10 01:31:50 +01:00
|
|
|
unsigned int cond0 = (unsigned int)(cj == ci_cj0);
|
|
|
|
unsigned int cond1 = (unsigned int)(cj == ci_cj1);
|
2023-03-28 22:30:30 +02:00
|
|
|
#endif
|
|
|
|
MD_SIMD_MASK excl_mask0 = simd_mask_from_u32(atom->masks_4xn_fn[cond0 * 8 + cond1 * 4 + 0]);
|
|
|
|
MD_SIMD_MASK excl_mask1 = simd_mask_from_u32(atom->masks_4xn_fn[cond0 * 8 + cond1 * 4 + 1]);
|
|
|
|
MD_SIMD_MASK excl_mask2 = simd_mask_from_u32(atom->masks_4xn_fn[cond0 * 8 + cond1 * 4 + 2]);
|
|
|
|
MD_SIMD_MASK excl_mask3 = simd_mask_from_u32(atom->masks_4xn_fn[cond0 * 8 + cond1 * 4 + 3]);
|
2022-02-04 17:52:48 +01:00
|
|
|
#endif
|
2022-02-02 18:00:44 +01:00
|
|
|
|
2023-03-28 23:00:21 +02:00
|
|
|
MD_SIMD_FLOAT rsq0 = simd_fma(delx0, delx0, simd_fma(dely0, dely0, delz0 * delz0));
|
|
|
|
MD_SIMD_FLOAT rsq1 = simd_fma(delx1, delx1, simd_fma(dely1, dely1, delz1 * delz1));
|
|
|
|
MD_SIMD_FLOAT rsq2 = simd_fma(delx2, delx2, simd_fma(dely2, dely2, delz2 * delz2));
|
|
|
|
MD_SIMD_FLOAT rsq3 = simd_fma(delx3, delx3, simd_fma(dely3, dely3, delz3 * delz3));
|
2022-02-02 18:00:44 +01:00
|
|
|
|
|
|
|
MD_SIMD_MASK cutoff_mask0 = simd_mask_and(excl_mask0, simd_mask_cond_lt(rsq0, cutforcesq_vec));
|
|
|
|
MD_SIMD_MASK cutoff_mask1 = simd_mask_and(excl_mask1, simd_mask_cond_lt(rsq1, cutforcesq_vec));
|
|
|
|
MD_SIMD_MASK cutoff_mask2 = simd_mask_and(excl_mask2, simd_mask_cond_lt(rsq2, cutforcesq_vec));
|
|
|
|
MD_SIMD_MASK cutoff_mask3 = simd_mask_and(excl_mask3, simd_mask_cond_lt(rsq3, cutforcesq_vec));
|
|
|
|
|
|
|
|
MD_SIMD_FLOAT sr2_0 = simd_reciprocal(rsq0);
|
|
|
|
MD_SIMD_FLOAT sr2_1 = simd_reciprocal(rsq1);
|
|
|
|
MD_SIMD_FLOAT sr2_2 = simd_reciprocal(rsq2);
|
|
|
|
MD_SIMD_FLOAT sr2_3 = simd_reciprocal(rsq3);
|
|
|
|
|
2023-03-28 23:00:21 +02:00
|
|
|
MD_SIMD_FLOAT sr6_0 = sr2_0 * sr2_0 * sr2_0 * sigma6_vec;
|
|
|
|
MD_SIMD_FLOAT sr6_1 = sr2_1 * sr2_1 * sr2_1 * sigma6_vec;
|
|
|
|
MD_SIMD_FLOAT sr6_2 = sr2_2 * sr2_2 * sr2_2 * sigma6_vec;
|
|
|
|
MD_SIMD_FLOAT sr6_3 = sr2_3 * sr2_3 * sr2_3 * sigma6_vec;
|
|
|
|
|
|
|
|
MD_SIMD_FLOAT force0 = c48_vec * sr6_0 * (sr6_0 - c05_vec) * sr2_0 * eps_vec;
|
|
|
|
MD_SIMD_FLOAT force1 = c48_vec * sr6_1 * (sr6_1 - c05_vec) * sr2_1 * eps_vec;
|
|
|
|
MD_SIMD_FLOAT force2 = c48_vec * sr6_2 * (sr6_2 - c05_vec) * sr2_2 * eps_vec;
|
|
|
|
MD_SIMD_FLOAT force3 = c48_vec * sr6_3 * (sr6_3 - c05_vec) * sr2_3 * eps_vec;
|
|
|
|
|
|
|
|
fix0 = simd_masked_add(fix0, delx0 * force0, cutoff_mask0);
|
|
|
|
fiy0 = simd_masked_add(fiy0, dely0 * force0, cutoff_mask0);
|
|
|
|
fiz0 = simd_masked_add(fiz0, delz0 * force0, cutoff_mask0);
|
|
|
|
fix1 = simd_masked_add(fix1, delx1 * force1, cutoff_mask1);
|
|
|
|
fiy1 = simd_masked_add(fiy1, dely1 * force1, cutoff_mask1);
|
|
|
|
fiz1 = simd_masked_add(fiz1, delz1 * force1, cutoff_mask1);
|
|
|
|
fix2 = simd_masked_add(fix2, delx2 * force2, cutoff_mask2);
|
|
|
|
fiy2 = simd_masked_add(fiy2, dely2 * force2, cutoff_mask2);
|
|
|
|
fiz2 = simd_masked_add(fiz2, delz2 * force2, cutoff_mask2);
|
|
|
|
fix3 = simd_masked_add(fix3, delx3 * force3, cutoff_mask3);
|
|
|
|
fiy3 = simd_masked_add(fiy3, dely3 * force3, cutoff_mask3);
|
|
|
|
fiz3 = simd_masked_add(fiz3, delz3 * force3, cutoff_mask3);
|
|
|
|
}
|
2022-02-02 18:00:44 +01:00
|
|
|
|
2023-03-28 23:00:21 +02:00
|
|
|
for(int k = numneighs_masked; k < numneighs; k++) {
|
2023-03-30 01:57:26 +02:00
|
|
|
int cj = neighs[k];
|
2023-03-28 23:00:21 +02:00
|
|
|
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
|
|
|
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
|
|
|
MD_SIMD_FLOAT xj_tmp = simd_load(&cj_x[CL_X_OFFSET]);
|
|
|
|
MD_SIMD_FLOAT yj_tmp = simd_load(&cj_x[CL_Y_OFFSET]);
|
|
|
|
MD_SIMD_FLOAT zj_tmp = simd_load(&cj_x[CL_Z_OFFSET]);
|
|
|
|
MD_SIMD_FLOAT delx0 = xi0_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely0 = yi0_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz0 = zi0_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT delx1 = xi1_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely1 = yi1_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz1 = zi1_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT delx2 = xi2_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely2 = yi2_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz2 = zi2_tmp - zj_tmp;
|
|
|
|
MD_SIMD_FLOAT delx3 = xi3_tmp - xj_tmp;
|
|
|
|
MD_SIMD_FLOAT dely3 = yi3_tmp - yj_tmp;
|
|
|
|
MD_SIMD_FLOAT delz3 = zi3_tmp - zj_tmp;
|
|
|
|
|
|
|
|
MD_SIMD_FLOAT rsq0 = simd_fma(delx0, delx0, simd_fma(dely0, dely0, delz0 * delz0));
|
|
|
|
MD_SIMD_FLOAT rsq1 = simd_fma(delx1, delx1, simd_fma(dely1, dely1, delz1 * delz1));
|
|
|
|
MD_SIMD_FLOAT rsq2 = simd_fma(delx2, delx2, simd_fma(dely2, dely2, delz2 * delz2));
|
|
|
|
MD_SIMD_FLOAT rsq3 = simd_fma(delx3, delx3, simd_fma(dely3, dely3, delz3 * delz3));
|
2022-02-02 18:00:44 +01:00
|
|
|
|
2023-03-28 23:00:21 +02:00
|
|
|
MD_SIMD_MASK cutoff_mask0 = simd_mask_cond_lt(rsq0, cutforcesq_vec);
|
|
|
|
MD_SIMD_MASK cutoff_mask1 = simd_mask_cond_lt(rsq1, cutforcesq_vec);
|
|
|
|
MD_SIMD_MASK cutoff_mask2 = simd_mask_cond_lt(rsq2, cutforcesq_vec);
|
|
|
|
MD_SIMD_MASK cutoff_mask3 = simd_mask_cond_lt(rsq3, cutforcesq_vec);
|
|
|
|
|
|
|
|
MD_SIMD_FLOAT sr2_0 = simd_reciprocal(rsq0);
|
|
|
|
MD_SIMD_FLOAT sr2_1 = simd_reciprocal(rsq1);
|
|
|
|
MD_SIMD_FLOAT sr2_2 = simd_reciprocal(rsq2);
|
|
|
|
MD_SIMD_FLOAT sr2_3 = simd_reciprocal(rsq3);
|
|
|
|
|
|
|
|
MD_SIMD_FLOAT sr6_0 = sr2_0 * sr2_0 * sr2_0 * sigma6_vec;
|
|
|
|
MD_SIMD_FLOAT sr6_1 = sr2_1 * sr2_1 * sr2_1 * sigma6_vec;
|
|
|
|
MD_SIMD_FLOAT sr6_2 = sr2_2 * sr2_2 * sr2_2 * sigma6_vec;
|
|
|
|
MD_SIMD_FLOAT sr6_3 = sr2_3 * sr2_3 * sr2_3 * sigma6_vec;
|
|
|
|
|
|
|
|
MD_SIMD_FLOAT force0 = c48_vec * sr6_0 * (sr6_0 - c05_vec) * sr2_0 * eps_vec;
|
|
|
|
MD_SIMD_FLOAT force1 = c48_vec * sr6_1 * (sr6_1 - c05_vec) * sr2_1 * eps_vec;
|
|
|
|
MD_SIMD_FLOAT force2 = c48_vec * sr6_2 * (sr6_2 - c05_vec) * sr2_2 * eps_vec;
|
|
|
|
MD_SIMD_FLOAT force3 = c48_vec * sr6_3 * (sr6_3 - c05_vec) * sr2_3 * eps_vec;
|
|
|
|
|
|
|
|
fix0 = simd_masked_add(fix0, delx0 * force0, cutoff_mask0);
|
|
|
|
fiy0 = simd_masked_add(fiy0, dely0 * force0, cutoff_mask0);
|
|
|
|
fiz0 = simd_masked_add(fiz0, delz0 * force0, cutoff_mask0);
|
|
|
|
fix1 = simd_masked_add(fix1, delx1 * force1, cutoff_mask1);
|
|
|
|
fiy1 = simd_masked_add(fiy1, dely1 * force1, cutoff_mask1);
|
|
|
|
fiz1 = simd_masked_add(fiz1, delz1 * force1, cutoff_mask1);
|
|
|
|
fix2 = simd_masked_add(fix2, delx2 * force2, cutoff_mask2);
|
|
|
|
fiy2 = simd_masked_add(fiy2, dely2 * force2, cutoff_mask2);
|
|
|
|
fiz2 = simd_masked_add(fiz2, delz2 * force2, cutoff_mask2);
|
|
|
|
fix3 = simd_masked_add(fix3, delx3 * force3, cutoff_mask3);
|
|
|
|
fiy3 = simd_masked_add(fiy3, dely3 * force3, cutoff_mask3);
|
|
|
|
fiz3 = simd_masked_add(fiz3, delz3 * force3, cutoff_mask3);
|
2022-02-02 18:00:44 +01:00
|
|
|
}
|
|
|
|
|
2022-03-16 17:54:52 +01:00
|
|
|
simd_incr_reduced_sum(&ci_f[CL_X_OFFSET], fix0, fix1, fix2, fix3);
|
|
|
|
simd_incr_reduced_sum(&ci_f[CL_Y_OFFSET], fiy0, fiy1, fiy2, fiy3);
|
|
|
|
simd_incr_reduced_sum(&ci_f[CL_Z_OFFSET], fiz0, fiz1, fiz2, fiz3);
|
2022-02-02 18:00:44 +01:00
|
|
|
|
2022-02-08 00:55:27 +01:00
|
|
|
addStat(stats->calculated_forces, 1);
|
|
|
|
addStat(stats->num_neighs, numneighs);
|
2023-01-02 23:57:51 +01:00
|
|
|
addStat(stats->force_iters, (long long int)((double)numneighs));
|
|
|
|
//addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
|
2022-02-02 18:00:44 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
LIKWID_MARKER_STOP("force");
|
2023-01-22 15:31:47 +01:00
|
|
|
}
|
|
|
|
|
2022-02-02 18:00:44 +01:00
|
|
|
double E = getTimeStamp();
|
|
|
|
DEBUG_MESSAGE("computeForceLJ_4xn end\n");
|
|
|
|
return E-S;
|
|
|
|
}
|
2022-03-22 23:47:05 +01:00
|
|
|
|
|
|
|
double computeForceLJ_4xn(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
|
|
|
if(neighbor->half_neigh) {
|
|
|
|
return computeForceLJ_4xn_half(param, atom, neighbor, stats);
|
|
|
|
}
|
|
|
|
|
|
|
|
return computeForceLJ_4xn_full(param, atom, neighbor, stats);
|
|
|
|
}
|