Adjust ISA options and improve output

Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
Rafael Ravedutti 2022-08-16 18:36:47 +02:00
parent 0caeea0494
commit 911ba63336
9 changed files with 70 additions and 53 deletions

View File

@ -64,8 +64,12 @@ ifneq ($(VECTOR_WIDTH),)
DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH) DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH)
endif endif
ifeq ($(strip $(NO_AVX2)),true) ifeq ($(strip $(MASK_REGISTERS)),true)
DEFINES += -DNO_AVX2 DEFINES += -DMASK_REGISTERS
endif
ifeq ($(strip $(SIMD_KERNEL_AVAILABLE)),true)
DEFINES += -DSIMD_KERNEL_AVAILABLE
endif endif
ifeq ($(strip $(AVX512)),true) ifeq ($(strip $(AVX512)),true)

View File

@ -2,6 +2,9 @@
TAG ?= ICC TAG ?= ICC
# Instruction set (SSE/AVX/AVX2/AVX512) # Instruction set (SSE/AVX/AVX2/AVX512)
ISA ?= AVX512 ISA ?= AVX512
# Use mask registers (AVX512 must be available in the target CPU)
# This is always true when ISA is set to AVX512
MASK_REGISTERS ?= true
# Optimization scheme (lammps/gromacs/clusters_per_bin) # Optimization scheme (lammps/gromacs/clusters_per_bin)
OPT_SCHEME ?= lammps OPT_SCHEME ?= lammps
# Enable likwid (true or false) # Enable likwid (true or false)

View File

@ -28,10 +28,10 @@
#define MD_SIMD_FLOAT __m256d #define MD_SIMD_FLOAT __m256d
#define MD_SIMD_INT __m128i #define MD_SIMD_INT __m128i
#ifdef NO_AVX2 #ifdef MASK_REGISTERS
# define MD_SIMD_MASK __m256d
#else
# define MD_SIMD_MASK __mmask8 # define MD_SIMD_MASK __mmask8
#else
# define MD_SIMD_MASK __m256d
#endif #endif
static inline MD_SIMD_FLOAT simd_broadcast(MD_FLOAT scalar) { return _mm256_set1_pd(scalar); } static inline MD_SIMD_FLOAT simd_broadcast(MD_FLOAT scalar) { return _mm256_set1_pd(scalar); }

View File

@ -2,12 +2,13 @@ ifeq ($(strip $(ISA)), SSE)
_VECTOR_WIDTH=2 _VECTOR_WIDTH=2
else ifeq ($(strip $(ISA)), AVX) else ifeq ($(strip $(ISA)), AVX)
# Vector width is 4 but AVX2 instruction set is not supported # Vector width is 4 but AVX2 instruction set is not supported
NO_AVX2=true
_VECTOR_WIDTH=4 _VECTOR_WIDTH=4
else ifeq ($(strip $(ISA)), AVX2) else ifeq ($(strip $(ISA)), AVX2)
#SIMD_KERNEL_AVAILABLE=true
_VECTOR_WIDTH=4 _VECTOR_WIDTH=4
else ifeq ($(strip $(ISA)), AVX512) else ifeq ($(strip $(ISA)), AVX512)
AVX512=true AVX512=true
SIMD_KERNEL_AVAILABLE=true
_VECTOR_WIDTH=8 _VECTOR_WIDTH=8
endif endif

View File

@ -21,16 +21,14 @@
* ======================================================================================= * =======================================================================================
*/ */
#include <math.h> #include <math.h>
//---
#include <atom.h>
#include <likwid-marker.h> #include <likwid-marker.h>
#include <timing.h>
#include <neighbor.h> #include <neighbor.h>
#include <parameter.h> #include <parameter.h>
#include <atom.h>
#include <stats.h> #include <stats.h>
#include <timing.h>
// TODO: Joint common files for gromacs and lammps variants
#include "../gromacs/includes/simd.h"
double computeForceDemFullNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) { double computeForceDemFullNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
int Nlocal = atom->Nlocal; int Nlocal = atom->Nlocal;

View File

@ -20,16 +20,20 @@
* with MD-Bench. If not, see <https://www.gnu.org/licenses/>. * with MD-Bench. If not, see <https://www.gnu.org/licenses/>.
* ======================================================================================= * =======================================================================================
*/ */
#include <stdio.h>
#include <stdlib.h>
//---
#include <atom.h>
#include <likwid-marker.h> #include <likwid-marker.h>
#include <timing.h>
#include <neighbor.h> #include <neighbor.h>
#include <parameter.h> #include <parameter.h>
#include <atom.h>
#include <stats.h> #include <stats.h>
#include <timing.h>
// TODO: Joint common files for gromacs and lammps variants // TODO: Joint common files for gromacs and lammps variants
#ifdef SIMD_KERNEL_AVAILABLE
#include "../gromacs/includes/simd.h" #include "../gromacs/includes/simd.h"
#endif
double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) { double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
int Nlocal = atom->Nlocal; int Nlocal = atom->Nlocal;
@ -194,11 +198,6 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig
MD_FLOAT cutforcesq = param->cutforce * param->cutforce; MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6; MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon; MD_FLOAT epsilon = param->epsilon;
MD_SIMD_FLOAT cutforcesq_vec = simd_broadcast(cutforcesq);
MD_SIMD_FLOAT sigma6_vec = simd_broadcast(sigma6);
MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon);
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
for(int i = 0; i < Nlocal; i++) { for(int i = 0; i < Nlocal; i++) {
atom_fx(i) = 0.0; atom_fx(i) = 0.0;
@ -209,10 +208,16 @@ double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neig
double S = getTimeStamp(); double S = getTimeStamp();
LIKWID_MARKER_START("force"); LIKWID_MARKER_START("force");
#ifdef NO_AVX2 #ifndef SIMD_KERNEL_AVAILABLE
fprintf(stderr, "Error: SIMD kernel implemented for AVX2 and AVX512 only!"); fprintf(stderr, "Error: SIMD kernel not implemented for specified instruction set!");
exit(-1); exit(-1);
#else #else
MD_SIMD_FLOAT cutforcesq_vec = simd_broadcast(cutforcesq);
MD_SIMD_FLOAT sigma6_vec = simd_broadcast(sigma6);
MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon);
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
#pragma omp parallel for #pragma omp parallel for
for(int i = 0; i < Nlocal; i++) { for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs]; neighs = &neighbor->neighbors[i * neighbor->maxneighs];

View File

@ -26,12 +26,15 @@
#ifndef MIN #ifndef MIN
# define MIN(x,y) ((x)<(y)?(x):(y)) # define MIN(x,y) ((x)<(y)?(x):(y))
#endif #endif
#ifndef MAX #ifndef MAX
# define MAX(x,y) ((x)>(y)?(x):(y)) # define MAX(x,y) ((x)>(y)?(x):(y))
#endif #endif
#ifndef ABS #ifndef ABS
# define ABS(a) ((a) >= 0 ? (a) : -(a)) # define ABS(a) ((a) >= 0 ? (a) : -(a))
#endif #endif
#ifdef DEBUG #ifdef DEBUG
# define DEBUG_MESSAGE printf # define DEBUG_MESSAGE printf
#else #else
@ -46,6 +49,12 @@
#define FF_EAM 1 #define FF_EAM 1
#define FF_DEM 2 #define FF_DEM 2
#if PRECISION == 1
# define PRECISION_STRING "single"
#else
# define PRECISION_STRING "double"
#endif
extern double myrandom(int*); extern double myrandom(int*);
extern void random_reset(int *seed, int ibase, double *coord); extern void random_reset(int *seed, int ibase, double *coord);
extern int str2ff(const char *string); extern int str2ff(const char *string);

View File

@ -226,6 +226,7 @@ int main(int argc, char** argv) {
param.cutneigh = param.cutforce + param.skin; param.cutneigh = param.cutforce + param.skin;
setup(&param, &eam, &atom, &neighbor, &stats); setup(&param, &eam, &atom, &neighbor, &stats);
printParameter(&param); printParameter(&param);
printf(HLINE);
printf("step\ttemp\t\tpressure\n"); printf("step\ttemp\t\tpressure\n");
computeThermo(0, &param, &atom); computeThermo(0, &param, &atom);
@ -272,14 +273,6 @@ int main(int argc, char** argv) {
timer[TOTAL] = getTimeStamp() - timer[TOTAL]; timer[TOTAL] = getTimeStamp() - timer[TOTAL];
computeThermo(-1, &param, &atom); computeThermo(-1, &param, &atom);
printf(HLINE);
printf("Force field: %s\n", ff2str(param.force_field));
printf("Data layout for positions: %s\n", POS_DATA_LAYOUT);
#if PRECISION == 1
printf("Using single precision floating point.\n");
#else
printf("Using double precision floating point.\n");
#endif
printf(HLINE); printf(HLINE);
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes); printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n", printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",

View File

@ -24,6 +24,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
//--- //---
#include <atom.h>
#include <parameter.h> #include <parameter.h>
#include <util.h> #include <util.h>
@ -154,6 +155,9 @@ void printParameter(Parameter *param) {
} }
printf("\tForce field: %s\n", ff2str(param->force_field)); printf("\tForce field: %s\n", ff2str(param->force_field));
printf("\tKernel: %s\n", KERNEL_NAME);
printf("\tData layout: %s\n", POS_DATA_LAYOUT);
printf("\tFloating-point precision: %s\n", PRECISION_STRING);
printf("\tUnit cells (nx, ny, nz): %d, %d, %d\n", param->nx, param->ny, param->nz); printf("\tUnit cells (nx, ny, nz): %d, %d, %d\n", param->nx, param->ny, param->nz);
printf("\tDomain box sizes (x, y, z): %e, %e, %e\n", param->xprd, param->yprd, param->zprd); printf("\tDomain box sizes (x, y, z): %e, %e, %e\n", param->xprd, param->yprd, param->zprd);
printf("\tPeriodic (x, y, z): %d, %d, %d\n", param->pbc_x, param->pbc_y, param->pbc_z); printf("\tPeriodic (x, y, z): %d, %d, %d\n", param->pbc_x, param->pbc_y, param->pbc_z);
@ -175,5 +179,5 @@ void printParameter(Parameter *param) {
printf("\tCutoff radius: %e\n", param->cutforce); printf("\tCutoff radius: %e\n", param->cutforce);
printf("\tSkin: %e\n", param->skin); printf("\tSkin: %e\n", param->skin);
printf("\tHalf neighbor lists: %d\n", param->half_neigh); printf("\tHalf neighbor lists: %d\n", param->half_neigh);
printf("\tProcessor frequency (GHz): %.4f\n\n", param->proc_freq); printf("\tProcessor frequency (GHz): %.4f\n", param->proc_freq);
} }