Merge branch 'master' of github.com:RRZE-HPC/MD-Bench

This commit is contained in:
Rafael Ravedutti 2022-04-04 21:52:47 +02:00
commit 4d11c5a3c2
5 changed files with 37 additions and 12 deletions

View File

@ -71,6 +71,10 @@ ifeq ($(strip $(AVX512)),true)
DEFINES += -DAVX512 DEFINES += -DAVX512
endif endif
ifeq ($(strip $(ENABLE_OMP_SIMD)),true)
DEFINES += -DENABLE_OMP_SIMD
endif
VPATH = $(SRC_DIR) $(ASM_DIR) VPATH = $(SRC_DIR) $(ASM_DIR)
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c)) ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s)) OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s))

View File

@ -1,19 +1,21 @@
# Compiler tag (GCC/CLANG/ICC) # Compiler tag (GCC/CLANG/ICC/ONEAPI)
TAG ?= ICC TAG ?= ICC
# Instruction set (SSE/AVX/AVX2/AVX512) # Instruction set (SSE/AVX/AVX2/AVX512)
ISA ?= AVX512 ISA ?= AVX512
# Optimization scheme (lammps/gromacs/clusters_per_bin) # Optimization scheme (lammps/gromacs/clusters_per_bin)
OPT_SCHEME ?= gromacs OPT_SCHEME ?= lammps
# Enable likwid (true or false) # Enable likwid (true or false)
ENABLE_LIKWID ?= true ENABLE_LIKWID ?= true
# SP or DP # SP or DP
DATA_TYPE ?= SP DATA_TYPE ?= DP
# AOS or SOA # AOS or SOA
DATA_LAYOUT ?= AOS DATA_LAYOUT ?= AOS
# Assembly syntax to generate (ATT/INTEL) # Assembly syntax to generate (ATT/INTEL)
ASM_SYNTAX ?= ATT ASM_SYNTAX ?= ATT
# Debug # Debug
DEBUG ?= false DEBUG ?= false
# Use omp simd pragma for lammps halfneigh
ENABLE_OMP_SIMD ?= true
# Explicitly store and load atom types (true or false) # Explicitly store and load atom types (true or false)
EXPLICIT_TYPES ?= false EXPLICIT_TYPES ?= false
@ -22,7 +24,7 @@ MEM_TRACER ?= false
# Trace indexes and distances for gather-md (true or false) # Trace indexes and distances for gather-md (true or false)
INDEX_TRACER ?= false INDEX_TRACER ?= false
# Compute statistics # Compute statistics
COMPUTE_STATS ?= true COMPUTE_STATS ?= false
# Configurations for gromacs optimization scheme # Configurations for gromacs optimization scheme
# Use reference version # Use reference version

View File

@ -5,13 +5,13 @@ OPENMP = #-qopenmp
PROFILE = #-profile-functions -g -pg PROFILE = #-profile-functions -g -pg
OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE) OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE)
#OPTS = -Ofast -xCORE-AVX2 $(PROFILE) #OPTS = -Ofast -xCORE-AVX2 $(PROFILE)
#OPTS = -fast -xAVX $(PROFILE) #OPTS = -Ofast -xAVX $(PROFILE)
#OPTS = -fast -xSSE4.2 $(PROFILE) #OPTS = -Ofast -xSSE4.2 $(PROFILE)
#OPTS = -fast -no-vec $(PROFILE) #OPTS = -Ofast -no-vec $(PROFILE)
#OPTS = -fast -xHost $(PROFILE) #OPTS = -Ofast -xHost $(PROFILE)
CFLAGS = $(PROFILE) -restrict $(OPENMP) $(OPTS) CFLAGS = $(PROFILE) -restrict $(OPENMP) $(OPTS)
ASFLAGS = #-masm=intel ASFLAGS = #-masm=intel
LFLAGS = $(PROFILE) $(OPTS) $(OPENMP) LFLAGS = $(PROFILE) $(OPTS) $(OPENMP)
DEFINES = -std=c11 -pedantic-errors -D_GNU_SOURCE #-DLIKWID_PERFMON DEFINES = -std=c11 -pedantic-errors -D_GNU_SOURCE
INCLUDES = #$(LIKWID_INC) INCLUDES =
LIBS = -lm #$(LIKWID_LIB) -llikwid LIBS = -lm

17
include_ONEAPI.mk Normal file
View File

@ -0,0 +1,17 @@
CC = icx
LINKER = $(CC)
OPENMP = -qopenmp-simd
PROFILE = #-g -pg
#OPTS = -Ofast -no-vec
#OPTS = -Ofast -xSSE4.2
#OPTS = -Ofast -xAVX
#OPTS = -Ofast -xCORE-AVX2
OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high
#OPTS = -Ofast -xHost
CFLAGS = $(PROFILE) $(OPTS) $(OPENMP)
ASFLAGS = -masm=intel
LFLAGS = $(PROFILE) $(OPTS)
DEFINES = -D_GNU_SOURCE -DNOCHUNK
INCLUDES =
LIBS = -lm

View File

@ -137,7 +137,9 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor,
#endif #endif
// Pragma required to vectorize the inner loop // Pragma required to vectorize the inner loop
#pragma simd reduction(+: fix,fiy,fiz) #ifdef ENABLE_OMP_SIMD
#pragma omp simd reduction(+: fix,fiy,fiz)
#endif
for(int k = 0; k < numneighs; k++) { for(int k = 0; k < numneighs; k++) {
int j = neighs[k]; int j = neighs[k];
MD_FLOAT delx = xtmp - atom_x(j); MD_FLOAT delx = xtmp - atom_x(j);