diff --git a/Makefile b/Makefile index 9d3cc90..7ecb39b 100644 --- a/Makefile +++ b/Makefile @@ -67,6 +67,10 @@ ifeq ($(strip $(AVX512)),true) DEFINES += -DAVX512 endif +ifeq ($(strip $(ENABLE_OMP_SIMD)),true) + DEFINES += -DENABLE_OMP_SIMD +endif + VPATH = $(SRC_DIR) $(ASM_DIR) ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c)) OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s)) diff --git a/config.mk b/config.mk index 9afe7a0..a1f3689 100644 --- a/config.mk +++ b/config.mk @@ -1,19 +1,21 @@ -# Compiler tag (GCC/CLANG/ICC) +# Compiler tag (GCC/CLANG/ICC/ONEAPI) TAG ?= ICC # Instruction set (SSE/AVX/AVX2/AVX512) ISA ?= AVX512 # Optimization scheme (lammps/gromacs/clusters_per_bin) -OPT_SCHEME ?= gromacs +OPT_SCHEME ?= lammps # Enable likwid (true or false) ENABLE_LIKWID ?= true # SP or DP -DATA_TYPE ?= SP +DATA_TYPE ?= DP # AOS or SOA DATA_LAYOUT ?= AOS # Assembly syntax to generate (ATT/INTEL) ASM_SYNTAX ?= ATT # Debug DEBUG ?= false +# Use omp simd pragma for lammps halfneigh +ENABLE_OMP_SIMD ?= true # Explicitly store and load atom types (true or false) EXPLICIT_TYPES ?= false @@ -22,7 +24,7 @@ MEM_TRACER ?= false # Trace indexes and distances for gather-md (true or false) INDEX_TRACER ?= false # Compute statistics -COMPUTE_STATS ?= true +COMPUTE_STATS ?= false # Configurations for gromacs optimization scheme # Use reference version diff --git a/include_ICC.mk b/include_ICC.mk index 2f49bfb..1c63010 100644 --- a/include_ICC.mk +++ b/include_ICC.mk @@ -5,13 +5,13 @@ OPENMP = #-qopenmp PROFILE = #-profile-functions -g -pg OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE) #OPTS = -Ofast -xCORE-AVX2 $(PROFILE) -#OPTS = -fast -xAVX $(PROFILE) -#OPTS = -fast -xSSE4.2 $(PROFILE) -#OPTS = -fast -no-vec $(PROFILE) -#OPTS = -fast -xHost $(PROFILE) +#OPTS = -Ofast -xAVX $(PROFILE) +#OPTS = -Ofast -xSSE4.2 $(PROFILE) +#OPTS = -Ofast -no-vec $(PROFILE) +#OPTS = -Ofast -xHost $(PROFILE) CFLAGS = $(PROFILE) -restrict $(OPENMP) $(OPTS) ASFLAGS = #-masm=intel LFLAGS = $(PROFILE) $(OPTS) $(OPENMP) -DEFINES = -std=c11 -pedantic-errors -D_GNU_SOURCE #-DLIKWID_PERFMON -INCLUDES = #$(LIKWID_INC) -LIBS = -lm #$(LIKWID_LIB) -llikwid +DEFINES = -std=c11 -pedantic-errors -D_GNU_SOURCE +INCLUDES = +LIBS = -lm diff --git a/include_ONEAPI.mk b/include_ONEAPI.mk new file mode 100644 index 0000000..9e48212 --- /dev/null +++ b/include_ONEAPI.mk @@ -0,0 +1,17 @@ +CC = icx +LINKER = $(CC) + +OPENMP = -qopenmp-simd +PROFILE = #-g -pg +#OPTS = -Ofast -no-vec +#OPTS = -Ofast -xSSE4.2 +#OPTS = -Ofast -xAVX +#OPTS = -Ofast -xCORE-AVX2 +OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high +#OPTS = -Ofast -xHost +CFLAGS = $(PROFILE) $(OPTS) $(OPENMP) +ASFLAGS = -masm=intel +LFLAGS = $(PROFILE) $(OPTS) +DEFINES = -D_GNU_SOURCE -DNOCHUNK +INCLUDES = +LIBS = -lm diff --git a/lammps/force_lj.c b/lammps/force_lj.c index ec1297b..b111c4e 100644 --- a/lammps/force_lj.c +++ b/lammps/force_lj.c @@ -137,7 +137,9 @@ double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, #endif // Pragma required to vectorize the inner loop - #pragma simd reduction(+: fix,fiy,fiz) +#ifdef ENABLE_OMP_SIMD + #pragma omp simd reduction(+: fix,fiy,fiz) +#endif for(int k = 0; k < numneighs; k++) { int j = neighs[k]; MD_FLOAT delx = xtmp - atom_x(j);