Refactor code

This commit is contained in:
Jan Eitzinger 2024-05-15 14:20:40 +02:00
parent 9712d7e2c8
commit 8d0a8b5f9c
122 changed files with 418 additions and 4527 deletions

122
Makefile
View File

@ -1,109 +1,30 @@
#CONFIGURE BUILD SYSTEM #CONFIGURE BUILD SYSTEM
IDENTIFIER = $(OPT_SCHEME)-$(TAG)-$(ISA)-$(DATA_TYPE) TAG = $(OPT_TAG)-$(TOOLCHAIN)-$(DATA_TYPE)
TARGET = MDBench-$(IDENTIFIER) TARGET = MDBench-$(TAG)
BUILD_DIR = ./build-$(IDENTIFIER) BUILD_DIR = ./build/build-$(TAG)
SRC_DIR = ./$(OPT_SCHEME) SRC_ROOT = ./src
ASM_DIR = ./asm SRC_DIR = $(SRC_ROOT)/$(OPT_SCHEME)
COMMON_DIR = ./common COMMON_DIR = $(SRC_ROOT)/common
CUDA_DIR = ./$(SRC_DIR)/cuda CUDA_DIR = $(SRC_DIR)/cuda
MAKE_DIR = ./ MAKE_DIR = ./make
Q ?= @ Q ?= @
#DO NOT EDIT BELOW #DO NOT EDIT BELOW
include $(MAKE_DIR)/config.mk include config.mk
include $(MAKE_DIR)/include_$(TAG).mk include $(MAKE_DIR)/include_$(TOOLCHAIN).mk
include $(MAKE_DIR)/include_LIKWID.mk include $(MAKE_DIR)/include_LIKWID.mk
ifneq ($(strip $(ISA)),NONE)
include $(MAKE_DIR)/include_ISA.mk include $(MAKE_DIR)/include_ISA.mk
endif
include $(MAKE_DIR)/include_GROMACS.mk include $(MAKE_DIR)/include_GROMACS.mk
INCLUDES += -I./$(SRC_DIR)/includes -I./$(COMMON_DIR)/includes INCLUDES += -I./$(SRC_DIR) -I./$(COMMON_DIR)
ifeq ($(strip $(DATA_LAYOUT)),AOS) VPATH = $(SRC_DIR) $(COMMON_DIR) $(CUDA_DIR)
DEFINES += -DAOS
endif
ifeq ($(strip $(DATA_TYPE)),SP)
DEFINES += -DPRECISION=1
else
DEFINES += -DPRECISION=2
endif
ifneq ($(ASM_SYNTAX), ATT)
ASFLAGS += -masm=intel
endif
ifeq ($(strip $(SORT_ATOMS)),true)
DEFINES += -DSORT_ATOMS
endif
ifeq ($(strip $(EXPLICIT_TYPES)),true)
DEFINES += -DEXPLICIT_TYPES
endif
ifeq ($(strip $(MEM_TRACER)),true)
DEFINES += -DMEM_TRACER
endif
ifeq ($(strip $(INDEX_TRACER)),true)
DEFINES += -DINDEX_TRACER
endif
ifeq ($(strip $(COMPUTE_STATS)),true)
DEFINES += -DCOMPUTE_STATS
endif
ifeq ($(strip $(XTC_OUTPUT)),true)
DEFINES += -DXTC_OUTPUT
endif
ifeq ($(strip $(USE_REFERENCE_VERSION)),true)
DEFINES += -DUSE_REFERENCE_VERSION
endif
ifeq ($(strip $(HALF_NEIGHBOR_LISTS_CHECK_CJ)),true)
DEFINES += -DHALF_NEIGHBOR_LISTS_CHECK_CJ
endif
ifeq ($(strip $(DEBUG)),true)
DEFINES += -DDEBUG
endif
ifneq ($(VECTOR_WIDTH),)
DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH)
endif
ifeq ($(strip $(__SIMD_KERNEL__)),true)
DEFINES += -D__SIMD_KERNEL__
endif
ifeq ($(strip $(__SSE__)),true)
DEFINES += -D__ISA_SSE__
endif
ifeq ($(strip $(__ISA_AVX__)),true)
DEFINES += -D__ISA_AVX__
endif
ifeq ($(strip $(__ISA_AVX_FMA__)),true)
DEFINES += -D__ISA_AVX_FMA__
endif
ifeq ($(strip $(__ISA_AVX2__)),true)
DEFINES += -D__ISA_AVX2__
endif
ifeq ($(strip $(__ISA_AVX512__)),true)
DEFINES += -D__ISA_AVX512__
endif
ifeq ($(strip $(ENABLE_OMP_SIMD)),true)
DEFINES += -DENABLE_OMP_SIMD
endif
VPATH = $(SRC_DIR) $(ASM_DIR) $(CUDA_DIR)
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c)) ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s)) OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s))
OBJ = $(filter-out $(BUILD_DIR)/main% $(OVERWRITE),$(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c))) OBJ = $(filter-out $(BUILD_DIR)/main% $(OVERWRITE),$(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)))
OBJ += $(patsubst $(ASM_DIR)/%.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*.s)) OBJ += $(patsubst $(ASM_DIR)/%.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*.s))
OBJ += $(patsubst $(COMMON_DIR)/%.c, $(BUILD_DIR)/%-common.o,$(wildcard $(COMMON_DIR)/*.c)) OBJ += $(patsubst $(COMMON_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(COMMON_DIR)/*.c))
ifeq ($(strip $(TAG)),NVCC) ifeq ($(strip $(TAG)),NVCC)
OBJ += $(patsubst $(CUDA_DIR)/%.cu, $(BUILD_DIR)/%-cuda.o,$(wildcard $(CUDA_DIR)/*.cu)) OBJ += $(patsubst $(CUDA_DIR)/%.cu, $(BUILD_DIR)/%-cuda.o,$(wildcard $(CUDA_DIR)/*.cu))
endif endif
@ -129,11 +50,6 @@ $(BUILD_DIR)/%.o: %.c
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ $(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
$(Q)$(CC) $(CPPFLAGS) -MT $@ -MM $< > $(BUILD_DIR)/$*.d $(Q)$(CC) $(CPPFLAGS) -MT $@ -MM $< > $(BUILD_DIR)/$*.d
$(BUILD_DIR)/%-common.o: $(COMMON_DIR)/%.c
$(info ===> COMPILE $@)
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
$(Q)$(CC) $(CPPFLAGS) -MT $@ -MM $< > $(BUILD_DIR)/$*.d
$(BUILD_DIR)/%-cuda.o: %.cu $(BUILD_DIR)/%-cuda.o: %.cu
$(info ===> COMPILE $@) $(info ===> COMPILE $@)
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ $(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
@ -152,18 +68,16 @@ $(BUILD_DIR)/%.o: %.s
clean: clean:
$(info ===> CLEAN) $(info ===> CLEAN)
@rm -rf $(BUILD_DIR) @rm -rf $(BUILD_DIR)
@rm -rf $(TARGET)*
@rm -f tags
cleanall: cleanall:
$(info ===> CLEAN) $(info ===> CLEAN)
@rm -rf build-* @rm -rf build
@rm -rf MDBench-* @rm -rf MDBench-*
@rm -f tags @rm -f tags
distclean: clean distclean: clean
$(info ===> DIST CLEAN) $(info ===> DIST CLEAN)
@rm -f $(TARGET)* @rm -f $(TARGET)
@rm -f tags @rm -f tags
info: info:
@ -177,6 +91,6 @@ tags:
$(Q)ctags -R $(Q)ctags -R
$(BUILD_DIR): $(BUILD_DIR):
@mkdir $(BUILD_DIR) @mkdir -p $(BUILD_DIR)
-include $(OBJ:.o=.d) -include $(OBJ:.o=.d)

View File

@ -1,7 +1,8 @@
# Compiler tag (GCC/CLANG/ICC/ICX/ONEAPI/NVCC) # Compiler tool chain (GCC/CLANG/ICC/ICX/ONEAPI/NVCC)
TAG ?= CLANG TOOLCHAIN ?= CLANG
# Instruction set (SSE/AVX/AVX_FMA/AVX2/AVX512) # Instruction set for instrinsic kernels (NONE/SSE/AVX/AVX_FMA/AVX2/AVX512)
ISA ?= SSE ISA ?= ARM
SIMD ?= NONE
# Optimization scheme (verletlist/clusterpair/clusters_per_bin) # Optimization scheme (verletlist/clusterpair/clusters_per_bin)
OPT_SCHEME ?= verletlist OPT_SCHEME ?= verletlist
# Enable likwid (true or false) # Enable likwid (true or false)
@ -47,3 +48,93 @@ USE_CUDA_HOST_MEMORY ?= false
#Feature options #Feature options
OPTIONS = -DALIGNMENT=64 OPTIONS = -DALIGNMENT=64
#OPTIONS += More options #OPTIONS += More options
#DO NOT EDIT BELOW
ifeq ($(strip $(DATA_LAYOUT)),AOS)
DEFINES += -DAOS
endif
ifeq ($(strip $(DATA_TYPE)),SP)
DEFINES += -DPRECISION=1
else
DEFINES += -DPRECISION=2
endif
ifneq ($(ASM_SYNTAX), ATT)
ASFLAGS += -masm=intel
endif
ifeq ($(strip $(SORT_ATOMS)),true)
DEFINES += -DSORT_ATOMS
endif
ifeq ($(strip $(EXPLICIT_TYPES)),true)
DEFINES += -DEXPLICIT_TYPES
endif
ifeq ($(strip $(MEM_TRACER)),true)
DEFINES += -DMEM_TRACER
endif
ifeq ($(strip $(INDEX_TRACER)),true)
DEFINES += -DINDEX_TRACER
endif
ifeq ($(strip $(COMPUTE_STATS)),true)
DEFINES += -DCOMPUTE_STATS
endif
ifeq ($(strip $(XTC_OUTPUT)),true)
DEFINES += -DXTC_OUTPUT
endif
ifeq ($(strip $(USE_REFERENCE_VERSION)),true)
DEFINES += -DUSE_REFERENCE_VERSION
endif
ifeq ($(strip $(HALF_NEIGHBOR_LISTS_CHECK_CJ)),true)
DEFINES += -DHALF_NEIGHBOR_LISTS_CHECK_CJ
endif
ifeq ($(strip $(DEBUG)),true)
DEFINES += -DDEBUG
endif
ifneq ($(VECTOR_WIDTH),)
DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH)
endif
ifeq ($(strip $(__SIMD_KERNEL__)),true)
DEFINES += -D__SIMD_KERNEL__
endif
ifeq ($(strip $(__SSE__)),true)
DEFINES += -D__ISA_SSE__
endif
ifeq ($(strip $(__ISA_AVX__)),true)
DEFINES += -D__ISA_AVX__
endif
ifeq ($(strip $(__ISA_AVX_FMA__)),true)
DEFINES += -D__ISA_AVX_FMA__
endif
ifeq ($(strip $(__ISA_AVX2__)),true)
DEFINES += -D__ISA_AVX2__
endif
ifeq ($(strip $(__ISA_AVX512__)),true)
DEFINES += -D__ISA_AVX512__
endif
ifeq ($(strip $(ENABLE_OMP_SIMD)),true)
DEFINES += -DENABLE_OMP_SIMD
endif
ifeq ($(strip $(OPT_SCHEME)),verletlist)
OPT_TAG = VL
endif
ifneq ($(strip $(SIMD)),NONE)
TOOLCHAIN = $(TOOLCHAIN)-$(ISA)-$(SIMD)
endif

Binary file not shown.

Before

Width:  |  Height:  |  Size: 273 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 98 KiB

View File

@ -1,523 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
width="297mm"
height="210mm"
viewBox="0 0 297 210"
version="1.1"
id="svg5"
inkscape:version="1.1.2 (0a00cf5339, 2022-02-04)"
sodipodi:docname="gather_bench.svg"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg">
<sodipodi:namedview
id="namedview7"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageshadow="2"
inkscape:pageopacity="0.0"
inkscape:pagecheckerboard="0"
inkscape:document-units="mm"
showgrid="false"
inkscape:zoom="0.73508842"
inkscape:cx="551.63432"
inkscape:cy="348.25743"
inkscape:window-width="1920"
inkscape:window-height="1011"
inkscape:window-x="0"
inkscape:window-y="165"
inkscape:window-maximized="1"
inkscape:current-layer="layer1" />
<defs
id="defs2">
<rect
x="144.01516"
y="304.36604"
width="248.99777"
height="100.91557"
id="rect79475" />
<rect
x="309.01869"
y="43.698615"
width="552.19421"
height="71.390348"
id="rect65238" />
<rect
x="762.55856"
y="341.3838"
width="277.62756"
height="105.0235"
id="rect47632" />
<linearGradient
inkscape:collect="always"
id="linearGradient40704">
<stop
style="stop-color:#ccffaa;stop-opacity:1;"
offset="0"
id="stop40700" />
<stop
style="stop-color:#ccffaa;stop-opacity:0;"
offset="1"
id="stop40702" />
</linearGradient>
<marker
style="overflow:visible;"
id="Arrow2Mend"
refX="0.0"
refY="0.0"
orient="auto"
inkscape:stockid="Arrow2Mend"
inkscape:isstock="true">
<path
transform="scale(0.6) rotate(180) translate(0,0)"
d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
style="stroke:context-stroke;fill-rule:evenodd;fill:context-stroke;stroke-width:0.62500000;stroke-linejoin:round;"
id="path39486" />
</marker>
<marker
style="overflow:visible;"
id="Arrow1Mend"
refX="0.0"
refY="0.0"
orient="auto"
inkscape:stockid="Arrow1Mend"
inkscape:isstock="true">
<path
transform="scale(0.4) rotate(180) translate(10,0)"
style="fill-rule:evenodd;fill:context-stroke;stroke:context-stroke;stroke-width:1.0pt;"
d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
id="path39468" />
</marker>
<marker
style="overflow:visible;"
id="Arrow1Lend"
refX="0.0"
refY="0.0"
orient="auto"
inkscape:stockid="Arrow1Lend"
inkscape:isstock="true">
<path
transform="scale(0.8) rotate(180) translate(12.5,0)"
style="fill-rule:evenodd;fill:context-stroke;stroke:context-stroke;stroke-width:1.0pt;"
d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
id="path39462" />
</marker>
<rect
x="707.09731"
y="616.36746"
width="407.71288"
height="417.08306"
id="rect24254" />
<rect
x="47.404365"
y="100.3268"
width="398.49855"
height="110.16514"
id="rect5050" />
<rect
x="47.404366"
y="100.3268"
width="398.49854"
height="110.16514"
id="rect5050-3" />
<rect
x="47.404366"
y="100.3268"
width="398.49854"
height="110.16514"
id="rect5050-3-5" />
<rect
x="47.404366"
y="100.3268"
width="398.49854"
height="110.16514"
id="rect5050-3-5-6" />
<rect
x="47.404366"
y="100.3268"
width="398.49854"
height="110.16514"
id="rect5050-3-5-6-1" />
<rect
x="47.404366"
y="100.3268"
width="398.49854"
height="110.16514"
id="rect5050-0" />
<rect
x="47.404366"
y="100.3268"
width="398.49854"
height="110.16514"
id="rect5050-0-6" />
<rect
x="47.404366"
y="100.3268"
width="398.49854"
height="110.16514"
id="rect5050-0-6-2" />
<rect
x="47.404366"
y="100.3268"
width="398.49854"
height="110.16514"
id="rect5050-0-6-2-8" />
<marker
style="overflow:visible"
id="Arrow2Mend-2"
refX="0"
refY="0"
orient="auto"
inkscape:stockid="Arrow2Mend"
inkscape:isstock="true">
<path
transform="scale(-0.6)"
d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
id="path39486-3" />
</marker>
<marker
style="overflow:visible"
id="Arrow2Mend-2-5"
refX="0"
refY="0"
orient="auto"
inkscape:stockid="Arrow2Mend"
inkscape:isstock="true">
<path
transform="scale(-0.6)"
d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
id="path39486-3-9" />
</marker>
<marker
style="overflow:visible"
id="Arrow2Mend-2-5-2"
refX="0"
refY="0"
orient="auto"
inkscape:stockid="Arrow2Mend"
inkscape:isstock="true">
<path
transform="scale(-0.6)"
d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
id="path39486-3-9-8" />
</marker>
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient40704"
id="linearGradient40706"
x1="324.58157"
y1="127.35331"
x2="363.61096"
y2="98.957848"
gradientUnits="userSpaceOnUse" />
<rect
x="47.404366"
y="100.3268"
width="398.49854"
height="110.16514"
id="rect5050-3-5-6-1-7" />
<rect
x="309.01868"
y="43.698616"
width="552.19421"
height="71.39035"
id="rect65238-1" />
</defs>
<g
inkscape:label="Layer 1"
inkscape:groupmode="layer"
id="layer1">
<rect
style="fill:#d5d5ff;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0"
id="rect55834"
width="250.31726"
height="74.676537"
x="25.257824"
y="97.277718" />
<rect
style="fill:#d5f6ff;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0"
id="rect55832"
width="250.35208"
height="64.461151"
x="25.256891"
y="32.817505" />
<rect
style="fill:#ccffaa;stroke:#091600;stroke-width:1.31891"
id="rect6462"
width="82.385742"
height="20.525751"
x="28.355024"
y="48.740646" />
<text
xml:space="preserve"
transform="matrix(0.26458333,0,0,0.26458333,17.244577,26.206534)"
id="text5048"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="47.404297"
y="135.7168"
id="tspan82948"><tspan
style="font-weight:bold;-inkscape-font-specification:'sans-serif Bold'"
id="tspan82946">gather-bench</tspan></tspan></text>
<rect
style="fill:#de87aa;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
id="rect6462-9"
width="18.764017"
height="20.965076"
x="39.518955"
y="140.726" />
<text
xml:space="preserve"
transform="matrix(0.33667319,0,0,0.33667319,25.589293,109.42998)"
id="text5048-3"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="47.404297"
y="135.7168"
id="tspan82950">L1</tspan></text>
<rect
style="fill:#de87aa;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
id="rect6462-9-0"
width="21.653919"
height="24.193966"
x="97.687294"
y="138.51564" />
<text
xml:space="preserve"
transform="matrix(0.3885252,0,0,0.3885252,81.212654,102.39964)"
id="text5048-3-6"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0-6);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="47.404297"
y="135.7168"
id="tspan82952">L2</tspan></text>
<rect
style="fill:#de87aa;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
id="rect6462-9-0-6"
width="27.217058"
height="30.409672"
x="149.19933"
y="134.83977" />
<text
xml:space="preserve"
transform="matrix(0.48834178,0,0,0.48834178,128.49215,89.445174)"
id="text5048-3-6-1"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0-6-2);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="47.404297"
y="135.7168"
id="tspan82954">L3</tspan></text>
<rect
style="fill:#eeaaff;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
id="rect6462-9-0-6-7"
width="61.032539"
height="29.96501"
x="204.01265"
y="135.61238" />
<text
xml:space="preserve"
transform="matrix(0.48834178,0,0,0.48834178,182.37007,89.995434)"
id="text5048-3-6-1-9"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0-6-2-8);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="47.404297"
y="135.7168"
id="tspan82956">DRAM</tspan></text>
<rect
style="fill:#ffccaa;stroke:#091600;stroke-width:1.10636"
id="rect6462-6"
width="74.980759"
height="15.869514"
x="126.09525"
y="38.773243" />
<text
xml:space="preserve"
transform="matrix(0.26458333,0,0,0.26458333,115.65481,14.295323)"
id="text5048-7"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="47.404297"
y="135.7168"
id="tspan82958">Single gather</tspan></text>
<rect
style="fill:#ffccaa;stroke:#091600;stroke-width:1.03971"
id="rect6462-6-3"
width="66.071701"
height="15.904838"
x="126.90776"
y="63.642746" />
<text
xml:space="preserve"
transform="matrix(0.26458333,0,0,0.26458333,116.63325,39.114393)"
id="text5048-7-5"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3-5);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="47.404297"
y="135.7168"
id="tspan82960">MD gathers</tspan></text>
<rect
style="fill:#afe9dd;stroke:#091600;stroke-width:1.02848"
id="rect6462-6-3-2"
width="64.479698"
height="15.947394"
x="206.65364"
y="52.98967" />
<text
xml:space="preserve"
transform="matrix(0.26458333,0,0,0.26458333,196.01512,28.482594)"
id="text5048-7-5-9"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3-5-6);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="47.404297"
y="135.7168"
id="tspan82962">Contiguous</tspan></text>
<rect
style="fill:#afe9dd;stroke:#091600;stroke-width:0.987323"
id="rect6462-6-3-2-2"
width="59.269382"
height="15.988551"
x="208.16559"
y="76.856781" />
<text
xml:space="preserve"
transform="matrix(0.26458333,0,0,0.26458333,197.58604,52.220445)"
id="text5048-7-5-9-7"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3-5-6-1);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="47.404297"
y="135.7168"
id="tspan82964">&quot;Random&quot;</tspan></text>
<text
xml:space="preserve"
transform="scale(0.26458333)"
id="text24252"
style="fill:black;fill-opacity:1;stroke:none;font-family:sans-serif;font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect24254)" />
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="M 193.10512,71.273276 206.30683,61.033513"
id="path39049"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="M 193.08841,71.196939 207.86207,84.43804"
id="path39053"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.39816;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 58.548229,151.24436 38.298093,0.25023"
id="path39219"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.24847;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 119.19252,150.09399 29.28333,0.26095"
id="path39219-2"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 177.02022,150.44367 26.36623,0.26095"
id="path39219-2-0"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend)"
d="m 48.145458,92.71788 -0.644819,47.57709"
id="path39377"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend-2)"
d="M 48.121208,92.873762 106.60807,137.41946"
id="path39377-7"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend-2-5)"
d="M 48.073928,92.825143 158.88023,133.04546"
id="path39377-7-2"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend-2-5-2)"
d="M 48.051946,92.813593 233.0959,134.16596"
id="path39377-7-2-9"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<rect
style="fill:#e9afaf;stroke:#091600;stroke-width:1.34518"
id="rect6462-6-3-2-2-3"
width="65.880661"
height="26.700579"
x="38.104012"
y="80.530182" />
<path
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 77.365612,69.678744 h 2e-6"
id="path39808"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 111.64767,59.183009 6.84466,0.03069"
id="path41004"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 119.03378,47.056357 -0.58704,25.198541"
id="path41006"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.02423;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 118.07503,72.254897 7.94998,-0.05784"
id="path41008"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.882836;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 118.26666,47.054814 7.69322,0.173925"
id="path41112"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
d="M 68.213642,69.068864 67.910274,80.302728"
id="path55728"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<text
xml:space="preserve"
transform="matrix(0.26458333,0,0,0.26458333,-1.3782637,4.0412367)"
id="text65236"
style="font-style:normal;font-weight:normal;font-size:53.3333px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect65238);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="309.01953"
y="90.886691"
id="tspan82968"><tspan
style="font-weight:bold;-inkscape-font-specification:'sans-serif Bold'"
id="tspan82966">Application Level</tspan></tspan></text>
<text
xml:space="preserve"
transform="matrix(0.26458333,0,0,0.26458333,2.7015103,160.71919)"
id="text65236-2"
style="font-style:normal;font-weight:normal;font-size:53.3333px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect65238-1);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="309.01953"
y="90.886691"
id="tspan82972"><tspan
style="font-weight:bold;-inkscape-font-specification:'sans-serif Bold'"
id="tspan82970">Hardware Level</tspan></tspan></text>
<text
xml:space="preserve"
transform="matrix(0.26458333,0,0,0.26458333,2.3490396,0.57331532)"
id="text79473"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect79475);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="144.01562"
y="339.75586"
id="tspan82974">vgather </tspan><tspan
x="144.01562"
y="389.75586"
id="tspan82976">instructions</tspan></text>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 128 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 62 KiB

View File

@ -1,17 +1,18 @@
CC = clang CC = /opt/homebrew/Cellar/llvm/18.1.5/bin/clang
LINKER = $(CC) LINKER = $(CC)
ANSI_CFLAGS = -ansi ANSI_CFLAGS = -ansi
ANSI_CFLAGS += -std=c99 ANSI_CFLAGS += -std=c99
ANSI_CFLAGS += -pedantic ANSI_CFLAGS += -pedantic
ANSI_CFLAGS += -Wextra # ANSI_CFLAGS += -Wextra
CFLAGS = -Ofast -march=native $(ANSI_CFLAGS) #-Xpreprocessor -fopenmp -g CFLAGS = -Ofast -march=native $(ANSI_CFLAGS) -Xpreprocessor -fopenmp #-g
#CFLAGS = -Ofast -march=core-avx2 $(ANSI_CFLAGS) #-Xpreprocessor -fopenmp -g #CFLAGS = -Ofast -march=core-avx2 $(ANSI_CFLAGS) #-Xpreprocessor -fopenmp -g
#CFLAGS = -O3 -march=cascadelake $(ANSI_CFLAGS) #-Xpreprocessor -fopenmp -g #CFLAGS = -O3 -march=cascadelake $(ANSI_CFLAGS) #-Xpreprocessor -fopenmp -g
#CFLAGS = -Ofast $(ANSI_CFLAGS) -g #-Xpreprocessor -fopenmp -g #CFLAGS = -Ofast $(ANSI_CFLAGS) -g #-Xpreprocessor -fopenmp -g
ASFLAGS = -masm=intel ASFLAGS = #-masm=intel
LFLAGS = LFLAGS =
DEFINES = -D_GNU_SOURCE DEFINES = -D_GNU_SOURCE
INCLUDES = # MacOSX with Apple Silicon and homebrew
LIBS = -lm #-lomp INCLUDES = -I/opt/homebrew/Cellar/libomp/18.1.5/include/
LIBS = -lm -L/opt/homebrew/Cellar/libomp/18.1.5/lib/ -lomp

View File

@ -4,24 +4,18 @@
* Use of this source code is governed by a LGPL-3.0 * Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file. * license that can be found in the LICENSE file.
*/ */
#include <stdlib.h>
#include <time.h> #include <time.h>
double getTimeStamp() double getTimeStamp(void)
{ {
struct timespec ts; struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts); clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9; return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
} }
double getTimeResolution() double getTimeResolution(void)
{ {
struct timespec ts; struct timespec ts;
clock_getres(CLOCK_MONOTONIC, &ts); clock_getres(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9; return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
} }
double getTimeStamp_()
{
return getTimeStamp();
}

View File

@ -9,6 +9,5 @@
extern double getTimeStamp(void); extern double getTimeStamp(void);
extern double getTimeResolution(void); extern double getTimeResolution(void);
extern double getTimeStamp_(void);
#endif #endif

View File

@ -5,7 +5,6 @@
* license that can be found in the LICENSE file. * license that can be found in the LICENSE file.
*/ */
#include <errno.h> #include <errno.h>
#include <stdarg.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
@ -19,7 +18,8 @@
#define IR 2836 #define IR 2836
#define MASK 123459876 #define MASK 123459876
double myrandom(int* seed) { double myrandom(int* seed)
{
int k = (*seed) / IQ; int k = (*seed) / IQ;
double ans; double ans;
@ -29,7 +29,8 @@ double myrandom(int* seed) {
return ans; return ans;
} }
void random_reset(int *seed, int ibase, double *coord) { void random_reset(int* seed, int ibase, double* coord)
{
int i; int i;
char* str = (char*)&ibase; char* str = (char*)&ibase;
int n = sizeof(int); int n = sizeof(int);
@ -61,30 +62,41 @@ void random_reset(int *seed, int ibase, double *coord) {
// warm up the RNG // warm up the RNG
for (i = 0; i < 5; i++) myrandom(seed); for (i = 0; i < 5; i++)
myrandom(seed);
// save = 0; // save = 0;
} }
int str2ff(const char *string) { int str2ff(const char* string)
{
if (strncmp(string, "lj", 2) == 0) return FF_LJ; if (strncmp(string, "lj", 2) == 0) return FF_LJ;
if (strncmp(string, "eam", 3) == 0) return FF_EAM; if (strncmp(string, "eam", 3) == 0) return FF_EAM;
if (strncmp(string, "dem", 3) == 0) return FF_DEM; if (strncmp(string, "dem", 3) == 0) return FF_DEM;
return -1; return -1;
} }
const char* ff2str(int ff) { const char* ff2str(int ff)
if(ff == FF_LJ) { return "lj"; } {
if(ff == FF_EAM) { return "eam"; } if (ff == FF_LJ) {
if(ff == FF_DEM) { return "dem"; } return "lj";
}
if (ff == FF_EAM) {
return "eam";
}
if (ff == FF_DEM) {
return "dem";
}
return "invalid"; return "invalid";
} }
int get_cuda_num_threads() { int get_cuda_num_threads(void)
{
const char* num_threads_env = getenv("NUM_THREADS"); const char* num_threads_env = getenv("NUM_THREADS");
return (num_threads_env == NULL) ? 32 : atoi(num_threads_env); return (num_threads_env == NULL) ? 32 : atoi(num_threads_env);
} }
void readline(char *line, FILE *fp) { void readline(char* line, FILE* fp)
{
if (fgets(line, MAXLINE, fp) == NULL) { if (fgets(line, MAXLINE, fp) == NULL) {
if (errno != 0) { if (errno != 0) {
perror("readline()"); perror("readline()");
@ -93,13 +105,16 @@ void readline(char *line, FILE *fp) {
} }
} }
void debug_printf(const char *format, ...) { void debug_printf(const char* format, ...)
{
#ifdef DEBUG #ifdef DEBUG
va_list arg; va_list arg;
int ret; int ret;
va_start(arg, format); va_start(arg, format);
if((vfprintf(stdout, format, arg)) < 0) { perror("debug_printf()"); } if ((vfprintf(stdout, format, arg)) < 0) {
perror("debug_printf()");
}
va_end(arg); va_end(arg);
#endif #endif
} }

102
src/verletlist/atom.h Normal file
View File

@ -0,0 +1,102 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <parameter.h>
#ifndef __ATOM_H_
#define __ATOM_H_
#ifdef CUDA_TARGET
#define KERNEL_NAME "CUDA"
#define computeForceLJFullNeigh computeForceLJFullNeigh_cuda
#define initialIntegrate initialIntegrate_cuda
#define finalIntegrate finalIntegrate_cuda
#define buildNeighbor buildNeighbor_cuda
#define updatePbc updatePbc_cuda
#define updateAtomsPbc updateAtomsPbc_cuda
#else
#ifdef USE_SIMD_KERNEL
#define KERNEL_NAME "SIMD"
#define computeForceLJFullNeigh computeForceLJFullNeigh_simd
#else
#define KERNEL_NAME "PLAIN"
#endif
#define initialIntegrate initialIntegrate_cpu
#define finalIntegrate finalIntegrate_cpu
#define buildNeighbor buildNeighbor_cpu
#define updatePbc updatePbc_cpu
#define updateAtomsPbc updateAtomsPbc_cpu
#endif
typedef struct {
MD_FLOAT *x, *y, *z;
MD_FLOAT *vx, *vy, *vz;
MD_FLOAT *fx, *fy, *fz;
int* border_map;
int* type;
MD_FLOAT* epsilon;
MD_FLOAT* sigma6;
MD_FLOAT* cutforcesq;
MD_FLOAT* cutneighsq;
} DeviceAtom;
typedef struct {
int Natoms, Nlocal, Nghost, Nmax;
MD_FLOAT *x, *y, *z;
MD_FLOAT *vx, *vy, *vz;
MD_FLOAT *fx, *fy, *fz;
int* border_map;
int* type;
int ntypes;
MD_FLOAT* epsilon;
MD_FLOAT* sigma6;
MD_FLOAT* cutforcesq;
MD_FLOAT* cutneighsq;
// DEM
MD_FLOAT* radius;
MD_FLOAT* av;
MD_FLOAT* r;
// Device data
DeviceAtom d_atom;
} Atom;
extern void initAtom(Atom*);
extern void createAtom(Atom*, Parameter*);
extern int readAtom(Atom*, Parameter*);
extern int readAtom_pdb(Atom*, Parameter*);
extern int readAtom_gro(Atom*, Parameter*);
extern int readAtom_dmp(Atom*, Parameter*);
extern int readAtom_in(Atom*, Parameter*);
extern void writeAtom(Atom*, Parameter*);
extern void growAtom(Atom*);
#ifdef AOS
#define POS_DATA_LAYOUT "AoS"
#define atom_x(i) atom->x[(i) * 3 + 0]
#define atom_y(i) atom->x[(i) * 3 + 1]
#define atom_z(i) atom->x[(i) * 3 + 2]
#define atom_vx(i) atom->vx[(i) * 3 + 0]
#define atom_vy(i) atom->vx[(i) * 3 + 1]
#define atom_vz(i) atom->vx[(i) * 3 + 2]
#define atom_fx(i) atom->fx[(i) * 3 + 0]
#define atom_fy(i) atom->fx[(i) * 3 + 1]
#define atom_fz(i) atom->fx[(i) * 3 + 2]
#else
#define POS_DATA_LAYOUT "SoA"
#define atom_x(i) atom->x[i]
#define atom_y(i) atom->y[i]
#define atom_z(i) atom->z[i]
#define atom_vx(i) atom->vx[i]
#define atom_vy(i) atom->vy[i]
#define atom_vz(i) atom->vz[i]
#define atom_fx(i) atom->fx[i]
#define atom_fy(i) atom->fy[i]
#define atom_fz(i) atom->fz[i]
#endif
#endif

View File

@ -0,0 +1,112 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <stdlib.h>
//---
#include <atom.h>
#include <likwid-marker.h>
#include <neighbor.h>
#include <parameter.h>
#include <stats.h>
#include <timing.h>
#ifdef __SIMD_KERNEL__
#include <simd.h>
#endif
double computeForceLJFullNeigh_simd(
Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats)
{
int Nlocal = atom->Nlocal;
int* neighs;
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
for (int i = 0; i < Nlocal; i++) {
atom_fx(i) = 0.0;
atom_fy(i) = 0.0;
atom_fz(i) = 0.0;
}
double S = getTimeStamp();
#ifndef __SIMD_KERNEL__
fprintf(stderr, "Error: SIMD kernel not implemented for specified instruction set!");
exit(-1);
#else
MD_SIMD_FLOAT cutforcesq_vec = simd_broadcast(cutforcesq);
MD_SIMD_FLOAT sigma6_vec = simd_broadcast(sigma6);
MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon);
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
#pragma omp parallel
{
LIKWID_MARKER_START("force");
#pragma omp for schedule(runtime)
for (int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_SIMD_INT numneighs_vec = simd_int_broadcast(numneighs);
MD_SIMD_FLOAT xtmp = simd_broadcast(atom_x(i));
MD_SIMD_FLOAT ytmp = simd_broadcast(atom_y(i));
MD_SIMD_FLOAT ztmp = simd_broadcast(atom_z(i));
MD_SIMD_FLOAT fix = simd_zero();
MD_SIMD_FLOAT fiy = simd_zero();
MD_SIMD_FLOAT fiz = simd_zero();
for (int k = 0; k < numneighs; k += VECTOR_WIDTH) {
// If the last iteration of this loop is separated from the rest, this
// mask can be set only there
MD_SIMD_MASK mask_numneighs = simd_mask_int_cond_lt(
simd_int_add(simd_int_broadcast(k), simd_int_seq()),
numneighs_vec);
MD_SIMD_INT j = simd_int_mask_load(&neighs[k], mask_numneighs);
#ifdef AOS
MD_SIMD_INT j3 = simd_int_add(simd_int_add(j, j), j); // j * 3
MD_SIMD_FLOAT delx = xtmp -
simd_gather(j3, &(atom->x[0]), sizeof(MD_FLOAT));
MD_SIMD_FLOAT dely = ytmp -
simd_gather(j3, &(atom->x[1]), sizeof(MD_FLOAT));
MD_SIMD_FLOAT delz = ztmp -
simd_gather(j3, &(atom->x[2]), sizeof(MD_FLOAT));
#else
MD_SIMD_FLOAT delx = xtmp - simd_gather(j, atom->x, sizeof(MD_FLOAT));
MD_SIMD_FLOAT dely = ytmp - simd_gather(j, atom->y, sizeof(MD_FLOAT));
MD_SIMD_FLOAT delz = ztmp - simd_gather(j, atom->z, sizeof(MD_FLOAT));
#endif
MD_SIMD_FLOAT rsq = simd_fma(delx,
delx,
simd_fma(dely, dely, simd_mul(delz, delz)));
MD_SIMD_MASK cutoff_mask = simd_mask_and(mask_numneighs,
simd_mask_cond_lt(rsq, cutforcesq_vec));
MD_SIMD_FLOAT sr2 = simd_reciprocal(rsq);
MD_SIMD_FLOAT sr6 = simd_mul(sr2,
simd_mul(sr2, simd_mul(sr2, sigma6_vec)));
MD_SIMD_FLOAT force = simd_mul(c48_vec,
simd_mul(sr6,
simd_mul(simd_sub(sr6, c05_vec), simd_mul(sr2, eps_vec))));
fix = simd_masked_add(fix, simd_mul(delx, force), cutoff_mask);
fiy = simd_masked_add(fiy, simd_mul(dely, force), cutoff_mask);
fiz = simd_masked_add(fiz, simd_mul(delz, force), cutoff_mask);
}
atom_fx(i) += simd_h_reduce_sum(fix);
atom_fy(i) += simd_h_reduce_sum(fiy);
atom_fz(i) += simd_h_reduce_sum(fiz);
}
LIKWID_MARKER_STOP("force");
}
#endif
double E = getTimeStamp();
return E - S;
}

View File

@ -4,9 +4,6 @@
* Use of this source code is governed by a LGPL-3.0 * Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file. * license that can be found in the LICENSE file.
*/ */
#include <stdio.h>
#include <stdlib.h>
//---
#include <atom.h> #include <atom.h>
#include <likwid-marker.h> #include <likwid-marker.h>
#include <neighbor.h> #include <neighbor.h>
@ -14,10 +11,6 @@
#include <stats.h> #include <stats.h>
#include <timing.h> #include <timing.h>
#ifdef __SIMD_KERNEL__
#include <simd.h>
#endif
double computeForceLJFullNeigh( double computeForceLJFullNeigh(
Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats) Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats)
{ {
@ -203,96 +196,3 @@ double computeForceLJHalfNeigh(
double timeStop = getTimeStamp(); double timeStop = getTimeStamp();
return timeStop - timeStart; return timeStop - timeStart;
} }
double computeForceLJFullNeigh_simd(
Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats)
{
int Nlocal = atom->Nlocal;
int* neighs;
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
for (int i = 0; i < Nlocal; i++) {
atom_fx(i) = 0.0;
atom_fy(i) = 0.0;
atom_fz(i) = 0.0;
}
double S = getTimeStamp();
#ifndef __SIMD_KERNEL__
fprintf(stderr, "Error: SIMD kernel not implemented for specified instruction set!");
exit(-1);
#else
MD_SIMD_FLOAT cutforcesq_vec = simd_broadcast(cutforcesq);
MD_SIMD_FLOAT sigma6_vec = simd_broadcast(sigma6);
MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon);
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
#pragma omp parallel
{
LIKWID_MARKER_START("force");
#pragma omp for schedule(runtime)
for (int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_SIMD_INT numneighs_vec = simd_int_broadcast(numneighs);
MD_SIMD_FLOAT xtmp = simd_broadcast(atom_x(i));
MD_SIMD_FLOAT ytmp = simd_broadcast(atom_y(i));
MD_SIMD_FLOAT ztmp = simd_broadcast(atom_z(i));
MD_SIMD_FLOAT fix = simd_zero();
MD_SIMD_FLOAT fiy = simd_zero();
MD_SIMD_FLOAT fiz = simd_zero();
for (int k = 0; k < numneighs; k += VECTOR_WIDTH) {
// If the last iteration of this loop is separated from the rest, this
// mask can be set only there
MD_SIMD_MASK mask_numneighs = simd_mask_int_cond_lt(
simd_int_add(simd_int_broadcast(k), simd_int_seq()),
numneighs_vec);
MD_SIMD_INT j = simd_int_mask_load(&neighs[k], mask_numneighs);
#ifdef AOS
MD_SIMD_INT j3 = simd_int_add(simd_int_add(j, j), j); // j * 3
MD_SIMD_FLOAT delx = xtmp -
simd_gather(j3, &(atom->x[0]), sizeof(MD_FLOAT));
MD_SIMD_FLOAT dely = ytmp -
simd_gather(j3, &(atom->x[1]), sizeof(MD_FLOAT));
MD_SIMD_FLOAT delz = ztmp -
simd_gather(j3, &(atom->x[2]), sizeof(MD_FLOAT));
#else
MD_SIMD_FLOAT delx = xtmp - simd_gather(j, atom->x, sizeof(MD_FLOAT));
MD_SIMD_FLOAT dely = ytmp - simd_gather(j, atom->y, sizeof(MD_FLOAT));
MD_SIMD_FLOAT delz = ztmp - simd_gather(j, atom->z, sizeof(MD_FLOAT));
#endif
MD_SIMD_FLOAT rsq = simd_fma(delx,
delx,
simd_fma(dely, dely, simd_mul(delz, delz)));
MD_SIMD_MASK cutoff_mask = simd_mask_and(mask_numneighs,
simd_mask_cond_lt(rsq, cutforcesq_vec));
MD_SIMD_FLOAT sr2 = simd_reciprocal(rsq);
MD_SIMD_FLOAT sr6 = simd_mul(sr2,
simd_mul(sr2, simd_mul(sr2, sigma6_vec)));
MD_SIMD_FLOAT force = simd_mul(c48_vec,
simd_mul(sr6,
simd_mul(simd_sub(sr6, c05_vec), simd_mul(sr2, eps_vec))));
fix = simd_masked_add(fix, simd_mul(delx, force), cutoff_mask);
fiy = simd_masked_add(fiy, simd_mul(dely, force), cutoff_mask);
fiz = simd_masked_add(fiz, simd_mul(delz, force), cutoff_mask);
}
atom_fx(i) += simd_h_reduce_sum(fix);
atom_fy(i) += simd_h_reduce_sum(fiy);
atom_fz(i) += simd_h_reduce_sum(fiz);
}
LIKWID_MARKER_STOP("force");
}
#endif
double E = getTimeStamp();
return E - S;
}

View File

@ -10,8 +10,8 @@
#include <string.h> #include <string.h>
#include <unistd.h> #include <unistd.h>
// #include <omp.h>
#include <likwid-marker.h> #include <likwid-marker.h>
#include <omp.h>
#include <allocate.h> #include <allocate.h>
#include <atom.h> #include <atom.h>
@ -30,8 +30,8 @@
#define HLINE "------------------------------------------------------------------\n" #define HLINE "------------------------------------------------------------------\n"
extern double computeForceLJFullNeigh_plain_c(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceLJHalfNeigh(Parameter*, Atom*, Neighbor*, Stats*); extern double computeForceLJHalfNeigh(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceLJFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*); extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*); extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
@ -325,35 +325,38 @@ int main(int argc, char** argv)
timer[TOTAL] - timer[FORCE] - timer[NEIGH]); timer[TOTAL] - timer[FORCE] - timer[NEIGH]);
printf(HLINE); printf(HLINE);
// int nthreads = 0; int nthreads = 0;
// int chunkSize = 0; int chunkSize = 0;
// omp_sched_t schedKind; omp_sched_t schedKind;
// char schedType[10]; char schedType[10];
// #pragma omp parallel #pragma omp parallel
// #pragma omp master #pragma omp master
// { {
// omp_get_schedule(&schedKind, &chunkSize); omp_get_schedule(&schedKind, &chunkSize);
//
// switch (schedKind) { switch (schedKind) {
// case omp_sched_static: case omp_sched_static:
// strcpy(schedType, "static"); strcpy(schedType, "static");
// break; break;
// case omp_sched_dynamic: case omp_sched_dynamic:
// strcpy(schedType, "dynamic"); strcpy(schedType, "dynamic");
// break; break;
// case omp_sched_guided: case omp_sched_guided:
// strcpy(schedType, "guided"); strcpy(schedType, "guided");
// break; break;
// case omp_sched_auto: case omp_sched_auto:
// strcpy(schedType, "auto"); strcpy(schedType, "auto");
// break; break;
// } case omp_sched_monotonic:
// strcpy(schedType, "auto");
// nthreads = omp_get_max_threads(); break;
// } }
//
// printf("Num threads: %d\n", nthreads); nthreads = omp_get_max_threads();
// printf("Schedule: (%s,%d)\n", schedType, chunkSize); }
printf("Num threads: %d\n", nthreads);
printf("Schedule: (%s,%d)\n", schedType, chunkSize);
printf("Performance: %.2f million atom updates per second\n", printf("Performance: %.2f million atom updates per second\n",
1e-6 * (double)atom.Natoms * param.ntimes / timer[TOTAL]); 1e-6 * (double)atom.Natoms * param.ntimes / timer[TOTAL]);

View File

@ -1,37 +0,0 @@
# Utility tools for MD-Bench
**mdBench.c:** Single file version for MD-Bench, used mostly for teaching purposes.
**run_stub.sh:** Bash script to run the MD-Bench stubbed force calculation for different configurations and evaluate the performance.
The configuration parameters are:
- **-a <numbers>:** specify the number of atoms per unit cell (the number of neighbors per atom is this value minus 1), the default is 8.
- **-n <numbers>:** timesteps to run the simulation, the default is 200.
- **-nx <numbers>:** number of unit cells in the x dimension, the default is 4.
- **-ny <numbers>:** number of unit cells in the y dimension, the default is 4.
- **-nz <numbers>:** number of unit cells in the z dimension, the default is 2.
Notice that these parameters can also be specified as lists, which executes the stubbed force calculation several times varying the specific parameter to each element of the list, and hence all combinations of parameters will be executed. For example, the following command:
```bash
bash run_stub.sh -a "8 16" -nx "4 8" -ny 8 -nz 4
```
Will execute the stubbed force calculation for the following 4 configurations:
```bash
1> 8 atoms per unit cell on a 4x8x4 grid of unit cells, 200 timesteps
2> 16 atoms per unit cell on a 4x8x4 grid of unit cells, 200 timesteps
3> 8 atoms per unit cell on a 8x8x4 grid of unit cells, 200 timesteps
4> 16 atoms per unit cell on a 8x8x4 grid of unit cells, 200 timesteps
```
The following parameters are also available:
- **-f <frequency>:** CPU frequency in GHz (assure your CPU frequency is fixed by disabling Turbo mode), more performance metrics such as cycles per iteration are displayed if this option is defined.
- **-o <file>:** output file (.txt) for the results, the default is *run_results.txt*.
- **-r <runs>:** number of runs for each configuration (only the values for the best run are displayed), the default is 3.
**plot_run_stub_data.py:** Python script to plot the data generated by the *run_stub.sh* script. Just provide the name of the .txt file as a parameter and this script generates a corresponding PDF with the same file name.
**plot_gather_data.py:** Python script to plot the data generated by the gather benchmark. Just provide the name of the .txt file containing the gather output as a parameter and this script generates a corresponding PDF with the same file name. Multiple outputs with different strides can be included in the text file by concatenating the outputs. The script handles output from both standard simple array case and MD variant.
**cache.py:** Python script to run the cache simulator with the data obtained from the memory tracer. Just run it with the tracer output file name as a parameter. The cache specifications can be directly adapted in the script to match those of the target processor of interest.

View File

@ -1,33 +0,0 @@
import sys
from cachesim import CacheSimulator, Cache, MainMemory
filename = sys.argv[1]
mem = MainMemory()
#l3 = Cache("L3", 20480, 16, 64, "LRU") # 20MB: 20480 sets, 16-ways with cacheline size of 64 bytes
#l2 = Cache("L2", 256, 4, 64, "LRU", store_to=l3, load_from=l3) # 256KB
#l1 = Cache("L1", 64, 8, 64, "LRU", store_to=l2, load_from=l2) # 32KB
# Cascade Lake
l3 = Cache("L3", 14336, 16, 64, "LRU", write_allocate=False)
l2 = Cache("L2", 1024, 16, 64, "LRU", store_to=l3, victims_to=l3)
l1 = Cache("L1", 64, 8, 64, "LRU", store_to=l2, load_from=l2)
mem.load_to(l2)
mem.store_from(l3)
cs = CacheSimulator(l1, mem)
with open(filename, 'r') as fp:
for line in fp.readlines():
op, addr = line.split(": ")
op = op[0]
addr = int(addr, 16)
if op == 'W':
cs.store(addr, length=8)
elif op == 'R':
cs.load(addr, length=8)
else:
sys.exit("Invalid operation: {}".format(op))
cs.force_write_back()
cs.print_stats()

View File

@ -1,39 +0,0 @@
import sys
from cachesim import CacheSimulator, Cache, MainMemory
def get_set_id(cache, addr):
return (addr >> cache.cl_bits) % cache.sets
filename = sys.argv[1]
N = sys.argv[2]
mem = MainMemory()
# Cascade Lake
l3 = Cache("L3", 14336, 16, 64, "LRU", write_allocate=False)
l2 = Cache("L2", 1024, 16, 64, "LRU", store_to=l3, victims_to=l3)
l1 = Cache("L1", 64, 8, 64, "LRU", store_to=l2, load_from=l2)
mem.load_to(l2)
mem.store_from(l3)
cs = CacheSimulator(l1, mem)
sets_hist = {
'l1': {s: 0 for s in range(l1.sets)},
'l2': {s: 0 for s in range(l2.sets)},
'l3': {s: 0 for s in range(l3.sets)}
}
with open(filename, 'r') as fp:
for line in fp.readlines():
op, addr = line.split(": ")
op = op[0]
addr = int(addr, 16)
sets_hist['l1'][get_set_id(l1, addr)] += 1
sets_hist['l2'][get_set_id(l2, addr)] += 1
sets_hist['l3'][get_set_id(l3, addr)] += 1
for cache_level, data in sets_hist.items():
if cache_level != 'l3':
print(cache_level, ": ")
for set_id in data:
if data[set_id] > 0:
print(set_id, " -> ", data[set_id])

View File

@ -1,116 +0,0 @@
#!/bin/bash
[[ -z "$1" ]] && echo "Use: $0 <binary> [-c <core>] [-f <freq>] [-n <nruns>] [-l <log>] [-s]" && exit
[[ ! -f "$1" ]] && echo "Binary file not found, make sure to use 'make'" && exit
[[ ! -f "$1-stub" ]] && echo "Binary file for stubbed case not found, make sure to use 'make VARIANT=stub'" && exit
MDBENCH_BIN=$1
BIN_INFO="${MDBENCH_BIN#*-}" # $OPT_SCHEME-$TAG-$ISA-$PREC
OPT_SCHEME="${BIN_INFO%%-*}"
PREC="${BIN_INFO##*-}"
BIN_INFO="${BIN_INFO#*-}" # $TAG-$ISA-$PREC
BIN_INFO="${BIN_INFO%-*}" # $TAG-$ISA
TAG="${BIN_INFO%%-*}"
ISA="${BIN_INFO##*-}"
CORE="${CORE:-0}"
FREQ="${FREQ:-2.4}"
NRUNS="${NRUNS:-3}"
LOG="${LOG:-latencies_and_cfds.$(hostname).log}"
STUB_ONLY="${STUB_ONLY:-false}"
SKIP_SET_FREQ="${SKIP_SET_FREQ:-false}"
OPTIND=2
while getopts "c:f:n:l:s" flag; do
case "${flag}" in
c) CORE=${OPTARG};;
f) FREQ=${OPTARG};;
n) NRUNS=${OPTARG};;
l) LOG=${OPTARG};;
s) STUB_ONLY=true;;
esac
done
# Other useful variables
MDBENCH_BIN=./MDBench-$OPT_SCHEME-$TAG-$ISA-$PREC
FIXED_PARAMS="--freq $FREQ"
CPU_VENDOR=$(lscpu | grep "Vendor ID" | tr -s ' ' | cut -d ' ' -f3)
if [ "$CPU_VENDOR" == "GenuineIntel" ]; then
ALL_PREFETCHERS="HW_PREFETCHER,CL_PREFETCHER,DCU_PREFETCHER,IP_PREFETCHER"
DEFAULT_PREFETCHERS=("ALL HW_PREFETCHER CL_PREFETCHER DCU_PREFETCHER IP_PREFETCHER NONE")
else
ALL_PREFETCHERS=""
DEFAULT_PREFETCHERS=("IGNORE")
fi
if [ -z ${PREFETCHERS+x} ]; then
PREFETCHERS=${DEFAULT_PREFETCHERS}
fi
if [ "$OPT_SCHEME" == "gromacs" ]; then
STUB1_NAME=stub-33
STUB1_PARAMS="-na 4 -nn 33"
STUB2_NAME=stub-128
STUB2_PARAMS="-na 4 -nn 128"
else
STUB1_NAME=stub-76
STUB1_PARAMS="-nn 76"
STUB2_NAME=stub-1024
STUB2_PARAMS="-nn 1024"
fi
function run_benchmark() {
BEST=10000000
for i in $(seq $NRUNS); do
RES=$(likwid-pin -c $CORE "$* $FIXED_PARAMS" 2>&1 | grep "Cycles/SIMD iteration" | cut -d ' ' -f3)
if (( $(echo "$BEST > $RES" | bc -l ) )); then
BEST=$RES
fi
done
}
echo "Tag: $TAG" | tee -a $LOG
echo "Optimization scheme: $OPT_SCHEME" | tee -a $LOG
echo "Instruction set: $ISA" | tee -a $LOG
echo "Precision: $PREC" | tee -a $LOG
echo "Binary: $MDBENCH_BIN(-stub)" | tee -a $LOG
echo "Frequency: $FREQ" | tee -a $LOG
echo "Number of runs: $NRUNS" | tee -a $LOG
echo "Run only stubbed cases: $STUB_ONLY" | tee -a $LOG
if [ "$SKIP_SET_FREQ" == "false" ]; then
echo "Fixing frequencies..."
likwid-setFrequencies -f $FREQ -t 0
fi
for p in $PREFETCHERS; do
if [ "$p" != "IGNORE" ]; then
if [ "$p" == "ALL" ]; then
likwid-features -c $CORE -e $ALL_PREFETCHERS
elif [ "$p" == "NONE" ]; then
likwid-features -c $CORE -d $ALL_PREFETCHERS
else
likwid-features -c $CORE -d $ALL_PREFETCHERS
likwid-features -c $CORE -e $p
fi
echo "Prefetcher settings: $p"
likwid-features -c $CORE -l
fi
MSG="$p: "
if [ "$STUB_ONLY" == "false" ]; then
run_benchmark $MDBENCH_BIN
MSG+="standard=$BEST, "
run_benchmark $MDBENCH_BIN -i data/copper_melting/input_lj_cu_one_atomtype_20x20x20.dmp
MSG+="melt=$BEST, "
run_benchmark $MDBENCH_BIN -p data/argon_1000/mdbench_params.conf -i data/argon_1000/tprout.gro
MSG+="argon=$BEST, "
fi
run_benchmark $MDBENCH_BIN-stub $STUB1_PARAMS
MSG+="$STUB1_NAME=$BEST, "
run_benchmark $MDBENCH_BIN-stub $STUB2_PARAMS
MSG+="$STUB2_NAME=$BEST"
echo $MSG | tee -a $LOG
done

View File

@ -1,52 +0,0 @@
# Prerequisites
*.d
# Object files
*.o
*.ko
*.obj
*.elf
# Linker output
*.ilk
*.map
*.exp
# Precompiled Headers
*.gch
*.pch
# Libraries
*.lib
*.a
*.la
*.lo
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex
# Debug files
*.dSYM/
*.su
*.idb
*.pdb
# Kernel Module Compile Results
*.mod*
*.cmd
.tmp_versions/
modules.order
Module.symvers
Mkfile.old
dkms.conf

View File

@ -1,21 +0,0 @@
MIT License
Copyright (c) RRZE-HPC
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1,126 +0,0 @@
#CONFIGURE BUILD SYSTEM
TARGET = gather-bench-$(TAG)
BUILD_DIR = ./$(TAG)
SRC_DIR = ./src
MAKE_DIR = ./
ISA_DIR = ./src/$(ISA)
Q ?= @
#DO NOT EDIT BELOW
include $(MAKE_DIR)/config.mk
include $(MAKE_DIR)/include_$(TAG).mk
include $(MAKE_DIR)/include_LIKWID.mk
INCLUDES += -I./src/includes
VPATH = $(SRC_DIR) ${ISA_DIR}
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
ASM += $(patsubst $(SRC_DIR)/%.f90, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.f90))
OBJ = $(filter-out $(BUILD_DIR)/main%, $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)))
OBJ += $(patsubst $(SRC_DIR)/%.cc, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.cc))
OBJ += $(patsubst $(SRC_DIR)/%.cpp, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.cpp))
OBJ += $(patsubst $(SRC_DIR)/%.f90, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.f90))
OBJ += $(patsubst $(SRC_DIR)/%.F90, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.F90))
OBJ += $(patsubst $(SRC_DIR)/%.s, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.s))
OBJ += $(patsubst $(ISA_DIR)/%.S, $(BUILD_DIR)/%.o,$(wildcard $(ISA_DIR)/*.S))
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(INCLUDES) -DISA_$(ISA)
ifneq ($(VARIANT),)
.DEFAULT_GOAL := ${TARGET}-$(VARIANT)
endif
ifeq ($(strip $(DATA_LAYOUT)),AOS)
CPPFLAGS += -DAOS
endif
ifeq ($(strip $(TEST)),true)
CPPFLAGS += -DTEST
endif
ifeq ($(strip $(PADDING)),true)
CPPFLAGS += -DPADDING
endif
ifeq ($(strip $(MEASURE_GATHER_CYCLES)),true)
CPPFLAGS += -DMEASURE_GATHER_CYCLES
endif
ifeq ($(strip $(ONLY_FIRST_DIMENSION)),true)
CPPFLAGS += -DONLY_FIRST_DIMENSION
endif
ifeq ($(strip $(MEM_TRACER)),true)
CPPFLAGS += -DMEM_TRACER
endif
${TARGET}: $(BUILD_DIR) $(OBJ) $(SRC_DIR)/main.c
@echo "===> LINKING $(TARGET)"
$(Q)${LINKER} ${CPPFLAGS} ${LFLAGS} -o $(TARGET) $(SRC_DIR)/main.c $(OBJ) $(LIBS)
${TARGET}-%: $(BUILD_DIR) $(OBJ) $(SRC_DIR)/main-%.c
@echo "===> LINKING $(TARGET)-$* "
$(Q)${LINKER} ${CPPFLAGS} ${LFLAGS} -o $(TARGET)-$* $(SRC_DIR)/main-$*.c $(OBJ) $(LIBS)
asm: $(BUILD_DIR) $(ASM)
$(BUILD_DIR)/%.o: %.c
@echo "===> COMPILE $@"
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
$(Q)$(CC) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
$(BUILD_DIR)/%.s: %.c
@echo "===> GENERATE ASM $@"
$(Q)$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
$(BUILD_DIR)/%.s: %.f90
@echo "===> COMPILE $@"
$(Q)$(FC) -S $(FCFLAGS) $< -o $@
$(BUILD_DIR)/%.o: %.cc
@echo "===> COMPILE $@"
$(Q)$(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@
$(Q)$(CXX) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
$(BUILD_DIR)/%.o: %.cpp
@echo "===> COMPILE $@"
$(Q)$(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@
$(Q)$(CXX) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
$(BUILD_DIR)/%.o: %.f90
@echo "===> COMPILE $@"
$(Q)$(FC) -c $(FCFLAGS) $< -o $@
$(BUILD_DIR)/%.o: %.F90
@echo "===> COMPILE $@"
$(Q)$(FC) -c $(CPPFLAGS) $(FCFLAGS) $< -o $@
$(BUILD_DIR)/%.o: %.s
@echo "===> ASSEMBLE $@"
$(Q)$(AS) $(ASFLAGS) $< -o $@
$(BUILD_DIR)/%.o: %.S
@echo "===> ASSEMBLE $@"
$(Q)$(CC) -c $(CPPFLAGS) $< -o $@
tags:
@echo "===> GENERATE TAGS"
$(Q)ctags -R
$(BUILD_DIR):
@mkdir $(BUILD_DIR)
ifeq ($(findstring $(MAKECMDGOALS),clean),)
-include $(OBJ:.o=.d)
endif
.PHONY: clean distclean
clean:
@echo "===> CLEAN"
@rm -rf $(BUILD_DIR)
@rm -f tags
distclean: clean
@echo "===> DIST CLEAN"
@rm -f $(TARGET)
@rm -f tags

View File

@ -1,2 +0,0 @@
# gather-bench
A X86 gather instruction performance benchmark

View File

@ -1,22 +0,0 @@
# Supported: GCC, CLANG, ICC
TAG ?= ICC
# Supported: avx2, avx512
ISA ?= avx512
# Use likwid?
ENABLE_LIKWID ?= false
# SP or DP
DATA_TYPE ?= DP
# AOS or SOA
DATA_LAYOUT ?= AOS
# Padding byte for AoS
PADDING ?= false
# Measure cycles for each gather separately
MEASURE_GATHER_CYCLES ?= false
# Gather data only for first dimension (one gather per iteration)
ONLY_FIRST_DIMENSION ?= false
# Trace memory addresses for cache simulator
MEM_TRACER ?= false
# Test correctness of gather kernels
TEST ?= false

View File

@ -1,9 +0,0 @@
CC = clang
LINKER = $(CC)
OPENMP =# -fopenmp
CFLAGS = -Ofast -std=c11 -march=core-avx2 -mavx -mfma $(OPENMP)
LFLAGS = $(OPENMP) -march=core-avx2 -mavx -mfma
DEFINES = -D_GNU_SOURCE
INCLUDES =
LIBS =

View File

@ -1,11 +0,0 @@
CC = gcc
AS = as
LINKER = $(CC)
OPENMP = -fopenmp
CFLAGS = -Ofast -std=c11 -mavx2 -mfma $(OPENMP)
ASFLAGS =
LFLAGS = $(OPENMP) -mavx2 -mfma
DEFINES = -D_GNU_SOURCE
INCLUDES =
LIBS =

View File

@ -1,9 +0,0 @@
CC = icc
LINKER = $(CC)
OPENMP = -qopenmp
CFLAGS = -Ofast -xhost -std=c11 $(OPENMP)
LFLAGS = $(OPENMP)
DEFINES = -D_GNU_SOURCE
INCLUDES =
LIBS =

View File

@ -1,10 +0,0 @@
LIKWID_INC ?= -I/usr/local/include
LIKWID_DEFINES ?= -DLIKWID_PERFMON
LIKWID_LIB ?= -L/usr/local/lib
ifeq ($(strip $(ENABLE_LIKWID)),true)
INCLUDES += ${LIKWID_INC}
DEFINES += ${LIKWID_DEFINES}
LIBS += -llikwid
LFLAGS += ${LIKWID_LIB}
endif

View File

@ -1,57 +0,0 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
void* allocate (int alignment, size_t bytesize)
{
int errorCode;
void* ptr;
errorCode = posix_memalign(&ptr, alignment, bytesize);
if (errorCode) {
if (errorCode == EINVAL) {
fprintf(stderr,
"Error: Alignment parameter is not a power of two\n");
exit(EXIT_FAILURE);
}
if (errorCode == ENOMEM) {
fprintf(stderr,
"Error: Insufficient memory to fulfill the request\n");
exit(EXIT_FAILURE);
}
}
if (ptr == NULL) {
fprintf(stderr, "Error: posix_memalign failed!\n");
exit(EXIT_FAILURE);
}
return ptr;
}

Some files were not shown because too many files have changed in this diff Show More