Refactor code
This commit is contained in:
parent
9712d7e2c8
commit
8d0a8b5f9c
122
Makefile
122
Makefile
@ -1,109 +1,30 @@
|
|||||||
#CONFIGURE BUILD SYSTEM
|
#CONFIGURE BUILD SYSTEM
|
||||||
IDENTIFIER = $(OPT_SCHEME)-$(TAG)-$(ISA)-$(DATA_TYPE)
|
TAG = $(OPT_TAG)-$(TOOLCHAIN)-$(DATA_TYPE)
|
||||||
TARGET = MDBench-$(IDENTIFIER)
|
TARGET = MDBench-$(TAG)
|
||||||
BUILD_DIR = ./build-$(IDENTIFIER)
|
BUILD_DIR = ./build/build-$(TAG)
|
||||||
SRC_DIR = ./$(OPT_SCHEME)
|
SRC_ROOT = ./src
|
||||||
ASM_DIR = ./asm
|
SRC_DIR = $(SRC_ROOT)/$(OPT_SCHEME)
|
||||||
COMMON_DIR = ./common
|
COMMON_DIR = $(SRC_ROOT)/common
|
||||||
CUDA_DIR = ./$(SRC_DIR)/cuda
|
CUDA_DIR = $(SRC_DIR)/cuda
|
||||||
MAKE_DIR = ./
|
MAKE_DIR = ./make
|
||||||
Q ?= @
|
Q ?= @
|
||||||
|
|
||||||
#DO NOT EDIT BELOW
|
#DO NOT EDIT BELOW
|
||||||
include $(MAKE_DIR)/config.mk
|
include config.mk
|
||||||
include $(MAKE_DIR)/include_$(TAG).mk
|
include $(MAKE_DIR)/include_$(TOOLCHAIN).mk
|
||||||
include $(MAKE_DIR)/include_LIKWID.mk
|
include $(MAKE_DIR)/include_LIKWID.mk
|
||||||
|
ifneq ($(strip $(ISA)),NONE)
|
||||||
include $(MAKE_DIR)/include_ISA.mk
|
include $(MAKE_DIR)/include_ISA.mk
|
||||||
|
endif
|
||||||
include $(MAKE_DIR)/include_GROMACS.mk
|
include $(MAKE_DIR)/include_GROMACS.mk
|
||||||
INCLUDES += -I./$(SRC_DIR)/includes -I./$(COMMON_DIR)/includes
|
INCLUDES += -I./$(SRC_DIR) -I./$(COMMON_DIR)
|
||||||
|
|
||||||
ifeq ($(strip $(DATA_LAYOUT)),AOS)
|
VPATH = $(SRC_DIR) $(COMMON_DIR) $(CUDA_DIR)
|
||||||
DEFINES += -DAOS
|
|
||||||
endif
|
|
||||||
ifeq ($(strip $(DATA_TYPE)),SP)
|
|
||||||
DEFINES += -DPRECISION=1
|
|
||||||
else
|
|
||||||
DEFINES += -DPRECISION=2
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(ASM_SYNTAX), ATT)
|
|
||||||
ASFLAGS += -masm=intel
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(SORT_ATOMS)),true)
|
|
||||||
DEFINES += -DSORT_ATOMS
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(EXPLICIT_TYPES)),true)
|
|
||||||
DEFINES += -DEXPLICIT_TYPES
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(MEM_TRACER)),true)
|
|
||||||
DEFINES += -DMEM_TRACER
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(INDEX_TRACER)),true)
|
|
||||||
DEFINES += -DINDEX_TRACER
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(COMPUTE_STATS)),true)
|
|
||||||
DEFINES += -DCOMPUTE_STATS
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(XTC_OUTPUT)),true)
|
|
||||||
DEFINES += -DXTC_OUTPUT
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(USE_REFERENCE_VERSION)),true)
|
|
||||||
DEFINES += -DUSE_REFERENCE_VERSION
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(HALF_NEIGHBOR_LISTS_CHECK_CJ)),true)
|
|
||||||
DEFINES += -DHALF_NEIGHBOR_LISTS_CHECK_CJ
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(DEBUG)),true)
|
|
||||||
DEFINES += -DDEBUG
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(VECTOR_WIDTH),)
|
|
||||||
DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH)
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(__SIMD_KERNEL__)),true)
|
|
||||||
DEFINES += -D__SIMD_KERNEL__
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(__SSE__)),true)
|
|
||||||
DEFINES += -D__ISA_SSE__
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(__ISA_AVX__)),true)
|
|
||||||
DEFINES += -D__ISA_AVX__
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(__ISA_AVX_FMA__)),true)
|
|
||||||
DEFINES += -D__ISA_AVX_FMA__
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(__ISA_AVX2__)),true)
|
|
||||||
DEFINES += -D__ISA_AVX2__
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(__ISA_AVX512__)),true)
|
|
||||||
DEFINES += -D__ISA_AVX512__
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(ENABLE_OMP_SIMD)),true)
|
|
||||||
DEFINES += -DENABLE_OMP_SIMD
|
|
||||||
endif
|
|
||||||
|
|
||||||
VPATH = $(SRC_DIR) $(ASM_DIR) $(CUDA_DIR)
|
|
||||||
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
|
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
|
||||||
OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s))
|
OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s))
|
||||||
OBJ = $(filter-out $(BUILD_DIR)/main% $(OVERWRITE),$(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)))
|
OBJ = $(filter-out $(BUILD_DIR)/main% $(OVERWRITE),$(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)))
|
||||||
OBJ += $(patsubst $(ASM_DIR)/%.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*.s))
|
OBJ += $(patsubst $(ASM_DIR)/%.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*.s))
|
||||||
OBJ += $(patsubst $(COMMON_DIR)/%.c, $(BUILD_DIR)/%-common.o,$(wildcard $(COMMON_DIR)/*.c))
|
OBJ += $(patsubst $(COMMON_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(COMMON_DIR)/*.c))
|
||||||
ifeq ($(strip $(TAG)),NVCC)
|
ifeq ($(strip $(TAG)),NVCC)
|
||||||
OBJ += $(patsubst $(CUDA_DIR)/%.cu, $(BUILD_DIR)/%-cuda.o,$(wildcard $(CUDA_DIR)/*.cu))
|
OBJ += $(patsubst $(CUDA_DIR)/%.cu, $(BUILD_DIR)/%-cuda.o,$(wildcard $(CUDA_DIR)/*.cu))
|
||||||
endif
|
endif
|
||||||
@ -129,11 +50,6 @@ $(BUILD_DIR)/%.o: %.c
|
|||||||
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
|
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
|
||||||
$(Q)$(CC) $(CPPFLAGS) -MT $@ -MM $< > $(BUILD_DIR)/$*.d
|
$(Q)$(CC) $(CPPFLAGS) -MT $@ -MM $< > $(BUILD_DIR)/$*.d
|
||||||
|
|
||||||
$(BUILD_DIR)/%-common.o: $(COMMON_DIR)/%.c
|
|
||||||
$(info ===> COMPILE $@)
|
|
||||||
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
|
|
||||||
$(Q)$(CC) $(CPPFLAGS) -MT $@ -MM $< > $(BUILD_DIR)/$*.d
|
|
||||||
|
|
||||||
$(BUILD_DIR)/%-cuda.o: %.cu
|
$(BUILD_DIR)/%-cuda.o: %.cu
|
||||||
$(info ===> COMPILE $@)
|
$(info ===> COMPILE $@)
|
||||||
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
|
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
|
||||||
@ -152,18 +68,16 @@ $(BUILD_DIR)/%.o: %.s
|
|||||||
clean:
|
clean:
|
||||||
$(info ===> CLEAN)
|
$(info ===> CLEAN)
|
||||||
@rm -rf $(BUILD_DIR)
|
@rm -rf $(BUILD_DIR)
|
||||||
@rm -rf $(TARGET)*
|
|
||||||
@rm -f tags
|
|
||||||
|
|
||||||
cleanall:
|
cleanall:
|
||||||
$(info ===> CLEAN)
|
$(info ===> CLEAN)
|
||||||
@rm -rf build-*
|
@rm -rf build
|
||||||
@rm -rf MDBench-*
|
@rm -rf MDBench-*
|
||||||
@rm -f tags
|
@rm -f tags
|
||||||
|
|
||||||
distclean: clean
|
distclean: clean
|
||||||
$(info ===> DIST CLEAN)
|
$(info ===> DIST CLEAN)
|
||||||
@rm -f $(TARGET)*
|
@rm -f $(TARGET)
|
||||||
@rm -f tags
|
@rm -f tags
|
||||||
|
|
||||||
info:
|
info:
|
||||||
@ -177,6 +91,6 @@ tags:
|
|||||||
$(Q)ctags -R
|
$(Q)ctags -R
|
||||||
|
|
||||||
$(BUILD_DIR):
|
$(BUILD_DIR):
|
||||||
@mkdir $(BUILD_DIR)
|
@mkdir -p $(BUILD_DIR)
|
||||||
|
|
||||||
-include $(OBJ:.o=.d)
|
-include $(OBJ:.o=.d)
|
||||||
|
99
config.mk
99
config.mk
@ -1,7 +1,8 @@
|
|||||||
# Compiler tag (GCC/CLANG/ICC/ICX/ONEAPI/NVCC)
|
# Compiler tool chain (GCC/CLANG/ICC/ICX/ONEAPI/NVCC)
|
||||||
TAG ?= CLANG
|
TOOLCHAIN ?= CLANG
|
||||||
# Instruction set (SSE/AVX/AVX_FMA/AVX2/AVX512)
|
# Instruction set for instrinsic kernels (NONE/SSE/AVX/AVX_FMA/AVX2/AVX512)
|
||||||
ISA ?= SSE
|
ISA ?= ARM
|
||||||
|
SIMD ?= NONE
|
||||||
# Optimization scheme (verletlist/clusterpair/clusters_per_bin)
|
# Optimization scheme (verletlist/clusterpair/clusters_per_bin)
|
||||||
OPT_SCHEME ?= verletlist
|
OPT_SCHEME ?= verletlist
|
||||||
# Enable likwid (true or false)
|
# Enable likwid (true or false)
|
||||||
@ -47,3 +48,93 @@ USE_CUDA_HOST_MEMORY ?= false
|
|||||||
#Feature options
|
#Feature options
|
||||||
OPTIONS = -DALIGNMENT=64
|
OPTIONS = -DALIGNMENT=64
|
||||||
#OPTIONS += More options
|
#OPTIONS += More options
|
||||||
|
|
||||||
|
#DO NOT EDIT BELOW
|
||||||
|
ifeq ($(strip $(DATA_LAYOUT)),AOS)
|
||||||
|
DEFINES += -DAOS
|
||||||
|
endif
|
||||||
|
ifeq ($(strip $(DATA_TYPE)),SP)
|
||||||
|
DEFINES += -DPRECISION=1
|
||||||
|
else
|
||||||
|
DEFINES += -DPRECISION=2
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifneq ($(ASM_SYNTAX), ATT)
|
||||||
|
ASFLAGS += -masm=intel
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(SORT_ATOMS)),true)
|
||||||
|
DEFINES += -DSORT_ATOMS
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(EXPLICIT_TYPES)),true)
|
||||||
|
DEFINES += -DEXPLICIT_TYPES
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(MEM_TRACER)),true)
|
||||||
|
DEFINES += -DMEM_TRACER
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(INDEX_TRACER)),true)
|
||||||
|
DEFINES += -DINDEX_TRACER
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(COMPUTE_STATS)),true)
|
||||||
|
DEFINES += -DCOMPUTE_STATS
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(XTC_OUTPUT)),true)
|
||||||
|
DEFINES += -DXTC_OUTPUT
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(USE_REFERENCE_VERSION)),true)
|
||||||
|
DEFINES += -DUSE_REFERENCE_VERSION
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(HALF_NEIGHBOR_LISTS_CHECK_CJ)),true)
|
||||||
|
DEFINES += -DHALF_NEIGHBOR_LISTS_CHECK_CJ
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(DEBUG)),true)
|
||||||
|
DEFINES += -DDEBUG
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifneq ($(VECTOR_WIDTH),)
|
||||||
|
DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH)
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(__SIMD_KERNEL__)),true)
|
||||||
|
DEFINES += -D__SIMD_KERNEL__
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(__SSE__)),true)
|
||||||
|
DEFINES += -D__ISA_SSE__
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(__ISA_AVX__)),true)
|
||||||
|
DEFINES += -D__ISA_AVX__
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(__ISA_AVX_FMA__)),true)
|
||||||
|
DEFINES += -D__ISA_AVX_FMA__
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(__ISA_AVX2__)),true)
|
||||||
|
DEFINES += -D__ISA_AVX2__
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(__ISA_AVX512__)),true)
|
||||||
|
DEFINES += -D__ISA_AVX512__
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(ENABLE_OMP_SIMD)),true)
|
||||||
|
DEFINES += -DENABLE_OMP_SIMD
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(strip $(OPT_SCHEME)),verletlist)
|
||||||
|
OPT_TAG = VL
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifneq ($(strip $(SIMD)),NONE)
|
||||||
|
TOOLCHAIN = $(TOOLCHAIN)-$(ISA)-$(SIMD)
|
||||||
|
endif
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 273 KiB |
Binary file not shown.
Before Width: | Height: | Size: 98 KiB |
@ -1,523 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
|
||||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
|
||||||
|
|
||||||
<svg
|
|
||||||
width="297mm"
|
|
||||||
height="210mm"
|
|
||||||
viewBox="0 0 297 210"
|
|
||||||
version="1.1"
|
|
||||||
id="svg5"
|
|
||||||
inkscape:version="1.1.2 (0a00cf5339, 2022-02-04)"
|
|
||||||
sodipodi:docname="gather_bench.svg"
|
|
||||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
|
||||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
xmlns="http://www.w3.org/2000/svg"
|
|
||||||
xmlns:svg="http://www.w3.org/2000/svg">
|
|
||||||
<sodipodi:namedview
|
|
||||||
id="namedview7"
|
|
||||||
pagecolor="#ffffff"
|
|
||||||
bordercolor="#666666"
|
|
||||||
borderopacity="1.0"
|
|
||||||
inkscape:pageshadow="2"
|
|
||||||
inkscape:pageopacity="0.0"
|
|
||||||
inkscape:pagecheckerboard="0"
|
|
||||||
inkscape:document-units="mm"
|
|
||||||
showgrid="false"
|
|
||||||
inkscape:zoom="0.73508842"
|
|
||||||
inkscape:cx="551.63432"
|
|
||||||
inkscape:cy="348.25743"
|
|
||||||
inkscape:window-width="1920"
|
|
||||||
inkscape:window-height="1011"
|
|
||||||
inkscape:window-x="0"
|
|
||||||
inkscape:window-y="165"
|
|
||||||
inkscape:window-maximized="1"
|
|
||||||
inkscape:current-layer="layer1" />
|
|
||||||
<defs
|
|
||||||
id="defs2">
|
|
||||||
<rect
|
|
||||||
x="144.01516"
|
|
||||||
y="304.36604"
|
|
||||||
width="248.99777"
|
|
||||||
height="100.91557"
|
|
||||||
id="rect79475" />
|
|
||||||
<rect
|
|
||||||
x="309.01869"
|
|
||||||
y="43.698615"
|
|
||||||
width="552.19421"
|
|
||||||
height="71.390348"
|
|
||||||
id="rect65238" />
|
|
||||||
<rect
|
|
||||||
x="762.55856"
|
|
||||||
y="341.3838"
|
|
||||||
width="277.62756"
|
|
||||||
height="105.0235"
|
|
||||||
id="rect47632" />
|
|
||||||
<linearGradient
|
|
||||||
inkscape:collect="always"
|
|
||||||
id="linearGradient40704">
|
|
||||||
<stop
|
|
||||||
style="stop-color:#ccffaa;stop-opacity:1;"
|
|
||||||
offset="0"
|
|
||||||
id="stop40700" />
|
|
||||||
<stop
|
|
||||||
style="stop-color:#ccffaa;stop-opacity:0;"
|
|
||||||
offset="1"
|
|
||||||
id="stop40702" />
|
|
||||||
</linearGradient>
|
|
||||||
<marker
|
|
||||||
style="overflow:visible;"
|
|
||||||
id="Arrow2Mend"
|
|
||||||
refX="0.0"
|
|
||||||
refY="0.0"
|
|
||||||
orient="auto"
|
|
||||||
inkscape:stockid="Arrow2Mend"
|
|
||||||
inkscape:isstock="true">
|
|
||||||
<path
|
|
||||||
transform="scale(0.6) rotate(180) translate(0,0)"
|
|
||||||
d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
|
|
||||||
style="stroke:context-stroke;fill-rule:evenodd;fill:context-stroke;stroke-width:0.62500000;stroke-linejoin:round;"
|
|
||||||
id="path39486" />
|
|
||||||
</marker>
|
|
||||||
<marker
|
|
||||||
style="overflow:visible;"
|
|
||||||
id="Arrow1Mend"
|
|
||||||
refX="0.0"
|
|
||||||
refY="0.0"
|
|
||||||
orient="auto"
|
|
||||||
inkscape:stockid="Arrow1Mend"
|
|
||||||
inkscape:isstock="true">
|
|
||||||
<path
|
|
||||||
transform="scale(0.4) rotate(180) translate(10,0)"
|
|
||||||
style="fill-rule:evenodd;fill:context-stroke;stroke:context-stroke;stroke-width:1.0pt;"
|
|
||||||
d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
|
|
||||||
id="path39468" />
|
|
||||||
</marker>
|
|
||||||
<marker
|
|
||||||
style="overflow:visible;"
|
|
||||||
id="Arrow1Lend"
|
|
||||||
refX="0.0"
|
|
||||||
refY="0.0"
|
|
||||||
orient="auto"
|
|
||||||
inkscape:stockid="Arrow1Lend"
|
|
||||||
inkscape:isstock="true">
|
|
||||||
<path
|
|
||||||
transform="scale(0.8) rotate(180) translate(12.5,0)"
|
|
||||||
style="fill-rule:evenodd;fill:context-stroke;stroke:context-stroke;stroke-width:1.0pt;"
|
|
||||||
d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
|
|
||||||
id="path39462" />
|
|
||||||
</marker>
|
|
||||||
<rect
|
|
||||||
x="707.09731"
|
|
||||||
y="616.36746"
|
|
||||||
width="407.71288"
|
|
||||||
height="417.08306"
|
|
||||||
id="rect24254" />
|
|
||||||
<rect
|
|
||||||
x="47.404365"
|
|
||||||
y="100.3268"
|
|
||||||
width="398.49855"
|
|
||||||
height="110.16514"
|
|
||||||
id="rect5050" />
|
|
||||||
<rect
|
|
||||||
x="47.404366"
|
|
||||||
y="100.3268"
|
|
||||||
width="398.49854"
|
|
||||||
height="110.16514"
|
|
||||||
id="rect5050-3" />
|
|
||||||
<rect
|
|
||||||
x="47.404366"
|
|
||||||
y="100.3268"
|
|
||||||
width="398.49854"
|
|
||||||
height="110.16514"
|
|
||||||
id="rect5050-3-5" />
|
|
||||||
<rect
|
|
||||||
x="47.404366"
|
|
||||||
y="100.3268"
|
|
||||||
width="398.49854"
|
|
||||||
height="110.16514"
|
|
||||||
id="rect5050-3-5-6" />
|
|
||||||
<rect
|
|
||||||
x="47.404366"
|
|
||||||
y="100.3268"
|
|
||||||
width="398.49854"
|
|
||||||
height="110.16514"
|
|
||||||
id="rect5050-3-5-6-1" />
|
|
||||||
<rect
|
|
||||||
x="47.404366"
|
|
||||||
y="100.3268"
|
|
||||||
width="398.49854"
|
|
||||||
height="110.16514"
|
|
||||||
id="rect5050-0" />
|
|
||||||
<rect
|
|
||||||
x="47.404366"
|
|
||||||
y="100.3268"
|
|
||||||
width="398.49854"
|
|
||||||
height="110.16514"
|
|
||||||
id="rect5050-0-6" />
|
|
||||||
<rect
|
|
||||||
x="47.404366"
|
|
||||||
y="100.3268"
|
|
||||||
width="398.49854"
|
|
||||||
height="110.16514"
|
|
||||||
id="rect5050-0-6-2" />
|
|
||||||
<rect
|
|
||||||
x="47.404366"
|
|
||||||
y="100.3268"
|
|
||||||
width="398.49854"
|
|
||||||
height="110.16514"
|
|
||||||
id="rect5050-0-6-2-8" />
|
|
||||||
<marker
|
|
||||||
style="overflow:visible"
|
|
||||||
id="Arrow2Mend-2"
|
|
||||||
refX="0"
|
|
||||||
refY="0"
|
|
||||||
orient="auto"
|
|
||||||
inkscape:stockid="Arrow2Mend"
|
|
||||||
inkscape:isstock="true">
|
|
||||||
<path
|
|
||||||
transform="scale(-0.6)"
|
|
||||||
d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
|
|
||||||
style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
|
|
||||||
id="path39486-3" />
|
|
||||||
</marker>
|
|
||||||
<marker
|
|
||||||
style="overflow:visible"
|
|
||||||
id="Arrow2Mend-2-5"
|
|
||||||
refX="0"
|
|
||||||
refY="0"
|
|
||||||
orient="auto"
|
|
||||||
inkscape:stockid="Arrow2Mend"
|
|
||||||
inkscape:isstock="true">
|
|
||||||
<path
|
|
||||||
transform="scale(-0.6)"
|
|
||||||
d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
|
|
||||||
style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
|
|
||||||
id="path39486-3-9" />
|
|
||||||
</marker>
|
|
||||||
<marker
|
|
||||||
style="overflow:visible"
|
|
||||||
id="Arrow2Mend-2-5-2"
|
|
||||||
refX="0"
|
|
||||||
refY="0"
|
|
||||||
orient="auto"
|
|
||||||
inkscape:stockid="Arrow2Mend"
|
|
||||||
inkscape:isstock="true">
|
|
||||||
<path
|
|
||||||
transform="scale(-0.6)"
|
|
||||||
d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
|
|
||||||
style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
|
|
||||||
id="path39486-3-9-8" />
|
|
||||||
</marker>
|
|
||||||
<linearGradient
|
|
||||||
inkscape:collect="always"
|
|
||||||
xlink:href="#linearGradient40704"
|
|
||||||
id="linearGradient40706"
|
|
||||||
x1="324.58157"
|
|
||||||
y1="127.35331"
|
|
||||||
x2="363.61096"
|
|
||||||
y2="98.957848"
|
|
||||||
gradientUnits="userSpaceOnUse" />
|
|
||||||
<rect
|
|
||||||
x="47.404366"
|
|
||||||
y="100.3268"
|
|
||||||
width="398.49854"
|
|
||||||
height="110.16514"
|
|
||||||
id="rect5050-3-5-6-1-7" />
|
|
||||||
<rect
|
|
||||||
x="309.01868"
|
|
||||||
y="43.698616"
|
|
||||||
width="552.19421"
|
|
||||||
height="71.39035"
|
|
||||||
id="rect65238-1" />
|
|
||||||
</defs>
|
|
||||||
<g
|
|
||||||
inkscape:label="Layer 1"
|
|
||||||
inkscape:groupmode="layer"
|
|
||||||
id="layer1">
|
|
||||||
<rect
|
|
||||||
style="fill:#d5d5ff;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0"
|
|
||||||
id="rect55834"
|
|
||||||
width="250.31726"
|
|
||||||
height="74.676537"
|
|
||||||
x="25.257824"
|
|
||||||
y="97.277718" />
|
|
||||||
<rect
|
|
||||||
style="fill:#d5f6ff;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0"
|
|
||||||
id="rect55832"
|
|
||||||
width="250.35208"
|
|
||||||
height="64.461151"
|
|
||||||
x="25.256891"
|
|
||||||
y="32.817505" />
|
|
||||||
<rect
|
|
||||||
style="fill:#ccffaa;stroke:#091600;stroke-width:1.31891"
|
|
||||||
id="rect6462"
|
|
||||||
width="82.385742"
|
|
||||||
height="20.525751"
|
|
||||||
x="28.355024"
|
|
||||||
y="48.740646" />
|
|
||||||
<text
|
|
||||||
xml:space="preserve"
|
|
||||||
transform="matrix(0.26458333,0,0,0.26458333,17.244577,26.206534)"
|
|
||||||
id="text5048"
|
|
||||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
|
||||||
x="47.404297"
|
|
||||||
y="135.7168"
|
|
||||||
id="tspan82948"><tspan
|
|
||||||
style="font-weight:bold;-inkscape-font-specification:'sans-serif Bold'"
|
|
||||||
id="tspan82946">gather-bench</tspan></tspan></text>
|
|
||||||
<rect
|
|
||||||
style="fill:#de87aa;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
|
|
||||||
id="rect6462-9"
|
|
||||||
width="18.764017"
|
|
||||||
height="20.965076"
|
|
||||||
x="39.518955"
|
|
||||||
y="140.726" />
|
|
||||||
<text
|
|
||||||
xml:space="preserve"
|
|
||||||
transform="matrix(0.33667319,0,0,0.33667319,25.589293,109.42998)"
|
|
||||||
id="text5048-3"
|
|
||||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
|
||||||
x="47.404297"
|
|
||||||
y="135.7168"
|
|
||||||
id="tspan82950">L1</tspan></text>
|
|
||||||
<rect
|
|
||||||
style="fill:#de87aa;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
|
|
||||||
id="rect6462-9-0"
|
|
||||||
width="21.653919"
|
|
||||||
height="24.193966"
|
|
||||||
x="97.687294"
|
|
||||||
y="138.51564" />
|
|
||||||
<text
|
|
||||||
xml:space="preserve"
|
|
||||||
transform="matrix(0.3885252,0,0,0.3885252,81.212654,102.39964)"
|
|
||||||
id="text5048-3-6"
|
|
||||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0-6);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
|
||||||
x="47.404297"
|
|
||||||
y="135.7168"
|
|
||||||
id="tspan82952">L2</tspan></text>
|
|
||||||
<rect
|
|
||||||
style="fill:#de87aa;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
|
|
||||||
id="rect6462-9-0-6"
|
|
||||||
width="27.217058"
|
|
||||||
height="30.409672"
|
|
||||||
x="149.19933"
|
|
||||||
y="134.83977" />
|
|
||||||
<text
|
|
||||||
xml:space="preserve"
|
|
||||||
transform="matrix(0.48834178,0,0,0.48834178,128.49215,89.445174)"
|
|
||||||
id="text5048-3-6-1"
|
|
||||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0-6-2);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
|
||||||
x="47.404297"
|
|
||||||
y="135.7168"
|
|
||||||
id="tspan82954">L3</tspan></text>
|
|
||||||
<rect
|
|
||||||
style="fill:#eeaaff;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
|
|
||||||
id="rect6462-9-0-6-7"
|
|
||||||
width="61.032539"
|
|
||||||
height="29.96501"
|
|
||||||
x="204.01265"
|
|
||||||
y="135.61238" />
|
|
||||||
<text
|
|
||||||
xml:space="preserve"
|
|
||||||
transform="matrix(0.48834178,0,0,0.48834178,182.37007,89.995434)"
|
|
||||||
id="text5048-3-6-1-9"
|
|
||||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0-6-2-8);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
|
||||||
x="47.404297"
|
|
||||||
y="135.7168"
|
|
||||||
id="tspan82956">DRAM</tspan></text>
|
|
||||||
<rect
|
|
||||||
style="fill:#ffccaa;stroke:#091600;stroke-width:1.10636"
|
|
||||||
id="rect6462-6"
|
|
||||||
width="74.980759"
|
|
||||||
height="15.869514"
|
|
||||||
x="126.09525"
|
|
||||||
y="38.773243" />
|
|
||||||
<text
|
|
||||||
xml:space="preserve"
|
|
||||||
transform="matrix(0.26458333,0,0,0.26458333,115.65481,14.295323)"
|
|
||||||
id="text5048-7"
|
|
||||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
|
||||||
x="47.404297"
|
|
||||||
y="135.7168"
|
|
||||||
id="tspan82958">Single gather</tspan></text>
|
|
||||||
<rect
|
|
||||||
style="fill:#ffccaa;stroke:#091600;stroke-width:1.03971"
|
|
||||||
id="rect6462-6-3"
|
|
||||||
width="66.071701"
|
|
||||||
height="15.904838"
|
|
||||||
x="126.90776"
|
|
||||||
y="63.642746" />
|
|
||||||
<text
|
|
||||||
xml:space="preserve"
|
|
||||||
transform="matrix(0.26458333,0,0,0.26458333,116.63325,39.114393)"
|
|
||||||
id="text5048-7-5"
|
|
||||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3-5);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
|
||||||
x="47.404297"
|
|
||||||
y="135.7168"
|
|
||||||
id="tspan82960">MD gathers</tspan></text>
|
|
||||||
<rect
|
|
||||||
style="fill:#afe9dd;stroke:#091600;stroke-width:1.02848"
|
|
||||||
id="rect6462-6-3-2"
|
|
||||||
width="64.479698"
|
|
||||||
height="15.947394"
|
|
||||||
x="206.65364"
|
|
||||||
y="52.98967" />
|
|
||||||
<text
|
|
||||||
xml:space="preserve"
|
|
||||||
transform="matrix(0.26458333,0,0,0.26458333,196.01512,28.482594)"
|
|
||||||
id="text5048-7-5-9"
|
|
||||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3-5-6);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
|
||||||
x="47.404297"
|
|
||||||
y="135.7168"
|
|
||||||
id="tspan82962">Contiguous</tspan></text>
|
|
||||||
<rect
|
|
||||||
style="fill:#afe9dd;stroke:#091600;stroke-width:0.987323"
|
|
||||||
id="rect6462-6-3-2-2"
|
|
||||||
width="59.269382"
|
|
||||||
height="15.988551"
|
|
||||||
x="208.16559"
|
|
||||||
y="76.856781" />
|
|
||||||
<text
|
|
||||||
xml:space="preserve"
|
|
||||||
transform="matrix(0.26458333,0,0,0.26458333,197.58604,52.220445)"
|
|
||||||
id="text5048-7-5-9-7"
|
|
||||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3-5-6-1);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
|
||||||
x="47.404297"
|
|
||||||
y="135.7168"
|
|
||||||
id="tspan82964">"Random"</tspan></text>
|
|
||||||
<text
|
|
||||||
xml:space="preserve"
|
|
||||||
transform="scale(0.26458333)"
|
|
||||||
id="text24252"
|
|
||||||
style="fill:black;fill-opacity:1;stroke:none;font-family:sans-serif;font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect24254)" />
|
|
||||||
<path
|
|
||||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
|
||||||
d="M 193.10512,71.273276 206.30683,61.033513"
|
|
||||||
id="path39049"
|
|
||||||
inkscape:connector-type="polyline"
|
|
||||||
inkscape:connector-curvature="0" />
|
|
||||||
<path
|
|
||||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
|
||||||
d="M 193.08841,71.196939 207.86207,84.43804"
|
|
||||||
id="path39053"
|
|
||||||
inkscape:connector-type="polyline"
|
|
||||||
inkscape:connector-curvature="0" />
|
|
||||||
<path
|
|
||||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.39816;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
|
||||||
d="m 58.548229,151.24436 38.298093,0.25023"
|
|
||||||
id="path39219"
|
|
||||||
inkscape:connector-type="polyline"
|
|
||||||
inkscape:connector-curvature="0" />
|
|
||||||
<path
|
|
||||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.24847;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
|
||||||
d="m 119.19252,150.09399 29.28333,0.26095"
|
|
||||||
id="path39219-2"
|
|
||||||
inkscape:connector-type="polyline"
|
|
||||||
inkscape:connector-curvature="0" />
|
|
||||||
<path
|
|
||||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
|
||||||
d="m 177.02022,150.44367 26.36623,0.26095"
|
|
||||||
id="path39219-2-0"
|
|
||||||
inkscape:connector-type="polyline"
|
|
||||||
inkscape:connector-curvature="0" />
|
|
||||||
<path
|
|
||||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend)"
|
|
||||||
d="m 48.145458,92.71788 -0.644819,47.57709"
|
|
||||||
id="path39377"
|
|
||||||
inkscape:connector-type="polyline"
|
|
||||||
inkscape:connector-curvature="0" />
|
|
||||||
<path
|
|
||||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend-2)"
|
|
||||||
d="M 48.121208,92.873762 106.60807,137.41946"
|
|
||||||
id="path39377-7"
|
|
||||||
inkscape:connector-type="polyline"
|
|
||||||
inkscape:connector-curvature="0" />
|
|
||||||
<path
|
|
||||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend-2-5)"
|
|
||||||
d="M 48.073928,92.825143 158.88023,133.04546"
|
|
||||||
id="path39377-7-2"
|
|
||||||
inkscape:connector-type="polyline"
|
|
||||||
inkscape:connector-curvature="0" />
|
|
||||||
<path
|
|
||||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend-2-5-2)"
|
|
||||||
d="M 48.051946,92.813593 233.0959,134.16596"
|
|
||||||
id="path39377-7-2-9"
|
|
||||||
inkscape:connector-type="polyline"
|
|
||||||
inkscape:connector-curvature="0" />
|
|
||||||
<rect
|
|
||||||
style="fill:#e9afaf;stroke:#091600;stroke-width:1.34518"
|
|
||||||
id="rect6462-6-3-2-2-3"
|
|
||||||
width="65.880661"
|
|
||||||
height="26.700579"
|
|
||||||
x="38.104012"
|
|
||||||
y="80.530182" />
|
|
||||||
<path
|
|
||||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
|
||||||
d="m 77.365612,69.678744 h 2e-6"
|
|
||||||
id="path39808"
|
|
||||||
inkscape:connector-type="polyline"
|
|
||||||
inkscape:connector-curvature="0" />
|
|
||||||
<path
|
|
||||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
|
||||||
d="m 111.64767,59.183009 6.84466,0.03069"
|
|
||||||
id="path41004"
|
|
||||||
inkscape:connector-type="polyline"
|
|
||||||
inkscape:connector-curvature="0" />
|
|
||||||
<path
|
|
||||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
|
||||||
d="m 119.03378,47.056357 -0.58704,25.198541"
|
|
||||||
id="path41006"
|
|
||||||
inkscape:connector-type="polyline"
|
|
||||||
inkscape:connector-curvature="0" />
|
|
||||||
<path
|
|
||||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.02423;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
|
||||||
d="m 118.07503,72.254897 7.94998,-0.05784"
|
|
||||||
id="path41008"
|
|
||||||
inkscape:connector-type="polyline"
|
|
||||||
inkscape:connector-curvature="0" />
|
|
||||||
<path
|
|
||||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.882836;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
|
||||||
d="m 118.26666,47.054814 7.69322,0.173925"
|
|
||||||
id="path41112"
|
|
||||||
inkscape:connector-type="polyline"
|
|
||||||
inkscape:connector-curvature="0" />
|
|
||||||
<path
|
|
||||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
|
|
||||||
d="M 68.213642,69.068864 67.910274,80.302728"
|
|
||||||
id="path55728"
|
|
||||||
inkscape:connector-type="polyline"
|
|
||||||
inkscape:connector-curvature="0" />
|
|
||||||
<text
|
|
||||||
xml:space="preserve"
|
|
||||||
transform="matrix(0.26458333,0,0,0.26458333,-1.3782637,4.0412367)"
|
|
||||||
id="text65236"
|
|
||||||
style="font-style:normal;font-weight:normal;font-size:53.3333px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect65238);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
|
||||||
x="309.01953"
|
|
||||||
y="90.886691"
|
|
||||||
id="tspan82968"><tspan
|
|
||||||
style="font-weight:bold;-inkscape-font-specification:'sans-serif Bold'"
|
|
||||||
id="tspan82966">Application Level</tspan></tspan></text>
|
|
||||||
<text
|
|
||||||
xml:space="preserve"
|
|
||||||
transform="matrix(0.26458333,0,0,0.26458333,2.7015103,160.71919)"
|
|
||||||
id="text65236-2"
|
|
||||||
style="font-style:normal;font-weight:normal;font-size:53.3333px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect65238-1);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
|
||||||
x="309.01953"
|
|
||||||
y="90.886691"
|
|
||||||
id="tspan82972"><tspan
|
|
||||||
style="font-weight:bold;-inkscape-font-specification:'sans-serif Bold'"
|
|
||||||
id="tspan82970">Hardware Level</tspan></tspan></text>
|
|
||||||
<text
|
|
||||||
xml:space="preserve"
|
|
||||||
transform="matrix(0.26458333,0,0,0.26458333,2.3490396,0.57331532)"
|
|
||||||
id="text79473"
|
|
||||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect79475);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
|
||||||
x="144.01562"
|
|
||||||
y="339.75586"
|
|
||||||
id="tspan82974">vgather </tspan><tspan
|
|
||||||
x="144.01562"
|
|
||||||
y="389.75586"
|
|
||||||
id="tspan82976">instructions</tspan></text>
|
|
||||||
</g>
|
|
||||||
</svg>
|
|
Before Width: | Height: | Size: 21 KiB |
Binary file not shown.
Binary file not shown.
Before Width: | Height: | Size: 128 KiB |
Binary file not shown.
Binary file not shown.
Before Width: | Height: | Size: 52 KiB |
Binary file not shown.
Binary file not shown.
Before Width: | Height: | Size: 62 KiB |
@ -1,17 +1,18 @@
|
|||||||
CC = clang
|
CC = /opt/homebrew/Cellar/llvm/18.1.5/bin/clang
|
||||||
LINKER = $(CC)
|
LINKER = $(CC)
|
||||||
|
|
||||||
ANSI_CFLAGS = -ansi
|
ANSI_CFLAGS = -ansi
|
||||||
ANSI_CFLAGS += -std=c99
|
ANSI_CFLAGS += -std=c99
|
||||||
ANSI_CFLAGS += -pedantic
|
ANSI_CFLAGS += -pedantic
|
||||||
ANSI_CFLAGS += -Wextra
|
# ANSI_CFLAGS += -Wextra
|
||||||
|
|
||||||
CFLAGS = -Ofast -march=native $(ANSI_CFLAGS) #-Xpreprocessor -fopenmp -g
|
CFLAGS = -Ofast -march=native $(ANSI_CFLAGS) -Xpreprocessor -fopenmp #-g
|
||||||
#CFLAGS = -Ofast -march=core-avx2 $(ANSI_CFLAGS) #-Xpreprocessor -fopenmp -g
|
#CFLAGS = -Ofast -march=core-avx2 $(ANSI_CFLAGS) #-Xpreprocessor -fopenmp -g
|
||||||
#CFLAGS = -O3 -march=cascadelake $(ANSI_CFLAGS) #-Xpreprocessor -fopenmp -g
|
#CFLAGS = -O3 -march=cascadelake $(ANSI_CFLAGS) #-Xpreprocessor -fopenmp -g
|
||||||
#CFLAGS = -Ofast $(ANSI_CFLAGS) -g #-Xpreprocessor -fopenmp -g
|
#CFLAGS = -Ofast $(ANSI_CFLAGS) -g #-Xpreprocessor -fopenmp -g
|
||||||
ASFLAGS = -masm=intel
|
ASFLAGS = #-masm=intel
|
||||||
LFLAGS =
|
LFLAGS =
|
||||||
DEFINES = -D_GNU_SOURCE
|
DEFINES = -D_GNU_SOURCE
|
||||||
INCLUDES =
|
# MacOSX with Apple Silicon and homebrew
|
||||||
LIBS = -lm #-lomp
|
INCLUDES = -I/opt/homebrew/Cellar/libomp/18.1.5/include/
|
||||||
|
LIBS = -lm -L/opt/homebrew/Cellar/libomp/18.1.5/lib/ -lomp
|
@ -4,24 +4,18 @@
|
|||||||
* Use of this source code is governed by a LGPL-3.0
|
* Use of this source code is governed by a LGPL-3.0
|
||||||
* license that can be found in the LICENSE file.
|
* license that can be found in the LICENSE file.
|
||||||
*/
|
*/
|
||||||
#include <stdlib.h>
|
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
double getTimeStamp()
|
double getTimeStamp(void)
|
||||||
{
|
{
|
||||||
struct timespec ts;
|
struct timespec ts;
|
||||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
|
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
|
||||||
}
|
}
|
||||||
|
|
||||||
double getTimeResolution()
|
double getTimeResolution(void)
|
||||||
{
|
{
|
||||||
struct timespec ts;
|
struct timespec ts;
|
||||||
clock_getres(CLOCK_MONOTONIC, &ts);
|
clock_getres(CLOCK_MONOTONIC, &ts);
|
||||||
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
|
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
|
||||||
}
|
}
|
||||||
|
|
||||||
double getTimeStamp_()
|
|
||||||
{
|
|
||||||
return getTimeStamp();
|
|
||||||
}
|
|
@ -9,6 +9,5 @@
|
|||||||
|
|
||||||
extern double getTimeStamp(void);
|
extern double getTimeStamp(void);
|
||||||
extern double getTimeResolution(void);
|
extern double getTimeResolution(void);
|
||||||
extern double getTimeStamp_(void);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
@ -5,34 +5,35 @@
|
|||||||
* license that can be found in the LICENSE file.
|
* license that can be found in the LICENSE file.
|
||||||
*/
|
*/
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <stdarg.h>
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <util.h>
|
#include <util.h>
|
||||||
|
|
||||||
/* Park/Miller RNG w/out MASKING, so as to be like f90s version */
|
/* Park/Miller RNG w/out MASKING, so as to be like f90s version */
|
||||||
#define IA 16807
|
#define IA 16807
|
||||||
#define IM 2147483647
|
#define IM 2147483647
|
||||||
#define AM (1.0/IM)
|
#define AM (1.0 / IM)
|
||||||
#define IQ 127773
|
#define IQ 127773
|
||||||
#define IR 2836
|
#define IR 2836
|
||||||
#define MASK 123459876
|
#define MASK 123459876
|
||||||
|
|
||||||
double myrandom(int* seed) {
|
double myrandom(int* seed)
|
||||||
int k= (*seed) / IQ;
|
{
|
||||||
|
int k = (*seed) / IQ;
|
||||||
double ans;
|
double ans;
|
||||||
|
|
||||||
*seed = IA * (*seed - k * IQ) - IR * k;
|
*seed = IA * (*seed - k * IQ) - IR * k;
|
||||||
if(*seed < 0) *seed += IM;
|
if (*seed < 0) *seed += IM;
|
||||||
ans = AM * (*seed);
|
ans = AM * (*seed);
|
||||||
return ans;
|
return ans;
|
||||||
}
|
}
|
||||||
|
|
||||||
void random_reset(int *seed, int ibase, double *coord) {
|
void random_reset(int* seed, int ibase, double* coord)
|
||||||
|
{
|
||||||
int i;
|
int i;
|
||||||
char *str = (char *) &ibase;
|
char* str = (char*)&ibase;
|
||||||
int n = sizeof(int);
|
int n = sizeof(int);
|
||||||
unsigned int hash = 0;
|
unsigned int hash = 0;
|
||||||
|
|
||||||
for (i = 0; i < n; i++) {
|
for (i = 0; i < n; i++) {
|
||||||
@ -41,8 +42,8 @@ void random_reset(int *seed, int ibase, double *coord) {
|
|||||||
hash ^= (hash >> 6);
|
hash ^= (hash >> 6);
|
||||||
}
|
}
|
||||||
|
|
||||||
str = (char *) coord;
|
str = (char*)coord;
|
||||||
n = 3 * sizeof(double);
|
n = 3 * sizeof(double);
|
||||||
for (i = 0; i < n; i++) {
|
for (i = 0; i < n; i++) {
|
||||||
hash += str[i];
|
hash += str[i];
|
||||||
hash += (hash << 10);
|
hash += (hash << 10);
|
||||||
@ -61,45 +62,59 @@ void random_reset(int *seed, int ibase, double *coord) {
|
|||||||
|
|
||||||
// warm up the RNG
|
// warm up the RNG
|
||||||
|
|
||||||
for (i = 0; i < 5; i++) myrandom(seed);
|
for (i = 0; i < 5; i++)
|
||||||
//save = 0;
|
myrandom(seed);
|
||||||
|
// save = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int str2ff(const char *string) {
|
int str2ff(const char* string)
|
||||||
if(strncmp(string, "lj", 2) == 0) return FF_LJ;
|
{
|
||||||
if(strncmp(string, "eam", 3) == 0) return FF_EAM;
|
if (strncmp(string, "lj", 2) == 0) return FF_LJ;
|
||||||
if(strncmp(string, "dem", 3) == 0) return FF_DEM;
|
if (strncmp(string, "eam", 3) == 0) return FF_EAM;
|
||||||
|
if (strncmp(string, "dem", 3) == 0) return FF_DEM;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char* ff2str(int ff) {
|
const char* ff2str(int ff)
|
||||||
if(ff == FF_LJ) { return "lj"; }
|
{
|
||||||
if(ff == FF_EAM) { return "eam"; }
|
if (ff == FF_LJ) {
|
||||||
if(ff == FF_DEM) { return "dem"; }
|
return "lj";
|
||||||
|
}
|
||||||
|
if (ff == FF_EAM) {
|
||||||
|
return "eam";
|
||||||
|
}
|
||||||
|
if (ff == FF_DEM) {
|
||||||
|
return "dem";
|
||||||
|
}
|
||||||
return "invalid";
|
return "invalid";
|
||||||
}
|
}
|
||||||
|
|
||||||
int get_cuda_num_threads() {
|
int get_cuda_num_threads(void)
|
||||||
const char *num_threads_env = getenv("NUM_THREADS");
|
{
|
||||||
|
const char* num_threads_env = getenv("NUM_THREADS");
|
||||||
return (num_threads_env == NULL) ? 32 : atoi(num_threads_env);
|
return (num_threads_env == NULL) ? 32 : atoi(num_threads_env);
|
||||||
}
|
}
|
||||||
|
|
||||||
void readline(char *line, FILE *fp) {
|
void readline(char* line, FILE* fp)
|
||||||
if(fgets(line, MAXLINE, fp) == NULL) {
|
{
|
||||||
if(errno != 0) {
|
if (fgets(line, MAXLINE, fp) == NULL) {
|
||||||
|
if (errno != 0) {
|
||||||
perror("readline()");
|
perror("readline()");
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void debug_printf(const char *format, ...) {
|
void debug_printf(const char* format, ...)
|
||||||
#ifdef DEBUG
|
{
|
||||||
|
#ifdef DEBUG
|
||||||
va_list arg;
|
va_list arg;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
va_start(arg, format);
|
va_start(arg, format);
|
||||||
if((vfprintf(stdout, format, arg)) < 0) { perror("debug_printf()"); }
|
if ((vfprintf(stdout, format, arg)) < 0) {
|
||||||
|
perror("debug_printf()");
|
||||||
|
}
|
||||||
va_end(arg);
|
va_end(arg);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
102
src/verletlist/atom.h
Normal file
102
src/verletlist/atom.h
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
* All rights reserved. This file is part of MD-Bench.
|
||||||
|
* Use of this source code is governed by a LGPL-3.0
|
||||||
|
* license that can be found in the LICENSE file.
|
||||||
|
*/
|
||||||
|
#include <parameter.h>
|
||||||
|
|
||||||
|
#ifndef __ATOM_H_
|
||||||
|
#define __ATOM_H_
|
||||||
|
|
||||||
|
#ifdef CUDA_TARGET
|
||||||
|
#define KERNEL_NAME "CUDA"
|
||||||
|
#define computeForceLJFullNeigh computeForceLJFullNeigh_cuda
|
||||||
|
#define initialIntegrate initialIntegrate_cuda
|
||||||
|
#define finalIntegrate finalIntegrate_cuda
|
||||||
|
#define buildNeighbor buildNeighbor_cuda
|
||||||
|
#define updatePbc updatePbc_cuda
|
||||||
|
#define updateAtomsPbc updateAtomsPbc_cuda
|
||||||
|
#else
|
||||||
|
#ifdef USE_SIMD_KERNEL
|
||||||
|
#define KERNEL_NAME "SIMD"
|
||||||
|
#define computeForceLJFullNeigh computeForceLJFullNeigh_simd
|
||||||
|
#else
|
||||||
|
#define KERNEL_NAME "PLAIN"
|
||||||
|
#endif
|
||||||
|
#define initialIntegrate initialIntegrate_cpu
|
||||||
|
#define finalIntegrate finalIntegrate_cpu
|
||||||
|
#define buildNeighbor buildNeighbor_cpu
|
||||||
|
#define updatePbc updatePbc_cpu
|
||||||
|
#define updateAtomsPbc updateAtomsPbc_cpu
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
MD_FLOAT *x, *y, *z;
|
||||||
|
MD_FLOAT *vx, *vy, *vz;
|
||||||
|
MD_FLOAT *fx, *fy, *fz;
|
||||||
|
int* border_map;
|
||||||
|
int* type;
|
||||||
|
MD_FLOAT* epsilon;
|
||||||
|
MD_FLOAT* sigma6;
|
||||||
|
MD_FLOAT* cutforcesq;
|
||||||
|
MD_FLOAT* cutneighsq;
|
||||||
|
} DeviceAtom;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int Natoms, Nlocal, Nghost, Nmax;
|
||||||
|
MD_FLOAT *x, *y, *z;
|
||||||
|
MD_FLOAT *vx, *vy, *vz;
|
||||||
|
MD_FLOAT *fx, *fy, *fz;
|
||||||
|
int* border_map;
|
||||||
|
int* type;
|
||||||
|
int ntypes;
|
||||||
|
MD_FLOAT* epsilon;
|
||||||
|
MD_FLOAT* sigma6;
|
||||||
|
MD_FLOAT* cutforcesq;
|
||||||
|
MD_FLOAT* cutneighsq;
|
||||||
|
|
||||||
|
// DEM
|
||||||
|
MD_FLOAT* radius;
|
||||||
|
MD_FLOAT* av;
|
||||||
|
MD_FLOAT* r;
|
||||||
|
|
||||||
|
// Device data
|
||||||
|
DeviceAtom d_atom;
|
||||||
|
} Atom;
|
||||||
|
|
||||||
|
extern void initAtom(Atom*);
|
||||||
|
extern void createAtom(Atom*, Parameter*);
|
||||||
|
extern int readAtom(Atom*, Parameter*);
|
||||||
|
extern int readAtom_pdb(Atom*, Parameter*);
|
||||||
|
extern int readAtom_gro(Atom*, Parameter*);
|
||||||
|
extern int readAtom_dmp(Atom*, Parameter*);
|
||||||
|
extern int readAtom_in(Atom*, Parameter*);
|
||||||
|
extern void writeAtom(Atom*, Parameter*);
|
||||||
|
extern void growAtom(Atom*);
|
||||||
|
|
||||||
|
#ifdef AOS
|
||||||
|
#define POS_DATA_LAYOUT "AoS"
|
||||||
|
#define atom_x(i) atom->x[(i) * 3 + 0]
|
||||||
|
#define atom_y(i) atom->x[(i) * 3 + 1]
|
||||||
|
#define atom_z(i) atom->x[(i) * 3 + 2]
|
||||||
|
#define atom_vx(i) atom->vx[(i) * 3 + 0]
|
||||||
|
#define atom_vy(i) atom->vx[(i) * 3 + 1]
|
||||||
|
#define atom_vz(i) atom->vx[(i) * 3 + 2]
|
||||||
|
#define atom_fx(i) atom->fx[(i) * 3 + 0]
|
||||||
|
#define atom_fy(i) atom->fx[(i) * 3 + 1]
|
||||||
|
#define atom_fz(i) atom->fx[(i) * 3 + 2]
|
||||||
|
#else
|
||||||
|
#define POS_DATA_LAYOUT "SoA"
|
||||||
|
#define atom_x(i) atom->x[i]
|
||||||
|
#define atom_y(i) atom->y[i]
|
||||||
|
#define atom_z(i) atom->z[i]
|
||||||
|
#define atom_vx(i) atom->vx[i]
|
||||||
|
#define atom_vy(i) atom->vy[i]
|
||||||
|
#define atom_vz(i) atom->vz[i]
|
||||||
|
#define atom_fx(i) atom->fx[i]
|
||||||
|
#define atom_fy(i) atom->fy[i]
|
||||||
|
#define atom_fz(i) atom->fz[i]
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
112
src/verletlist/force_lj-x86.c
Normal file
112
src/verletlist/force_lj-x86.c
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
* All rights reserved. This file is part of MD-Bench.
|
||||||
|
* Use of this source code is governed by a LGPL-3.0
|
||||||
|
* license that can be found in the LICENSE file.
|
||||||
|
*/
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
//---
|
||||||
|
#include <atom.h>
|
||||||
|
#include <likwid-marker.h>
|
||||||
|
#include <neighbor.h>
|
||||||
|
#include <parameter.h>
|
||||||
|
#include <stats.h>
|
||||||
|
#include <timing.h>
|
||||||
|
|
||||||
|
#ifdef __SIMD_KERNEL__
|
||||||
|
#include <simd.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
double computeForceLJFullNeigh_simd(
|
||||||
|
Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats)
|
||||||
|
{
|
||||||
|
int Nlocal = atom->Nlocal;
|
||||||
|
int* neighs;
|
||||||
|
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||||
|
MD_FLOAT sigma6 = param->sigma6;
|
||||||
|
MD_FLOAT epsilon = param->epsilon;
|
||||||
|
|
||||||
|
for (int i = 0; i < Nlocal; i++) {
|
||||||
|
atom_fx(i) = 0.0;
|
||||||
|
atom_fy(i) = 0.0;
|
||||||
|
atom_fz(i) = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
double S = getTimeStamp();
|
||||||
|
|
||||||
|
#ifndef __SIMD_KERNEL__
|
||||||
|
fprintf(stderr, "Error: SIMD kernel not implemented for specified instruction set!");
|
||||||
|
exit(-1);
|
||||||
|
#else
|
||||||
|
MD_SIMD_FLOAT cutforcesq_vec = simd_broadcast(cutforcesq);
|
||||||
|
MD_SIMD_FLOAT sigma6_vec = simd_broadcast(sigma6);
|
||||||
|
MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon);
|
||||||
|
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
|
||||||
|
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
LIKWID_MARKER_START("force");
|
||||||
|
|
||||||
|
#pragma omp for schedule(runtime)
|
||||||
|
for (int i = 0; i < Nlocal; i++) {
|
||||||
|
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||||
|
int numneighs = neighbor->numneigh[i];
|
||||||
|
MD_SIMD_INT numneighs_vec = simd_int_broadcast(numneighs);
|
||||||
|
MD_SIMD_FLOAT xtmp = simd_broadcast(atom_x(i));
|
||||||
|
MD_SIMD_FLOAT ytmp = simd_broadcast(atom_y(i));
|
||||||
|
MD_SIMD_FLOAT ztmp = simd_broadcast(atom_z(i));
|
||||||
|
MD_SIMD_FLOAT fix = simd_zero();
|
||||||
|
MD_SIMD_FLOAT fiy = simd_zero();
|
||||||
|
MD_SIMD_FLOAT fiz = simd_zero();
|
||||||
|
|
||||||
|
for (int k = 0; k < numneighs; k += VECTOR_WIDTH) {
|
||||||
|
// If the last iteration of this loop is separated from the rest, this
|
||||||
|
// mask can be set only there
|
||||||
|
MD_SIMD_MASK mask_numneighs = simd_mask_int_cond_lt(
|
||||||
|
simd_int_add(simd_int_broadcast(k), simd_int_seq()),
|
||||||
|
numneighs_vec);
|
||||||
|
MD_SIMD_INT j = simd_int_mask_load(&neighs[k], mask_numneighs);
|
||||||
|
#ifdef AOS
|
||||||
|
MD_SIMD_INT j3 = simd_int_add(simd_int_add(j, j), j); // j * 3
|
||||||
|
MD_SIMD_FLOAT delx = xtmp -
|
||||||
|
simd_gather(j3, &(atom->x[0]), sizeof(MD_FLOAT));
|
||||||
|
MD_SIMD_FLOAT dely = ytmp -
|
||||||
|
simd_gather(j3, &(atom->x[1]), sizeof(MD_FLOAT));
|
||||||
|
MD_SIMD_FLOAT delz = ztmp -
|
||||||
|
simd_gather(j3, &(atom->x[2]), sizeof(MD_FLOAT));
|
||||||
|
#else
|
||||||
|
MD_SIMD_FLOAT delx = xtmp - simd_gather(j, atom->x, sizeof(MD_FLOAT));
|
||||||
|
MD_SIMD_FLOAT dely = ytmp - simd_gather(j, atom->y, sizeof(MD_FLOAT));
|
||||||
|
MD_SIMD_FLOAT delz = ztmp - simd_gather(j, atom->z, sizeof(MD_FLOAT));
|
||||||
|
#endif
|
||||||
|
MD_SIMD_FLOAT rsq = simd_fma(delx,
|
||||||
|
delx,
|
||||||
|
simd_fma(dely, dely, simd_mul(delz, delz)));
|
||||||
|
MD_SIMD_MASK cutoff_mask = simd_mask_and(mask_numneighs,
|
||||||
|
simd_mask_cond_lt(rsq, cutforcesq_vec));
|
||||||
|
MD_SIMD_FLOAT sr2 = simd_reciprocal(rsq);
|
||||||
|
MD_SIMD_FLOAT sr6 = simd_mul(sr2,
|
||||||
|
simd_mul(sr2, simd_mul(sr2, sigma6_vec)));
|
||||||
|
MD_SIMD_FLOAT force = simd_mul(c48_vec,
|
||||||
|
simd_mul(sr6,
|
||||||
|
simd_mul(simd_sub(sr6, c05_vec), simd_mul(sr2, eps_vec))));
|
||||||
|
|
||||||
|
fix = simd_masked_add(fix, simd_mul(delx, force), cutoff_mask);
|
||||||
|
fiy = simd_masked_add(fiy, simd_mul(dely, force), cutoff_mask);
|
||||||
|
fiz = simd_masked_add(fiz, simd_mul(delz, force), cutoff_mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
atom_fx(i) += simd_h_reduce_sum(fix);
|
||||||
|
atom_fy(i) += simd_h_reduce_sum(fiy);
|
||||||
|
atom_fz(i) += simd_h_reduce_sum(fiz);
|
||||||
|
}
|
||||||
|
|
||||||
|
LIKWID_MARKER_STOP("force");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
double E = getTimeStamp();
|
||||||
|
return E - S;
|
||||||
|
}
|
@ -4,9 +4,6 @@
|
|||||||
* Use of this source code is governed by a LGPL-3.0
|
* Use of this source code is governed by a LGPL-3.0
|
||||||
* license that can be found in the LICENSE file.
|
* license that can be found in the LICENSE file.
|
||||||
*/
|
*/
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
//---
|
|
||||||
#include <atom.h>
|
#include <atom.h>
|
||||||
#include <likwid-marker.h>
|
#include <likwid-marker.h>
|
||||||
#include <neighbor.h>
|
#include <neighbor.h>
|
||||||
@ -14,10 +11,6 @@
|
|||||||
#include <stats.h>
|
#include <stats.h>
|
||||||
#include <timing.h>
|
#include <timing.h>
|
||||||
|
|
||||||
#ifdef __SIMD_KERNEL__
|
|
||||||
#include <simd.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
double computeForceLJFullNeigh(
|
double computeForceLJFullNeigh(
|
||||||
Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats)
|
Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats)
|
||||||
{
|
{
|
||||||
@ -203,96 +196,3 @@ double computeForceLJHalfNeigh(
|
|||||||
double timeStop = getTimeStamp();
|
double timeStop = getTimeStamp();
|
||||||
return timeStop - timeStart;
|
return timeStop - timeStart;
|
||||||
}
|
}
|
||||||
|
|
||||||
double computeForceLJFullNeigh_simd(
|
|
||||||
Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats)
|
|
||||||
{
|
|
||||||
int Nlocal = atom->Nlocal;
|
|
||||||
int* neighs;
|
|
||||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
|
||||||
MD_FLOAT sigma6 = param->sigma6;
|
|
||||||
MD_FLOAT epsilon = param->epsilon;
|
|
||||||
|
|
||||||
for (int i = 0; i < Nlocal; i++) {
|
|
||||||
atom_fx(i) = 0.0;
|
|
||||||
atom_fy(i) = 0.0;
|
|
||||||
atom_fz(i) = 0.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
double S = getTimeStamp();
|
|
||||||
|
|
||||||
#ifndef __SIMD_KERNEL__
|
|
||||||
fprintf(stderr, "Error: SIMD kernel not implemented for specified instruction set!");
|
|
||||||
exit(-1);
|
|
||||||
#else
|
|
||||||
MD_SIMD_FLOAT cutforcesq_vec = simd_broadcast(cutforcesq);
|
|
||||||
MD_SIMD_FLOAT sigma6_vec = simd_broadcast(sigma6);
|
|
||||||
MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon);
|
|
||||||
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
|
|
||||||
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
|
|
||||||
|
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
LIKWID_MARKER_START("force");
|
|
||||||
|
|
||||||
#pragma omp for schedule(runtime)
|
|
||||||
for (int i = 0; i < Nlocal; i++) {
|
|
||||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
|
||||||
int numneighs = neighbor->numneigh[i];
|
|
||||||
MD_SIMD_INT numneighs_vec = simd_int_broadcast(numneighs);
|
|
||||||
MD_SIMD_FLOAT xtmp = simd_broadcast(atom_x(i));
|
|
||||||
MD_SIMD_FLOAT ytmp = simd_broadcast(atom_y(i));
|
|
||||||
MD_SIMD_FLOAT ztmp = simd_broadcast(atom_z(i));
|
|
||||||
MD_SIMD_FLOAT fix = simd_zero();
|
|
||||||
MD_SIMD_FLOAT fiy = simd_zero();
|
|
||||||
MD_SIMD_FLOAT fiz = simd_zero();
|
|
||||||
|
|
||||||
for (int k = 0; k < numneighs; k += VECTOR_WIDTH) {
|
|
||||||
// If the last iteration of this loop is separated from the rest, this
|
|
||||||
// mask can be set only there
|
|
||||||
MD_SIMD_MASK mask_numneighs = simd_mask_int_cond_lt(
|
|
||||||
simd_int_add(simd_int_broadcast(k), simd_int_seq()),
|
|
||||||
numneighs_vec);
|
|
||||||
MD_SIMD_INT j = simd_int_mask_load(&neighs[k], mask_numneighs);
|
|
||||||
#ifdef AOS
|
|
||||||
MD_SIMD_INT j3 = simd_int_add(simd_int_add(j, j), j); // j * 3
|
|
||||||
MD_SIMD_FLOAT delx = xtmp -
|
|
||||||
simd_gather(j3, &(atom->x[0]), sizeof(MD_FLOAT));
|
|
||||||
MD_SIMD_FLOAT dely = ytmp -
|
|
||||||
simd_gather(j3, &(atom->x[1]), sizeof(MD_FLOAT));
|
|
||||||
MD_SIMD_FLOAT delz = ztmp -
|
|
||||||
simd_gather(j3, &(atom->x[2]), sizeof(MD_FLOAT));
|
|
||||||
#else
|
|
||||||
MD_SIMD_FLOAT delx = xtmp - simd_gather(j, atom->x, sizeof(MD_FLOAT));
|
|
||||||
MD_SIMD_FLOAT dely = ytmp - simd_gather(j, atom->y, sizeof(MD_FLOAT));
|
|
||||||
MD_SIMD_FLOAT delz = ztmp - simd_gather(j, atom->z, sizeof(MD_FLOAT));
|
|
||||||
#endif
|
|
||||||
MD_SIMD_FLOAT rsq = simd_fma(delx,
|
|
||||||
delx,
|
|
||||||
simd_fma(dely, dely, simd_mul(delz, delz)));
|
|
||||||
MD_SIMD_MASK cutoff_mask = simd_mask_and(mask_numneighs,
|
|
||||||
simd_mask_cond_lt(rsq, cutforcesq_vec));
|
|
||||||
MD_SIMD_FLOAT sr2 = simd_reciprocal(rsq);
|
|
||||||
MD_SIMD_FLOAT sr6 = simd_mul(sr2,
|
|
||||||
simd_mul(sr2, simd_mul(sr2, sigma6_vec)));
|
|
||||||
MD_SIMD_FLOAT force = simd_mul(c48_vec,
|
|
||||||
simd_mul(sr6,
|
|
||||||
simd_mul(simd_sub(sr6, c05_vec), simd_mul(sr2, eps_vec))));
|
|
||||||
|
|
||||||
fix = simd_masked_add(fix, simd_mul(delx, force), cutoff_mask);
|
|
||||||
fiy = simd_masked_add(fiy, simd_mul(dely, force), cutoff_mask);
|
|
||||||
fiz = simd_masked_add(fiz, simd_mul(delz, force), cutoff_mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
atom_fx(i) += simd_h_reduce_sum(fix);
|
|
||||||
atom_fy(i) += simd_h_reduce_sum(fiy);
|
|
||||||
atom_fz(i) += simd_h_reduce_sum(fiz);
|
|
||||||
}
|
|
||||||
|
|
||||||
LIKWID_MARKER_STOP("force");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
double E = getTimeStamp();
|
|
||||||
return E - S;
|
|
||||||
}
|
|
@ -10,8 +10,8 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
// #include <omp.h>
|
|
||||||
#include <likwid-marker.h>
|
#include <likwid-marker.h>
|
||||||
|
#include <omp.h>
|
||||||
|
|
||||||
#include <allocate.h>
|
#include <allocate.h>
|
||||||
#include <atom.h>
|
#include <atom.h>
|
||||||
@ -30,8 +30,8 @@
|
|||||||
|
|
||||||
#define HLINE "------------------------------------------------------------------\n"
|
#define HLINE "------------------------------------------------------------------\n"
|
||||||
|
|
||||||
extern double computeForceLJFullNeigh_plain_c(Parameter*, Atom*, Neighbor*, Stats*);
|
|
||||||
extern double computeForceLJHalfNeigh(Parameter*, Atom*, Neighbor*, Stats*);
|
extern double computeForceLJHalfNeigh(Parameter*, Atom*, Neighbor*, Stats*);
|
||||||
|
extern double computeForceLJFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
|
||||||
extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
|
extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
|
||||||
extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
|
extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
|
||||||
|
|
||||||
@ -325,35 +325,38 @@ int main(int argc, char** argv)
|
|||||||
timer[TOTAL] - timer[FORCE] - timer[NEIGH]);
|
timer[TOTAL] - timer[FORCE] - timer[NEIGH]);
|
||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
|
|
||||||
// int nthreads = 0;
|
int nthreads = 0;
|
||||||
// int chunkSize = 0;
|
int chunkSize = 0;
|
||||||
// omp_sched_t schedKind;
|
omp_sched_t schedKind;
|
||||||
// char schedType[10];
|
char schedType[10];
|
||||||
// #pragma omp parallel
|
#pragma omp parallel
|
||||||
// #pragma omp master
|
#pragma omp master
|
||||||
// {
|
{
|
||||||
// omp_get_schedule(&schedKind, &chunkSize);
|
omp_get_schedule(&schedKind, &chunkSize);
|
||||||
//
|
|
||||||
// switch (schedKind) {
|
switch (schedKind) {
|
||||||
// case omp_sched_static:
|
case omp_sched_static:
|
||||||
// strcpy(schedType, "static");
|
strcpy(schedType, "static");
|
||||||
// break;
|
break;
|
||||||
// case omp_sched_dynamic:
|
case omp_sched_dynamic:
|
||||||
// strcpy(schedType, "dynamic");
|
strcpy(schedType, "dynamic");
|
||||||
// break;
|
break;
|
||||||
// case omp_sched_guided:
|
case omp_sched_guided:
|
||||||
// strcpy(schedType, "guided");
|
strcpy(schedType, "guided");
|
||||||
// break;
|
break;
|
||||||
// case omp_sched_auto:
|
case omp_sched_auto:
|
||||||
// strcpy(schedType, "auto");
|
strcpy(schedType, "auto");
|
||||||
// break;
|
break;
|
||||||
// }
|
case omp_sched_monotonic:
|
||||||
//
|
strcpy(schedType, "auto");
|
||||||
// nthreads = omp_get_max_threads();
|
break;
|
||||||
// }
|
}
|
||||||
//
|
|
||||||
// printf("Num threads: %d\n", nthreads);
|
nthreads = omp_get_max_threads();
|
||||||
// printf("Schedule: (%s,%d)\n", schedType, chunkSize);
|
}
|
||||||
|
|
||||||
|
printf("Num threads: %d\n", nthreads);
|
||||||
|
printf("Schedule: (%s,%d)\n", schedType, chunkSize);
|
||||||
|
|
||||||
printf("Performance: %.2f million atom updates per second\n",
|
printf("Performance: %.2f million atom updates per second\n",
|
||||||
1e-6 * (double)atom.Natoms * param.ntimes / timer[TOTAL]);
|
1e-6 * (double)atom.Natoms * param.ntimes / timer[TOTAL]);
|
@ -1,37 +0,0 @@
|
|||||||
# Utility tools for MD-Bench
|
|
||||||
|
|
||||||
**mdBench.c:** Single file version for MD-Bench, used mostly for teaching purposes.
|
|
||||||
|
|
||||||
**run_stub.sh:** Bash script to run the MD-Bench stubbed force calculation for different configurations and evaluate the performance.
|
|
||||||
The configuration parameters are:
|
|
||||||
- **-a <numbers>:** specify the number of atoms per unit cell (the number of neighbors per atom is this value minus 1), the default is 8.
|
|
||||||
- **-n <numbers>:** timesteps to run the simulation, the default is 200.
|
|
||||||
- **-nx <numbers>:** number of unit cells in the x dimension, the default is 4.
|
|
||||||
- **-ny <numbers>:** number of unit cells in the y dimension, the default is 4.
|
|
||||||
- **-nz <numbers>:** number of unit cells in the z dimension, the default is 2.
|
|
||||||
|
|
||||||
Notice that these parameters can also be specified as lists, which executes the stubbed force calculation several times varying the specific parameter to each element of the list, and hence all combinations of parameters will be executed. For example, the following command:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
bash run_stub.sh -a "8 16" -nx "4 8" -ny 8 -nz 4
|
|
||||||
```
|
|
||||||
|
|
||||||
Will execute the stubbed force calculation for the following 4 configurations:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
1> 8 atoms per unit cell on a 4x8x4 grid of unit cells, 200 timesteps
|
|
||||||
2> 16 atoms per unit cell on a 4x8x4 grid of unit cells, 200 timesteps
|
|
||||||
3> 8 atoms per unit cell on a 8x8x4 grid of unit cells, 200 timesteps
|
|
||||||
4> 16 atoms per unit cell on a 8x8x4 grid of unit cells, 200 timesteps
|
|
||||||
```
|
|
||||||
|
|
||||||
The following parameters are also available:
|
|
||||||
- **-f <frequency>:** CPU frequency in GHz (assure your CPU frequency is fixed by disabling Turbo mode), more performance metrics such as cycles per iteration are displayed if this option is defined.
|
|
||||||
- **-o <file>:** output file (.txt) for the results, the default is *run_results.txt*.
|
|
||||||
- **-r <runs>:** number of runs for each configuration (only the values for the best run are displayed), the default is 3.
|
|
||||||
|
|
||||||
**plot_run_stub_data.py:** Python script to plot the data generated by the *run_stub.sh* script. Just provide the name of the .txt file as a parameter and this script generates a corresponding PDF with the same file name.
|
|
||||||
|
|
||||||
**plot_gather_data.py:** Python script to plot the data generated by the gather benchmark. Just provide the name of the .txt file containing the gather output as a parameter and this script generates a corresponding PDF with the same file name. Multiple outputs with different strides can be included in the text file by concatenating the outputs. The script handles output from both standard simple array case and MD variant.
|
|
||||||
|
|
||||||
**cache.py:** Python script to run the cache simulator with the data obtained from the memory tracer. Just run it with the tracer output file name as a parameter. The cache specifications can be directly adapted in the script to match those of the target processor of interest.
|
|
@ -1,33 +0,0 @@
|
|||||||
import sys
|
|
||||||
from cachesim import CacheSimulator, Cache, MainMemory
|
|
||||||
|
|
||||||
filename = sys.argv[1]
|
|
||||||
mem = MainMemory()
|
|
||||||
|
|
||||||
#l3 = Cache("L3", 20480, 16, 64, "LRU") # 20MB: 20480 sets, 16-ways with cacheline size of 64 bytes
|
|
||||||
#l2 = Cache("L2", 256, 4, 64, "LRU", store_to=l3, load_from=l3) # 256KB
|
|
||||||
#l1 = Cache("L1", 64, 8, 64, "LRU", store_to=l2, load_from=l2) # 32KB
|
|
||||||
|
|
||||||
# Cascade Lake
|
|
||||||
l3 = Cache("L3", 14336, 16, 64, "LRU", write_allocate=False)
|
|
||||||
l2 = Cache("L2", 1024, 16, 64, "LRU", store_to=l3, victims_to=l3)
|
|
||||||
l1 = Cache("L1", 64, 8, 64, "LRU", store_to=l2, load_from=l2)
|
|
||||||
mem.load_to(l2)
|
|
||||||
mem.store_from(l3)
|
|
||||||
cs = CacheSimulator(l1, mem)
|
|
||||||
|
|
||||||
with open(filename, 'r') as fp:
|
|
||||||
for line in fp.readlines():
|
|
||||||
op, addr = line.split(": ")
|
|
||||||
op = op[0]
|
|
||||||
addr = int(addr, 16)
|
|
||||||
|
|
||||||
if op == 'W':
|
|
||||||
cs.store(addr, length=8)
|
|
||||||
elif op == 'R':
|
|
||||||
cs.load(addr, length=8)
|
|
||||||
else:
|
|
||||||
sys.exit("Invalid operation: {}".format(op))
|
|
||||||
|
|
||||||
cs.force_write_back()
|
|
||||||
cs.print_stats()
|
|
@ -1,39 +0,0 @@
|
|||||||
import sys
|
|
||||||
from cachesim import CacheSimulator, Cache, MainMemory
|
|
||||||
|
|
||||||
def get_set_id(cache, addr):
|
|
||||||
return (addr >> cache.cl_bits) % cache.sets
|
|
||||||
|
|
||||||
filename = sys.argv[1]
|
|
||||||
N = sys.argv[2]
|
|
||||||
mem = MainMemory()
|
|
||||||
|
|
||||||
# Cascade Lake
|
|
||||||
l3 = Cache("L3", 14336, 16, 64, "LRU", write_allocate=False)
|
|
||||||
l2 = Cache("L2", 1024, 16, 64, "LRU", store_to=l3, victims_to=l3)
|
|
||||||
l1 = Cache("L1", 64, 8, 64, "LRU", store_to=l2, load_from=l2)
|
|
||||||
mem.load_to(l2)
|
|
||||||
mem.store_from(l3)
|
|
||||||
cs = CacheSimulator(l1, mem)
|
|
||||||
|
|
||||||
sets_hist = {
|
|
||||||
'l1': {s: 0 for s in range(l1.sets)},
|
|
||||||
'l2': {s: 0 for s in range(l2.sets)},
|
|
||||||
'l3': {s: 0 for s in range(l3.sets)}
|
|
||||||
}
|
|
||||||
|
|
||||||
with open(filename, 'r') as fp:
|
|
||||||
for line in fp.readlines():
|
|
||||||
op, addr = line.split(": ")
|
|
||||||
op = op[0]
|
|
||||||
addr = int(addr, 16)
|
|
||||||
sets_hist['l1'][get_set_id(l1, addr)] += 1
|
|
||||||
sets_hist['l2'][get_set_id(l2, addr)] += 1
|
|
||||||
sets_hist['l3'][get_set_id(l3, addr)] += 1
|
|
||||||
|
|
||||||
for cache_level, data in sets_hist.items():
|
|
||||||
if cache_level != 'l3':
|
|
||||||
print(cache_level, ": ")
|
|
||||||
for set_id in data:
|
|
||||||
if data[set_id] > 0:
|
|
||||||
print(set_id, " -> ", data[set_id])
|
|
@ -1,116 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
[[ -z "$1" ]] && echo "Use: $0 <binary> [-c <core>] [-f <freq>] [-n <nruns>] [-l <log>] [-s]" && exit
|
|
||||||
[[ ! -f "$1" ]] && echo "Binary file not found, make sure to use 'make'" && exit
|
|
||||||
[[ ! -f "$1-stub" ]] && echo "Binary file for stubbed case not found, make sure to use 'make VARIANT=stub'" && exit
|
|
||||||
|
|
||||||
MDBENCH_BIN=$1
|
|
||||||
BIN_INFO="${MDBENCH_BIN#*-}" # $OPT_SCHEME-$TAG-$ISA-$PREC
|
|
||||||
OPT_SCHEME="${BIN_INFO%%-*}"
|
|
||||||
PREC="${BIN_INFO##*-}"
|
|
||||||
BIN_INFO="${BIN_INFO#*-}" # $TAG-$ISA-$PREC
|
|
||||||
BIN_INFO="${BIN_INFO%-*}" # $TAG-$ISA
|
|
||||||
TAG="${BIN_INFO%%-*}"
|
|
||||||
ISA="${BIN_INFO##*-}"
|
|
||||||
CORE="${CORE:-0}"
|
|
||||||
FREQ="${FREQ:-2.4}"
|
|
||||||
NRUNS="${NRUNS:-3}"
|
|
||||||
LOG="${LOG:-latencies_and_cfds.$(hostname).log}"
|
|
||||||
STUB_ONLY="${STUB_ONLY:-false}"
|
|
||||||
SKIP_SET_FREQ="${SKIP_SET_FREQ:-false}"
|
|
||||||
|
|
||||||
OPTIND=2
|
|
||||||
while getopts "c:f:n:l:s" flag; do
|
|
||||||
case "${flag}" in
|
|
||||||
c) CORE=${OPTARG};;
|
|
||||||
f) FREQ=${OPTARG};;
|
|
||||||
n) NRUNS=${OPTARG};;
|
|
||||||
l) LOG=${OPTARG};;
|
|
||||||
s) STUB_ONLY=true;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
# Other useful variables
|
|
||||||
MDBENCH_BIN=./MDBench-$OPT_SCHEME-$TAG-$ISA-$PREC
|
|
||||||
FIXED_PARAMS="--freq $FREQ"
|
|
||||||
CPU_VENDOR=$(lscpu | grep "Vendor ID" | tr -s ' ' | cut -d ' ' -f3)
|
|
||||||
|
|
||||||
if [ "$CPU_VENDOR" == "GenuineIntel" ]; then
|
|
||||||
ALL_PREFETCHERS="HW_PREFETCHER,CL_PREFETCHER,DCU_PREFETCHER,IP_PREFETCHER"
|
|
||||||
DEFAULT_PREFETCHERS=("ALL HW_PREFETCHER CL_PREFETCHER DCU_PREFETCHER IP_PREFETCHER NONE")
|
|
||||||
else
|
|
||||||
ALL_PREFETCHERS=""
|
|
||||||
DEFAULT_PREFETCHERS=("IGNORE")
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -z ${PREFETCHERS+x} ]; then
|
|
||||||
PREFETCHERS=${DEFAULT_PREFETCHERS}
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "$OPT_SCHEME" == "gromacs" ]; then
|
|
||||||
STUB1_NAME=stub-33
|
|
||||||
STUB1_PARAMS="-na 4 -nn 33"
|
|
||||||
STUB2_NAME=stub-128
|
|
||||||
STUB2_PARAMS="-na 4 -nn 128"
|
|
||||||
else
|
|
||||||
STUB1_NAME=stub-76
|
|
||||||
STUB1_PARAMS="-nn 76"
|
|
||||||
STUB2_NAME=stub-1024
|
|
||||||
STUB2_PARAMS="-nn 1024"
|
|
||||||
fi
|
|
||||||
|
|
||||||
function run_benchmark() {
|
|
||||||
BEST=10000000
|
|
||||||
for i in $(seq $NRUNS); do
|
|
||||||
RES=$(likwid-pin -c $CORE "$* $FIXED_PARAMS" 2>&1 | grep "Cycles/SIMD iteration" | cut -d ' ' -f3)
|
|
||||||
if (( $(echo "$BEST > $RES" | bc -l ) )); then
|
|
||||||
BEST=$RES
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
echo "Tag: $TAG" | tee -a $LOG
|
|
||||||
echo "Optimization scheme: $OPT_SCHEME" | tee -a $LOG
|
|
||||||
echo "Instruction set: $ISA" | tee -a $LOG
|
|
||||||
echo "Precision: $PREC" | tee -a $LOG
|
|
||||||
echo "Binary: $MDBENCH_BIN(-stub)" | tee -a $LOG
|
|
||||||
echo "Frequency: $FREQ" | tee -a $LOG
|
|
||||||
echo "Number of runs: $NRUNS" | tee -a $LOG
|
|
||||||
echo "Run only stubbed cases: $STUB_ONLY" | tee -a $LOG
|
|
||||||
|
|
||||||
if [ "$SKIP_SET_FREQ" == "false" ]; then
|
|
||||||
echo "Fixing frequencies..."
|
|
||||||
likwid-setFrequencies -f $FREQ -t 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
for p in $PREFETCHERS; do
|
|
||||||
if [ "$p" != "IGNORE" ]; then
|
|
||||||
if [ "$p" == "ALL" ]; then
|
|
||||||
likwid-features -c $CORE -e $ALL_PREFETCHERS
|
|
||||||
elif [ "$p" == "NONE" ]; then
|
|
||||||
likwid-features -c $CORE -d $ALL_PREFETCHERS
|
|
||||||
else
|
|
||||||
likwid-features -c $CORE -d $ALL_PREFETCHERS
|
|
||||||
likwid-features -c $CORE -e $p
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Prefetcher settings: $p"
|
|
||||||
likwid-features -c $CORE -l
|
|
||||||
fi
|
|
||||||
|
|
||||||
MSG="$p: "
|
|
||||||
if [ "$STUB_ONLY" == "false" ]; then
|
|
||||||
run_benchmark $MDBENCH_BIN
|
|
||||||
MSG+="standard=$BEST, "
|
|
||||||
run_benchmark $MDBENCH_BIN -i data/copper_melting/input_lj_cu_one_atomtype_20x20x20.dmp
|
|
||||||
MSG+="melt=$BEST, "
|
|
||||||
run_benchmark $MDBENCH_BIN -p data/argon_1000/mdbench_params.conf -i data/argon_1000/tprout.gro
|
|
||||||
MSG+="argon=$BEST, "
|
|
||||||
fi
|
|
||||||
|
|
||||||
run_benchmark $MDBENCH_BIN-stub $STUB1_PARAMS
|
|
||||||
MSG+="$STUB1_NAME=$BEST, "
|
|
||||||
run_benchmark $MDBENCH_BIN-stub $STUB2_PARAMS
|
|
||||||
MSG+="$STUB2_NAME=$BEST"
|
|
||||||
echo $MSG | tee -a $LOG
|
|
||||||
done
|
|
52
util/gather-bench/.gitignore
vendored
52
util/gather-bench/.gitignore
vendored
@ -1,52 +0,0 @@
|
|||||||
# Prerequisites
|
|
||||||
*.d
|
|
||||||
|
|
||||||
# Object files
|
|
||||||
*.o
|
|
||||||
*.ko
|
|
||||||
*.obj
|
|
||||||
*.elf
|
|
||||||
|
|
||||||
# Linker output
|
|
||||||
*.ilk
|
|
||||||
*.map
|
|
||||||
*.exp
|
|
||||||
|
|
||||||
# Precompiled Headers
|
|
||||||
*.gch
|
|
||||||
*.pch
|
|
||||||
|
|
||||||
# Libraries
|
|
||||||
*.lib
|
|
||||||
*.a
|
|
||||||
*.la
|
|
||||||
*.lo
|
|
||||||
|
|
||||||
# Shared objects (inc. Windows DLLs)
|
|
||||||
*.dll
|
|
||||||
*.so
|
|
||||||
*.so.*
|
|
||||||
*.dylib
|
|
||||||
|
|
||||||
# Executables
|
|
||||||
*.exe
|
|
||||||
*.out
|
|
||||||
*.app
|
|
||||||
*.i*86
|
|
||||||
*.x86_64
|
|
||||||
*.hex
|
|
||||||
|
|
||||||
# Debug files
|
|
||||||
*.dSYM/
|
|
||||||
*.su
|
|
||||||
*.idb
|
|
||||||
*.pdb
|
|
||||||
|
|
||||||
# Kernel Module Compile Results
|
|
||||||
*.mod*
|
|
||||||
*.cmd
|
|
||||||
.tmp_versions/
|
|
||||||
modules.order
|
|
||||||
Module.symvers
|
|
||||||
Mkfile.old
|
|
||||||
dkms.conf
|
|
@ -1,21 +0,0 @@
|
|||||||
MIT License
|
|
||||||
|
|
||||||
Copyright (c) RRZE-HPC
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
|
||||||
in the Software without restriction, including without limitation the rights
|
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
|
||||||
furnished to do so, subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all
|
|
||||||
copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
SOFTWARE.
|
|
@ -1,126 +0,0 @@
|
|||||||
#CONFIGURE BUILD SYSTEM
|
|
||||||
TARGET = gather-bench-$(TAG)
|
|
||||||
BUILD_DIR = ./$(TAG)
|
|
||||||
SRC_DIR = ./src
|
|
||||||
MAKE_DIR = ./
|
|
||||||
ISA_DIR = ./src/$(ISA)
|
|
||||||
Q ?= @
|
|
||||||
|
|
||||||
#DO NOT EDIT BELOW
|
|
||||||
include $(MAKE_DIR)/config.mk
|
|
||||||
include $(MAKE_DIR)/include_$(TAG).mk
|
|
||||||
include $(MAKE_DIR)/include_LIKWID.mk
|
|
||||||
INCLUDES += -I./src/includes
|
|
||||||
|
|
||||||
VPATH = $(SRC_DIR) ${ISA_DIR}
|
|
||||||
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
|
|
||||||
ASM += $(patsubst $(SRC_DIR)/%.f90, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.f90))
|
|
||||||
OBJ = $(filter-out $(BUILD_DIR)/main%, $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)))
|
|
||||||
OBJ += $(patsubst $(SRC_DIR)/%.cc, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.cc))
|
|
||||||
OBJ += $(patsubst $(SRC_DIR)/%.cpp, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.cpp))
|
|
||||||
OBJ += $(patsubst $(SRC_DIR)/%.f90, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.f90))
|
|
||||||
OBJ += $(patsubst $(SRC_DIR)/%.F90, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.F90))
|
|
||||||
OBJ += $(patsubst $(SRC_DIR)/%.s, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.s))
|
|
||||||
OBJ += $(patsubst $(ISA_DIR)/%.S, $(BUILD_DIR)/%.o,$(wildcard $(ISA_DIR)/*.S))
|
|
||||||
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(INCLUDES) -DISA_$(ISA)
|
|
||||||
|
|
||||||
ifneq ($(VARIANT),)
|
|
||||||
.DEFAULT_GOAL := ${TARGET}-$(VARIANT)
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(DATA_LAYOUT)),AOS)
|
|
||||||
CPPFLAGS += -DAOS
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(TEST)),true)
|
|
||||||
CPPFLAGS += -DTEST
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(PADDING)),true)
|
|
||||||
CPPFLAGS += -DPADDING
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(MEASURE_GATHER_CYCLES)),true)
|
|
||||||
CPPFLAGS += -DMEASURE_GATHER_CYCLES
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(ONLY_FIRST_DIMENSION)),true)
|
|
||||||
CPPFLAGS += -DONLY_FIRST_DIMENSION
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(strip $(MEM_TRACER)),true)
|
|
||||||
CPPFLAGS += -DMEM_TRACER
|
|
||||||
endif
|
|
||||||
|
|
||||||
${TARGET}: $(BUILD_DIR) $(OBJ) $(SRC_DIR)/main.c
|
|
||||||
@echo "===> LINKING $(TARGET)"
|
|
||||||
$(Q)${LINKER} ${CPPFLAGS} ${LFLAGS} -o $(TARGET) $(SRC_DIR)/main.c $(OBJ) $(LIBS)
|
|
||||||
|
|
||||||
${TARGET}-%: $(BUILD_DIR) $(OBJ) $(SRC_DIR)/main-%.c
|
|
||||||
@echo "===> LINKING $(TARGET)-$* "
|
|
||||||
$(Q)${LINKER} ${CPPFLAGS} ${LFLAGS} -o $(TARGET)-$* $(SRC_DIR)/main-$*.c $(OBJ) $(LIBS)
|
|
||||||
|
|
||||||
asm: $(BUILD_DIR) $(ASM)
|
|
||||||
|
|
||||||
$(BUILD_DIR)/%.o: %.c
|
|
||||||
@echo "===> COMPILE $@"
|
|
||||||
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
|
|
||||||
$(Q)$(CC) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
|
|
||||||
|
|
||||||
$(BUILD_DIR)/%.s: %.c
|
|
||||||
@echo "===> GENERATE ASM $@"
|
|
||||||
$(Q)$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
|
|
||||||
|
|
||||||
$(BUILD_DIR)/%.s: %.f90
|
|
||||||
@echo "===> COMPILE $@"
|
|
||||||
$(Q)$(FC) -S $(FCFLAGS) $< -o $@
|
|
||||||
|
|
||||||
$(BUILD_DIR)/%.o: %.cc
|
|
||||||
@echo "===> COMPILE $@"
|
|
||||||
$(Q)$(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@
|
|
||||||
$(Q)$(CXX) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
|
|
||||||
|
|
||||||
$(BUILD_DIR)/%.o: %.cpp
|
|
||||||
@echo "===> COMPILE $@"
|
|
||||||
$(Q)$(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@
|
|
||||||
$(Q)$(CXX) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
|
|
||||||
|
|
||||||
$(BUILD_DIR)/%.o: %.f90
|
|
||||||
@echo "===> COMPILE $@"
|
|
||||||
$(Q)$(FC) -c $(FCFLAGS) $< -o $@
|
|
||||||
|
|
||||||
$(BUILD_DIR)/%.o: %.F90
|
|
||||||
@echo "===> COMPILE $@"
|
|
||||||
$(Q)$(FC) -c $(CPPFLAGS) $(FCFLAGS) $< -o $@
|
|
||||||
|
|
||||||
$(BUILD_DIR)/%.o: %.s
|
|
||||||
@echo "===> ASSEMBLE $@"
|
|
||||||
$(Q)$(AS) $(ASFLAGS) $< -o $@
|
|
||||||
|
|
||||||
$(BUILD_DIR)/%.o: %.S
|
|
||||||
@echo "===> ASSEMBLE $@"
|
|
||||||
$(Q)$(CC) -c $(CPPFLAGS) $< -o $@
|
|
||||||
|
|
||||||
tags:
|
|
||||||
@echo "===> GENERATE TAGS"
|
|
||||||
$(Q)ctags -R
|
|
||||||
|
|
||||||
|
|
||||||
$(BUILD_DIR):
|
|
||||||
@mkdir $(BUILD_DIR)
|
|
||||||
|
|
||||||
ifeq ($(findstring $(MAKECMDGOALS),clean),)
|
|
||||||
-include $(OBJ:.o=.d)
|
|
||||||
endif
|
|
||||||
|
|
||||||
.PHONY: clean distclean
|
|
||||||
|
|
||||||
clean:
|
|
||||||
@echo "===> CLEAN"
|
|
||||||
@rm -rf $(BUILD_DIR)
|
|
||||||
@rm -f tags
|
|
||||||
|
|
||||||
distclean: clean
|
|
||||||
@echo "===> DIST CLEAN"
|
|
||||||
@rm -f $(TARGET)
|
|
||||||
@rm -f tags
|
|
@ -1,2 +0,0 @@
|
|||||||
# gather-bench
|
|
||||||
A X86 gather instruction performance benchmark
|
|
@ -1,22 +0,0 @@
|
|||||||
# Supported: GCC, CLANG, ICC
|
|
||||||
TAG ?= ICC
|
|
||||||
# Supported: avx2, avx512
|
|
||||||
ISA ?= avx512
|
|
||||||
# Use likwid?
|
|
||||||
ENABLE_LIKWID ?= false
|
|
||||||
|
|
||||||
# SP or DP
|
|
||||||
DATA_TYPE ?= DP
|
|
||||||
# AOS or SOA
|
|
||||||
DATA_LAYOUT ?= AOS
|
|
||||||
# Padding byte for AoS
|
|
||||||
PADDING ?= false
|
|
||||||
# Measure cycles for each gather separately
|
|
||||||
MEASURE_GATHER_CYCLES ?= false
|
|
||||||
# Gather data only for first dimension (one gather per iteration)
|
|
||||||
ONLY_FIRST_DIMENSION ?= false
|
|
||||||
|
|
||||||
# Trace memory addresses for cache simulator
|
|
||||||
MEM_TRACER ?= false
|
|
||||||
# Test correctness of gather kernels
|
|
||||||
TEST ?= false
|
|
@ -1,9 +0,0 @@
|
|||||||
CC = clang
|
|
||||||
LINKER = $(CC)
|
|
||||||
|
|
||||||
OPENMP =# -fopenmp
|
|
||||||
CFLAGS = -Ofast -std=c11 -march=core-avx2 -mavx -mfma $(OPENMP)
|
|
||||||
LFLAGS = $(OPENMP) -march=core-avx2 -mavx -mfma
|
|
||||||
DEFINES = -D_GNU_SOURCE
|
|
||||||
INCLUDES =
|
|
||||||
LIBS =
|
|
@ -1,11 +0,0 @@
|
|||||||
CC = gcc
|
|
||||||
AS = as
|
|
||||||
LINKER = $(CC)
|
|
||||||
|
|
||||||
OPENMP = -fopenmp
|
|
||||||
CFLAGS = -Ofast -std=c11 -mavx2 -mfma $(OPENMP)
|
|
||||||
ASFLAGS =
|
|
||||||
LFLAGS = $(OPENMP) -mavx2 -mfma
|
|
||||||
DEFINES = -D_GNU_SOURCE
|
|
||||||
INCLUDES =
|
|
||||||
LIBS =
|
|
@ -1,9 +0,0 @@
|
|||||||
CC = icc
|
|
||||||
LINKER = $(CC)
|
|
||||||
|
|
||||||
OPENMP = -qopenmp
|
|
||||||
CFLAGS = -Ofast -xhost -std=c11 $(OPENMP)
|
|
||||||
LFLAGS = $(OPENMP)
|
|
||||||
DEFINES = -D_GNU_SOURCE
|
|
||||||
INCLUDES =
|
|
||||||
LIBS =
|
|
@ -1,10 +0,0 @@
|
|||||||
LIKWID_INC ?= -I/usr/local/include
|
|
||||||
LIKWID_DEFINES ?= -DLIKWID_PERFMON
|
|
||||||
LIKWID_LIB ?= -L/usr/local/lib
|
|
||||||
|
|
||||||
ifeq ($(strip $(ENABLE_LIKWID)),true)
|
|
||||||
INCLUDES += ${LIKWID_INC}
|
|
||||||
DEFINES += ${LIKWID_DEFINES}
|
|
||||||
LIBS += -llikwid
|
|
||||||
LFLAGS += ${LIKWID_LIB}
|
|
||||||
endif
|
|
@ -1,57 +0,0 @@
|
|||||||
/*
|
|
||||||
* =======================================================================================
|
|
||||||
*
|
|
||||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
|
||||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
* of this software and associated documentation files (the "Software"), to deal
|
|
||||||
* in the Software without restriction, including without limitation the rights
|
|
||||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the Software is
|
|
||||||
* furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in all
|
|
||||||
* copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
* SOFTWARE.
|
|
||||||
*
|
|
||||||
* =======================================================================================
|
|
||||||
*/
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <errno.h>
|
|
||||||
|
|
||||||
void* allocate (int alignment, size_t bytesize)
|
|
||||||
{
|
|
||||||
int errorCode;
|
|
||||||
void* ptr;
|
|
||||||
|
|
||||||
errorCode = posix_memalign(&ptr, alignment, bytesize);
|
|
||||||
|
|
||||||
if (errorCode) {
|
|
||||||
if (errorCode == EINVAL) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"Error: Alignment parameter is not a power of two\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
if (errorCode == ENOMEM) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"Error: Insufficient memory to fulfill the request\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ptr == NULL) {
|
|
||||||
fprintf(stderr, "Error: posix_memalign failed!\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ptr;
|
|
||||||
}
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user