Compare commits

...

12 Commits

Author SHA1 Message Date
1dc6d12f52 Cleanup 2024-05-15 15:30:11 +02:00
e60a798f37 Change subtitle in help text 2024-05-15 14:55:09 +02:00
7a62c5c1ff Cleanup README 2024-05-15 14:37:40 +02:00
8d0a8b5f9c Refactor code 2024-05-15 14:20:40 +02:00
9712d7e2c8 Cleanup. Remove copyright year. Reformat. 2024-05-13 12:33:08 +02:00
rafaelravedutti
a6a269703d Merge pull request #7 from RRZE-HPC/mucosim23
Mucosim23
2024-01-17 15:14:08 +01:00
TejeshPala
7ee250161a omp_get_max_threads instead of omp_get_num_threads for gcc compiler adaption
Signed-off-by: TejeshPala <tejesh.pala@fau.de>
2024-01-13 15:09:03 +01:00
TejeshPala
c73efea786 include openmp in ICC
Signed-off-by: TejeshPala <tejesh.pala@fau.de>
2024-01-11 17:16:17 +01:00
TejeshPala
4cfa664533 schedule options for force kernels and to print in main fn
Signed-off-by: TejeshPala <tejesh.pala@fau.de>
2024-01-11 17:09:18 +01:00
Rafael Ravedutti
1837403326 Merge branch 'master' of github.com:RRZE-HPC/MD-Bench 2023-12-13 10:52:55 +01:00
TEJESH PALA
ce00aa0042 Merge pull request #6 from RRZE-HPC/mucosim23
omp print threads
2023-11-21 17:11:18 +01:00
TejeshPala
c4e5e87265 omp print threads 2023-11-21 15:31:27 +01:00
128 changed files with 1298 additions and 5068 deletions

176
.clang-format Normal file
View File

@ -0,0 +1,176 @@
---
Language: Cpp
# BasedOnStyle: WebKit
AccessModifierOffset: -4
AlignAfterOpenBracket: DontAlign
AlignArrayOfStructures: None
AlignConsecutiveAssignments: Consecutive
AlignConsecutiveBitFields: None
AlignConsecutiveDeclarations: None
AlignConsecutiveMacros: Consecutive
AlignEscapedNewlines: Right
AlignOperands: Align
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: false
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortEnumsOnASingleLine: true
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
AllowShortLambdasOnASingleLine: All
AllowShortIfStatementsOnASingleLine: OnlyFirstIf
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: MultiLine
AttributeMacros:
- __capability
BinPackArguments: false
BinPackParameters: false
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: Never
AfterEnum: false
AfterFunction: true
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
BeforeLambdaBody: false
BeforeWhile: false
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: WebKit
BreakBeforeInheritanceComma: false
BreakInheritanceList: BeforeColon
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeComma
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 90
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: false
DeriveLineEnding: true
DerivePointerAlignment: false
DisableFormat: false
EmptyLineAfterAccessModifier: Never
EmptyLineBeforeAccessModifier: LogicalBlock
ExperimentalAutoDetectBinPacking: false
BasedOnStyle: ''
ConstructorInitializerAllOnOneLineOrOnePerLine: false
AllowAllConstructorInitializersOnNextLine: true
FixNamespaceComments: false
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IfMacros:
- KJ_IF_MAYBE
IncludeBlocks: Preserve
IncludeCategories:
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
Priority: 2
SortPriority: 0
CaseSensitive: false
- Regex: '^(<|"(gtest|gmock|isl|json)/)'
Priority: 3
SortPriority: 0
CaseSensitive: false
- Regex: '.*'
Priority: 1
SortPriority: 0
CaseSensitive: false
IncludeIsMainRegex: '(Test)?$'
IncludeIsMainSourceRegex: ''
IndentAccessModifiers: false
IndentCaseLabels: false
IndentCaseBlocks: false
IndentGotoLabels: true
IndentPPDirectives: None
IndentExternBlock: AfterExternBlock
IndentWidth: 4
IndentWrappedFunctionNames: false
InsertTrailingCommas: None
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: true
LambdaBodyIndentation: Signature
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: Inner
ObjCBinPackProtocolList: Auto
ObjCBlockIndentWidth: 4
ObjCBreakBeforeNestedBlockParam: true
ObjCSpaceAfterProperty: true
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 200
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 60
PenaltyIndentedWhitespace: 0
PointerAlignment: Left
PPIndentWidth: -1
ReferenceAlignment: Pointer
ReflowComments: true
ShortNamespaceLines: 1
SortIncludes: CaseSensitive
SortJavaStaticImport: Before
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCaseColon: false
SpaceBeforeCpp11BracedList: true
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceAroundPointerQualifiers: Default
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: Never
SpacesInConditionalStatement: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInLineCommentPrefix:
Minimum: 1
Maximum: -1
SpacesInParentheses: false
SpacesInSquareBrackets: false
SpaceBeforeSquareBrackets: false
BitFieldColonSpacing: Both
Standard: Latest
StatementAttributeLikeMacros:
- Q_EMIT
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
TabWidth: 8
UseCRLF: false
UseTab: Never
WhitespaceSensitiveMacros:
- STRINGIZE
- PP_STRINGIZE
- BOOST_PP_STRINGIZE
- NS_SWIFT_NAME
- CF_SWIFT_NAME
...

14
.clang-tidy Normal file
View File

@ -0,0 +1,14 @@
---
Checks: 'clang-diagnostic-*,clang-analyzer-*,clang-bugprone-*,readability-identifier-naming'
WarningsAsErrors: true
HeaderFilterRegex: '.*'
AnalyzeTemporaryDtors: false
CheckOptions:
- key: readability-identifier-naming.StructCase
value: 'CamelCase'
- key: readability-identifier-naming.FunctionCase
value: 'camelBack'
- key: readability-identifier-naming.VariableCase
value: 'camelBack'
- key: readability-identifier-naming.GlobalConstantCase
value: 'UPPER_CASE'

3
.clangd Normal file
View File

@ -0,0 +1,3 @@
CompileFlags:
Add: [-I/Users/jan/prg/MD-Bench/src/verletlist/, -I/Users/jan/prg/MD-Bench/src/common/, -DALIGNMENT=64]
Compiler: clang

131
Makefile
View File

@ -1,120 +1,32 @@
#CONFIGURE BUILD SYSTEM
IDENTIFIER = $(OPT_SCHEME)-$(TAG)-$(ISA)-$(DATA_TYPE)
TARGET = MDBench-$(IDENTIFIER)
BUILD_DIR = ./build-$(IDENTIFIER)
SRC_DIR = ./$(OPT_SCHEME)
ASM_DIR = ./asm
COMMON_DIR = ./common
CUDA_DIR = ./$(SRC_DIR)/cuda
MAKE_DIR = ./
TAG = $(OPT_TAG)-$(TOOLCHAIN)-$(DATA_TYPE)
TARGET = MDBench-$(TAG)
BUILD_DIR = ./build/build-$(TAG)
SRC_ROOT = ./src
SRC_DIR = $(SRC_ROOT)/$(OPT_SCHEME)
COMMON_DIR = $(SRC_ROOT)/common
CUDA_DIR = $(SRC_DIR)/cuda
MAKE_DIR = ./make
Q ?= @
#DO NOT EDIT BELOW
include $(MAKE_DIR)/config.mk
include $(MAKE_DIR)/include_$(TAG).mk
include config.mk
include $(MAKE_DIR)/include_$(TOOLCHAIN).mk
include $(MAKE_DIR)/include_LIKWID.mk
ifneq ($(strip $(ISA)),NONE)
include $(MAKE_DIR)/include_ISA.mk
include $(MAKE_DIR)/include_GROMACS.mk
INCLUDES += -I./$(SRC_DIR)/includes -I./$(COMMON_DIR)/includes
ifeq ($(strip $(DATA_LAYOUT)),AOS)
DEFINES += -DAOS
endif
ifeq ($(strip $(DATA_TYPE)),SP)
DEFINES += -DPRECISION=1
else
DEFINES += -DPRECISION=2
endif
INCLUDES += -I./$(SRC_DIR) -I./$(COMMON_DIR)
ifneq ($(ASM_SYNTAX), ATT)
ASFLAGS += -masm=intel
endif
ifeq ($(strip $(SORT_ATOMS)),true)
DEFINES += -DSORT_ATOMS
endif
ifeq ($(strip $(EXPLICIT_TYPES)),true)
DEFINES += -DEXPLICIT_TYPES
endif
ifeq ($(strip $(MEM_TRACER)),true)
DEFINES += -DMEM_TRACER
endif
ifeq ($(strip $(INDEX_TRACER)),true)
DEFINES += -DINDEX_TRACER
endif
ifeq ($(strip $(COMPUTE_STATS)),true)
DEFINES += -DCOMPUTE_STATS
endif
ifeq ($(strip $(XTC_OUTPUT)),true)
DEFINES += -DXTC_OUTPUT
endif
ifeq ($(strip $(USE_REFERENCE_VERSION)),true)
DEFINES += -DUSE_REFERENCE_VERSION
endif
ifeq ($(strip $(HALF_NEIGHBOR_LISTS_CHECK_CJ)),true)
DEFINES += -DHALF_NEIGHBOR_LISTS_CHECK_CJ
endif
ifeq ($(strip $(DEBUG)),true)
DEFINES += -DDEBUG
endif
ifneq ($(VECTOR_WIDTH),)
DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH)
endif
ifeq ($(strip $(__SIMD_KERNEL__)),true)
DEFINES += -D__SIMD_KERNEL__
endif
ifeq ($(strip $(__SSE__)),true)
DEFINES += -D__ISA_SSE__
endif
ifeq ($(strip $(__ISA_AVX__)),true)
DEFINES += -D__ISA_AVX__
endif
ifeq ($(strip $(__ISA_AVX_FMA__)),true)
DEFINES += -D__ISA_AVX_FMA__
endif
ifeq ($(strip $(__ISA_AVX2__)),true)
DEFINES += -D__ISA_AVX2__
endif
ifeq ($(strip $(__ISA_AVX512__)),true)
DEFINES += -D__ISA_AVX512__
endif
ifeq ($(strip $(ENABLE_OMP_SIMD)),true)
DEFINES += -DENABLE_OMP_SIMD
endif
ifeq ($(strip $(USE_SIMD_KERNEL)),true)
DEFINES += -DUSE_SIMD_KERNEL
endif
VPATH = $(SRC_DIR) $(ASM_DIR) $(CUDA_DIR)
VPATH = $(SRC_DIR) $(COMMON_DIR) $(CUDA_DIR)
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s))
OBJ = $(filter-out $(BUILD_DIR)/main% $(OVERWRITE),$(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)))
OBJ += $(patsubst $(ASM_DIR)/%.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*.s))
OBJ += $(patsubst $(COMMON_DIR)/%.c, $(BUILD_DIR)/%-common.o,$(wildcard $(COMMON_DIR)/*.c))
OBJ = $(filter-out $(BUILD_DIR)/main%, $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)))
OBJ += $(patsubst $(COMMON_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(COMMON_DIR)/*.c))
ifeq ($(strip $(TAG)),NVCC)
OBJ += $(patsubst $(CUDA_DIR)/%.cu, $(BUILD_DIR)/%-cuda.o,$(wildcard $(CUDA_DIR)/*.cu))
endif
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
# $(warning $(OBJ))
ifneq ($(VARIANT),)
.DEFAULT_GOAL := ${TARGET}-$(VARIANT)
DEFINES += -DVARIANT=$(VARIANT)
@ -133,11 +45,6 @@ $(BUILD_DIR)/%.o: %.c
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
$(Q)$(CC) $(CPPFLAGS) -MT $@ -MM $< > $(BUILD_DIR)/$*.d
$(BUILD_DIR)/%-common.o: $(COMMON_DIR)/%.c
$(info ===> COMPILE $@)
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
$(Q)$(CC) $(CPPFLAGS) -MT $@ -MM $< > $(BUILD_DIR)/$*.d
$(BUILD_DIR)/%-cuda.o: %.cu
$(info ===> COMPILE $@)
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
@ -156,18 +63,16 @@ $(BUILD_DIR)/%.o: %.s
clean:
$(info ===> CLEAN)
@rm -rf $(BUILD_DIR)
@rm -rf $(TARGET)*
@rm -f tags
cleanall:
$(info ===> CLEAN)
@rm -rf build-*
@rm -rf build
@rm -rf MDBench-*
@rm -f tags
distclean: clean
$(info ===> DIST CLEAN)
@rm -f $(TARGET)*
@rm -f $(TARGET)
@rm -f tags
info:
@ -181,6 +86,6 @@ tags:
$(Q)ctags -R
$(BUILD_DIR):
@mkdir $(BUILD_DIR)
@mkdir -p $(BUILD_DIR)
-include $(OBJ:.o=.d)

View File

@ -1,34 +1,14 @@
# MD-Bench
![Image](figures/features-v3.png "MD-Bench Features")
MD-Bench is a toolbox for the performance engineering of short-range force calculation kernels on molecular-dynamics applications.
It aims at covering all available state-of-the-art algorithms from different community codes such as LAMMPS and GROMACS.
Apart from that, many tools to study and evaluate the in-depth performance of such kernels on distinct hardware are offered, like gather-bench, a standalone benchmark that mimics the data movement from MD kernels and the stubbed force calculation cases that focus on isolating the impacts caused by memory latencies and control flow divergence contributions in the overall performance.
<table>
<thead>
<tr>
<th>Verlet Lists</th>
<th>GROMACS MxN</th>
<th>Stubbed cases</th>
</tr>
</thead>
<tbody>
<tr>
<td><a target="_blank" rel="noopener noreferrer" href="figures/verlet_v2.png"><img src="figures/verlet_v2.png" alt="Image" title="Verlet Lists" style="width: 100%;"></a></td>
<td><a target="_blank" rel="noopener noreferrer" href="figures/gromacs_mxn_v2.png"><img src="figures/gromacs_mxn_v2.png" alt="Image" title="GROMACS MxN" style="width: 90%;"></a></td>
<td><a target="_blank" rel="noopener noreferrer" href="figures/stub_new_v3.png"><img src="figures/stub_new_v3.png" alt="Image" title="Stubbed cases" style="width: 100%;"></a></td>
</tr>
</tbody>
</table>
<!-- ![Image](figures/gather_bench.png "gather-bench") -->
MD-Bench is a toolbox for the performance engineering of short-range force
calculation kernels on molecular-dynamics applications. It aims at covering all
available state-of-the-art algorithms from different community codes such as
LAMMPS and GROMACS.
## Build instructions
Properly configure your building by changing `config.mk` file. The following options are available:
Properly configure your building by changing `config.mk` file. The following
options are available:
- **TAG:** Compiler tag (available options: GCC, CLANG, ICC, ONEAPI, NVCC).
- **ISA:** Instruction set (available options: SSE, AVX, AVX\_FMA, AVX2, AVX512).
@ -45,15 +25,18 @@ Properly configure your building by changing `config.mk` file. The following opt
- **COMPUTE\_STATS:** Compute statistics.
Configurations for LAMMPS Verlet Lists optimization scheme:
- **ENABLE\_OMP\_SIMD:** Use omp simd pragma on half neighbor-lists kernels.
- **USE\_SIMD\_KERNEL:** Compile kernel with explicit SIMD intrinsics.
Configurations for GROMACS MxN optimization scheme:
- **USE\_REFERENCE\_VERSION:** Use reference version (only for correction purposes).
- **XTC\_OUTPUT:** Enable XTC output.
- **HALF\_NEIGHBOR\_LISTS\_CHECK\_CJ:** Check if j-clusters are local when decreasing the reaction force.
Configurations for CUDA:
- **USE\_CUDA\_HOST\_MEMORY:** Use CUDA host memory to optimize host-device transfers.
When done, just use `make` to compile the code.
@ -68,11 +51,14 @@ Use the following command to run a simulation:
./MD-Bench-<TAG>-<OPT_SCHEME> [OPTION]...
```
Where `TAG` and `OPT_SCHEME` correspond to the building options with the same name.
Without any options, a Copper FCC lattice system with size 32x32x32 (131072 atoms) over 200 time-steps using the Lennard-Jones potential (sigma=1.0, epsilon=1.0) is simulated.
Where `TAG` and `OPT_SCHEME` correspond to the building options with the same
name. Without any options, a Copper FCC lattice system with size 32x32x32
(131072 atoms) over 200 time-steps using the Lennard-Jones potential (sigma=1.0,
epsilon=1.0) is simulated.
The default behavior and other options can be changed using the following parameters:
```
```sh
-p <string>: file to read parameters from (can be specified more than once)
-f <string>: force field (lj or eam), default lj
-i <string>: input file with atom positions (dump)
@ -92,11 +78,17 @@ TBD
## Citations
Rafael Ravedutti Lucio Machado, Jan Eitzinger, Harald Köstler, and Gerhard Wellein: MD-Bench: A generic proxy-app toolbox for state-of-the-art molecular dynamics algorithms. Accepted for [PPAM](https://ppam.edu.pl/) 2022, the 14th International Conference on Parallel Processing and Applied Mathematics, Gdansk, Poland, September 11-14, 2022. PPAM 2022 Best Paper Award. Preprint: [arXiv:2207.13094](https://arxiv.org/abs/2207.13094)
Rafael Ravedutti Lucio Machado, Jan Eitzinger, Harald Köstler, and Gerhard
Wellein: MD-Bench: A generic proxy-app toolbox for state-of-the-art molecular
dynamics algorithms. Accepted for [PPAM](https://ppam.edu.pl/) 2022, the 14th
International Conference on Parallel Processing and Applied Mathematics, Gdansk,
Poland, September 11-14, 2022. PPAM 2022 Best Paper Award. Preprint:
[arXiv:2207.13094](https://arxiv.org/abs/2207.13094)
## Credits
MD-Bench is developed by the Erlangen National High Performance Computing Center ([NHR@FAU](https://hpc.fau.de/)) at the University of Erlangen-Nürnberg.
MD-Bench is developed by the Erlangen National High Performance Computing Center
([NHR@FAU](https://hpc.fau.de/)) at the University of Erlangen-Nürnberg.
## License

109
config.mk
View File

@ -1,17 +1,18 @@
# Compiler tag (GCC/CLANG/ICC/ICX/ONEAPI/NVCC)
TAG ?= ICC
# Instruction set (SSE/AVX/AVX_FMA/AVX2/AVX512)
ISA ?= AVX512
# Optimization scheme (lammps/gromacs/clusters_per_bin)
OPT_SCHEME ?= lammps
# Compiler tool chain (GCC/CLANG/ICC/ICX/ONEAPI/NVCC)
TOOLCHAIN ?= CLANG
# Instruction set for instrinsic kernels (NONE/SSE/AVX/AVX_FMA/AVX2/AVX512)
ISA ?= ARM
SIMD ?= NONE
# Optimization scheme (verletlist/clusterpair/clusters_per_bin)
OPT_SCHEME ?= verletlist
# Enable likwid (true or false)
ENABLE_LIKWID ?= true
ENABLE_LIKWID ?= false
# SP or DP
DATA_TYPE ?= DP
# AOS or SOA
DATA_LAYOUT ?= AOS
# Assembly syntax to generate (ATT/INTEL)
ASM_SYNTAX ?= ATT
ASM_SYNTAX ?= INTEL
# Debug
DEBUG ?= false
@ -28,7 +29,7 @@ COMPUTE_STATS ?= true
# Configurations for lammps optimization scheme
# Use omp simd pragma when running with half neighbor-lists
ENABLE_OMP_SIMD ?= true
ENABLE_OMP_SIMD ?= false
# Use kernel with explicit SIMD intrinsics
USE_SIMD_KERNEL ?= false
@ -47,3 +48,93 @@ USE_CUDA_HOST_MEMORY ?= false
#Feature options
OPTIONS = -DALIGNMENT=64
#OPTIONS += More options
#DO NOT EDIT BELOW
ifeq ($(strip $(DATA_LAYOUT)),AOS)
DEFINES += -DAOS
endif
ifeq ($(strip $(DATA_TYPE)),SP)
DEFINES += -DPRECISION=1
else
DEFINES += -DPRECISION=2
endif
ifneq ($(ASM_SYNTAX), ATT)
ASFLAGS += -masm=intel
endif
ifeq ($(strip $(SORT_ATOMS)),true)
DEFINES += -DSORT_ATOMS
endif
ifeq ($(strip $(EXPLICIT_TYPES)),true)
DEFINES += -DEXPLICIT_TYPES
endif
ifeq ($(strip $(MEM_TRACER)),true)
DEFINES += -DMEM_TRACER
endif
ifeq ($(strip $(INDEX_TRACER)),true)
DEFINES += -DINDEX_TRACER
endif
ifeq ($(strip $(COMPUTE_STATS)),true)
DEFINES += -DCOMPUTE_STATS
endif
ifeq ($(strip $(XTC_OUTPUT)),true)
DEFINES += -DXTC_OUTPUT
endif
ifeq ($(strip $(USE_REFERENCE_VERSION)),true)
DEFINES += -DUSE_REFERENCE_VERSION
endif
ifeq ($(strip $(HALF_NEIGHBOR_LISTS_CHECK_CJ)),true)
DEFINES += -DHALF_NEIGHBOR_LISTS_CHECK_CJ
endif
ifeq ($(strip $(DEBUG)),true)
DEFINES += -DDEBUG
endif
ifneq ($(VECTOR_WIDTH),)
DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH)
endif
ifeq ($(strip $(__SIMD_KERNEL__)),true)
DEFINES += -D__SIMD_KERNEL__
endif
ifeq ($(strip $(__SSE__)),true)
DEFINES += -D__ISA_SSE__
endif
ifeq ($(strip $(__ISA_AVX__)),true)
DEFINES += -D__ISA_AVX__
endif
ifeq ($(strip $(__ISA_AVX_FMA__)),true)
DEFINES += -D__ISA_AVX_FMA__
endif
ifeq ($(strip $(__ISA_AVX2__)),true)
DEFINES += -D__ISA_AVX2__
endif
ifeq ($(strip $(__ISA_AVX512__)),true)
DEFINES += -D__ISA_AVX512__
endif
ifeq ($(strip $(ENABLE_OMP_SIMD)),true)
DEFINES += -DENABLE_OMP_SIMD
endif
ifeq ($(strip $(OPT_SCHEME)),verletlist)
OPT_TAG = VL
endif
ifneq ($(strip $(SIMD)),NONE)
TOOLCHAIN = $(TOOLCHAIN)-$(ISA)-$(SIMD)
endif

Binary file not shown.

Before

Width:  |  Height:  |  Size: 273 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 98 KiB

View File

@ -1,523 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
width="297mm"
height="210mm"
viewBox="0 0 297 210"
version="1.1"
id="svg5"
inkscape:version="1.1.2 (0a00cf5339, 2022-02-04)"
sodipodi:docname="gather_bench.svg"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg">
<sodipodi:namedview
id="namedview7"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageshadow="2"
inkscape:pageopacity="0.0"
inkscape:pagecheckerboard="0"
inkscape:document-units="mm"
showgrid="false"
inkscape:zoom="0.73508842"
inkscape:cx="551.63432"
inkscape:cy="348.25743"
inkscape:window-width="1920"
inkscape:window-height="1011"
inkscape:window-x="0"
inkscape:window-y="165"
inkscape:window-maximized="1"
inkscape:current-layer="layer1" />
<defs
id="defs2">
<rect
x="144.01516"
y="304.36604"
width="248.99777"
height="100.91557"
id="rect79475" />
<rect
x="309.01869"
y="43.698615"
width="552.19421"
height="71.390348"
id="rect65238" />
<rect
x="762.55856"
y="341.3838"
width="277.62756"
height="105.0235"
id="rect47632" />
<linearGradient
inkscape:collect="always"
id="linearGradient40704">
<stop
style="stop-color:#ccffaa;stop-opacity:1;"
offset="0"
id="stop40700" />
<stop
style="stop-color:#ccffaa;stop-opacity:0;"
offset="1"
id="stop40702" />
</linearGradient>
<marker
style="overflow:visible;"
id="Arrow2Mend"
refX="0.0"
refY="0.0"
orient="auto"
inkscape:stockid="Arrow2Mend"
inkscape:isstock="true">
<path
transform="scale(0.6) rotate(180) translate(0,0)"
d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
style="stroke:context-stroke;fill-rule:evenodd;fill:context-stroke;stroke-width:0.62500000;stroke-linejoin:round;"
id="path39486" />
</marker>
<marker
style="overflow:visible;"
id="Arrow1Mend"
refX="0.0"
refY="0.0"
orient="auto"
inkscape:stockid="Arrow1Mend"
inkscape:isstock="true">
<path
transform="scale(0.4) rotate(180) translate(10,0)"
style="fill-rule:evenodd;fill:context-stroke;stroke:context-stroke;stroke-width:1.0pt;"
d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
id="path39468" />
</marker>
<marker
style="overflow:visible;"
id="Arrow1Lend"
refX="0.0"
refY="0.0"
orient="auto"
inkscape:stockid="Arrow1Lend"
inkscape:isstock="true">
<path
transform="scale(0.8) rotate(180) translate(12.5,0)"
style="fill-rule:evenodd;fill:context-stroke;stroke:context-stroke;stroke-width:1.0pt;"
d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
id="path39462" />
</marker>
<rect
x="707.09731"
y="616.36746"
width="407.71288"
height="417.08306"
id="rect24254" />
<rect
x="47.404365"
y="100.3268"
width="398.49855"
height="110.16514"
id="rect5050" />
<rect
x="47.404366"
y="100.3268"
width="398.49854"
height="110.16514"
id="rect5050-3" />
<rect
x="47.404366"
y="100.3268"
width="398.49854"
height="110.16514"
id="rect5050-3-5" />
<rect
x="47.404366"
y="100.3268"
width="398.49854"
height="110.16514"
id="rect5050-3-5-6" />
<rect
x="47.404366"
y="100.3268"
width="398.49854"
height="110.16514"
id="rect5050-3-5-6-1" />
<rect
x="47.404366"
y="100.3268"
width="398.49854"
height="110.16514"
id="rect5050-0" />
<rect
x="47.404366"
y="100.3268"
width="398.49854"
height="110.16514"
id="rect5050-0-6" />
<rect
x="47.404366"
y="100.3268"
width="398.49854"
height="110.16514"
id="rect5050-0-6-2" />
<rect
x="47.404366"
y="100.3268"
width="398.49854"
height="110.16514"
id="rect5050-0-6-2-8" />
<marker
style="overflow:visible"
id="Arrow2Mend-2"
refX="0"
refY="0"
orient="auto"
inkscape:stockid="Arrow2Mend"
inkscape:isstock="true">
<path
transform="scale(-0.6)"
d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
id="path39486-3" />
</marker>
<marker
style="overflow:visible"
id="Arrow2Mend-2-5"
refX="0"
refY="0"
orient="auto"
inkscape:stockid="Arrow2Mend"
inkscape:isstock="true">
<path
transform="scale(-0.6)"
d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
id="path39486-3-9" />
</marker>
<marker
style="overflow:visible"
id="Arrow2Mend-2-5-2"
refX="0"
refY="0"
orient="auto"
inkscape:stockid="Arrow2Mend"
inkscape:isstock="true">
<path
transform="scale(-0.6)"
d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
id="path39486-3-9-8" />
</marker>
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient40704"
id="linearGradient40706"
x1="324.58157"
y1="127.35331"
x2="363.61096"
y2="98.957848"
gradientUnits="userSpaceOnUse" />
<rect
x="47.404366"
y="100.3268"
width="398.49854"
height="110.16514"
id="rect5050-3-5-6-1-7" />
<rect
x="309.01868"
y="43.698616"
width="552.19421"
height="71.39035"
id="rect65238-1" />
</defs>
<g
inkscape:label="Layer 1"
inkscape:groupmode="layer"
id="layer1">
<rect
style="fill:#d5d5ff;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0"
id="rect55834"
width="250.31726"
height="74.676537"
x="25.257824"
y="97.277718" />
<rect
style="fill:#d5f6ff;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0"
id="rect55832"
width="250.35208"
height="64.461151"
x="25.256891"
y="32.817505" />
<rect
style="fill:#ccffaa;stroke:#091600;stroke-width:1.31891"
id="rect6462"
width="82.385742"
height="20.525751"
x="28.355024"
y="48.740646" />
<text
xml:space="preserve"
transform="matrix(0.26458333,0,0,0.26458333,17.244577,26.206534)"
id="text5048"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="47.404297"
y="135.7168"
id="tspan82948"><tspan
style="font-weight:bold;-inkscape-font-specification:'sans-serif Bold'"
id="tspan82946">gather-bench</tspan></tspan></text>
<rect
style="fill:#de87aa;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
id="rect6462-9"
width="18.764017"
height="20.965076"
x="39.518955"
y="140.726" />
<text
xml:space="preserve"
transform="matrix(0.33667319,0,0,0.33667319,25.589293,109.42998)"
id="text5048-3"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="47.404297"
y="135.7168"
id="tspan82950">L1</tspan></text>
<rect
style="fill:#de87aa;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
id="rect6462-9-0"
width="21.653919"
height="24.193966"
x="97.687294"
y="138.51564" />
<text
xml:space="preserve"
transform="matrix(0.3885252,0,0,0.3885252,81.212654,102.39964)"
id="text5048-3-6"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0-6);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="47.404297"
y="135.7168"
id="tspan82952">L2</tspan></text>
<rect
style="fill:#de87aa;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
id="rect6462-9-0-6"
width="27.217058"
height="30.409672"
x="149.19933"
y="134.83977" />
<text
xml:space="preserve"
transform="matrix(0.48834178,0,0,0.48834178,128.49215,89.445174)"
id="text5048-3-6-1"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0-6-2);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="47.404297"
y="135.7168"
id="tspan82954">L3</tspan></text>
<rect
style="fill:#eeaaff;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
id="rect6462-9-0-6-7"
width="61.032539"
height="29.96501"
x="204.01265"
y="135.61238" />
<text
xml:space="preserve"
transform="matrix(0.48834178,0,0,0.48834178,182.37007,89.995434)"
id="text5048-3-6-1-9"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0-6-2-8);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="47.404297"
y="135.7168"
id="tspan82956">DRAM</tspan></text>
<rect
style="fill:#ffccaa;stroke:#091600;stroke-width:1.10636"
id="rect6462-6"
width="74.980759"
height="15.869514"
x="126.09525"
y="38.773243" />
<text
xml:space="preserve"
transform="matrix(0.26458333,0,0,0.26458333,115.65481,14.295323)"
id="text5048-7"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="47.404297"
y="135.7168"
id="tspan82958">Single gather</tspan></text>
<rect
style="fill:#ffccaa;stroke:#091600;stroke-width:1.03971"
id="rect6462-6-3"
width="66.071701"
height="15.904838"
x="126.90776"
y="63.642746" />
<text
xml:space="preserve"
transform="matrix(0.26458333,0,0,0.26458333,116.63325,39.114393)"
id="text5048-7-5"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3-5);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="47.404297"
y="135.7168"
id="tspan82960">MD gathers</tspan></text>
<rect
style="fill:#afe9dd;stroke:#091600;stroke-width:1.02848"
id="rect6462-6-3-2"
width="64.479698"
height="15.947394"
x="206.65364"
y="52.98967" />
<text
xml:space="preserve"
transform="matrix(0.26458333,0,0,0.26458333,196.01512,28.482594)"
id="text5048-7-5-9"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3-5-6);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="47.404297"
y="135.7168"
id="tspan82962">Contiguous</tspan></text>
<rect
style="fill:#afe9dd;stroke:#091600;stroke-width:0.987323"
id="rect6462-6-3-2-2"
width="59.269382"
height="15.988551"
x="208.16559"
y="76.856781" />
<text
xml:space="preserve"
transform="matrix(0.26458333,0,0,0.26458333,197.58604,52.220445)"
id="text5048-7-5-9-7"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3-5-6-1);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="47.404297"
y="135.7168"
id="tspan82964">&quot;Random&quot;</tspan></text>
<text
xml:space="preserve"
transform="scale(0.26458333)"
id="text24252"
style="fill:black;fill-opacity:1;stroke:none;font-family:sans-serif;font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect24254)" />
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="M 193.10512,71.273276 206.30683,61.033513"
id="path39049"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="M 193.08841,71.196939 207.86207,84.43804"
id="path39053"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.39816;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 58.548229,151.24436 38.298093,0.25023"
id="path39219"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.24847;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 119.19252,150.09399 29.28333,0.26095"
id="path39219-2"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 177.02022,150.44367 26.36623,0.26095"
id="path39219-2-0"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend)"
d="m 48.145458,92.71788 -0.644819,47.57709"
id="path39377"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend-2)"
d="M 48.121208,92.873762 106.60807,137.41946"
id="path39377-7"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend-2-5)"
d="M 48.073928,92.825143 158.88023,133.04546"
id="path39377-7-2"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend-2-5-2)"
d="M 48.051946,92.813593 233.0959,134.16596"
id="path39377-7-2-9"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<rect
style="fill:#e9afaf;stroke:#091600;stroke-width:1.34518"
id="rect6462-6-3-2-2-3"
width="65.880661"
height="26.700579"
x="38.104012"
y="80.530182" />
<path
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 77.365612,69.678744 h 2e-6"
id="path39808"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 111.64767,59.183009 6.84466,0.03069"
id="path41004"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 119.03378,47.056357 -0.58704,25.198541"
id="path41006"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.02423;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 118.07503,72.254897 7.94998,-0.05784"
id="path41008"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.882836;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 118.26666,47.054814 7.69322,0.173925"
id="path41112"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
d="M 68.213642,69.068864 67.910274,80.302728"
id="path55728"
inkscape:connector-type="polyline"
inkscape:connector-curvature="0" />
<text
xml:space="preserve"
transform="matrix(0.26458333,0,0,0.26458333,-1.3782637,4.0412367)"
id="text65236"
style="font-style:normal;font-weight:normal;font-size:53.3333px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect65238);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="309.01953"
y="90.886691"
id="tspan82968"><tspan
style="font-weight:bold;-inkscape-font-specification:'sans-serif Bold'"
id="tspan82966">Application Level</tspan></tspan></text>
<text
xml:space="preserve"
transform="matrix(0.26458333,0,0,0.26458333,2.7015103,160.71919)"
id="text65236-2"
style="font-style:normal;font-weight:normal;font-size:53.3333px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect65238-1);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="309.01953"
y="90.886691"
id="tspan82972"><tspan
style="font-weight:bold;-inkscape-font-specification:'sans-serif Bold'"
id="tspan82970">Hardware Level</tspan></tspan></text>
<text
xml:space="preserve"
transform="matrix(0.26458333,0,0,0.26458333,2.3490396,0.57331532)"
id="text79473"
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect79475);fill:#000000;fill-opacity:1;stroke:none"><tspan
x="144.01562"
y="339.75586"
id="tspan82974">vgather </tspan><tspan
x="144.01562"
y="389.75586"
id="tspan82976">instructions</tspan></text>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 128 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 62 KiB

View File

@ -1,11 +0,0 @@
GROMACS_PATH=/apps/Gromacs/2018.1-mkl
GROMACS_INC ?= -I${GROMACS_PATH}/include
GROMACS_DEFINES ?=
GROMACS_LIB ?= -L${GROMACS_PATH}/lib64
ifeq ($(strip $(XTC_OUTPUT)),true)
INCLUDES += ${GROMACS_INC}
DEFINES += ${GROMACS_DEFINES}
LIBS += -lgromacs
LFLAGS += ${GROMACS_LIB}
endif

View File

@ -1,278 +0,0 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <stdlib.h>
//---
#include <atom.h>
#include <likwid-marker.h>
#include <neighbor.h>
#include <parameter.h>
#include <stats.h>
#include <timing.h>
#ifdef __SIMD_KERNEL__
#include <simd.h>
#endif
double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
int Nlocal = atom->Nlocal;
int* neighs;
#ifndef EXPLICIT_TYPES
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
#endif
const MD_FLOAT num1 = 1.0;
const MD_FLOAT num48 = 48.0;
const MD_FLOAT num05 = 0.5;
for(int i = 0; i < Nlocal; i++) {
atom_fx(i) = 0.0;
atom_fy(i) = 0.0;
atom_fz(i) = 0.0;
}
double S = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("force");
#pragma omp for
for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
MD_FLOAT fix = 0;
MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0;
#ifdef EXPLICIT_TYPES
const int type_i = atom->type[i];
#endif
for(int k = 0; k < numneighs; k++) {
int j = neighs[k];
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
const int type_j = atom->type[j];
const int type_ij = type_i * atom->ntypes + type_j;
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
const MD_FLOAT epsilon = atom->epsilon[type_ij];
#endif
if(rsq < cutforcesq) {
MD_FLOAT sr2 = num1 / rsq;
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
#ifdef USE_REFERENCE_VERSION
addStat(stats->atoms_within_cutoff, 1);
} else {
addStat(stats->atoms_outside_cutoff, 1);
#endif
}
}
atom_fx(i) += fix;
atom_fy(i) += fiy;
atom_fz(i) += fiz;
#ifdef USE_REFERENCE_VERSION
if(numneighs % VECTOR_WIDTH > 0) {
addStat(stats->atoms_outside_cutoff, VECTOR_WIDTH - (numneighs % VECTOR_WIDTH));
}
#endif
addStat(stats->total_force_neighs, numneighs);
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
}
LIKWID_MARKER_STOP("force");
}
double E = getTimeStamp();
return E-S;
}
double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
int Nlocal = atom->Nlocal;
int* neighs;
#ifndef EXPLICIT_TYPES
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
#endif
const MD_FLOAT num1 = 1.0;
const MD_FLOAT num48 = 48.0;
const MD_FLOAT num05 = 0.5;
for(int i = 0; i < Nlocal; i++) {
atom_fx(i) = 0.0;
atom_fy(i) = 0.0;
atom_fz(i) = 0.0;
}
double S = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("forceLJ-halfneigh");
#pragma omp for
for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
MD_FLOAT fix = 0;
MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0;
#ifdef EXPLICIT_TYPES
const int type_i = atom->type[i];
#endif
// Pragma required to vectorize the inner loop
#ifdef ENABLE_OMP_SIMD
#pragma omp simd reduction(+: fix,fiy,fiz)
#endif
for(int k = 0; k < numneighs; k++) {
int j = neighs[k];
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
const int type_j = atom->type[j];
const int type_ij = type_i * atom->ntypes + type_j;
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
const MD_FLOAT epsilon = atom->epsilon[type_ij];
#endif
if(rsq < cutforcesq) {
MD_FLOAT sr2 = num1 / rsq;
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
// We do not need to update forces for ghost atoms
if(j < Nlocal) {
atom_fx(j) -= delx * force;
atom_fy(j) -= dely * force;
atom_fz(j) -= delz * force;
}
}
}
atom_fx(i) += fix;
atom_fy(i) += fiy;
atom_fz(i) += fiz;
addStat(stats->total_force_neighs, numneighs);
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
}
LIKWID_MARKER_STOP("forceLJ-halfneigh");
}
double E = getTimeStamp();
return E-S;
}
double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
int Nlocal = atom->Nlocal;
int* neighs;
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
for(int i = 0; i < Nlocal; i++) {
atom_fx(i) = 0.0;
atom_fy(i) = 0.0;
atom_fz(i) = 0.0;
}
double S = getTimeStamp();
#ifndef __SIMD_KERNEL__
fprintf(stderr, "Error: SIMD kernel not implemented for specified instruction set!");
exit(-1);
#else
MD_SIMD_FLOAT cutforcesq_vec = simd_broadcast(cutforcesq);
MD_SIMD_FLOAT sigma6_vec = simd_broadcast(sigma6);
MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon);
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
#pragma omp parallel
{
LIKWID_MARKER_START("force");
#pragma omp for
for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_SIMD_INT numneighs_vec = simd_int_broadcast(numneighs);
MD_SIMD_FLOAT xtmp = simd_broadcast(atom_x(i));
MD_SIMD_FLOAT ytmp = simd_broadcast(atom_y(i));
MD_SIMD_FLOAT ztmp = simd_broadcast(atom_z(i));
MD_SIMD_FLOAT fix = simd_zero();
MD_SIMD_FLOAT fiy = simd_zero();
MD_SIMD_FLOAT fiz = simd_zero();
for(int k = 0; k < numneighs; k += VECTOR_WIDTH) {
// If the last iteration of this loop is separated from the rest, this mask can be set only there
MD_SIMD_MASK mask_numneighs = simd_mask_int_cond_lt(simd_int_add(simd_int_broadcast(k), simd_int_seq()), numneighs_vec);
MD_SIMD_INT j = simd_int_mask_load(&neighs[k], mask_numneighs);
#ifdef AOS
MD_SIMD_INT j3 = simd_int_add(simd_int_add(j, j), j); // j * 3
MD_SIMD_FLOAT delx = xtmp - simd_gather(j3, &(atom->x[0]), sizeof(MD_FLOAT));
MD_SIMD_FLOAT dely = ytmp - simd_gather(j3, &(atom->x[1]), sizeof(MD_FLOAT));
MD_SIMD_FLOAT delz = ztmp - simd_gather(j3, &(atom->x[2]), sizeof(MD_FLOAT));
#else
MD_SIMD_FLOAT delx = xtmp - simd_gather(j, atom->x, sizeof(MD_FLOAT));
MD_SIMD_FLOAT dely = ytmp - simd_gather(j, atom->y, sizeof(MD_FLOAT));
MD_SIMD_FLOAT delz = ztmp - simd_gather(j, atom->z, sizeof(MD_FLOAT));
#endif
MD_SIMD_FLOAT rsq = simd_fma(delx, delx, simd_fma(dely, dely, simd_mul(delz, delz)));
MD_SIMD_MASK cutoff_mask = simd_mask_and(mask_numneighs, simd_mask_cond_lt(rsq, cutforcesq_vec));
MD_SIMD_FLOAT sr2 = simd_reciprocal(rsq);
MD_SIMD_FLOAT sr6 = simd_mul(sr2, simd_mul(sr2, simd_mul(sr2, sigma6_vec)));
MD_SIMD_FLOAT force = simd_mul(c48_vec, simd_mul(sr6, simd_mul(simd_sub(sr6, c05_vec), simd_mul(sr2, eps_vec))));
fix = simd_masked_add(fix, simd_mul(delx, force), cutoff_mask);
fiy = simd_masked_add(fiy, simd_mul(dely, force), cutoff_mask);
fiz = simd_masked_add(fiz, simd_mul(delz, force), cutoff_mask);
}
atom_fx(i) += simd_h_reduce_sum(fix);
atom_fy(i) += simd_h_reduce_sum(fiy);
atom_fz(i) += simd_h_reduce_sum(fiz);
}
LIKWID_MARKER_STOP("force");
}
#endif
double E = getTimeStamp();
return E-S;
}

View File

@ -1,103 +0,0 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <parameter.h>
#ifndef __ATOM_H_
#define __ATOM_H_
#ifdef CUDA_TARGET
# define KERNEL_NAME "CUDA"
# define computeForceLJFullNeigh computeForceLJFullNeigh_cuda
# define initialIntegrate initialIntegrate_cuda
# define finalIntegrate finalIntegrate_cuda
# define buildNeighbor buildNeighbor_cuda
# define updatePbc updatePbc_cuda
# define updateAtomsPbc updateAtomsPbc_cuda
#else
# ifdef USE_SIMD_KERNEL
# define KERNEL_NAME "SIMD"
# define computeForceLJFullNeigh computeForceLJFullNeigh_simd
# else
# define KERNEL_NAME "plain-C"
# define computeForceLJFullNeigh computeForceLJFullNeigh_plain_c
# endif
# define initialIntegrate initialIntegrate_cpu
# define finalIntegrate finalIntegrate_cpu
# define buildNeighbor buildNeighbor_cpu
# define updatePbc updatePbc_cpu
# define updateAtomsPbc updateAtomsPbc_cpu
#endif
typedef struct {
MD_FLOAT *x, *y, *z;
MD_FLOAT *vx, *vy, *vz;
MD_FLOAT *fx, *fy, *fz;
int *border_map;
int *type;
MD_FLOAT *epsilon;
MD_FLOAT *sigma6;
MD_FLOAT *cutforcesq;
MD_FLOAT *cutneighsq;
} DeviceAtom;
typedef struct {
int Natoms, Nlocal, Nghost, Nmax;
MD_FLOAT *x, *y, *z;
MD_FLOAT *vx, *vy, *vz;
MD_FLOAT *fx, *fy, *fz;
int *border_map;
int *type;
int ntypes;
MD_FLOAT *epsilon;
MD_FLOAT *sigma6;
MD_FLOAT *cutforcesq;
MD_FLOAT *cutneighsq;
// DEM
MD_FLOAT *radius;
MD_FLOAT *av;
MD_FLOAT *r;
// Device data
DeviceAtom d_atom;
} Atom;
extern void initAtom(Atom*);
extern void createAtom(Atom*, Parameter*);
extern int readAtom(Atom*, Parameter*);
extern int readAtom_pdb(Atom*, Parameter*);
extern int readAtom_gro(Atom*, Parameter*);
extern int readAtom_dmp(Atom*, Parameter*);
extern int readAtom_in(Atom*, Parameter*);
extern void writeAtom(Atom*, Parameter*);
extern void growAtom(Atom*);
#ifdef AOS
# define POS_DATA_LAYOUT "AoS"
# define atom_x(i) atom->x[(i) * 3 + 0]
# define atom_y(i) atom->x[(i) * 3 + 1]
# define atom_z(i) atom->x[(i) * 3 + 2]
# define atom_vx(i) atom->vx[(i) * 3 + 0]
# define atom_vy(i) atom->vx[(i) * 3 + 1]
# define atom_vz(i) atom->vx[(i) * 3 + 2]
# define atom_fx(i) atom->fx[(i) * 3 + 0]
# define atom_fy(i) atom->fx[(i) * 3 + 1]
# define atom_fz(i) atom->fx[(i) * 3 + 2]
#else
# define POS_DATA_LAYOUT "SoA"
# define atom_x(i) atom->x[i]
# define atom_y(i) atom->y[i]
# define atom_z(i) atom->z[i]
# define atom_vx(i) atom->vx[i]
# define atom_vy(i) atom->vy[i]
# define atom_vz(i) atom->vz[i]
# define atom_fx(i) atom->fx[i]
# define atom_fy(i) atom->fy[i]
# define atom_fz(i) atom->fz[i]
#endif
#endif

View File

@ -1,171 +0,0 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
//---
#include <pbc.h>
#include <atom.h>
#include <allocate.h>
#define DELTA 20000
int NmaxGhost;
int *PBCx, *PBCy, *PBCz;
static void growPbc(Atom*);
/* exported subroutines */
void initPbc(Atom* atom) {
NmaxGhost = 0;
atom->border_map = NULL;
PBCx = NULL; PBCy = NULL; PBCz = NULL;
}
/* update coordinates of ghost atoms */
/* uses mapping created in setupPbc */
void updatePbc_cpu(Atom *atom, Parameter *param, bool doReneighbor) {
int *border_map = atom->border_map;
int nlocal = atom->Nlocal;
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
for(int i = 0; i < atom->Nghost; i++) {
atom_x(nlocal + i) = atom_x(border_map[i]) + PBCx[i] * xprd;
atom_y(nlocal + i) = atom_y(border_map[i]) + PBCy[i] * yprd;
atom_z(nlocal + i) = atom_z(border_map[i]) + PBCz[i] * zprd;
}
}
/* relocate atoms that have left domain according
* to periodic boundary conditions */
void updateAtomsPbc_cpu(Atom *atom, Parameter *param) {
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
for(int i = 0; i < atom->Nlocal; i++) {
if(atom_x(i) < 0.0) {
atom_x(i) += xprd;
} else if(atom_x(i) >= xprd) {
atom_x(i) -= xprd;
}
if(atom_y(i) < 0.0) {
atom_y(i) += yprd;
} else if(atom_y(i) >= yprd) {
atom_y(i) -= yprd;
}
if(atom_z(i) < 0.0) {
atom_z(i) += zprd;
} else if(atom_z(i) >= zprd) {
atom_z(i) -= zprd;
}
}
}
/* setup periodic boundary conditions by
* defining ghost atoms around domain
* only creates mapping and coordinate corrections
* that are then enforced in updatePbc */
#define ADDGHOST(dx,dy,dz) \
Nghost++; \
border_map[Nghost] = i; \
PBCx[Nghost] = dx; \
PBCy[Nghost] = dy; \
PBCz[Nghost] = dz; \
atom->type[atom->Nlocal + Nghost] = atom->type[i]
void setupPbc(Atom *atom, Parameter *param) {
int *border_map = atom->border_map;
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
MD_FLOAT Cutneigh = param->cutneigh;
int Nghost = -1;
for(int i = 0; i < atom->Nlocal; i++) {
if (atom->Nlocal + Nghost + 7 >= atom->Nmax) {
growAtom(atom);
}
if (Nghost + 7 >= NmaxGhost) {
growPbc(atom);
border_map = atom->border_map;
}
MD_FLOAT x = atom_x(i);
MD_FLOAT y = atom_y(i);
MD_FLOAT z = atom_z(i);
/* Setup ghost atoms */
/* 6 planes */
if(param->pbc_x != 0) {
if (x < Cutneigh) { ADDGHOST(+1,0,0); }
if (x >= (xprd-Cutneigh)) { ADDGHOST(-1,0,0); }
}
if(param->pbc_y != 0) {
if (y < Cutneigh) { ADDGHOST(0,+1,0); }
if (y >= (yprd-Cutneigh)) { ADDGHOST(0,-1,0); }
}
if(param->pbc_z != 0) {
if (z < Cutneigh) { ADDGHOST(0,0,+1); }
if (z >= (zprd-Cutneigh)) { ADDGHOST(0,0,-1); }
}
/* 8 corners */
if(param->pbc_x != 0 && param->pbc_y != 0 && param->pbc_z != 0) {
if (x < Cutneigh && y < Cutneigh && z < Cutneigh) { ADDGHOST(+1,+1,+1); }
if (x < Cutneigh && y >= (yprd-Cutneigh) && z < Cutneigh) { ADDGHOST(+1,-1,+1); }
if (x < Cutneigh && y < Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(+1,+1,-1); }
if (x < Cutneigh && y >= (yprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(+1,-1,-1); }
if (x >= (xprd-Cutneigh) && y < Cutneigh && z < Cutneigh) { ADDGHOST(-1,+1,+1); }
if (x >= (xprd-Cutneigh) && y >= (yprd-Cutneigh) && z < Cutneigh) { ADDGHOST(-1,-1,+1); }
if (x >= (xprd-Cutneigh) && y < Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(-1,+1,-1); }
if (x >= (xprd-Cutneigh) && y >= (yprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(-1,-1,-1); }
}
/* 12 edges */
if(param->pbc_x != 0 && param->pbc_z != 0) {
if (x < Cutneigh && z < Cutneigh) { ADDGHOST(+1,0,+1); }
if (x < Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(+1,0,-1); }
if (x >= (xprd-Cutneigh) && z < Cutneigh) { ADDGHOST(-1,0,+1); }
if (x >= (xprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(-1,0,-1); }
}
if(param->pbc_y != 0 && param->pbc_z != 0) {
if (y < Cutneigh && z < Cutneigh) { ADDGHOST(0,+1,+1); }
if (y < Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(0,+1,-1); }
if (y >= (yprd-Cutneigh) && z < Cutneigh) { ADDGHOST(0,-1,+1); }
if (y >= (yprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(0,-1,-1); }
}
if(param->pbc_x != 0 && param->pbc_y != 0) {
if (y < Cutneigh && x < Cutneigh) { ADDGHOST(+1,+1,0); }
if (y < Cutneigh && x >= (xprd-Cutneigh)) { ADDGHOST(-1,+1,0); }
if (y >= (yprd-Cutneigh) && x < Cutneigh) { ADDGHOST(+1,-1,0); }
if (y >= (yprd-Cutneigh) && x >= (xprd-Cutneigh)) { ADDGHOST(-1,-1,0); }
}
}
// increase by one to make it the ghost atom count
atom->Nghost = Nghost + 1;
}
/* internal subroutines */
void growPbc(Atom* atom) {
int nold = NmaxGhost;
NmaxGhost += DELTA;
atom->border_map = (int*) reallocate(atom->border_map, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
PBCx = (int*) reallocate(PBCx, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
PBCy = (int*) reallocate(PBCy, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
PBCz = (int*) reallocate(PBCz, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
}

View File

@ -1,17 +1,18 @@
CC = clang
CC = /opt/homebrew/Cellar/llvm/18.1.5/bin/clang
LINKER = $(CC)
ANSI_CFLAGS = -ansi
ANSI_CFLAGS += -std=c99
ANSI_CFLAGS += -pedantic
ANSI_CFLAGS += -Wextra
# ANSI_CFLAGS += -Wextra
CFLAGS = -Ofast -march=native $(ANSI_CFLAGS) #-Xpreprocessor -fopenmp -g
CFLAGS = -Ofast -march=native $(ANSI_CFLAGS) -Xpreprocessor -fopenmp #-g
#CFLAGS = -Ofast -march=core-avx2 $(ANSI_CFLAGS) #-Xpreprocessor -fopenmp -g
#CFLAGS = -O3 -march=cascadelake $(ANSI_CFLAGS) #-Xpreprocessor -fopenmp -g
#CFLAGS = -Ofast $(ANSI_CFLAGS) -g #-Xpreprocessor -fopenmp -g
ASFLAGS = -masm=intel
ASFLAGS = #-masm=intel
LFLAGS =
DEFINES = -D_GNU_SOURCE
INCLUDES =
LIBS = -lm #-lomp
# MacOSX with Apple Silicon and homebrew
INCLUDES = -I/opt/homebrew/Cellar/libomp/18.1.5/include/
LIBS = -lm -L/opt/homebrew/Cellar/libomp/18.1.5/lib/ -lomp

View File

@ -1,7 +1,7 @@
CC = icc
LINKER = $(CC)
OPENMP = #-qopenmp
OPENMP = -qopenmp
PROFILE = #-profile-functions -g -pg
ifeq ($(ISA),AVX512)

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
@ -66,7 +66,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
{
LIKWID_MARKER_START("force");
#pragma omp for
#pragma omp for schedule(runtime)
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci);
int ci_cj1 = CJ1_FROM_CI(ci);
@ -213,7 +213,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
#endif
*/
#pragma omp for
#pragma omp for schedule(runtime)
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N
@ -427,7 +427,7 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor
{
LIKWID_MARKER_START("force");
#pragma omp for
#pragma omp for schedule(runtime)
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N
@ -595,7 +595,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
{
LIKWID_MARKER_START("force");
#pragma omp for
#pragma omp for schedule(runtime)
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N
@ -869,7 +869,7 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
{
LIKWID_MARKER_START("force");
#pragma omp for
#pragma omp for schedule(runtime)
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj0 = CJ0_FROM_CI(ci);
#if CLUSTER_M > CLUSTER_N

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,11 +1,13 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <omp.h>
//--
#include <likwid-marker.h>
//--
@ -308,6 +310,30 @@ int main(int argc, char** argv) {
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
printf(HLINE);
int nthreads = 0;
int chunkSize = 0;
omp_sched_t schedKind;
char schedType[10];
#pragma omp parallel
#pragma omp master
{
omp_get_schedule(&schedKind, &chunkSize);
switch (schedKind)
{
case omp_sched_static: strcpy(schedType, "static"); break;
case omp_sched_dynamic: strcpy(schedType, "dynamic"); break;
case omp_sched_guided: strcpy(schedType, "guided"); break;
case omp_sched_auto: strcpy(schedType, "auto"); break;
}
nthreads = omp_get_max_threads();
}
printf("Num threads: %d\n", nthreads);
printf("Schedule: (%s,%d)\n", schedType, chunkSize);
printf("Performance: %.2f million atom updates per second\n",
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
#ifdef COMPUTE_STATS

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,27 +1,21 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdlib.h>
#include <time.h>
double getTimeStamp()
double getTimeStamp(void)
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
}
double getTimeResolution()
double getTimeResolution(void)
{
struct timespec ts;
clock_getres(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
}
double getTimeStamp_()
{
return getTimeStamp();
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
@ -9,6 +9,5 @@
extern double getTimeStamp(void);
extern double getTimeResolution(void);
extern double getTimeStamp_(void);
#endif

View File

@ -1,11 +1,10 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -19,7 +18,8 @@
#define IR 2836
#define MASK 123459876
double myrandom(int* seed) {
double myrandom(int* seed)
{
int k = (*seed) / IQ;
double ans;
@ -29,7 +29,8 @@ double myrandom(int* seed) {
return ans;
}
void random_reset(int *seed, int ibase, double *coord) {
void random_reset(int* seed, int ibase, double* coord)
{
int i;
char* str = (char*)&ibase;
int n = sizeof(int);
@ -61,30 +62,41 @@ void random_reset(int *seed, int ibase, double *coord) {
// warm up the RNG
for (i = 0; i < 5; i++) myrandom(seed);
for (i = 0; i < 5; i++)
myrandom(seed);
// save = 0;
}
int str2ff(const char *string) {
int str2ff(const char* string)
{
if (strncmp(string, "lj", 2) == 0) return FF_LJ;
if (strncmp(string, "eam", 3) == 0) return FF_EAM;
if (strncmp(string, "dem", 3) == 0) return FF_DEM;
return -1;
}
const char* ff2str(int ff) {
if(ff == FF_LJ) { return "lj"; }
if(ff == FF_EAM) { return "eam"; }
if(ff == FF_DEM) { return "dem"; }
const char* ff2str(int ff)
{
if (ff == FF_LJ) {
return "lj";
}
if (ff == FF_EAM) {
return "eam";
}
if (ff == FF_DEM) {
return "dem";
}
return "invalid";
}
int get_cuda_num_threads() {
int get_cuda_num_threads(void)
{
const char* num_threads_env = getenv("NUM_THREADS");
return (num_threads_env == NULL) ? 32 : atoi(num_threads_env);
}
void readline(char *line, FILE *fp) {
void readline(char* line, FILE* fp)
{
if (fgets(line, MAXLINE, fp) == NULL) {
if (errno != 0) {
perror("readline()");
@ -93,13 +105,16 @@ void readline(char *line, FILE *fp) {
}
}
void debug_printf(const char *format, ...) {
void debug_printf(const char* format, ...)
{
#ifdef DEBUG
va_list arg;
int ret;
va_start(arg, format);
if((vfprintf(stdout, format, arg)) < 0) { perror("debug_printf()"); }
if ((vfprintf(stdout, format, arg)) < 0) {
perror("debug_printf()");
}
va_end(arg);
#endif
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
@ -7,6 +7,7 @@
#ifndef __UTIL_H_
#define __UTIL_H_
#include <stdio.h>
#ifndef MIN
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#endif
@ -41,6 +42,6 @@ extern int str2ff(const char *string);
extern const char *ff2str(int ff);
extern void readline(char *line, FILE *fp);
extern void debug_printf(const char *format, ...);
extern int get_cuda_num_threads();
extern int get_cuda_num_threads(void);
#endif

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

102
src/verletlist/atom.h Normal file
View File

@ -0,0 +1,102 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <parameter.h>
#ifndef __ATOM_H_
#define __ATOM_H_
#ifdef CUDA_TARGET
#define KERNEL_NAME "CUDA"
#define computeForceLJFullNeigh computeForceLJFullNeigh_cuda
#define initialIntegrate initialIntegrate_cuda
#define finalIntegrate finalIntegrate_cuda
#define buildNeighbor buildNeighbor_cuda
#define updatePbc updatePbc_cuda
#define updateAtomsPbc updateAtomsPbc_cuda
#else
#ifdef USE_SIMD_KERNEL
#define KERNEL_NAME "SIMD"
#define computeForceLJFullNeigh computeForceLJFullNeigh_simd
#else
#define KERNEL_NAME "PLAIN"
#endif
#define initialIntegrate initialIntegrate_cpu
#define finalIntegrate finalIntegrate_cpu
#define buildNeighbor buildNeighbor_cpu
#define updatePbc updatePbc_cpu
#define updateAtomsPbc updateAtomsPbc_cpu
#endif
typedef struct {
MD_FLOAT *x, *y, *z;
MD_FLOAT *vx, *vy, *vz;
MD_FLOAT *fx, *fy, *fz;
int* border_map;
int* type;
MD_FLOAT* epsilon;
MD_FLOAT* sigma6;
MD_FLOAT* cutforcesq;
MD_FLOAT* cutneighsq;
} DeviceAtom;
typedef struct {
int Natoms, Nlocal, Nghost, Nmax;
MD_FLOAT *x, *y, *z;
MD_FLOAT *vx, *vy, *vz;
MD_FLOAT *fx, *fy, *fz;
int* border_map;
int* type;
int ntypes;
MD_FLOAT* epsilon;
MD_FLOAT* sigma6;
MD_FLOAT* cutforcesq;
MD_FLOAT* cutneighsq;
// DEM
MD_FLOAT* radius;
MD_FLOAT* av;
MD_FLOAT* r;
// Device data
DeviceAtom d_atom;
} Atom;
extern void initAtom(Atom*);
extern void createAtom(Atom*, Parameter*);
extern int readAtom(Atom*, Parameter*);
extern int readAtom_pdb(Atom*, Parameter*);
extern int readAtom_gro(Atom*, Parameter*);
extern int readAtom_dmp(Atom*, Parameter*);
extern int readAtom_in(Atom*, Parameter*);
extern void writeAtom(Atom*, Parameter*);
extern void growAtom(Atom*);
#ifdef AOS
#define POS_DATA_LAYOUT "AoS"
#define atom_x(i) atom->x[(i) * 3 + 0]
#define atom_y(i) atom->x[(i) * 3 + 1]
#define atom_z(i) atom->x[(i) * 3 + 2]
#define atom_vx(i) atom->vx[(i) * 3 + 0]
#define atom_vy(i) atom->vx[(i) * 3 + 1]
#define atom_vz(i) atom->vx[(i) * 3 + 2]
#define atom_fx(i) atom->fx[(i) * 3 + 0]
#define atom_fy(i) atom->fx[(i) * 3 + 1]
#define atom_fz(i) atom->fx[(i) * 3 + 2]
#else
#define POS_DATA_LAYOUT "SoA"
#define atom_x(i) atom->x[i]
#define atom_y(i) atom->y[i]
#define atom_z(i) atom->z[i]
#define atom_vx(i) atom->vx[i]
#define atom_vy(i) atom->vy[i]
#define atom_vz(i) atom->vz[i]
#define atom_fx(i) atom->fx[i]
#define atom_fy(i) atom->fy[i]
#define atom_fz(i) atom->fz[i]
#endif
#endif

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -0,0 +1,112 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <stdlib.h>
//---
#include <atom.h>
#include <likwid-marker.h>
#include <neighbor.h>
#include <parameter.h>
#include <stats.h>
#include <timing.h>
#ifdef __SIMD_KERNEL__
#include <simd.h>
#endif
double computeForceLJFullNeigh_simd(
Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats)
{
int Nlocal = atom->Nlocal;
int* neighs;
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
for (int i = 0; i < Nlocal; i++) {
atom_fx(i) = 0.0;
atom_fy(i) = 0.0;
atom_fz(i) = 0.0;
}
double S = getTimeStamp();
#ifndef __SIMD_KERNEL__
fprintf(stderr, "Error: SIMD kernel not implemented for specified instruction set!");
exit(-1);
#else
MD_SIMD_FLOAT cutforcesq_vec = simd_broadcast(cutforcesq);
MD_SIMD_FLOAT sigma6_vec = simd_broadcast(sigma6);
MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon);
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
#pragma omp parallel
{
LIKWID_MARKER_START("force");
#pragma omp for schedule(runtime)
for (int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_SIMD_INT numneighs_vec = simd_int_broadcast(numneighs);
MD_SIMD_FLOAT xtmp = simd_broadcast(atom_x(i));
MD_SIMD_FLOAT ytmp = simd_broadcast(atom_y(i));
MD_SIMD_FLOAT ztmp = simd_broadcast(atom_z(i));
MD_SIMD_FLOAT fix = simd_zero();
MD_SIMD_FLOAT fiy = simd_zero();
MD_SIMD_FLOAT fiz = simd_zero();
for (int k = 0; k < numneighs; k += VECTOR_WIDTH) {
// If the last iteration of this loop is separated from the rest, this
// mask can be set only there
MD_SIMD_MASK mask_numneighs = simd_mask_int_cond_lt(
simd_int_add(simd_int_broadcast(k), simd_int_seq()),
numneighs_vec);
MD_SIMD_INT j = simd_int_mask_load(&neighs[k], mask_numneighs);
#ifdef AOS
MD_SIMD_INT j3 = simd_int_add(simd_int_add(j, j), j); // j * 3
MD_SIMD_FLOAT delx = xtmp -
simd_gather(j3, &(atom->x[0]), sizeof(MD_FLOAT));
MD_SIMD_FLOAT dely = ytmp -
simd_gather(j3, &(atom->x[1]), sizeof(MD_FLOAT));
MD_SIMD_FLOAT delz = ztmp -
simd_gather(j3, &(atom->x[2]), sizeof(MD_FLOAT));
#else
MD_SIMD_FLOAT delx = xtmp - simd_gather(j, atom->x, sizeof(MD_FLOAT));
MD_SIMD_FLOAT dely = ytmp - simd_gather(j, atom->y, sizeof(MD_FLOAT));
MD_SIMD_FLOAT delz = ztmp - simd_gather(j, atom->z, sizeof(MD_FLOAT));
#endif
MD_SIMD_FLOAT rsq = simd_fma(delx,
delx,
simd_fma(dely, dely, simd_mul(delz, delz)));
MD_SIMD_MASK cutoff_mask = simd_mask_and(mask_numneighs,
simd_mask_cond_lt(rsq, cutforcesq_vec));
MD_SIMD_FLOAT sr2 = simd_reciprocal(rsq);
MD_SIMD_FLOAT sr6 = simd_mul(sr2,
simd_mul(sr2, simd_mul(sr2, sigma6_vec)));
MD_SIMD_FLOAT force = simd_mul(c48_vec,
simd_mul(sr6,
simd_mul(simd_sub(sr6, c05_vec), simd_mul(sr2, eps_vec))));
fix = simd_masked_add(fix, simd_mul(delx, force), cutoff_mask);
fiy = simd_masked_add(fiy, simd_mul(dely, force), cutoff_mask);
fiz = simd_masked_add(fiz, simd_mul(delz, force), cutoff_mask);
}
atom_fx(i) += simd_h_reduce_sum(fix);
atom_fy(i) += simd_h_reduce_sum(fiy);
atom_fz(i) += simd_h_reduce_sum(fiz);
}
LIKWID_MARKER_STOP("force");
}
#endif
double E = getTimeStamp();
return E - S;
}

198
src/verletlist/force_lj.c Normal file
View File

@ -0,0 +1,198 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <atom.h>
#include <likwid-marker.h>
#include <neighbor.h>
#include <parameter.h>
#include <stats.h>
#include <timing.h>
double computeForceLJFullNeigh(
Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats)
{
int nLocal = atom->Nlocal;
int* neighs;
#ifndef EXPLICIT_TYPES
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
#endif
const MD_FLOAT num1 = 1.0;
const MD_FLOAT num48 = 48.0;
const MD_FLOAT num05 = 0.5;
for (int i = 0; i < nLocal; i++) {
atom_fx(i) = 0.0;
atom_fy(i) = 0.0;
atom_fz(i) = 0.0;
}
double timeStart = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("force");
#pragma omp for schedule(runtime)
for (int i = 0; i < nLocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
MD_FLOAT fix = 0;
MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0;
#ifdef EXPLICIT_TYPES
const int type_i = atom->type[i];
#endif
for (int k = 0; k < numneighs; k++) {
int j = neighs[k];
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
const int type_j = atom->type[j];
const int type_ij = type_i * atom->ntypes + type_j;
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
const MD_FLOAT epsilon = atom->epsilon[type_ij];
#endif
if (rsq < cutforcesq) {
MD_FLOAT sr2 = num1 / rsq;
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
#ifdef USE_REFERENCE_VERSION
addStat(stats->atoms_within_cutoff, 1);
} else {
addStat(stats->atoms_outside_cutoff, 1);
#endif
}
}
atom_fx(i) += fix;
atom_fy(i) += fiy;
atom_fz(i) += fiz;
#ifdef USE_REFERENCE_VERSION
if (numneighs % VECTOR_WIDTH > 0) {
addStat(stats->atoms_outside_cutoff,
VECTOR_WIDTH - (numneighs % VECTOR_WIDTH));
}
#endif
addStat(stats->total_force_neighs, numneighs);
addStat(stats->total_force_iters,
(numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
}
LIKWID_MARKER_STOP("force");
}
double timeStop = getTimeStamp();
return timeStop - timeStart;
}
double computeForceLJHalfNeigh(
Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats)
{
int nlocal = atom->Nlocal;
int* neighs;
#ifndef EXPLICIT_TYPES
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
#endif
const MD_FLOAT num1 = 1.0;
const MD_FLOAT num48 = 48.0;
const MD_FLOAT num05 = 0.5;
for (int i = 0; i < nlocal; i++) {
atom_fx(i) = 0.0;
atom_fy(i) = 0.0;
atom_fz(i) = 0.0;
}
double timeStart = getTimeStamp();
#pragma omp parallel
{
LIKWID_MARKER_START("forceLJ-halfneigh");
#pragma omp for schedule(runtime)
for (int i = 0; i < nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
MD_FLOAT fix = 0;
MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0;
#ifdef EXPLICIT_TYPES
const int type_i = atom->type[i];
#endif
// Pragma required to vectorize the inner loop
#ifdef ENABLE_OMP_SIMD
#pragma omp simd reduction(+ : fix, fiy, fiz)
#endif
for (int k = 0; k < numneighs; k++) {
int j = neighs[k];
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
const int type_j = atom->type[j];
const int type_ij = type_i * atom->ntypes + type_j;
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
const MD_FLOAT epsilon = atom->epsilon[type_ij];
#endif
if (rsq < cutforcesq) {
MD_FLOAT sr2 = num1 / rsq;
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
// We do not need to update forces for ghost atoms
if (j < nlocal) {
atom_fx(j) -= delx * force;
atom_fy(j) -= dely * force;
atom_fz(j) -= delz * force;
}
}
}
atom_fx(i) += fix;
atom_fy(i) += fiy;
atom_fz(i) += fiz;
addStat(stats->total_force_neighs, numneighs);
addStat(stats->total_force_iters,
(numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
}
LIKWID_MARKER_STOP("forceLJ-halfneigh");
}
double timeStop = getTimeStamp();
return timeStop - timeStart;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,39 +1,37 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdlib.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <limits.h>
#include <math.h>
#include <float.h>
#include <likwid-marker.h>
#include <omp.h>
#include <allocate.h>
#include <atom.h>
#include <device.h>
#include <eam.h>
#include <integrate.h>
#include <thermo.h>
#include <timing.h>
#include <neighbor.h>
#include <parameter.h>
#include <pbc.h>
#include <stats.h>
#include <thermo.h>
#include <timers.h>
#include <timing.h>
#include <util.h>
#include <vtk.h>
#define HLINE "----------------------------------------------------------------------------\n"
#define HLINE "------------------------------------------------------------------\n"
extern double computeForceLJFullNeigh_plain_c(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceLJFullNeigh_simd(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceLJHalfNeigh(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceLJFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
@ -41,15 +39,18 @@ extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceLJFullNeigh_cuda(Parameter*, Atom*, Neighbor*);
#endif
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats) {
if(param->force_field == FF_EAM) { initEam(eam, param); }
double S, E;
double setup(Parameter* param, Eam* eam, Atom* atom, Neighbor* neighbor, Stats* stats)
{
if (param->force_field == FF_EAM) {
initEam(eam, param);
}
double timeStart, timeStop;
param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
param->xprd = param->nx * param->lattice;
param->yprd = param->ny * param->lattice;
param->zprd = param->nz * param->lattice;
S = getTimeStamp();
timeStart = getTimeStamp();
initAtom(atom);
initPbc(atom);
initStats(stats);
@ -62,7 +63,9 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
setupNeighbor(param);
setupThermo(param, atom->Natoms);
if(param->input_file == NULL) { adjustThermo(param, atom); }
if (param->input_file == NULL) {
adjustThermo(param, atom);
}
#ifdef SORT_ATOMS
atom->Nghost = 0;
sortAtom(atom);
@ -71,13 +74,14 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
initDevice(atom, neighbor);
updatePbc(atom, param, true);
buildNeighbor(atom, neighbor);
E = getTimeStamp();
return E-S;
timeStop = getTimeStamp();
return timeStop - timeStart;
}
double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
double S, E;
S = getTimeStamp();
double reneighbour(Parameter* param, Atom* atom, Neighbor* neighbor)
{
double timeStart, timeStop;
timeStart = getTimeStamp();
LIKWID_MARKER_START("reneighbour");
updateAtomsPbc(atom, param);
#ifdef SORT_ATOMS
@ -88,19 +92,26 @@ double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
updatePbc(atom, param, true);
buildNeighbor(atom, neighbor);
LIKWID_MARKER_STOP("reneighbour");
E = getTimeStamp();
return E-S;
timeStop = getTimeStamp();
return timeStop - timeStart;
}
void printAtomState(Atom *atom) {
printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n", atom->Natoms, atom->Nlocal, atom->Nghost, atom->Nmax);
void printAtomState(Atom* atom)
{
printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n",
atom->Natoms,
atom->Nlocal,
atom->Nghost,
atom->Nmax);
// int nall = atom->Nlocal + atom->Nghost;
// for (int i=0; i<nall; i++) {
// printf("%d %f %f %f\n", i, atom->x[i], atom->y[i], atom->z[i]);
// }
}
double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
double computeForce(
Eam* eam, Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats)
{
if (param->force_field == FF_EAM) {
return computeForceEam(eam, param, atom, neighbor, stats);
} else if (param->force_field == FF_DEM) {
@ -123,18 +134,27 @@ double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor,
#endif
}
void writeInput(Parameter *param, Atom *atom) {
void writeInput(Parameter* param, Atom* atom)
{
FILE* fpin = fopen("input.in", "w");
fprintf(fpin, "0,%f,0,%f,0,%f\n", param->xprd, param->yprd, param->zprd);
for (int i = 0; i < atom->Nlocal; i++) {
fprintf(fpin, "1,%f,%f,%f,%f,%f,%f\n", atom_x(i), atom_y(i), atom_z(i), atom_vx(i), atom_vy(i), atom_vz(i));
fprintf(fpin,
"1,%f,%f,%f,%f,%f,%f\n",
atom_x(i),
atom_y(i),
atom_z(i),
atom_vx(i),
atom_vy(i),
atom_vz(i));
}
fclose(fpin);
}
int main(int argc, char** argv) {
int main(int argc, char** argv)
{
double timer[NUMTIMER];
Eam eam;
Atom atom;
@ -212,15 +232,22 @@ int main(int argc, char** argv) {
continue;
}
if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
printf("MD Bench: A performance-oriented prototyping harness for MD "
"algorithms\n");
printf(HLINE);
printf("-p / --params <string>: file to read parameters from (can be specified more than once)\n");
printf("-f <string>: force field (lj, eam or dem), default lj\n");
printf("-i <string>: input file with atom positions (dump)\n");
printf("-p / --params <string>: file to read parameters from (can be "
"specified more than once)\n");
printf("-f <string>: force field (lj, eam or dem), "
"default lj\n");
printf("-i <string>: input file with atom positions "
"(dump)\n");
printf("-e <string>: input file for EAM\n");
printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
printf("-half <int>: use half (1) or full (0) neighbor lists\n");
printf("-n / --nsteps <int>: set number of timesteps for "
"simulation\n");
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in "
"x/y/z direction\n");
printf("-half <int>: use half (1) or full (0) neighbor "
"lists\n");
printf("-r / --radius <real>: set cutoff radius\n");
printf("-s / --skin <real>: set skin (verlet buffer)\n");
printf("-w <file>: write input atoms to file\n");
@ -288,10 +315,50 @@ int main(int argc, char** argv) {
computeThermo(-1, &param, &atom);
printf(HLINE);
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
printf("System: %d atoms %d ghost atoms, Steps: %d\n",
atom.Natoms,
atom.Nghost,
param.ntimes);
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
timer[TOTAL],
timer[FORCE],
timer[NEIGH],
timer[TOTAL] - timer[FORCE] - timer[NEIGH]);
printf(HLINE);
int nthreads = 0;
int chunkSize = 0;
omp_sched_t schedKind;
char schedType[10];
#pragma omp parallel
#pragma omp master
{
omp_get_schedule(&schedKind, &chunkSize);
switch (schedKind) {
case omp_sched_static:
strcpy(schedType, "static");
break;
case omp_sched_dynamic:
strcpy(schedType, "dynamic");
break;
case omp_sched_guided:
strcpy(schedType, "guided");
break;
case omp_sched_auto:
strcpy(schedType, "auto");
break;
case omp_sched_monotonic:
strcpy(schedType, "auto");
break;
}
nthreads = omp_get_max_threads();
}
printf("Num threads: %d\n", nthreads);
printf("Schedule: (%s,%d)\n", schedType, chunkSize);
printf("Performance: %.2f million atom updates per second\n",
1e-6 * (double)atom.Natoms * param.ntimes / timer[TOTAL]);
#ifdef COMPUTE_STATS

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

234
src/verletlist/pbc.c Normal file
View File

@ -0,0 +1,234 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
//---
#include <allocate.h>
#include <atom.h>
#include <pbc.h>
#define DELTA 20000
int nmaxGhost;
int *PBCx, *PBCy, *PBCz;
static void growPbc(Atom*);
/* exported subroutines */
void initPbc(Atom* atom)
{
nmaxGhost = 0;
atom->border_map = NULL;
PBCx = NULL;
PBCy = NULL;
PBCz = NULL;
}
/* update coordinates of ghost atoms */
/* uses mapping created in setupPbc */
void updatePbc_cpu(Atom* atom, Parameter* param, bool doReneighbor)
{
int* borderMap = atom->border_map;
int nlocal = atom->Nlocal;
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
for (int i = 0; i < atom->Nghost; i++) {
atom_x(nlocal + i) = atom_x(borderMap[i]) + PBCx[i] * xprd;
atom_y(nlocal + i) = atom_y(borderMap[i]) + PBCy[i] * yprd;
atom_z(nlocal + i) = atom_z(borderMap[i]) + PBCz[i] * zprd;
}
}
/* relocate atoms that have left domain according
* to periodic boundary conditions */
void updateAtomsPbc_cpu(Atom* atom, Parameter* param)
{
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
for (int i = 0; i < atom->Nlocal; i++) {
if (atom_x(i) < 0.0) {
atom_x(i) += xprd;
} else if (atom_x(i) >= xprd) {
atom_x(i) -= xprd;
}
if (atom_y(i) < 0.0) {
atom_y(i) += yprd;
} else if (atom_y(i) >= yprd) {
atom_y(i) -= yprd;
}
if (atom_z(i) < 0.0) {
atom_z(i) += zprd;
} else if (atom_z(i) >= zprd) {
atom_z(i) -= zprd;
}
}
}
/* setup periodic boundary conditions by
* defining ghost atoms around domain
* only creates mapping and coordinate corrections
* that are then enforced in updatePbc */
#define ADDGHOST(dx, dy, dz) \
Nghost++; \
border_map[Nghost] = i; \
PBCx[Nghost] = dx; \
PBCy[Nghost] = dy; \
PBCz[Nghost] = dz; \
atom->type[atom->Nlocal + Nghost] = atom->type[i]
void setupPbc(Atom* atom, Parameter* param)
{
int* border_map = atom->border_map;
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
MD_FLOAT cutneigh = param->cutneigh;
int Nghost = -1;
for (int i = 0; i < atom->Nlocal; i++) {
if (atom->Nlocal + Nghost + 7 >= atom->Nmax) {
growAtom(atom);
}
if (Nghost + 7 >= nmaxGhost) {
growPbc(atom);
border_map = atom->border_map;
}
MD_FLOAT x = atom_x(i);
MD_FLOAT y = atom_y(i);
MD_FLOAT z = atom_z(i);
/* Setup ghost atoms */
/* 6 planes */
if (param->pbc_x != 0) {
if (x < cutneigh) {
ADDGHOST(+1, 0, 0);
}
if (x >= (xprd - cutneigh)) {
ADDGHOST(-1, 0, 0);
}
}
if (param->pbc_y != 0) {
if (y < cutneigh) {
ADDGHOST(0, +1, 0);
}
if (y >= (yprd - cutneigh)) {
ADDGHOST(0, -1, 0);
}
}
if (param->pbc_z != 0) {
if (z < cutneigh) {
ADDGHOST(0, 0, +1);
}
if (z >= (zprd - cutneigh)) {
ADDGHOST(0, 0, -1);
}
}
/* 8 corners */
if (param->pbc_x != 0 && param->pbc_y != 0 && param->pbc_z != 0) {
if (x < cutneigh && y < cutneigh && z < cutneigh) {
ADDGHOST(+1, +1, +1);
}
if (x < cutneigh && y >= (yprd - cutneigh) && z < cutneigh) {
ADDGHOST(+1, -1, +1);
}
if (x < cutneigh && y < cutneigh && z >= (zprd - cutneigh)) {
ADDGHOST(+1, +1, -1);
}
if (x < cutneigh && y >= (yprd - cutneigh) && z >= (zprd - cutneigh)) {
ADDGHOST(+1, -1, -1);
}
if (x >= (xprd - cutneigh) && y < cutneigh && z < cutneigh) {
ADDGHOST(-1, +1, +1);
}
if (x >= (xprd - cutneigh) && y >= (yprd - cutneigh) && z < cutneigh) {
ADDGHOST(-1, -1, +1);
}
if (x >= (xprd - cutneigh) && y < cutneigh && z >= (zprd - cutneigh)) {
ADDGHOST(-1, +1, -1);
}
if (x >= (xprd - cutneigh) && y >= (yprd - cutneigh) &&
z >= (zprd - cutneigh)) {
ADDGHOST(-1, -1, -1);
}
}
/* 12 edges */
if (param->pbc_x != 0 && param->pbc_z != 0) {
if (x < cutneigh && z < cutneigh) {
ADDGHOST(+1, 0, +1);
}
if (x < cutneigh && z >= (zprd - cutneigh)) {
ADDGHOST(+1, 0, -1);
}
if (x >= (xprd - cutneigh) && z < cutneigh) {
ADDGHOST(-1, 0, +1);
}
if (x >= (xprd - cutneigh) && z >= (zprd - cutneigh)) {
ADDGHOST(-1, 0, -1);
}
}
if (param->pbc_y != 0 && param->pbc_z != 0) {
if (y < cutneigh && z < cutneigh) {
ADDGHOST(0, +1, +1);
}
if (y < cutneigh && z >= (zprd - cutneigh)) {
ADDGHOST(0, +1, -1);
}
if (y >= (yprd - cutneigh) && z < cutneigh) {
ADDGHOST(0, -1, +1);
}
if (y >= (yprd - cutneigh) && z >= (zprd - cutneigh)) {
ADDGHOST(0, -1, -1);
}
}
if (param->pbc_x != 0 && param->pbc_y != 0) {
if (y < cutneigh && x < cutneigh) {
ADDGHOST(+1, +1, 0);
}
if (y < cutneigh && x >= (xprd - cutneigh)) {
ADDGHOST(-1, +1, 0);
}
if (y >= (yprd - cutneigh) && x < cutneigh) {
ADDGHOST(+1, -1, 0);
}
if (y >= (yprd - cutneigh) && x >= (xprd - cutneigh)) {
ADDGHOST(-1, -1, 0);
}
}
}
// increase by one to make it the ghost atom count
atom->Nghost = Nghost + 1;
}
/* internal subroutines */
void growPbc(Atom* atom)
{
int nold = nmaxGhost;
nmaxGhost += DELTA;
atom->border_map = (int*)reallocate(atom->border_map,
ALIGNMENT,
nmaxGhost * sizeof(int),
nold * sizeof(int));
PBCx = (int*)reallocate(PBCx, ALIGNMENT, nmaxGhost * sizeof(int), nold * sizeof(int));
PBCy = (int*)reallocate(PBCy, ALIGNMENT, nmaxGhost * sizeof(int), nold * sizeof(int));
PBCz = (int*)reallocate(PBCz, ALIGNMENT, nmaxGhost * sizeof(int), nold * sizeof(int));
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
@ -11,7 +11,7 @@
#ifndef __PBC_H_
#define __PBC_H_
extern void initPbc();
extern void initPbc(Atom*);
extern void updatePbc_cpu(Atom*, Parameter*, bool);
extern void updateAtomsPbc_cpu(Atom*, Parameter*);
extern void setupPbc(Atom*, Parameter*);

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.

View File

@ -1,37 +0,0 @@
# Utility tools for MD-Bench
**mdBench.c:** Single file version for MD-Bench, used mostly for teaching purposes.
**run_stub.sh:** Bash script to run the MD-Bench stubbed force calculation for different configurations and evaluate the performance.
The configuration parameters are:
- **-a <numbers>:** specify the number of atoms per unit cell (the number of neighbors per atom is this value minus 1), the default is 8.
- **-n <numbers>:** timesteps to run the simulation, the default is 200.
- **-nx <numbers>:** number of unit cells in the x dimension, the default is 4.
- **-ny <numbers>:** number of unit cells in the y dimension, the default is 4.
- **-nz <numbers>:** number of unit cells in the z dimension, the default is 2.
Notice that these parameters can also be specified as lists, which executes the stubbed force calculation several times varying the specific parameter to each element of the list, and hence all combinations of parameters will be executed. For example, the following command:
```bash
bash run_stub.sh -a "8 16" -nx "4 8" -ny 8 -nz 4
```
Will execute the stubbed force calculation for the following 4 configurations:
```bash
1> 8 atoms per unit cell on a 4x8x4 grid of unit cells, 200 timesteps
2> 16 atoms per unit cell on a 4x8x4 grid of unit cells, 200 timesteps
3> 8 atoms per unit cell on a 8x8x4 grid of unit cells, 200 timesteps
4> 16 atoms per unit cell on a 8x8x4 grid of unit cells, 200 timesteps
```
The following parameters are also available:
- **-f <frequency>:** CPU frequency in GHz (assure your CPU frequency is fixed by disabling Turbo mode), more performance metrics such as cycles per iteration are displayed if this option is defined.
- **-o <file>:** output file (.txt) for the results, the default is *run_results.txt*.
- **-r <runs>:** number of runs for each configuration (only the values for the best run are displayed), the default is 3.
**plot_run_stub_data.py:** Python script to plot the data generated by the *run_stub.sh* script. Just provide the name of the .txt file as a parameter and this script generates a corresponding PDF with the same file name.
**plot_gather_data.py:** Python script to plot the data generated by the gather benchmark. Just provide the name of the .txt file containing the gather output as a parameter and this script generates a corresponding PDF with the same file name. Multiple outputs with different strides can be included in the text file by concatenating the outputs. The script handles output from both standard simple array case and MD variant.
**cache.py:** Python script to run the cache simulator with the data obtained from the memory tracer. Just run it with the tracer output file name as a parameter. The cache specifications can be directly adapted in the script to match those of the target processor of interest.

View File

@ -1,33 +0,0 @@
import sys
from cachesim import CacheSimulator, Cache, MainMemory
filename = sys.argv[1]
mem = MainMemory()
#l3 = Cache("L3", 20480, 16, 64, "LRU") # 20MB: 20480 sets, 16-ways with cacheline size of 64 bytes
#l2 = Cache("L2", 256, 4, 64, "LRU", store_to=l3, load_from=l3) # 256KB
#l1 = Cache("L1", 64, 8, 64, "LRU", store_to=l2, load_from=l2) # 32KB
# Cascade Lake
l3 = Cache("L3", 14336, 16, 64, "LRU", write_allocate=False)
l2 = Cache("L2", 1024, 16, 64, "LRU", store_to=l3, victims_to=l3)
l1 = Cache("L1", 64, 8, 64, "LRU", store_to=l2, load_from=l2)
mem.load_to(l2)
mem.store_from(l3)
cs = CacheSimulator(l1, mem)
with open(filename, 'r') as fp:
for line in fp.readlines():
op, addr = line.split(": ")
op = op[0]
addr = int(addr, 16)
if op == 'W':
cs.store(addr, length=8)
elif op == 'R':
cs.load(addr, length=8)
else:
sys.exit("Invalid operation: {}".format(op))
cs.force_write_back()
cs.print_stats()

View File

@ -1,39 +0,0 @@
import sys
from cachesim import CacheSimulator, Cache, MainMemory
def get_set_id(cache, addr):
return (addr >> cache.cl_bits) % cache.sets
filename = sys.argv[1]
N = sys.argv[2]
mem = MainMemory()
# Cascade Lake
l3 = Cache("L3", 14336, 16, 64, "LRU", write_allocate=False)
l2 = Cache("L2", 1024, 16, 64, "LRU", store_to=l3, victims_to=l3)
l1 = Cache("L1", 64, 8, 64, "LRU", store_to=l2, load_from=l2)
mem.load_to(l2)
mem.store_from(l3)
cs = CacheSimulator(l1, mem)
sets_hist = {
'l1': {s: 0 for s in range(l1.sets)},
'l2': {s: 0 for s in range(l2.sets)},
'l3': {s: 0 for s in range(l3.sets)}
}
with open(filename, 'r') as fp:
for line in fp.readlines():
op, addr = line.split(": ")
op = op[0]
addr = int(addr, 16)
sets_hist['l1'][get_set_id(l1, addr)] += 1
sets_hist['l2'][get_set_id(l2, addr)] += 1
sets_hist['l3'][get_set_id(l3, addr)] += 1
for cache_level, data in sets_hist.items():
if cache_level != 'l3':
print(cache_level, ": ")
for set_id in data:
if data[set_id] > 0:
print(set_id, " -> ", data[set_id])

View File

@ -1,116 +0,0 @@
#!/bin/bash
[[ -z "$1" ]] && echo "Use: $0 <binary> [-c <core>] [-f <freq>] [-n <nruns>] [-l <log>] [-s]" && exit
[[ ! -f "$1" ]] && echo "Binary file not found, make sure to use 'make'" && exit
[[ ! -f "$1-stub" ]] && echo "Binary file for stubbed case not found, make sure to use 'make VARIANT=stub'" && exit
MDBENCH_BIN=$1
BIN_INFO="${MDBENCH_BIN#*-}" # $OPT_SCHEME-$TAG-$ISA-$PREC
OPT_SCHEME="${BIN_INFO%%-*}"
PREC="${BIN_INFO##*-}"
BIN_INFO="${BIN_INFO#*-}" # $TAG-$ISA-$PREC
BIN_INFO="${BIN_INFO%-*}" # $TAG-$ISA
TAG="${BIN_INFO%%-*}"
ISA="${BIN_INFO##*-}"
CORE="${CORE:-0}"
FREQ="${FREQ:-2.4}"
NRUNS="${NRUNS:-3}"
LOG="${LOG:-latencies_and_cfds.$(hostname).log}"
STUB_ONLY="${STUB_ONLY:-false}"
SKIP_SET_FREQ="${SKIP_SET_FREQ:-false}"
OPTIND=2
while getopts "c:f:n:l:s" flag; do
case "${flag}" in
c) CORE=${OPTARG};;
f) FREQ=${OPTARG};;
n) NRUNS=${OPTARG};;
l) LOG=${OPTARG};;
s) STUB_ONLY=true;;
esac
done
# Other useful variables
MDBENCH_BIN=./MDBench-$OPT_SCHEME-$TAG-$ISA-$PREC
FIXED_PARAMS="--freq $FREQ"
CPU_VENDOR=$(lscpu | grep "Vendor ID" | tr -s ' ' | cut -d ' ' -f3)
if [ "$CPU_VENDOR" == "GenuineIntel" ]; then
ALL_PREFETCHERS="HW_PREFETCHER,CL_PREFETCHER,DCU_PREFETCHER,IP_PREFETCHER"
DEFAULT_PREFETCHERS=("ALL HW_PREFETCHER CL_PREFETCHER DCU_PREFETCHER IP_PREFETCHER NONE")
else
ALL_PREFETCHERS=""
DEFAULT_PREFETCHERS=("IGNORE")
fi
if [ -z ${PREFETCHERS+x} ]; then
PREFETCHERS=${DEFAULT_PREFETCHERS}
fi
if [ "$OPT_SCHEME" == "gromacs" ]; then
STUB1_NAME=stub-33
STUB1_PARAMS="-na 4 -nn 33"
STUB2_NAME=stub-128
STUB2_PARAMS="-na 4 -nn 128"
else
STUB1_NAME=stub-76
STUB1_PARAMS="-nn 76"
STUB2_NAME=stub-1024
STUB2_PARAMS="-nn 1024"
fi
function run_benchmark() {
BEST=10000000
for i in $(seq $NRUNS); do
RES=$(likwid-pin -c $CORE "$* $FIXED_PARAMS" 2>&1 | grep "Cycles/SIMD iteration" | cut -d ' ' -f3)
if (( $(echo "$BEST > $RES" | bc -l ) )); then
BEST=$RES
fi
done
}
echo "Tag: $TAG" | tee -a $LOG
echo "Optimization scheme: $OPT_SCHEME" | tee -a $LOG
echo "Instruction set: $ISA" | tee -a $LOG
echo "Precision: $PREC" | tee -a $LOG
echo "Binary: $MDBENCH_BIN(-stub)" | tee -a $LOG
echo "Frequency: $FREQ" | tee -a $LOG
echo "Number of runs: $NRUNS" | tee -a $LOG
echo "Run only stubbed cases: $STUB_ONLY" | tee -a $LOG
if [ "$SKIP_SET_FREQ" == "false" ]; then
echo "Fixing frequencies..."
likwid-setFrequencies -f $FREQ -t 0
fi
for p in $PREFETCHERS; do
if [ "$p" != "IGNORE" ]; then
if [ "$p" == "ALL" ]; then
likwid-features -c $CORE -e $ALL_PREFETCHERS
elif [ "$p" == "NONE" ]; then
likwid-features -c $CORE -d $ALL_PREFETCHERS
else
likwid-features -c $CORE -d $ALL_PREFETCHERS
likwid-features -c $CORE -e $p
fi
echo "Prefetcher settings: $p"
likwid-features -c $CORE -l
fi
MSG="$p: "
if [ "$STUB_ONLY" == "false" ]; then
run_benchmark $MDBENCH_BIN
MSG+="standard=$BEST, "
run_benchmark $MDBENCH_BIN -i data/copper_melting/input_lj_cu_one_atomtype_20x20x20.dmp
MSG+="melt=$BEST, "
run_benchmark $MDBENCH_BIN -p data/argon_1000/mdbench_params.conf -i data/argon_1000/tprout.gro
MSG+="argon=$BEST, "
fi
run_benchmark $MDBENCH_BIN-stub $STUB1_PARAMS
MSG+="$STUB1_NAME=$BEST, "
run_benchmark $MDBENCH_BIN-stub $STUB2_PARAMS
MSG+="$STUB2_NAME=$BEST"
echo $MSG | tee -a $LOG
done

View File

@ -1,52 +0,0 @@
# Prerequisites
*.d
# Object files
*.o
*.ko
*.obj
*.elf
# Linker output
*.ilk
*.map
*.exp
# Precompiled Headers
*.gch
*.pch
# Libraries
*.lib
*.a
*.la
*.lo
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex
# Debug files
*.dSYM/
*.su
*.idb
*.pdb
# Kernel Module Compile Results
*.mod*
*.cmd
.tmp_versions/
modules.order
Module.symvers
Mkfile.old
dkms.conf

View File

@ -1,21 +0,0 @@
MIT License
Copyright (c) 2021 RRZE-HPC
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1,126 +0,0 @@
#CONFIGURE BUILD SYSTEM
TARGET = gather-bench-$(TAG)
BUILD_DIR = ./$(TAG)
SRC_DIR = ./src
MAKE_DIR = ./
ISA_DIR = ./src/$(ISA)
Q ?= @
#DO NOT EDIT BELOW
include $(MAKE_DIR)/config.mk
include $(MAKE_DIR)/include_$(TAG).mk
include $(MAKE_DIR)/include_LIKWID.mk
INCLUDES += -I./src/includes
VPATH = $(SRC_DIR) ${ISA_DIR}
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
ASM += $(patsubst $(SRC_DIR)/%.f90, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.f90))
OBJ = $(filter-out $(BUILD_DIR)/main%, $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)))
OBJ += $(patsubst $(SRC_DIR)/%.cc, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.cc))
OBJ += $(patsubst $(SRC_DIR)/%.cpp, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.cpp))
OBJ += $(patsubst $(SRC_DIR)/%.f90, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.f90))
OBJ += $(patsubst $(SRC_DIR)/%.F90, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.F90))
OBJ += $(patsubst $(SRC_DIR)/%.s, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.s))
OBJ += $(patsubst $(ISA_DIR)/%.S, $(BUILD_DIR)/%.o,$(wildcard $(ISA_DIR)/*.S))
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(INCLUDES) -DISA_$(ISA)
ifneq ($(VARIANT),)
.DEFAULT_GOAL := ${TARGET}-$(VARIANT)
endif
ifeq ($(strip $(DATA_LAYOUT)),AOS)
CPPFLAGS += -DAOS
endif
ifeq ($(strip $(TEST)),true)
CPPFLAGS += -DTEST
endif
ifeq ($(strip $(PADDING)),true)
CPPFLAGS += -DPADDING
endif
ifeq ($(strip $(MEASURE_GATHER_CYCLES)),true)
CPPFLAGS += -DMEASURE_GATHER_CYCLES
endif
ifeq ($(strip $(ONLY_FIRST_DIMENSION)),true)
CPPFLAGS += -DONLY_FIRST_DIMENSION
endif
ifeq ($(strip $(MEM_TRACER)),true)
CPPFLAGS += -DMEM_TRACER
endif
${TARGET}: $(BUILD_DIR) $(OBJ) $(SRC_DIR)/main.c
@echo "===> LINKING $(TARGET)"
$(Q)${LINKER} ${CPPFLAGS} ${LFLAGS} -o $(TARGET) $(SRC_DIR)/main.c $(OBJ) $(LIBS)
${TARGET}-%: $(BUILD_DIR) $(OBJ) $(SRC_DIR)/main-%.c
@echo "===> LINKING $(TARGET)-$* "
$(Q)${LINKER} ${CPPFLAGS} ${LFLAGS} -o $(TARGET)-$* $(SRC_DIR)/main-$*.c $(OBJ) $(LIBS)
asm: $(BUILD_DIR) $(ASM)
$(BUILD_DIR)/%.o: %.c
@echo "===> COMPILE $@"
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
$(Q)$(CC) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
$(BUILD_DIR)/%.s: %.c
@echo "===> GENERATE ASM $@"
$(Q)$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
$(BUILD_DIR)/%.s: %.f90
@echo "===> COMPILE $@"
$(Q)$(FC) -S $(FCFLAGS) $< -o $@
$(BUILD_DIR)/%.o: %.cc
@echo "===> COMPILE $@"
$(Q)$(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@
$(Q)$(CXX) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
$(BUILD_DIR)/%.o: %.cpp
@echo "===> COMPILE $@"
$(Q)$(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@
$(Q)$(CXX) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
$(BUILD_DIR)/%.o: %.f90
@echo "===> COMPILE $@"
$(Q)$(FC) -c $(FCFLAGS) $< -o $@
$(BUILD_DIR)/%.o: %.F90
@echo "===> COMPILE $@"
$(Q)$(FC) -c $(CPPFLAGS) $(FCFLAGS) $< -o $@
$(BUILD_DIR)/%.o: %.s
@echo "===> ASSEMBLE $@"
$(Q)$(AS) $(ASFLAGS) $< -o $@
$(BUILD_DIR)/%.o: %.S
@echo "===> ASSEMBLE $@"
$(Q)$(CC) -c $(CPPFLAGS) $< -o $@
tags:
@echo "===> GENERATE TAGS"
$(Q)ctags -R
$(BUILD_DIR):
@mkdir $(BUILD_DIR)
ifeq ($(findstring $(MAKECMDGOALS),clean),)
-include $(OBJ:.o=.d)
endif
.PHONY: clean distclean
clean:
@echo "===> CLEAN"
@rm -rf $(BUILD_DIR)
@rm -f tags
distclean: clean
@echo "===> DIST CLEAN"
@rm -f $(TARGET)
@rm -f tags

Some files were not shown because too many files have changed in this diff Show More