Compare commits
5 Commits
Author | SHA1 | Date | |
---|---|---|---|
1dc6d12f52 | |||
e60a798f37 | |||
7a62c5c1ff | |||
8d0a8b5f9c | |||
9712d7e2c8 |
176
.clang-format
Normal file
176
.clang-format
Normal file
@ -0,0 +1,176 @@
|
||||
---
|
||||
Language: Cpp
|
||||
# BasedOnStyle: WebKit
|
||||
AccessModifierOffset: -4
|
||||
AlignAfterOpenBracket: DontAlign
|
||||
AlignArrayOfStructures: None
|
||||
AlignConsecutiveAssignments: Consecutive
|
||||
AlignConsecutiveBitFields: None
|
||||
AlignConsecutiveDeclarations: None
|
||||
AlignConsecutiveMacros: Consecutive
|
||||
AlignEscapedNewlines: Right
|
||||
AlignOperands: Align
|
||||
AlignTrailingComments: true
|
||||
AllowAllArgumentsOnNextLine: false
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortEnumsOnASingleLine: true
|
||||
AllowShortBlocksOnASingleLine: Never
|
||||
AllowShortCaseLabelsOnASingleLine: false
|
||||
AllowShortFunctionsOnASingleLine: All
|
||||
AllowShortLambdasOnASingleLine: All
|
||||
AllowShortIfStatementsOnASingleLine: OnlyFirstIf
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
AlwaysBreakAfterDefinitionReturnType: None
|
||||
AlwaysBreakAfterReturnType: None
|
||||
AlwaysBreakBeforeMultilineStrings: false
|
||||
AlwaysBreakTemplateDeclarations: MultiLine
|
||||
AttributeMacros:
|
||||
- __capability
|
||||
BinPackArguments: false
|
||||
BinPackParameters: false
|
||||
BraceWrapping:
|
||||
AfterCaseLabel: false
|
||||
AfterClass: false
|
||||
AfterControlStatement: Never
|
||||
AfterEnum: false
|
||||
AfterFunction: true
|
||||
AfterNamespace: false
|
||||
AfterObjCDeclaration: false
|
||||
AfterStruct: false
|
||||
AfterUnion: false
|
||||
AfterExternBlock: false
|
||||
BeforeCatch: false
|
||||
BeforeElse: false
|
||||
BeforeLambdaBody: false
|
||||
BeforeWhile: false
|
||||
IndentBraces: false
|
||||
SplitEmptyFunction: true
|
||||
SplitEmptyRecord: true
|
||||
SplitEmptyNamespace: true
|
||||
BreakBeforeBinaryOperators: None
|
||||
BreakBeforeBraces: WebKit
|
||||
BreakBeforeInheritanceComma: false
|
||||
BreakInheritanceList: BeforeColon
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializersBeforeComma: false
|
||||
BreakConstructorInitializers: BeforeComma
|
||||
BreakAfterJavaFieldAnnotations: false
|
||||
BreakStringLiterals: true
|
||||
ColumnLimit: 90
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
CompactNamespaces: false
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
ContinuationIndentWidth: 4
|
||||
Cpp11BracedListStyle: false
|
||||
DeriveLineEnding: true
|
||||
DerivePointerAlignment: false
|
||||
DisableFormat: false
|
||||
EmptyLineAfterAccessModifier: Never
|
||||
EmptyLineBeforeAccessModifier: LogicalBlock
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
BasedOnStyle: ''
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: false
|
||||
AllowAllConstructorInitializersOnNextLine: true
|
||||
FixNamespaceComments: false
|
||||
ForEachMacros:
|
||||
- foreach
|
||||
- Q_FOREACH
|
||||
- BOOST_FOREACH
|
||||
IfMacros:
|
||||
- KJ_IF_MAYBE
|
||||
IncludeBlocks: Preserve
|
||||
IncludeCategories:
|
||||
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
|
||||
Priority: 2
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '^(<|"(gtest|gmock|isl|json)/)'
|
||||
Priority: 3
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '.*'
|
||||
Priority: 1
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
IncludeIsMainRegex: '(Test)?$'
|
||||
IncludeIsMainSourceRegex: ''
|
||||
IndentAccessModifiers: false
|
||||
IndentCaseLabels: false
|
||||
IndentCaseBlocks: false
|
||||
IndentGotoLabels: true
|
||||
IndentPPDirectives: None
|
||||
IndentExternBlock: AfterExternBlock
|
||||
IndentWidth: 4
|
||||
IndentWrappedFunctionNames: false
|
||||
InsertTrailingCommas: None
|
||||
JavaScriptQuotes: Leave
|
||||
JavaScriptWrapImports: true
|
||||
KeepEmptyLinesAtTheStartOfBlocks: true
|
||||
LambdaBodyIndentation: Signature
|
||||
MacroBlockBegin: ''
|
||||
MacroBlockEnd: ''
|
||||
MaxEmptyLinesToKeep: 1
|
||||
NamespaceIndentation: Inner
|
||||
ObjCBinPackProtocolList: Auto
|
||||
ObjCBlockIndentWidth: 4
|
||||
ObjCBreakBeforeNestedBlockParam: true
|
||||
ObjCSpaceAfterProperty: true
|
||||
ObjCSpaceBeforeProtocolList: true
|
||||
PenaltyBreakAssignment: 200
|
||||
PenaltyBreakBeforeFirstCallParameter: 19
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyBreakTemplateDeclaration: 10
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyReturnTypeOnItsOwnLine: 60
|
||||
PenaltyIndentedWhitespace: 0
|
||||
PointerAlignment: Left
|
||||
PPIndentWidth: -1
|
||||
ReferenceAlignment: Pointer
|
||||
ReflowComments: true
|
||||
ShortNamespaceLines: 1
|
||||
SortIncludes: CaseSensitive
|
||||
SortJavaStaticImport: Before
|
||||
SortUsingDeclarations: true
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceAfterLogicalNot: false
|
||||
SpaceAfterTemplateKeyword: true
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
SpaceBeforeCaseColon: false
|
||||
SpaceBeforeCpp11BracedList: true
|
||||
SpaceBeforeCtorInitializerColon: true
|
||||
SpaceBeforeInheritanceColon: true
|
||||
SpaceBeforeParens: ControlStatements
|
||||
SpaceAroundPointerQualifiers: Default
|
||||
SpaceBeforeRangeBasedForLoopColon: true
|
||||
SpaceInEmptyBlock: false
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesBeforeTrailingComments: 1
|
||||
SpacesInAngles: Never
|
||||
SpacesInConditionalStatement: false
|
||||
SpacesInContainerLiterals: true
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInLineCommentPrefix:
|
||||
Minimum: 1
|
||||
Maximum: -1
|
||||
SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
SpaceBeforeSquareBrackets: false
|
||||
BitFieldColonSpacing: Both
|
||||
Standard: Latest
|
||||
StatementAttributeLikeMacros:
|
||||
- Q_EMIT
|
||||
StatementMacros:
|
||||
- Q_UNUSED
|
||||
- QT_REQUIRE_VERSION
|
||||
TabWidth: 8
|
||||
UseCRLF: false
|
||||
UseTab: Never
|
||||
WhitespaceSensitiveMacros:
|
||||
- STRINGIZE
|
||||
- PP_STRINGIZE
|
||||
- BOOST_PP_STRINGIZE
|
||||
- NS_SWIFT_NAME
|
||||
- CF_SWIFT_NAME
|
||||
...
|
14
.clang-tidy
Normal file
14
.clang-tidy
Normal file
@ -0,0 +1,14 @@
|
||||
---
|
||||
Checks: 'clang-diagnostic-*,clang-analyzer-*,clang-bugprone-*,readability-identifier-naming'
|
||||
WarningsAsErrors: true
|
||||
HeaderFilterRegex: '.*'
|
||||
AnalyzeTemporaryDtors: false
|
||||
CheckOptions:
|
||||
- key: readability-identifier-naming.StructCase
|
||||
value: 'CamelCase'
|
||||
- key: readability-identifier-naming.FunctionCase
|
||||
value: 'camelBack'
|
||||
- key: readability-identifier-naming.VariableCase
|
||||
value: 'camelBack'
|
||||
- key: readability-identifier-naming.GlobalConstantCase
|
||||
value: 'UPPER_CASE'
|
3
.clangd
Normal file
3
.clangd
Normal file
@ -0,0 +1,3 @@
|
||||
CompileFlags:
|
||||
Add: [-I/Users/jan/prg/MD-Bench/src/verletlist/, -I/Users/jan/prg/MD-Bench/src/common/, -DALIGNMENT=64]
|
||||
Compiler: clang
|
131
Makefile
131
Makefile
@ -1,120 +1,32 @@
|
||||
#CONFIGURE BUILD SYSTEM
|
||||
IDENTIFIER = $(OPT_SCHEME)-$(TAG)-$(ISA)-$(DATA_TYPE)
|
||||
TARGET = MDBench-$(IDENTIFIER)
|
||||
BUILD_DIR = ./build-$(IDENTIFIER)
|
||||
SRC_DIR = ./$(OPT_SCHEME)
|
||||
ASM_DIR = ./asm
|
||||
COMMON_DIR = ./common
|
||||
CUDA_DIR = ./$(SRC_DIR)/cuda
|
||||
MAKE_DIR = ./
|
||||
TAG = $(OPT_TAG)-$(TOOLCHAIN)-$(DATA_TYPE)
|
||||
TARGET = MDBench-$(TAG)
|
||||
BUILD_DIR = ./build/build-$(TAG)
|
||||
SRC_ROOT = ./src
|
||||
SRC_DIR = $(SRC_ROOT)/$(OPT_SCHEME)
|
||||
COMMON_DIR = $(SRC_ROOT)/common
|
||||
CUDA_DIR = $(SRC_DIR)/cuda
|
||||
MAKE_DIR = ./make
|
||||
Q ?= @
|
||||
|
||||
#DO NOT EDIT BELOW
|
||||
include $(MAKE_DIR)/config.mk
|
||||
include $(MAKE_DIR)/include_$(TAG).mk
|
||||
include config.mk
|
||||
include $(MAKE_DIR)/include_$(TOOLCHAIN).mk
|
||||
include $(MAKE_DIR)/include_LIKWID.mk
|
||||
ifneq ($(strip $(ISA)),NONE)
|
||||
include $(MAKE_DIR)/include_ISA.mk
|
||||
include $(MAKE_DIR)/include_GROMACS.mk
|
||||
INCLUDES += -I./$(SRC_DIR)/includes -I./$(COMMON_DIR)/includes
|
||||
|
||||
ifeq ($(strip $(DATA_LAYOUT)),AOS)
|
||||
DEFINES += -DAOS
|
||||
endif
|
||||
ifeq ($(strip $(DATA_TYPE)),SP)
|
||||
DEFINES += -DPRECISION=1
|
||||
else
|
||||
DEFINES += -DPRECISION=2
|
||||
endif
|
||||
INCLUDES += -I./$(SRC_DIR) -I./$(COMMON_DIR)
|
||||
|
||||
ifneq ($(ASM_SYNTAX), ATT)
|
||||
ASFLAGS += -masm=intel
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(SORT_ATOMS)),true)
|
||||
DEFINES += -DSORT_ATOMS
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(EXPLICIT_TYPES)),true)
|
||||
DEFINES += -DEXPLICIT_TYPES
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(MEM_TRACER)),true)
|
||||
DEFINES += -DMEM_TRACER
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(INDEX_TRACER)),true)
|
||||
DEFINES += -DINDEX_TRACER
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(COMPUTE_STATS)),true)
|
||||
DEFINES += -DCOMPUTE_STATS
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(XTC_OUTPUT)),true)
|
||||
DEFINES += -DXTC_OUTPUT
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(USE_REFERENCE_VERSION)),true)
|
||||
DEFINES += -DUSE_REFERENCE_VERSION
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(HALF_NEIGHBOR_LISTS_CHECK_CJ)),true)
|
||||
DEFINES += -DHALF_NEIGHBOR_LISTS_CHECK_CJ
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(DEBUG)),true)
|
||||
DEFINES += -DDEBUG
|
||||
endif
|
||||
|
||||
ifneq ($(VECTOR_WIDTH),)
|
||||
DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH)
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__SIMD_KERNEL__)),true)
|
||||
DEFINES += -D__SIMD_KERNEL__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__SSE__)),true)
|
||||
DEFINES += -D__ISA_SSE__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__ISA_AVX__)),true)
|
||||
DEFINES += -D__ISA_AVX__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__ISA_AVX_FMA__)),true)
|
||||
DEFINES += -D__ISA_AVX_FMA__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__ISA_AVX2__)),true)
|
||||
DEFINES += -D__ISA_AVX2__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__ISA_AVX512__)),true)
|
||||
DEFINES += -D__ISA_AVX512__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(ENABLE_OMP_SIMD)),true)
|
||||
DEFINES += -DENABLE_OMP_SIMD
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(USE_SIMD_KERNEL)),true)
|
||||
DEFINES += -DUSE_SIMD_KERNEL
|
||||
endif
|
||||
|
||||
VPATH = $(SRC_DIR) $(ASM_DIR) $(CUDA_DIR)
|
||||
VPATH = $(SRC_DIR) $(COMMON_DIR) $(CUDA_DIR)
|
||||
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
|
||||
OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s))
|
||||
OBJ = $(filter-out $(BUILD_DIR)/main% $(OVERWRITE),$(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)))
|
||||
OBJ += $(patsubst $(ASM_DIR)/%.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*.s))
|
||||
OBJ += $(patsubst $(COMMON_DIR)/%.c, $(BUILD_DIR)/%-common.o,$(wildcard $(COMMON_DIR)/*.c))
|
||||
OBJ = $(filter-out $(BUILD_DIR)/main%, $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)))
|
||||
OBJ += $(patsubst $(COMMON_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(COMMON_DIR)/*.c))
|
||||
ifeq ($(strip $(TAG)),NVCC)
|
||||
OBJ += $(patsubst $(CUDA_DIR)/%.cu, $(BUILD_DIR)/%-cuda.o,$(wildcard $(CUDA_DIR)/*.cu))
|
||||
endif
|
||||
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
|
||||
|
||||
# $(warning $(OBJ))
|
||||
|
||||
ifneq ($(VARIANT),)
|
||||
.DEFAULT_GOAL := ${TARGET}-$(VARIANT)
|
||||
DEFINES += -DVARIANT=$(VARIANT)
|
||||
@ -133,11 +45,6 @@ $(BUILD_DIR)/%.o: %.c
|
||||
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
|
||||
$(Q)$(CC) $(CPPFLAGS) -MT $@ -MM $< > $(BUILD_DIR)/$*.d
|
||||
|
||||
$(BUILD_DIR)/%-common.o: $(COMMON_DIR)/%.c
|
||||
$(info ===> COMPILE $@)
|
||||
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
|
||||
$(Q)$(CC) $(CPPFLAGS) -MT $@ -MM $< > $(BUILD_DIR)/$*.d
|
||||
|
||||
$(BUILD_DIR)/%-cuda.o: %.cu
|
||||
$(info ===> COMPILE $@)
|
||||
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
|
||||
@ -156,18 +63,16 @@ $(BUILD_DIR)/%.o: %.s
|
||||
clean:
|
||||
$(info ===> CLEAN)
|
||||
@rm -rf $(BUILD_DIR)
|
||||
@rm -rf $(TARGET)*
|
||||
@rm -f tags
|
||||
|
||||
cleanall:
|
||||
$(info ===> CLEAN)
|
||||
@rm -rf build-*
|
||||
@rm -rf build
|
||||
@rm -rf MDBench-*
|
||||
@rm -f tags
|
||||
|
||||
distclean: clean
|
||||
$(info ===> DIST CLEAN)
|
||||
@rm -f $(TARGET)*
|
||||
@rm -f $(TARGET)
|
||||
@rm -f tags
|
||||
|
||||
info:
|
||||
@ -181,6 +86,6 @@ tags:
|
||||
$(Q)ctags -R
|
||||
|
||||
$(BUILD_DIR):
|
||||
@mkdir $(BUILD_DIR)
|
||||
@mkdir -p $(BUILD_DIR)
|
||||
|
||||
-include $(OBJ:.o=.d)
|
||||
|
54
README.md
54
README.md
@ -1,34 +1,14 @@
|
||||
# MD-Bench
|
||||
|
||||
![Image](figures/features-v3.png "MD-Bench Features")
|
||||
|
||||
MD-Bench is a toolbox for the performance engineering of short-range force calculation kernels on molecular-dynamics applications.
|
||||
It aims at covering all available state-of-the-art algorithms from different community codes such as LAMMPS and GROMACS.
|
||||
|
||||
Apart from that, many tools to study and evaluate the in-depth performance of such kernels on distinct hardware are offered, like gather-bench, a standalone benchmark that mimics the data movement from MD kernels and the stubbed force calculation cases that focus on isolating the impacts caused by memory latencies and control flow divergence contributions in the overall performance.
|
||||
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Verlet Lists</th>
|
||||
<th>GROMACS MxN</th>
|
||||
<th>Stubbed cases</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><a target="_blank" rel="noopener noreferrer" href="figures/verlet_v2.png"><img src="figures/verlet_v2.png" alt="Image" title="Verlet Lists" style="width: 100%;"></a></td>
|
||||
<td><a target="_blank" rel="noopener noreferrer" href="figures/gromacs_mxn_v2.png"><img src="figures/gromacs_mxn_v2.png" alt="Image" title="GROMACS MxN" style="width: 90%;"></a></td>
|
||||
<td><a target="_blank" rel="noopener noreferrer" href="figures/stub_new_v3.png"><img src="figures/stub_new_v3.png" alt="Image" title="Stubbed cases" style="width: 100%;"></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<!-- ![Image](figures/gather_bench.png "gather-bench") -->
|
||||
MD-Bench is a toolbox for the performance engineering of short-range force
|
||||
calculation kernels on molecular-dynamics applications. It aims at covering all
|
||||
available state-of-the-art algorithms from different community codes such as
|
||||
LAMMPS and GROMACS.
|
||||
|
||||
## Build instructions
|
||||
|
||||
Properly configure your building by changing `config.mk` file. The following options are available:
|
||||
Properly configure your building by changing `config.mk` file. The following
|
||||
options are available:
|
||||
|
||||
- **TAG:** Compiler tag (available options: GCC, CLANG, ICC, ONEAPI, NVCC).
|
||||
- **ISA:** Instruction set (available options: SSE, AVX, AVX\_FMA, AVX2, AVX512).
|
||||
@ -45,15 +25,18 @@ Properly configure your building by changing `config.mk` file. The following opt
|
||||
- **COMPUTE\_STATS:** Compute statistics.
|
||||
|
||||
Configurations for LAMMPS Verlet Lists optimization scheme:
|
||||
|
||||
- **ENABLE\_OMP\_SIMD:** Use omp simd pragma on half neighbor-lists kernels.
|
||||
- **USE\_SIMD\_KERNEL:** Compile kernel with explicit SIMD intrinsics.
|
||||
|
||||
Configurations for GROMACS MxN optimization scheme:
|
||||
|
||||
- **USE\_REFERENCE\_VERSION:** Use reference version (only for correction purposes).
|
||||
- **XTC\_OUTPUT:** Enable XTC output.
|
||||
- **HALF\_NEIGHBOR\_LISTS\_CHECK\_CJ:** Check if j-clusters are local when decreasing the reaction force.
|
||||
|
||||
Configurations for CUDA:
|
||||
|
||||
- **USE\_CUDA\_HOST\_MEMORY:** Use CUDA host memory to optimize host-device transfers.
|
||||
|
||||
When done, just use `make` to compile the code.
|
||||
@ -68,11 +51,14 @@ Use the following command to run a simulation:
|
||||
./MD-Bench-<TAG>-<OPT_SCHEME> [OPTION]...
|
||||
```
|
||||
|
||||
Where `TAG` and `OPT_SCHEME` correspond to the building options with the same name.
|
||||
Without any options, a Copper FCC lattice system with size 32x32x32 (131072 atoms) over 200 time-steps using the Lennard-Jones potential (sigma=1.0, epsilon=1.0) is simulated.
|
||||
Where `TAG` and `OPT_SCHEME` correspond to the building options with the same
|
||||
name. Without any options, a Copper FCC lattice system with size 32x32x32
|
||||
(131072 atoms) over 200 time-steps using the Lennard-Jones potential (sigma=1.0,
|
||||
epsilon=1.0) is simulated.
|
||||
|
||||
The default behavior and other options can be changed using the following parameters:
|
||||
```
|
||||
|
||||
```sh
|
||||
-p <string>: file to read parameters from (can be specified more than once)
|
||||
-f <string>: force field (lj or eam), default lj
|
||||
-i <string>: input file with atom positions (dump)
|
||||
@ -92,11 +78,17 @@ TBD
|
||||
|
||||
## Citations
|
||||
|
||||
Rafael Ravedutti Lucio Machado, Jan Eitzinger, Harald Köstler, and Gerhard Wellein: MD-Bench: A generic proxy-app toolbox for state-of-the-art molecular dynamics algorithms. Accepted for [PPAM](https://ppam.edu.pl/) 2022, the 14th International Conference on Parallel Processing and Applied Mathematics, Gdansk, Poland, September 11-14, 2022. PPAM 2022 Best Paper Award. Preprint: [arXiv:2207.13094](https://arxiv.org/abs/2207.13094)
|
||||
Rafael Ravedutti Lucio Machado, Jan Eitzinger, Harald Köstler, and Gerhard
|
||||
Wellein: MD-Bench: A generic proxy-app toolbox for state-of-the-art molecular
|
||||
dynamics algorithms. Accepted for [PPAM](https://ppam.edu.pl/) 2022, the 14th
|
||||
International Conference on Parallel Processing and Applied Mathematics, Gdansk,
|
||||
Poland, September 11-14, 2022. PPAM 2022 Best Paper Award. Preprint:
|
||||
[arXiv:2207.13094](https://arxiv.org/abs/2207.13094)
|
||||
|
||||
## Credits
|
||||
|
||||
MD-Bench is developed by the Erlangen National High Performance Computing Center ([NHR@FAU](https://hpc.fau.de/)) at the University of Erlangen-Nürnberg.
|
||||
MD-Bench is developed by the Erlangen National High Performance Computing Center
|
||||
([NHR@FAU](https://hpc.fau.de/)) at the University of Erlangen-Nürnberg.
|
||||
|
||||
## License
|
||||
|
||||
|
109
config.mk
109
config.mk
@ -1,17 +1,18 @@
|
||||
# Compiler tag (GCC/CLANG/ICC/ICX/ONEAPI/NVCC)
|
||||
TAG ?= ICC
|
||||
# Instruction set (SSE/AVX/AVX_FMA/AVX2/AVX512)
|
||||
ISA ?= AVX512
|
||||
# Optimization scheme (lammps/gromacs/clusters_per_bin)
|
||||
OPT_SCHEME ?= lammps
|
||||
# Compiler tool chain (GCC/CLANG/ICC/ICX/ONEAPI/NVCC)
|
||||
TOOLCHAIN ?= CLANG
|
||||
# Instruction set for instrinsic kernels (NONE/SSE/AVX/AVX_FMA/AVX2/AVX512)
|
||||
ISA ?= ARM
|
||||
SIMD ?= NONE
|
||||
# Optimization scheme (verletlist/clusterpair/clusters_per_bin)
|
||||
OPT_SCHEME ?= verletlist
|
||||
# Enable likwid (true or false)
|
||||
ENABLE_LIKWID ?= true
|
||||
ENABLE_LIKWID ?= false
|
||||
# SP or DP
|
||||
DATA_TYPE ?= DP
|
||||
# AOS or SOA
|
||||
DATA_LAYOUT ?= AOS
|
||||
# Assembly syntax to generate (ATT/INTEL)
|
||||
ASM_SYNTAX ?= ATT
|
||||
ASM_SYNTAX ?= INTEL
|
||||
# Debug
|
||||
DEBUG ?= false
|
||||
|
||||
@ -28,7 +29,7 @@ COMPUTE_STATS ?= true
|
||||
|
||||
# Configurations for lammps optimization scheme
|
||||
# Use omp simd pragma when running with half neighbor-lists
|
||||
ENABLE_OMP_SIMD ?= true
|
||||
ENABLE_OMP_SIMD ?= false
|
||||
# Use kernel with explicit SIMD intrinsics
|
||||
USE_SIMD_KERNEL ?= false
|
||||
|
||||
@ -47,3 +48,93 @@ USE_CUDA_HOST_MEMORY ?= false
|
||||
#Feature options
|
||||
OPTIONS = -DALIGNMENT=64
|
||||
#OPTIONS += More options
|
||||
|
||||
#DO NOT EDIT BELOW
|
||||
ifeq ($(strip $(DATA_LAYOUT)),AOS)
|
||||
DEFINES += -DAOS
|
||||
endif
|
||||
ifeq ($(strip $(DATA_TYPE)),SP)
|
||||
DEFINES += -DPRECISION=1
|
||||
else
|
||||
DEFINES += -DPRECISION=2
|
||||
endif
|
||||
|
||||
ifneq ($(ASM_SYNTAX), ATT)
|
||||
ASFLAGS += -masm=intel
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(SORT_ATOMS)),true)
|
||||
DEFINES += -DSORT_ATOMS
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(EXPLICIT_TYPES)),true)
|
||||
DEFINES += -DEXPLICIT_TYPES
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(MEM_TRACER)),true)
|
||||
DEFINES += -DMEM_TRACER
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(INDEX_TRACER)),true)
|
||||
DEFINES += -DINDEX_TRACER
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(COMPUTE_STATS)),true)
|
||||
DEFINES += -DCOMPUTE_STATS
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(XTC_OUTPUT)),true)
|
||||
DEFINES += -DXTC_OUTPUT
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(USE_REFERENCE_VERSION)),true)
|
||||
DEFINES += -DUSE_REFERENCE_VERSION
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(HALF_NEIGHBOR_LISTS_CHECK_CJ)),true)
|
||||
DEFINES += -DHALF_NEIGHBOR_LISTS_CHECK_CJ
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(DEBUG)),true)
|
||||
DEFINES += -DDEBUG
|
||||
endif
|
||||
|
||||
ifneq ($(VECTOR_WIDTH),)
|
||||
DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH)
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__SIMD_KERNEL__)),true)
|
||||
DEFINES += -D__SIMD_KERNEL__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__SSE__)),true)
|
||||
DEFINES += -D__ISA_SSE__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__ISA_AVX__)),true)
|
||||
DEFINES += -D__ISA_AVX__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__ISA_AVX_FMA__)),true)
|
||||
DEFINES += -D__ISA_AVX_FMA__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__ISA_AVX2__)),true)
|
||||
DEFINES += -D__ISA_AVX2__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__ISA_AVX512__)),true)
|
||||
DEFINES += -D__ISA_AVX512__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(ENABLE_OMP_SIMD)),true)
|
||||
DEFINES += -DENABLE_OMP_SIMD
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(OPT_SCHEME)),verletlist)
|
||||
OPT_TAG = VL
|
||||
endif
|
||||
|
||||
ifneq ($(strip $(SIMD)),NONE)
|
||||
TOOLCHAIN = $(TOOLCHAIN)-$(ISA)-$(SIMD)
|
||||
endif
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 273 KiB |
Binary file not shown.
Before Width: | Height: | Size: 98 KiB |
@ -1,523 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
width="297mm"
|
||||
height="210mm"
|
||||
viewBox="0 0 297 210"
|
||||
version="1.1"
|
||||
id="svg5"
|
||||
inkscape:version="1.1.2 (0a00cf5339, 2022-02-04)"
|
||||
sodipodi:docname="gather_bench.svg"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:svg="http://www.w3.org/2000/svg">
|
||||
<sodipodi:namedview
|
||||
id="namedview7"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pagecheckerboard="0"
|
||||
inkscape:document-units="mm"
|
||||
showgrid="false"
|
||||
inkscape:zoom="0.73508842"
|
||||
inkscape:cx="551.63432"
|
||||
inkscape:cy="348.25743"
|
||||
inkscape:window-width="1920"
|
||||
inkscape:window-height="1011"
|
||||
inkscape:window-x="0"
|
||||
inkscape:window-y="165"
|
||||
inkscape:window-maximized="1"
|
||||
inkscape:current-layer="layer1" />
|
||||
<defs
|
||||
id="defs2">
|
||||
<rect
|
||||
x="144.01516"
|
||||
y="304.36604"
|
||||
width="248.99777"
|
||||
height="100.91557"
|
||||
id="rect79475" />
|
||||
<rect
|
||||
x="309.01869"
|
||||
y="43.698615"
|
||||
width="552.19421"
|
||||
height="71.390348"
|
||||
id="rect65238" />
|
||||
<rect
|
||||
x="762.55856"
|
||||
y="341.3838"
|
||||
width="277.62756"
|
||||
height="105.0235"
|
||||
id="rect47632" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
id="linearGradient40704">
|
||||
<stop
|
||||
style="stop-color:#ccffaa;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop40700" />
|
||||
<stop
|
||||
style="stop-color:#ccffaa;stop-opacity:0;"
|
||||
offset="1"
|
||||
id="stop40702" />
|
||||
</linearGradient>
|
||||
<marker
|
||||
style="overflow:visible;"
|
||||
id="Arrow2Mend"
|
||||
refX="0.0"
|
||||
refY="0.0"
|
||||
orient="auto"
|
||||
inkscape:stockid="Arrow2Mend"
|
||||
inkscape:isstock="true">
|
||||
<path
|
||||
transform="scale(0.6) rotate(180) translate(0,0)"
|
||||
d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
|
||||
style="stroke:context-stroke;fill-rule:evenodd;fill:context-stroke;stroke-width:0.62500000;stroke-linejoin:round;"
|
||||
id="path39486" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible;"
|
||||
id="Arrow1Mend"
|
||||
refX="0.0"
|
||||
refY="0.0"
|
||||
orient="auto"
|
||||
inkscape:stockid="Arrow1Mend"
|
||||
inkscape:isstock="true">
|
||||
<path
|
||||
transform="scale(0.4) rotate(180) translate(10,0)"
|
||||
style="fill-rule:evenodd;fill:context-stroke;stroke:context-stroke;stroke-width:1.0pt;"
|
||||
d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
|
||||
id="path39468" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible;"
|
||||
id="Arrow1Lend"
|
||||
refX="0.0"
|
||||
refY="0.0"
|
||||
orient="auto"
|
||||
inkscape:stockid="Arrow1Lend"
|
||||
inkscape:isstock="true">
|
||||
<path
|
||||
transform="scale(0.8) rotate(180) translate(12.5,0)"
|
||||
style="fill-rule:evenodd;fill:context-stroke;stroke:context-stroke;stroke-width:1.0pt;"
|
||||
d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
|
||||
id="path39462" />
|
||||
</marker>
|
||||
<rect
|
||||
x="707.09731"
|
||||
y="616.36746"
|
||||
width="407.71288"
|
||||
height="417.08306"
|
||||
id="rect24254" />
|
||||
<rect
|
||||
x="47.404365"
|
||||
y="100.3268"
|
||||
width="398.49855"
|
||||
height="110.16514"
|
||||
id="rect5050" />
|
||||
<rect
|
||||
x="47.404366"
|
||||
y="100.3268"
|
||||
width="398.49854"
|
||||
height="110.16514"
|
||||
id="rect5050-3" />
|
||||
<rect
|
||||
x="47.404366"
|
||||
y="100.3268"
|
||||
width="398.49854"
|
||||
height="110.16514"
|
||||
id="rect5050-3-5" />
|
||||
<rect
|
||||
x="47.404366"
|
||||
y="100.3268"
|
||||
width="398.49854"
|
||||
height="110.16514"
|
||||
id="rect5050-3-5-6" />
|
||||
<rect
|
||||
x="47.404366"
|
||||
y="100.3268"
|
||||
width="398.49854"
|
||||
height="110.16514"
|
||||
id="rect5050-3-5-6-1" />
|
||||
<rect
|
||||
x="47.404366"
|
||||
y="100.3268"
|
||||
width="398.49854"
|
||||
height="110.16514"
|
||||
id="rect5050-0" />
|
||||
<rect
|
||||
x="47.404366"
|
||||
y="100.3268"
|
||||
width="398.49854"
|
||||
height="110.16514"
|
||||
id="rect5050-0-6" />
|
||||
<rect
|
||||
x="47.404366"
|
||||
y="100.3268"
|
||||
width="398.49854"
|
||||
height="110.16514"
|
||||
id="rect5050-0-6-2" />
|
||||
<rect
|
||||
x="47.404366"
|
||||
y="100.3268"
|
||||
width="398.49854"
|
||||
height="110.16514"
|
||||
id="rect5050-0-6-2-8" />
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="Arrow2Mend-2"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto"
|
||||
inkscape:stockid="Arrow2Mend"
|
||||
inkscape:isstock="true">
|
||||
<path
|
||||
transform="scale(-0.6)"
|
||||
d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
|
||||
style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
|
||||
id="path39486-3" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="Arrow2Mend-2-5"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto"
|
||||
inkscape:stockid="Arrow2Mend"
|
||||
inkscape:isstock="true">
|
||||
<path
|
||||
transform="scale(-0.6)"
|
||||
d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
|
||||
style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
|
||||
id="path39486-3-9" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="Arrow2Mend-2-5-2"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto"
|
||||
inkscape:stockid="Arrow2Mend"
|
||||
inkscape:isstock="true">
|
||||
<path
|
||||
transform="scale(-0.6)"
|
||||
d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
|
||||
style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
|
||||
id="path39486-3-9-8" />
|
||||
</marker>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient40704"
|
||||
id="linearGradient40706"
|
||||
x1="324.58157"
|
||||
y1="127.35331"
|
||||
x2="363.61096"
|
||||
y2="98.957848"
|
||||
gradientUnits="userSpaceOnUse" />
|
||||
<rect
|
||||
x="47.404366"
|
||||
y="100.3268"
|
||||
width="398.49854"
|
||||
height="110.16514"
|
||||
id="rect5050-3-5-6-1-7" />
|
||||
<rect
|
||||
x="309.01868"
|
||||
y="43.698616"
|
||||
width="552.19421"
|
||||
height="71.39035"
|
||||
id="rect65238-1" />
|
||||
</defs>
|
||||
<g
|
||||
inkscape:label="Layer 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1">
|
||||
<rect
|
||||
style="fill:#d5d5ff;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0"
|
||||
id="rect55834"
|
||||
width="250.31726"
|
||||
height="74.676537"
|
||||
x="25.257824"
|
||||
y="97.277718" />
|
||||
<rect
|
||||
style="fill:#d5f6ff;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0"
|
||||
id="rect55832"
|
||||
width="250.35208"
|
||||
height="64.461151"
|
||||
x="25.256891"
|
||||
y="32.817505" />
|
||||
<rect
|
||||
style="fill:#ccffaa;stroke:#091600;stroke-width:1.31891"
|
||||
id="rect6462"
|
||||
width="82.385742"
|
||||
height="20.525751"
|
||||
x="28.355024"
|
||||
y="48.740646" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.26458333,0,0,0.26458333,17.244577,26.206534)"
|
||||
id="text5048"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="47.404297"
|
||||
y="135.7168"
|
||||
id="tspan82948"><tspan
|
||||
style="font-weight:bold;-inkscape-font-specification:'sans-serif Bold'"
|
||||
id="tspan82946">gather-bench</tspan></tspan></text>
|
||||
<rect
|
||||
style="fill:#de87aa;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
|
||||
id="rect6462-9"
|
||||
width="18.764017"
|
||||
height="20.965076"
|
||||
x="39.518955"
|
||||
y="140.726" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.33667319,0,0,0.33667319,25.589293,109.42998)"
|
||||
id="text5048-3"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="47.404297"
|
||||
y="135.7168"
|
||||
id="tspan82950">L1</tspan></text>
|
||||
<rect
|
||||
style="fill:#de87aa;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
|
||||
id="rect6462-9-0"
|
||||
width="21.653919"
|
||||
height="24.193966"
|
||||
x="97.687294"
|
||||
y="138.51564" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.3885252,0,0,0.3885252,81.212654,102.39964)"
|
||||
id="text5048-3-6"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0-6);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="47.404297"
|
||||
y="135.7168"
|
||||
id="tspan82952">L2</tspan></text>
|
||||
<rect
|
||||
style="fill:#de87aa;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
|
||||
id="rect6462-9-0-6"
|
||||
width="27.217058"
|
||||
height="30.409672"
|
||||
x="149.19933"
|
||||
y="134.83977" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.48834178,0,0,0.48834178,128.49215,89.445174)"
|
||||
id="text5048-3-6-1"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0-6-2);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="47.404297"
|
||||
y="135.7168"
|
||||
id="tspan82954">L3</tspan></text>
|
||||
<rect
|
||||
style="fill:#eeaaff;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
|
||||
id="rect6462-9-0-6-7"
|
||||
width="61.032539"
|
||||
height="29.96501"
|
||||
x="204.01265"
|
||||
y="135.61238" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.48834178,0,0,0.48834178,182.37007,89.995434)"
|
||||
id="text5048-3-6-1-9"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0-6-2-8);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="47.404297"
|
||||
y="135.7168"
|
||||
id="tspan82956">DRAM</tspan></text>
|
||||
<rect
|
||||
style="fill:#ffccaa;stroke:#091600;stroke-width:1.10636"
|
||||
id="rect6462-6"
|
||||
width="74.980759"
|
||||
height="15.869514"
|
||||
x="126.09525"
|
||||
y="38.773243" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.26458333,0,0,0.26458333,115.65481,14.295323)"
|
||||
id="text5048-7"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="47.404297"
|
||||
y="135.7168"
|
||||
id="tspan82958">Single gather</tspan></text>
|
||||
<rect
|
||||
style="fill:#ffccaa;stroke:#091600;stroke-width:1.03971"
|
||||
id="rect6462-6-3"
|
||||
width="66.071701"
|
||||
height="15.904838"
|
||||
x="126.90776"
|
||||
y="63.642746" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.26458333,0,0,0.26458333,116.63325,39.114393)"
|
||||
id="text5048-7-5"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3-5);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="47.404297"
|
||||
y="135.7168"
|
||||
id="tspan82960">MD gathers</tspan></text>
|
||||
<rect
|
||||
style="fill:#afe9dd;stroke:#091600;stroke-width:1.02848"
|
||||
id="rect6462-6-3-2"
|
||||
width="64.479698"
|
||||
height="15.947394"
|
||||
x="206.65364"
|
||||
y="52.98967" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.26458333,0,0,0.26458333,196.01512,28.482594)"
|
||||
id="text5048-7-5-9"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3-5-6);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="47.404297"
|
||||
y="135.7168"
|
||||
id="tspan82962">Contiguous</tspan></text>
|
||||
<rect
|
||||
style="fill:#afe9dd;stroke:#091600;stroke-width:0.987323"
|
||||
id="rect6462-6-3-2-2"
|
||||
width="59.269382"
|
||||
height="15.988551"
|
||||
x="208.16559"
|
||||
y="76.856781" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.26458333,0,0,0.26458333,197.58604,52.220445)"
|
||||
id="text5048-7-5-9-7"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3-5-6-1);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="47.404297"
|
||||
y="135.7168"
|
||||
id="tspan82964">"Random"</tspan></text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="scale(0.26458333)"
|
||||
id="text24252"
|
||||
style="fill:black;fill-opacity:1;stroke:none;font-family:sans-serif;font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect24254)" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="M 193.10512,71.273276 206.30683,61.033513"
|
||||
id="path39049"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="M 193.08841,71.196939 207.86207,84.43804"
|
||||
id="path39053"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.39816;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 58.548229,151.24436 38.298093,0.25023"
|
||||
id="path39219"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.24847;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 119.19252,150.09399 29.28333,0.26095"
|
||||
id="path39219-2"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 177.02022,150.44367 26.36623,0.26095"
|
||||
id="path39219-2-0"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend)"
|
||||
d="m 48.145458,92.71788 -0.644819,47.57709"
|
||||
id="path39377"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend-2)"
|
||||
d="M 48.121208,92.873762 106.60807,137.41946"
|
||||
id="path39377-7"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend-2-5)"
|
||||
d="M 48.073928,92.825143 158.88023,133.04546"
|
||||
id="path39377-7-2"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend-2-5-2)"
|
||||
d="M 48.051946,92.813593 233.0959,134.16596"
|
||||
id="path39377-7-2-9"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<rect
|
||||
style="fill:#e9afaf;stroke:#091600;stroke-width:1.34518"
|
||||
id="rect6462-6-3-2-2-3"
|
||||
width="65.880661"
|
||||
height="26.700579"
|
||||
x="38.104012"
|
||||
y="80.530182" />
|
||||
<path
|
||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 77.365612,69.678744 h 2e-6"
|
||||
id="path39808"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 111.64767,59.183009 6.84466,0.03069"
|
||||
id="path41004"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 119.03378,47.056357 -0.58704,25.198541"
|
||||
id="path41006"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.02423;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 118.07503,72.254897 7.94998,-0.05784"
|
||||
id="path41008"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.882836;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 118.26666,47.054814 7.69322,0.173925"
|
||||
id="path41112"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
|
||||
d="M 68.213642,69.068864 67.910274,80.302728"
|
||||
id="path55728"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.26458333,0,0,0.26458333,-1.3782637,4.0412367)"
|
||||
id="text65236"
|
||||
style="font-style:normal;font-weight:normal;font-size:53.3333px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect65238);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="309.01953"
|
||||
y="90.886691"
|
||||
id="tspan82968"><tspan
|
||||
style="font-weight:bold;-inkscape-font-specification:'sans-serif Bold'"
|
||||
id="tspan82966">Application Level</tspan></tspan></text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.26458333,0,0,0.26458333,2.7015103,160.71919)"
|
||||
id="text65236-2"
|
||||
style="font-style:normal;font-weight:normal;font-size:53.3333px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect65238-1);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="309.01953"
|
||||
y="90.886691"
|
||||
id="tspan82972"><tspan
|
||||
style="font-weight:bold;-inkscape-font-specification:'sans-serif Bold'"
|
||||
id="tspan82970">Hardware Level</tspan></tspan></text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.26458333,0,0,0.26458333,2.3490396,0.57331532)"
|
||||
id="text79473"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect79475);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="144.01562"
|
||||
y="339.75586"
|
||||
id="tspan82974">vgather </tspan><tspan
|
||||
x="144.01562"
|
||||
y="389.75586"
|
||||
id="tspan82976">instructions</tspan></text>
|
||||
</g>
|
||||
</svg>
|
Before Width: | Height: | Size: 21 KiB |
Binary file not shown.
Binary file not shown.
Before Width: | Height: | Size: 128 KiB |
Binary file not shown.
Binary file not shown.
Before Width: | Height: | Size: 52 KiB |
Binary file not shown.
Binary file not shown.
Before Width: | Height: | Size: 62 KiB |
@ -1,11 +0,0 @@
|
||||
GROMACS_PATH=/apps/Gromacs/2018.1-mkl
|
||||
GROMACS_INC ?= -I${GROMACS_PATH}/include
|
||||
GROMACS_DEFINES ?=
|
||||
GROMACS_LIB ?= -L${GROMACS_PATH}/lib64
|
||||
|
||||
ifeq ($(strip $(XTC_OUTPUT)),true)
|
||||
INCLUDES += ${GROMACS_INC}
|
||||
DEFINES += ${GROMACS_DEFINES}
|
||||
LIBS += -lgromacs
|
||||
LFLAGS += ${GROMACS_LIB}
|
||||
endif
|
@ -1,278 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
//---
|
||||
#include <atom.h>
|
||||
#include <likwid-marker.h>
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <stats.h>
|
||||
#include <timing.h>
|
||||
|
||||
#ifdef __SIMD_KERNEL__
|
||||
#include <simd.h>
|
||||
#endif
|
||||
|
||||
double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
int Nlocal = atom->Nlocal;
|
||||
int* neighs;
|
||||
#ifndef EXPLICIT_TYPES
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
#endif
|
||||
const MD_FLOAT num1 = 1.0;
|
||||
const MD_FLOAT num48 = 48.0;
|
||||
const MD_FLOAT num05 = 0.5;
|
||||
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
atom_fx(i) = 0.0;
|
||||
atom_fy(i) = 0.0;
|
||||
atom_fz(i) = 0.0;
|
||||
}
|
||||
double S = getTimeStamp();
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
LIKWID_MARKER_START("force");
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numneigh[i];
|
||||
MD_FLOAT xtmp = atom_x(i);
|
||||
MD_FLOAT ytmp = atom_y(i);
|
||||
MD_FLOAT ztmp = atom_z(i);
|
||||
MD_FLOAT fix = 0;
|
||||
MD_FLOAT fiy = 0;
|
||||
MD_FLOAT fiz = 0;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_i = atom->type[i];
|
||||
#endif
|
||||
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int j = neighs[k];
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_j = atom->type[j];
|
||||
const int type_ij = type_i * atom->ntypes + type_j;
|
||||
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
||||
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
||||
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
||||
#endif
|
||||
|
||||
if(rsq < cutforcesq) {
|
||||
MD_FLOAT sr2 = num1 / rsq;
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
#ifdef USE_REFERENCE_VERSION
|
||||
addStat(stats->atoms_within_cutoff, 1);
|
||||
} else {
|
||||
addStat(stats->atoms_outside_cutoff, 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
atom_fx(i) += fix;
|
||||
atom_fy(i) += fiy;
|
||||
atom_fz(i) += fiz;
|
||||
|
||||
#ifdef USE_REFERENCE_VERSION
|
||||
if(numneighs % VECTOR_WIDTH > 0) {
|
||||
addStat(stats->atoms_outside_cutoff, VECTOR_WIDTH - (numneighs % VECTOR_WIDTH));
|
||||
}
|
||||
#endif
|
||||
|
||||
addStat(stats->total_force_neighs, numneighs);
|
||||
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
|
||||
}
|
||||
|
||||
LIKWID_MARKER_STOP("force");
|
||||
}
|
||||
|
||||
double E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
int Nlocal = atom->Nlocal;
|
||||
int* neighs;
|
||||
#ifndef EXPLICIT_TYPES
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
#endif
|
||||
const MD_FLOAT num1 = 1.0;
|
||||
const MD_FLOAT num48 = 48.0;
|
||||
const MD_FLOAT num05 = 0.5;
|
||||
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
atom_fx(i) = 0.0;
|
||||
atom_fy(i) = 0.0;
|
||||
atom_fz(i) = 0.0;
|
||||
}
|
||||
|
||||
double S = getTimeStamp();
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
LIKWID_MARKER_START("forceLJ-halfneigh");
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numneigh[i];
|
||||
MD_FLOAT xtmp = atom_x(i);
|
||||
MD_FLOAT ytmp = atom_y(i);
|
||||
MD_FLOAT ztmp = atom_z(i);
|
||||
MD_FLOAT fix = 0;
|
||||
MD_FLOAT fiy = 0;
|
||||
MD_FLOAT fiz = 0;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_i = atom->type[i];
|
||||
#endif
|
||||
|
||||
// Pragma required to vectorize the inner loop
|
||||
#ifdef ENABLE_OMP_SIMD
|
||||
#pragma omp simd reduction(+: fix,fiy,fiz)
|
||||
#endif
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int j = neighs[k];
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_j = atom->type[j];
|
||||
const int type_ij = type_i * atom->ntypes + type_j;
|
||||
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
||||
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
||||
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
||||
#endif
|
||||
|
||||
if(rsq < cutforcesq) {
|
||||
MD_FLOAT sr2 = num1 / rsq;
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
|
||||
// We do not need to update forces for ghost atoms
|
||||
if(j < Nlocal) {
|
||||
atom_fx(j) -= delx * force;
|
||||
atom_fy(j) -= dely * force;
|
||||
atom_fz(j) -= delz * force;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
atom_fx(i) += fix;
|
||||
atom_fy(i) += fiy;
|
||||
atom_fz(i) += fiz;
|
||||
|
||||
addStat(stats->total_force_neighs, numneighs);
|
||||
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
|
||||
}
|
||||
|
||||
LIKWID_MARKER_STOP("forceLJ-halfneigh");
|
||||
}
|
||||
|
||||
double E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
int Nlocal = atom->Nlocal;
|
||||
int* neighs;
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
atom_fx(i) = 0.0;
|
||||
atom_fy(i) = 0.0;
|
||||
atom_fz(i) = 0.0;
|
||||
}
|
||||
|
||||
double S = getTimeStamp();
|
||||
|
||||
#ifndef __SIMD_KERNEL__
|
||||
fprintf(stderr, "Error: SIMD kernel not implemented for specified instruction set!");
|
||||
exit(-1);
|
||||
#else
|
||||
MD_SIMD_FLOAT cutforcesq_vec = simd_broadcast(cutforcesq);
|
||||
MD_SIMD_FLOAT sigma6_vec = simd_broadcast(sigma6);
|
||||
MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon);
|
||||
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
|
||||
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
|
||||
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
LIKWID_MARKER_START("force");
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numneigh[i];
|
||||
MD_SIMD_INT numneighs_vec = simd_int_broadcast(numneighs);
|
||||
MD_SIMD_FLOAT xtmp = simd_broadcast(atom_x(i));
|
||||
MD_SIMD_FLOAT ytmp = simd_broadcast(atom_y(i));
|
||||
MD_SIMD_FLOAT ztmp = simd_broadcast(atom_z(i));
|
||||
MD_SIMD_FLOAT fix = simd_zero();
|
||||
MD_SIMD_FLOAT fiy = simd_zero();
|
||||
MD_SIMD_FLOAT fiz = simd_zero();
|
||||
|
||||
for(int k = 0; k < numneighs; k += VECTOR_WIDTH) {
|
||||
// If the last iteration of this loop is separated from the rest, this mask can be set only there
|
||||
MD_SIMD_MASK mask_numneighs = simd_mask_int_cond_lt(simd_int_add(simd_int_broadcast(k), simd_int_seq()), numneighs_vec);
|
||||
MD_SIMD_INT j = simd_int_mask_load(&neighs[k], mask_numneighs);
|
||||
#ifdef AOS
|
||||
MD_SIMD_INT j3 = simd_int_add(simd_int_add(j, j), j); // j * 3
|
||||
MD_SIMD_FLOAT delx = xtmp - simd_gather(j3, &(atom->x[0]), sizeof(MD_FLOAT));
|
||||
MD_SIMD_FLOAT dely = ytmp - simd_gather(j3, &(atom->x[1]), sizeof(MD_FLOAT));
|
||||
MD_SIMD_FLOAT delz = ztmp - simd_gather(j3, &(atom->x[2]), sizeof(MD_FLOAT));
|
||||
#else
|
||||
MD_SIMD_FLOAT delx = xtmp - simd_gather(j, atom->x, sizeof(MD_FLOAT));
|
||||
MD_SIMD_FLOAT dely = ytmp - simd_gather(j, atom->y, sizeof(MD_FLOAT));
|
||||
MD_SIMD_FLOAT delz = ztmp - simd_gather(j, atom->z, sizeof(MD_FLOAT));
|
||||
#endif
|
||||
MD_SIMD_FLOAT rsq = simd_fma(delx, delx, simd_fma(dely, dely, simd_mul(delz, delz)));
|
||||
MD_SIMD_MASK cutoff_mask = simd_mask_and(mask_numneighs, simd_mask_cond_lt(rsq, cutforcesq_vec));
|
||||
MD_SIMD_FLOAT sr2 = simd_reciprocal(rsq);
|
||||
MD_SIMD_FLOAT sr6 = simd_mul(sr2, simd_mul(sr2, simd_mul(sr2, sigma6_vec)));
|
||||
MD_SIMD_FLOAT force = simd_mul(c48_vec, simd_mul(sr6, simd_mul(simd_sub(sr6, c05_vec), simd_mul(sr2, eps_vec))));
|
||||
|
||||
fix = simd_masked_add(fix, simd_mul(delx, force), cutoff_mask);
|
||||
fiy = simd_masked_add(fiy, simd_mul(dely, force), cutoff_mask);
|
||||
fiz = simd_masked_add(fiz, simd_mul(delz, force), cutoff_mask);
|
||||
}
|
||||
|
||||
atom_fx(i) += simd_h_reduce_sum(fix);
|
||||
atom_fy(i) += simd_h_reduce_sum(fiy);
|
||||
atom_fz(i) += simd_h_reduce_sum(fiz);
|
||||
}
|
||||
|
||||
LIKWID_MARKER_STOP("force");
|
||||
}
|
||||
#endif
|
||||
|
||||
double E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
@ -1,103 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <parameter.h>
|
||||
|
||||
#ifndef __ATOM_H_
|
||||
#define __ATOM_H_
|
||||
|
||||
#ifdef CUDA_TARGET
|
||||
# define KERNEL_NAME "CUDA"
|
||||
# define computeForceLJFullNeigh computeForceLJFullNeigh_cuda
|
||||
# define initialIntegrate initialIntegrate_cuda
|
||||
# define finalIntegrate finalIntegrate_cuda
|
||||
# define buildNeighbor buildNeighbor_cuda
|
||||
# define updatePbc updatePbc_cuda
|
||||
# define updateAtomsPbc updateAtomsPbc_cuda
|
||||
#else
|
||||
# ifdef USE_SIMD_KERNEL
|
||||
# define KERNEL_NAME "SIMD"
|
||||
# define computeForceLJFullNeigh computeForceLJFullNeigh_simd
|
||||
# else
|
||||
# define KERNEL_NAME "plain-C"
|
||||
# define computeForceLJFullNeigh computeForceLJFullNeigh_plain_c
|
||||
# endif
|
||||
# define initialIntegrate initialIntegrate_cpu
|
||||
# define finalIntegrate finalIntegrate_cpu
|
||||
# define buildNeighbor buildNeighbor_cpu
|
||||
# define updatePbc updatePbc_cpu
|
||||
# define updateAtomsPbc updateAtomsPbc_cpu
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
MD_FLOAT *x, *y, *z;
|
||||
MD_FLOAT *vx, *vy, *vz;
|
||||
MD_FLOAT *fx, *fy, *fz;
|
||||
int *border_map;
|
||||
int *type;
|
||||
MD_FLOAT *epsilon;
|
||||
MD_FLOAT *sigma6;
|
||||
MD_FLOAT *cutforcesq;
|
||||
MD_FLOAT *cutneighsq;
|
||||
} DeviceAtom;
|
||||
|
||||
typedef struct {
|
||||
int Natoms, Nlocal, Nghost, Nmax;
|
||||
MD_FLOAT *x, *y, *z;
|
||||
MD_FLOAT *vx, *vy, *vz;
|
||||
MD_FLOAT *fx, *fy, *fz;
|
||||
int *border_map;
|
||||
int *type;
|
||||
int ntypes;
|
||||
MD_FLOAT *epsilon;
|
||||
MD_FLOAT *sigma6;
|
||||
MD_FLOAT *cutforcesq;
|
||||
MD_FLOAT *cutneighsq;
|
||||
|
||||
// DEM
|
||||
MD_FLOAT *radius;
|
||||
MD_FLOAT *av;
|
||||
MD_FLOAT *r;
|
||||
|
||||
// Device data
|
||||
DeviceAtom d_atom;
|
||||
} Atom;
|
||||
|
||||
extern void initAtom(Atom*);
|
||||
extern void createAtom(Atom*, Parameter*);
|
||||
extern int readAtom(Atom*, Parameter*);
|
||||
extern int readAtom_pdb(Atom*, Parameter*);
|
||||
extern int readAtom_gro(Atom*, Parameter*);
|
||||
extern int readAtom_dmp(Atom*, Parameter*);
|
||||
extern int readAtom_in(Atom*, Parameter*);
|
||||
extern void writeAtom(Atom*, Parameter*);
|
||||
extern void growAtom(Atom*);
|
||||
|
||||
#ifdef AOS
|
||||
# define POS_DATA_LAYOUT "AoS"
|
||||
# define atom_x(i) atom->x[(i) * 3 + 0]
|
||||
# define atom_y(i) atom->x[(i) * 3 + 1]
|
||||
# define atom_z(i) atom->x[(i) * 3 + 2]
|
||||
# define atom_vx(i) atom->vx[(i) * 3 + 0]
|
||||
# define atom_vy(i) atom->vx[(i) * 3 + 1]
|
||||
# define atom_vz(i) atom->vx[(i) * 3 + 2]
|
||||
# define atom_fx(i) atom->fx[(i) * 3 + 0]
|
||||
# define atom_fy(i) atom->fx[(i) * 3 + 1]
|
||||
# define atom_fz(i) atom->fx[(i) * 3 + 2]
|
||||
#else
|
||||
# define POS_DATA_LAYOUT "SoA"
|
||||
# define atom_x(i) atom->x[i]
|
||||
# define atom_y(i) atom->y[i]
|
||||
# define atom_z(i) atom->z[i]
|
||||
# define atom_vx(i) atom->vx[i]
|
||||
# define atom_vy(i) atom->vy[i]
|
||||
# define atom_vz(i) atom->vz[i]
|
||||
# define atom_fx(i) atom->fx[i]
|
||||
# define atom_fy(i) atom->fy[i]
|
||||
# define atom_fz(i) atom->fz[i]
|
||||
#endif
|
||||
|
||||
#endif
|
171
lammps/pbc.c
171
lammps/pbc.c
@ -1,171 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
//---
|
||||
#include <pbc.h>
|
||||
#include <atom.h>
|
||||
#include <allocate.h>
|
||||
|
||||
#define DELTA 20000
|
||||
|
||||
int NmaxGhost;
|
||||
int *PBCx, *PBCy, *PBCz;
|
||||
|
||||
static void growPbc(Atom*);
|
||||
|
||||
/* exported subroutines */
|
||||
void initPbc(Atom* atom) {
|
||||
NmaxGhost = 0;
|
||||
atom->border_map = NULL;
|
||||
PBCx = NULL; PBCy = NULL; PBCz = NULL;
|
||||
}
|
||||
|
||||
/* update coordinates of ghost atoms */
|
||||
/* uses mapping created in setupPbc */
|
||||
void updatePbc_cpu(Atom *atom, Parameter *param, bool doReneighbor) {
|
||||
int *border_map = atom->border_map;
|
||||
int nlocal = atom->Nlocal;
|
||||
MD_FLOAT xprd = param->xprd;
|
||||
MD_FLOAT yprd = param->yprd;
|
||||
MD_FLOAT zprd = param->zprd;
|
||||
|
||||
for(int i = 0; i < atom->Nghost; i++) {
|
||||
atom_x(nlocal + i) = atom_x(border_map[i]) + PBCx[i] * xprd;
|
||||
atom_y(nlocal + i) = atom_y(border_map[i]) + PBCy[i] * yprd;
|
||||
atom_z(nlocal + i) = atom_z(border_map[i]) + PBCz[i] * zprd;
|
||||
}
|
||||
}
|
||||
|
||||
/* relocate atoms that have left domain according
|
||||
* to periodic boundary conditions */
|
||||
void updateAtomsPbc_cpu(Atom *atom, Parameter *param) {
|
||||
MD_FLOAT xprd = param->xprd;
|
||||
MD_FLOAT yprd = param->yprd;
|
||||
MD_FLOAT zprd = param->zprd;
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
if(atom_x(i) < 0.0) {
|
||||
atom_x(i) += xprd;
|
||||
} else if(atom_x(i) >= xprd) {
|
||||
atom_x(i) -= xprd;
|
||||
}
|
||||
|
||||
if(atom_y(i) < 0.0) {
|
||||
atom_y(i) += yprd;
|
||||
} else if(atom_y(i) >= yprd) {
|
||||
atom_y(i) -= yprd;
|
||||
}
|
||||
|
||||
if(atom_z(i) < 0.0) {
|
||||
atom_z(i) += zprd;
|
||||
} else if(atom_z(i) >= zprd) {
|
||||
atom_z(i) -= zprd;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* setup periodic boundary conditions by
|
||||
* defining ghost atoms around domain
|
||||
* only creates mapping and coordinate corrections
|
||||
* that are then enforced in updatePbc */
|
||||
#define ADDGHOST(dx,dy,dz) \
|
||||
Nghost++; \
|
||||
border_map[Nghost] = i; \
|
||||
PBCx[Nghost] = dx; \
|
||||
PBCy[Nghost] = dy; \
|
||||
PBCz[Nghost] = dz; \
|
||||
atom->type[atom->Nlocal + Nghost] = atom->type[i]
|
||||
|
||||
void setupPbc(Atom *atom, Parameter *param) {
|
||||
int *border_map = atom->border_map;
|
||||
MD_FLOAT xprd = param->xprd;
|
||||
MD_FLOAT yprd = param->yprd;
|
||||
MD_FLOAT zprd = param->zprd;
|
||||
MD_FLOAT Cutneigh = param->cutneigh;
|
||||
int Nghost = -1;
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
if (atom->Nlocal + Nghost + 7 >= atom->Nmax) {
|
||||
growAtom(atom);
|
||||
}
|
||||
|
||||
if (Nghost + 7 >= NmaxGhost) {
|
||||
growPbc(atom);
|
||||
border_map = atom->border_map;
|
||||
}
|
||||
|
||||
MD_FLOAT x = atom_x(i);
|
||||
MD_FLOAT y = atom_y(i);
|
||||
MD_FLOAT z = atom_z(i);
|
||||
|
||||
/* Setup ghost atoms */
|
||||
/* 6 planes */
|
||||
if(param->pbc_x != 0) {
|
||||
if (x < Cutneigh) { ADDGHOST(+1,0,0); }
|
||||
if (x >= (xprd-Cutneigh)) { ADDGHOST(-1,0,0); }
|
||||
}
|
||||
|
||||
if(param->pbc_y != 0) {
|
||||
if (y < Cutneigh) { ADDGHOST(0,+1,0); }
|
||||
if (y >= (yprd-Cutneigh)) { ADDGHOST(0,-1,0); }
|
||||
}
|
||||
|
||||
if(param->pbc_z != 0) {
|
||||
if (z < Cutneigh) { ADDGHOST(0,0,+1); }
|
||||
if (z >= (zprd-Cutneigh)) { ADDGHOST(0,0,-1); }
|
||||
}
|
||||
|
||||
/* 8 corners */
|
||||
if(param->pbc_x != 0 && param->pbc_y != 0 && param->pbc_z != 0) {
|
||||
if (x < Cutneigh && y < Cutneigh && z < Cutneigh) { ADDGHOST(+1,+1,+1); }
|
||||
if (x < Cutneigh && y >= (yprd-Cutneigh) && z < Cutneigh) { ADDGHOST(+1,-1,+1); }
|
||||
if (x < Cutneigh && y < Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(+1,+1,-1); }
|
||||
if (x < Cutneigh && y >= (yprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(+1,-1,-1); }
|
||||
if (x >= (xprd-Cutneigh) && y < Cutneigh && z < Cutneigh) { ADDGHOST(-1,+1,+1); }
|
||||
if (x >= (xprd-Cutneigh) && y >= (yprd-Cutneigh) && z < Cutneigh) { ADDGHOST(-1,-1,+1); }
|
||||
if (x >= (xprd-Cutneigh) && y < Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(-1,+1,-1); }
|
||||
if (x >= (xprd-Cutneigh) && y >= (yprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(-1,-1,-1); }
|
||||
}
|
||||
|
||||
/* 12 edges */
|
||||
if(param->pbc_x != 0 && param->pbc_z != 0) {
|
||||
if (x < Cutneigh && z < Cutneigh) { ADDGHOST(+1,0,+1); }
|
||||
if (x < Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(+1,0,-1); }
|
||||
if (x >= (xprd-Cutneigh) && z < Cutneigh) { ADDGHOST(-1,0,+1); }
|
||||
if (x >= (xprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(-1,0,-1); }
|
||||
}
|
||||
|
||||
if(param->pbc_y != 0 && param->pbc_z != 0) {
|
||||
if (y < Cutneigh && z < Cutneigh) { ADDGHOST(0,+1,+1); }
|
||||
if (y < Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(0,+1,-1); }
|
||||
if (y >= (yprd-Cutneigh) && z < Cutneigh) { ADDGHOST(0,-1,+1); }
|
||||
if (y >= (yprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(0,-1,-1); }
|
||||
}
|
||||
|
||||
if(param->pbc_x != 0 && param->pbc_y != 0) {
|
||||
if (y < Cutneigh && x < Cutneigh) { ADDGHOST(+1,+1,0); }
|
||||
if (y < Cutneigh && x >= (xprd-Cutneigh)) { ADDGHOST(-1,+1,0); }
|
||||
if (y >= (yprd-Cutneigh) && x < Cutneigh) { ADDGHOST(+1,-1,0); }
|
||||
if (y >= (yprd-Cutneigh) && x >= (xprd-Cutneigh)) { ADDGHOST(-1,-1,0); }
|
||||
}
|
||||
}
|
||||
// increase by one to make it the ghost atom count
|
||||
atom->Nghost = Nghost + 1;
|
||||
}
|
||||
|
||||
/* internal subroutines */
|
||||
void growPbc(Atom* atom) {
|
||||
int nold = NmaxGhost;
|
||||
NmaxGhost += DELTA;
|
||||
|
||||
atom->border_map = (int*) reallocate(atom->border_map, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
PBCx = (int*) reallocate(PBCx, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
PBCy = (int*) reallocate(PBCy, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
PBCz = (int*) reallocate(PBCz, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
}
|
@ -1,17 +1,18 @@
|
||||
CC = clang
|
||||
CC = /opt/homebrew/Cellar/llvm/18.1.5/bin/clang
|
||||
LINKER = $(CC)
|
||||
|
||||
ANSI_CFLAGS = -ansi
|
||||
ANSI_CFLAGS += -std=c99
|
||||
ANSI_CFLAGS += -pedantic
|
||||
ANSI_CFLAGS += -Wextra
|
||||
# ANSI_CFLAGS += -Wextra
|
||||
|
||||
CFLAGS = -Ofast -march=native $(ANSI_CFLAGS) #-Xpreprocessor -fopenmp -g
|
||||
CFLAGS = -Ofast -march=native $(ANSI_CFLAGS) -Xpreprocessor -fopenmp #-g
|
||||
#CFLAGS = -Ofast -march=core-avx2 $(ANSI_CFLAGS) #-Xpreprocessor -fopenmp -g
|
||||
#CFLAGS = -O3 -march=cascadelake $(ANSI_CFLAGS) #-Xpreprocessor -fopenmp -g
|
||||
#CFLAGS = -Ofast $(ANSI_CFLAGS) -g #-Xpreprocessor -fopenmp -g
|
||||
ASFLAGS = -masm=intel
|
||||
ASFLAGS = #-masm=intel
|
||||
LFLAGS =
|
||||
DEFINES = -D_GNU_SOURCE
|
||||
INCLUDES =
|
||||
LIBS = -lm #-lomp
|
||||
# MacOSX with Apple Silicon and homebrew
|
||||
INCLUDES = -I/opt/homebrew/Cellar/libomp/18.1.5/include/
|
||||
LIBS = -lm -L/opt/homebrew/Cellar/libomp/18.1.5/lib/ -lomp
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,27 +1,21 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
double getTimeStamp()
|
||||
double getTimeStamp(void)
|
||||
{
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
|
||||
}
|
||||
|
||||
double getTimeResolution()
|
||||
double getTimeResolution(void)
|
||||
{
|
||||
struct timespec ts;
|
||||
clock_getres(CLOCK_MONOTONIC, &ts);
|
||||
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
|
||||
}
|
||||
|
||||
double getTimeStamp_()
|
||||
{
|
||||
return getTimeStamp();
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
@ -9,6 +9,5 @@
|
||||
|
||||
extern double getTimeStamp(void);
|
||||
extern double getTimeResolution(void);
|
||||
extern double getTimeStamp_(void);
|
||||
|
||||
#endif
|
@ -1,38 +1,39 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <errno.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <util.h>
|
||||
|
||||
/* Park/Miller RNG w/out MASKING, so as to be like f90s version */
|
||||
#define IA 16807
|
||||
#define IM 2147483647
|
||||
#define AM (1.0/IM)
|
||||
#define IQ 127773
|
||||
#define IR 2836
|
||||
#define IA 16807
|
||||
#define IM 2147483647
|
||||
#define AM (1.0 / IM)
|
||||
#define IQ 127773
|
||||
#define IR 2836
|
||||
#define MASK 123459876
|
||||
|
||||
double myrandom(int* seed) {
|
||||
int k= (*seed) / IQ;
|
||||
double myrandom(int* seed)
|
||||
{
|
||||
int k = (*seed) / IQ;
|
||||
double ans;
|
||||
|
||||
*seed = IA * (*seed - k * IQ) - IR * k;
|
||||
if(*seed < 0) *seed += IM;
|
||||
if (*seed < 0) *seed += IM;
|
||||
ans = AM * (*seed);
|
||||
return ans;
|
||||
}
|
||||
|
||||
void random_reset(int *seed, int ibase, double *coord) {
|
||||
void random_reset(int* seed, int ibase, double* coord)
|
||||
{
|
||||
int i;
|
||||
char *str = (char *) &ibase;
|
||||
int n = sizeof(int);
|
||||
char* str = (char*)&ibase;
|
||||
int n = sizeof(int);
|
||||
unsigned int hash = 0;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
@ -41,8 +42,8 @@ void random_reset(int *seed, int ibase, double *coord) {
|
||||
hash ^= (hash >> 6);
|
||||
}
|
||||
|
||||
str = (char *) coord;
|
||||
n = 3 * sizeof(double);
|
||||
str = (char*)coord;
|
||||
n = 3 * sizeof(double);
|
||||
for (i = 0; i < n; i++) {
|
||||
hash += str[i];
|
||||
hash += (hash << 10);
|
||||
@ -61,45 +62,59 @@ void random_reset(int *seed, int ibase, double *coord) {
|
||||
|
||||
// warm up the RNG
|
||||
|
||||
for (i = 0; i < 5; i++) myrandom(seed);
|
||||
//save = 0;
|
||||
for (i = 0; i < 5; i++)
|
||||
myrandom(seed);
|
||||
// save = 0;
|
||||
}
|
||||
|
||||
int str2ff(const char *string) {
|
||||
if(strncmp(string, "lj", 2) == 0) return FF_LJ;
|
||||
if(strncmp(string, "eam", 3) == 0) return FF_EAM;
|
||||
if(strncmp(string, "dem", 3) == 0) return FF_DEM;
|
||||
int str2ff(const char* string)
|
||||
{
|
||||
if (strncmp(string, "lj", 2) == 0) return FF_LJ;
|
||||
if (strncmp(string, "eam", 3) == 0) return FF_EAM;
|
||||
if (strncmp(string, "dem", 3) == 0) return FF_DEM;
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* ff2str(int ff) {
|
||||
if(ff == FF_LJ) { return "lj"; }
|
||||
if(ff == FF_EAM) { return "eam"; }
|
||||
if(ff == FF_DEM) { return "dem"; }
|
||||
const char* ff2str(int ff)
|
||||
{
|
||||
if (ff == FF_LJ) {
|
||||
return "lj";
|
||||
}
|
||||
if (ff == FF_EAM) {
|
||||
return "eam";
|
||||
}
|
||||
if (ff == FF_DEM) {
|
||||
return "dem";
|
||||
}
|
||||
return "invalid";
|
||||
}
|
||||
|
||||
int get_cuda_num_threads() {
|
||||
const char *num_threads_env = getenv("NUM_THREADS");
|
||||
int get_cuda_num_threads(void)
|
||||
{
|
||||
const char* num_threads_env = getenv("NUM_THREADS");
|
||||
return (num_threads_env == NULL) ? 32 : atoi(num_threads_env);
|
||||
}
|
||||
|
||||
void readline(char *line, FILE *fp) {
|
||||
if(fgets(line, MAXLINE, fp) == NULL) {
|
||||
if(errno != 0) {
|
||||
void readline(char* line, FILE* fp)
|
||||
{
|
||||
if (fgets(line, MAXLINE, fp) == NULL) {
|
||||
if (errno != 0) {
|
||||
perror("readline()");
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void debug_printf(const char *format, ...) {
|
||||
#ifdef DEBUG
|
||||
void debug_printf(const char* format, ...)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
va_list arg;
|
||||
int ret;
|
||||
|
||||
va_start(arg, format);
|
||||
if((vfprintf(stdout, format, arg)) < 0) { perror("debug_printf()"); }
|
||||
if ((vfprintf(stdout, format, arg)) < 0) {
|
||||
perror("debug_printf()");
|
||||
}
|
||||
va_end(arg);
|
||||
#endif
|
||||
#endif
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
@ -7,40 +7,41 @@
|
||||
#ifndef __UTIL_H_
|
||||
#define __UTIL_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#ifndef MIN
|
||||
# define MIN(x,y) ((x)<(y)?(x):(y))
|
||||
#define MIN(x, y) ((x) < (y) ? (x) : (y))
|
||||
#endif
|
||||
|
||||
#ifndef MAX
|
||||
# define MAX(x,y) ((x)>(y)?(x):(y))
|
||||
#define MAX(x, y) ((x) > (y) ? (x) : (y))
|
||||
#endif
|
||||
|
||||
#ifndef ABS
|
||||
# define ABS(a) ((a) >= 0 ? (a) : -(a))
|
||||
#define ABS(a) ((a) >= 0 ? (a) : -(a))
|
||||
#endif
|
||||
|
||||
#define DEBUG_MESSAGE debug_printf
|
||||
|
||||
#ifndef MAXLINE
|
||||
# define MAXLINE 4096
|
||||
#define MAXLINE 4096
|
||||
#endif
|
||||
|
||||
#define FF_LJ 0
|
||||
#define FF_EAM 1
|
||||
#define FF_DEM 2
|
||||
#define FF_LJ 0
|
||||
#define FF_EAM 1
|
||||
#define FF_DEM 2
|
||||
|
||||
#if PRECISION == 1
|
||||
# define PRECISION_STRING "single"
|
||||
#define PRECISION_STRING "single"
|
||||
#else
|
||||
# define PRECISION_STRING "double"
|
||||
#define PRECISION_STRING "double"
|
||||
#endif
|
||||
|
||||
extern double myrandom(int*);
|
||||
extern double myrandom(int *);
|
||||
extern void random_reset(int *seed, int ibase, double *coord);
|
||||
extern int str2ff(const char *string);
|
||||
extern const char* ff2str(int ff);
|
||||
extern const char *ff2str(int ff);
|
||||
extern void readline(char *line, FILE *fp);
|
||||
extern void debug_printf(const char *format, ...);
|
||||
extern int get_cuda_num_threads();
|
||||
extern int get_cuda_num_threads(void);
|
||||
|
||||
#endif
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
102
src/verletlist/atom.h
Normal file
102
src/verletlist/atom.h
Normal file
@ -0,0 +1,102 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <parameter.h>
|
||||
|
||||
#ifndef __ATOM_H_
|
||||
#define __ATOM_H_
|
||||
|
||||
#ifdef CUDA_TARGET
|
||||
#define KERNEL_NAME "CUDA"
|
||||
#define computeForceLJFullNeigh computeForceLJFullNeigh_cuda
|
||||
#define initialIntegrate initialIntegrate_cuda
|
||||
#define finalIntegrate finalIntegrate_cuda
|
||||
#define buildNeighbor buildNeighbor_cuda
|
||||
#define updatePbc updatePbc_cuda
|
||||
#define updateAtomsPbc updateAtomsPbc_cuda
|
||||
#else
|
||||
#ifdef USE_SIMD_KERNEL
|
||||
#define KERNEL_NAME "SIMD"
|
||||
#define computeForceLJFullNeigh computeForceLJFullNeigh_simd
|
||||
#else
|
||||
#define KERNEL_NAME "PLAIN"
|
||||
#endif
|
||||
#define initialIntegrate initialIntegrate_cpu
|
||||
#define finalIntegrate finalIntegrate_cpu
|
||||
#define buildNeighbor buildNeighbor_cpu
|
||||
#define updatePbc updatePbc_cpu
|
||||
#define updateAtomsPbc updateAtomsPbc_cpu
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
MD_FLOAT *x, *y, *z;
|
||||
MD_FLOAT *vx, *vy, *vz;
|
||||
MD_FLOAT *fx, *fy, *fz;
|
||||
int* border_map;
|
||||
int* type;
|
||||
MD_FLOAT* epsilon;
|
||||
MD_FLOAT* sigma6;
|
||||
MD_FLOAT* cutforcesq;
|
||||
MD_FLOAT* cutneighsq;
|
||||
} DeviceAtom;
|
||||
|
||||
typedef struct {
|
||||
int Natoms, Nlocal, Nghost, Nmax;
|
||||
MD_FLOAT *x, *y, *z;
|
||||
MD_FLOAT *vx, *vy, *vz;
|
||||
MD_FLOAT *fx, *fy, *fz;
|
||||
int* border_map;
|
||||
int* type;
|
||||
int ntypes;
|
||||
MD_FLOAT* epsilon;
|
||||
MD_FLOAT* sigma6;
|
||||
MD_FLOAT* cutforcesq;
|
||||
MD_FLOAT* cutneighsq;
|
||||
|
||||
// DEM
|
||||
MD_FLOAT* radius;
|
||||
MD_FLOAT* av;
|
||||
MD_FLOAT* r;
|
||||
|
||||
// Device data
|
||||
DeviceAtom d_atom;
|
||||
} Atom;
|
||||
|
||||
extern void initAtom(Atom*);
|
||||
extern void createAtom(Atom*, Parameter*);
|
||||
extern int readAtom(Atom*, Parameter*);
|
||||
extern int readAtom_pdb(Atom*, Parameter*);
|
||||
extern int readAtom_gro(Atom*, Parameter*);
|
||||
extern int readAtom_dmp(Atom*, Parameter*);
|
||||
extern int readAtom_in(Atom*, Parameter*);
|
||||
extern void writeAtom(Atom*, Parameter*);
|
||||
extern void growAtom(Atom*);
|
||||
|
||||
#ifdef AOS
|
||||
#define POS_DATA_LAYOUT "AoS"
|
||||
#define atom_x(i) atom->x[(i) * 3 + 0]
|
||||
#define atom_y(i) atom->x[(i) * 3 + 1]
|
||||
#define atom_z(i) atom->x[(i) * 3 + 2]
|
||||
#define atom_vx(i) atom->vx[(i) * 3 + 0]
|
||||
#define atom_vy(i) atom->vx[(i) * 3 + 1]
|
||||
#define atom_vz(i) atom->vx[(i) * 3 + 2]
|
||||
#define atom_fx(i) atom->fx[(i) * 3 + 0]
|
||||
#define atom_fy(i) atom->fx[(i) * 3 + 1]
|
||||
#define atom_fz(i) atom->fx[(i) * 3 + 2]
|
||||
#else
|
||||
#define POS_DATA_LAYOUT "SoA"
|
||||
#define atom_x(i) atom->x[i]
|
||||
#define atom_y(i) atom->y[i]
|
||||
#define atom_z(i) atom->z[i]
|
||||
#define atom_vx(i) atom->vx[i]
|
||||
#define atom_vy(i) atom->vy[i]
|
||||
#define atom_vz(i) atom->vz[i]
|
||||
#define atom_fx(i) atom->fx[i]
|
||||
#define atom_fy(i) atom->fy[i]
|
||||
#define atom_fz(i) atom->fz[i]
|
||||
#endif
|
||||
|
||||
#endif
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
112
src/verletlist/force_lj-x86.c
Normal file
112
src/verletlist/force_lj-x86.c
Normal file
@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
//---
|
||||
#include <atom.h>
|
||||
#include <likwid-marker.h>
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <stats.h>
|
||||
#include <timing.h>
|
||||
|
||||
#ifdef __SIMD_KERNEL__
|
||||
#include <simd.h>
|
||||
#endif
|
||||
|
||||
double computeForceLJFullNeigh_simd(
|
||||
Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats)
|
||||
{
|
||||
int Nlocal = atom->Nlocal;
|
||||
int* neighs;
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
|
||||
for (int i = 0; i < Nlocal; i++) {
|
||||
atom_fx(i) = 0.0;
|
||||
atom_fy(i) = 0.0;
|
||||
atom_fz(i) = 0.0;
|
||||
}
|
||||
|
||||
double S = getTimeStamp();
|
||||
|
||||
#ifndef __SIMD_KERNEL__
|
||||
fprintf(stderr, "Error: SIMD kernel not implemented for specified instruction set!");
|
||||
exit(-1);
|
||||
#else
|
||||
MD_SIMD_FLOAT cutforcesq_vec = simd_broadcast(cutforcesq);
|
||||
MD_SIMD_FLOAT sigma6_vec = simd_broadcast(sigma6);
|
||||
MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon);
|
||||
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
|
||||
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
LIKWID_MARKER_START("force");
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
for (int i = 0; i < Nlocal; i++) {
|
||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numneigh[i];
|
||||
MD_SIMD_INT numneighs_vec = simd_int_broadcast(numneighs);
|
||||
MD_SIMD_FLOAT xtmp = simd_broadcast(atom_x(i));
|
||||
MD_SIMD_FLOAT ytmp = simd_broadcast(atom_y(i));
|
||||
MD_SIMD_FLOAT ztmp = simd_broadcast(atom_z(i));
|
||||
MD_SIMD_FLOAT fix = simd_zero();
|
||||
MD_SIMD_FLOAT fiy = simd_zero();
|
||||
MD_SIMD_FLOAT fiz = simd_zero();
|
||||
|
||||
for (int k = 0; k < numneighs; k += VECTOR_WIDTH) {
|
||||
// If the last iteration of this loop is separated from the rest, this
|
||||
// mask can be set only there
|
||||
MD_SIMD_MASK mask_numneighs = simd_mask_int_cond_lt(
|
||||
simd_int_add(simd_int_broadcast(k), simd_int_seq()),
|
||||
numneighs_vec);
|
||||
MD_SIMD_INT j = simd_int_mask_load(&neighs[k], mask_numneighs);
|
||||
#ifdef AOS
|
||||
MD_SIMD_INT j3 = simd_int_add(simd_int_add(j, j), j); // j * 3
|
||||
MD_SIMD_FLOAT delx = xtmp -
|
||||
simd_gather(j3, &(atom->x[0]), sizeof(MD_FLOAT));
|
||||
MD_SIMD_FLOAT dely = ytmp -
|
||||
simd_gather(j3, &(atom->x[1]), sizeof(MD_FLOAT));
|
||||
MD_SIMD_FLOAT delz = ztmp -
|
||||
simd_gather(j3, &(atom->x[2]), sizeof(MD_FLOAT));
|
||||
#else
|
||||
MD_SIMD_FLOAT delx = xtmp - simd_gather(j, atom->x, sizeof(MD_FLOAT));
|
||||
MD_SIMD_FLOAT dely = ytmp - simd_gather(j, atom->y, sizeof(MD_FLOAT));
|
||||
MD_SIMD_FLOAT delz = ztmp - simd_gather(j, atom->z, sizeof(MD_FLOAT));
|
||||
#endif
|
||||
MD_SIMD_FLOAT rsq = simd_fma(delx,
|
||||
delx,
|
||||
simd_fma(dely, dely, simd_mul(delz, delz)));
|
||||
MD_SIMD_MASK cutoff_mask = simd_mask_and(mask_numneighs,
|
||||
simd_mask_cond_lt(rsq, cutforcesq_vec));
|
||||
MD_SIMD_FLOAT sr2 = simd_reciprocal(rsq);
|
||||
MD_SIMD_FLOAT sr6 = simd_mul(sr2,
|
||||
simd_mul(sr2, simd_mul(sr2, sigma6_vec)));
|
||||
MD_SIMD_FLOAT force = simd_mul(c48_vec,
|
||||
simd_mul(sr6,
|
||||
simd_mul(simd_sub(sr6, c05_vec), simd_mul(sr2, eps_vec))));
|
||||
|
||||
fix = simd_masked_add(fix, simd_mul(delx, force), cutoff_mask);
|
||||
fiy = simd_masked_add(fiy, simd_mul(dely, force), cutoff_mask);
|
||||
fiz = simd_masked_add(fiz, simd_mul(delz, force), cutoff_mask);
|
||||
}
|
||||
|
||||
atom_fx(i) += simd_h_reduce_sum(fix);
|
||||
atom_fy(i) += simd_h_reduce_sum(fiy);
|
||||
atom_fz(i) += simd_h_reduce_sum(fiz);
|
||||
}
|
||||
|
||||
LIKWID_MARKER_STOP("force");
|
||||
}
|
||||
#endif
|
||||
|
||||
double E = getTimeStamp();
|
||||
return E - S;
|
||||
}
|
198
src/verletlist/force_lj.c
Normal file
198
src/verletlist/force_lj.c
Normal file
@ -0,0 +1,198 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <atom.h>
|
||||
#include <likwid-marker.h>
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <stats.h>
|
||||
#include <timing.h>
|
||||
|
||||
double computeForceLJFullNeigh(
|
||||
Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats)
|
||||
{
|
||||
int nLocal = atom->Nlocal;
|
||||
int* neighs;
|
||||
#ifndef EXPLICIT_TYPES
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
#endif
|
||||
const MD_FLOAT num1 = 1.0;
|
||||
const MD_FLOAT num48 = 48.0;
|
||||
const MD_FLOAT num05 = 0.5;
|
||||
|
||||
for (int i = 0; i < nLocal; i++) {
|
||||
atom_fx(i) = 0.0;
|
||||
atom_fy(i) = 0.0;
|
||||
atom_fz(i) = 0.0;
|
||||
}
|
||||
double timeStart = getTimeStamp();
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
LIKWID_MARKER_START("force");
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
for (int i = 0; i < nLocal; i++) {
|
||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numneigh[i];
|
||||
MD_FLOAT xtmp = atom_x(i);
|
||||
MD_FLOAT ytmp = atom_y(i);
|
||||
MD_FLOAT ztmp = atom_z(i);
|
||||
MD_FLOAT fix = 0;
|
||||
MD_FLOAT fiy = 0;
|
||||
MD_FLOAT fiz = 0;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_i = atom->type[i];
|
||||
#endif
|
||||
|
||||
for (int k = 0; k < numneighs; k++) {
|
||||
int j = neighs[k];
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_j = atom->type[j];
|
||||
const int type_ij = type_i * atom->ntypes + type_j;
|
||||
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
||||
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
||||
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
||||
#endif
|
||||
|
||||
if (rsq < cutforcesq) {
|
||||
MD_FLOAT sr2 = num1 / rsq;
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
#ifdef USE_REFERENCE_VERSION
|
||||
addStat(stats->atoms_within_cutoff, 1);
|
||||
} else {
|
||||
addStat(stats->atoms_outside_cutoff, 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
atom_fx(i) += fix;
|
||||
atom_fy(i) += fiy;
|
||||
atom_fz(i) += fiz;
|
||||
|
||||
#ifdef USE_REFERENCE_VERSION
|
||||
if (numneighs % VECTOR_WIDTH > 0) {
|
||||
addStat(stats->atoms_outside_cutoff,
|
||||
VECTOR_WIDTH - (numneighs % VECTOR_WIDTH));
|
||||
}
|
||||
#endif
|
||||
|
||||
addStat(stats->total_force_neighs, numneighs);
|
||||
addStat(stats->total_force_iters,
|
||||
(numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
|
||||
}
|
||||
|
||||
LIKWID_MARKER_STOP("force");
|
||||
}
|
||||
|
||||
double timeStop = getTimeStamp();
|
||||
return timeStop - timeStart;
|
||||
}
|
||||
|
||||
double computeForceLJHalfNeigh(
|
||||
Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats)
|
||||
{
|
||||
int nlocal = atom->Nlocal;
|
||||
int* neighs;
|
||||
#ifndef EXPLICIT_TYPES
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
#endif
|
||||
const MD_FLOAT num1 = 1.0;
|
||||
const MD_FLOAT num48 = 48.0;
|
||||
const MD_FLOAT num05 = 0.5;
|
||||
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
atom_fx(i) = 0.0;
|
||||
atom_fy(i) = 0.0;
|
||||
atom_fz(i) = 0.0;
|
||||
}
|
||||
|
||||
double timeStart = getTimeStamp();
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
LIKWID_MARKER_START("forceLJ-halfneigh");
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numneigh[i];
|
||||
MD_FLOAT xtmp = atom_x(i);
|
||||
MD_FLOAT ytmp = atom_y(i);
|
||||
MD_FLOAT ztmp = atom_z(i);
|
||||
MD_FLOAT fix = 0;
|
||||
MD_FLOAT fiy = 0;
|
||||
MD_FLOAT fiz = 0;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_i = atom->type[i];
|
||||
#endif
|
||||
|
||||
// Pragma required to vectorize the inner loop
|
||||
#ifdef ENABLE_OMP_SIMD
|
||||
#pragma omp simd reduction(+ : fix, fiy, fiz)
|
||||
#endif
|
||||
for (int k = 0; k < numneighs; k++) {
|
||||
int j = neighs[k];
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_j = atom->type[j];
|
||||
const int type_ij = type_i * atom->ntypes + type_j;
|
||||
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
||||
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
||||
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
||||
#endif
|
||||
|
||||
if (rsq < cutforcesq) {
|
||||
MD_FLOAT sr2 = num1 / rsq;
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
|
||||
// We do not need to update forces for ghost atoms
|
||||
if (j < nlocal) {
|
||||
atom_fx(j) -= delx * force;
|
||||
atom_fy(j) -= dely * force;
|
||||
atom_fz(j) -= delz * force;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
atom_fx(i) += fix;
|
||||
atom_fy(i) += fiy;
|
||||
atom_fz(i) += fiz;
|
||||
|
||||
addStat(stats->total_force_neighs, numneighs);
|
||||
addStat(stats->total_force_iters,
|
||||
(numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
|
||||
}
|
||||
|
||||
LIKWID_MARKER_STOP("forceLJ-halfneigh");
|
||||
}
|
||||
|
||||
double timeStop = getTimeStamp();
|
||||
return timeStop - timeStart;
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,40 +1,37 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
#include <omp.h>
|
||||
|
||||
#include <likwid-marker.h>
|
||||
#include <omp.h>
|
||||
|
||||
#include <allocate.h>
|
||||
#include <atom.h>
|
||||
#include <device.h>
|
||||
#include <eam.h>
|
||||
#include <integrate.h>
|
||||
#include <thermo.h>
|
||||
#include <timing.h>
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <pbc.h>
|
||||
#include <stats.h>
|
||||
#include <thermo.h>
|
||||
#include <timers.h>
|
||||
#include <timing.h>
|
||||
#include <util.h>
|
||||
#include <vtk.h>
|
||||
|
||||
#define HLINE "----------------------------------------------------------------------------\n"
|
||||
#define HLINE "------------------------------------------------------------------\n"
|
||||
|
||||
extern double computeForceLJFullNeigh_plain_c(Parameter*, Atom*, Neighbor*, Stats*);
|
||||
extern double computeForceLJFullNeigh_simd(Parameter*, Atom*, Neighbor*, Stats*);
|
||||
extern double computeForceLJHalfNeigh(Parameter*, Atom*, Neighbor*, Stats*);
|
||||
extern double computeForceLJFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
|
||||
extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
|
||||
extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
|
||||
|
||||
@ -42,20 +39,23 @@ extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
|
||||
extern double computeForceLJFullNeigh_cuda(Parameter*, Atom*, Neighbor*);
|
||||
#endif
|
||||
|
||||
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
if(param->force_field == FF_EAM) { initEam(eam, param); }
|
||||
double S, E;
|
||||
double setup(Parameter* param, Eam* eam, Atom* atom, Neighbor* neighbor, Stats* stats)
|
||||
{
|
||||
if (param->force_field == FF_EAM) {
|
||||
initEam(eam, param);
|
||||
}
|
||||
double timeStart, timeStop;
|
||||
param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
|
||||
param->xprd = param->nx * param->lattice;
|
||||
param->yprd = param->ny * param->lattice;
|
||||
param->zprd = param->nz * param->lattice;
|
||||
param->xprd = param->nx * param->lattice;
|
||||
param->yprd = param->ny * param->lattice;
|
||||
param->zprd = param->nz * param->lattice;
|
||||
|
||||
S = getTimeStamp();
|
||||
timeStart = getTimeStamp();
|
||||
initAtom(atom);
|
||||
initPbc(atom);
|
||||
initStats(stats);
|
||||
initNeighbor(neighbor, param);
|
||||
if(param->input_file == NULL) {
|
||||
if (param->input_file == NULL) {
|
||||
createAtom(atom, param);
|
||||
} else {
|
||||
readAtom(atom, param);
|
||||
@ -63,49 +63,59 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
|
||||
|
||||
setupNeighbor(param);
|
||||
setupThermo(param, atom->Natoms);
|
||||
if(param->input_file == NULL) { adjustThermo(param, atom); }
|
||||
#ifdef SORT_ATOMS
|
||||
if (param->input_file == NULL) {
|
||||
adjustThermo(param, atom);
|
||||
}
|
||||
#ifdef SORT_ATOMS
|
||||
atom->Nghost = 0;
|
||||
sortAtom(atom);
|
||||
#endif
|
||||
#endif
|
||||
setupPbc(atom, param);
|
||||
initDevice(atom, neighbor);
|
||||
updatePbc(atom, param, true);
|
||||
buildNeighbor(atom, neighbor);
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
timeStop = getTimeStamp();
|
||||
return timeStop - timeStart;
|
||||
}
|
||||
|
||||
double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||
double S, E;
|
||||
S = getTimeStamp();
|
||||
double reneighbour(Parameter* param, Atom* atom, Neighbor* neighbor)
|
||||
{
|
||||
double timeStart, timeStop;
|
||||
timeStart = getTimeStamp();
|
||||
LIKWID_MARKER_START("reneighbour");
|
||||
updateAtomsPbc(atom, param);
|
||||
#ifdef SORT_ATOMS
|
||||
#ifdef SORT_ATOMS
|
||||
atom->Nghost = 0;
|
||||
sortAtom(atom);
|
||||
#endif
|
||||
#endif
|
||||
setupPbc(atom, param);
|
||||
updatePbc(atom, param, true);
|
||||
buildNeighbor(atom, neighbor);
|
||||
LIKWID_MARKER_STOP("reneighbour");
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
timeStop = getTimeStamp();
|
||||
return timeStop - timeStart;
|
||||
}
|
||||
|
||||
void printAtomState(Atom *atom) {
|
||||
printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n", atom->Natoms, atom->Nlocal, atom->Nghost, atom->Nmax);
|
||||
void printAtomState(Atom* atom)
|
||||
{
|
||||
printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n",
|
||||
atom->Natoms,
|
||||
atom->Nlocal,
|
||||
atom->Nghost,
|
||||
atom->Nmax);
|
||||
// int nall = atom->Nlocal + atom->Nghost;
|
||||
// for (int i=0; i<nall; i++) {
|
||||
// printf("%d %f %f %f\n", i, atom->x[i], atom->y[i], atom->z[i]);
|
||||
// }
|
||||
}
|
||||
|
||||
double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
if(param->force_field == FF_EAM) {
|
||||
double computeForce(
|
||||
Eam* eam, Parameter* param, Atom* atom, Neighbor* neighbor, Stats* stats)
|
||||
{
|
||||
if (param->force_field == FF_EAM) {
|
||||
return computeForceEam(eam, param, atom, neighbor, stats);
|
||||
} else if(param->force_field == FF_DEM) {
|
||||
if(param->half_neigh) {
|
||||
} else if (param->force_field == FF_DEM) {
|
||||
if (param->half_neigh) {
|
||||
fprintf(stderr, "Error: DEM cannot use half neighbor-lists!\n");
|
||||
return 0.0;
|
||||
} else {
|
||||
@ -113,29 +123,38 @@ double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
}
|
||||
}
|
||||
|
||||
if(param->half_neigh) {
|
||||
if (param->half_neigh) {
|
||||
return computeForceLJHalfNeigh(param, atom, neighbor, stats);
|
||||
}
|
||||
|
||||
#ifdef CUDA_TARGET
|
||||
#ifdef CUDA_TARGET
|
||||
return computeForceLJFullNeigh(param, atom, neighbor);
|
||||
#else
|
||||
#else
|
||||
return computeForceLJFullNeigh(param, atom, neighbor, stats);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
void writeInput(Parameter *param, Atom *atom) {
|
||||
FILE *fpin = fopen("input.in", "w");
|
||||
void writeInput(Parameter* param, Atom* atom)
|
||||
{
|
||||
FILE* fpin = fopen("input.in", "w");
|
||||
fprintf(fpin, "0,%f,0,%f,0,%f\n", param->xprd, param->yprd, param->zprd);
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
fprintf(fpin, "1,%f,%f,%f,%f,%f,%f\n", atom_x(i), atom_y(i), atom_z(i), atom_vx(i), atom_vy(i), atom_vz(i));
|
||||
for (int i = 0; i < atom->Nlocal; i++) {
|
||||
fprintf(fpin,
|
||||
"1,%f,%f,%f,%f,%f,%f\n",
|
||||
atom_x(i),
|
||||
atom_y(i),
|
||||
atom_z(i),
|
||||
atom_vx(i),
|
||||
atom_vy(i),
|
||||
atom_vz(i));
|
||||
}
|
||||
|
||||
fclose(fpin);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
double timer[NUMTIMER];
|
||||
Eam eam;
|
||||
Atom atom;
|
||||
@ -147,81 +166,88 @@ int main(int argc, char** argv) {
|
||||
#pragma omp parallel
|
||||
{
|
||||
LIKWID_MARKER_REGISTER("force");
|
||||
//LIKWID_MARKER_REGISTER("reneighbour");
|
||||
//LIKWID_MARKER_REGISTER("pbc");
|
||||
// LIKWID_MARKER_REGISTER("reneighbour");
|
||||
// LIKWID_MARKER_REGISTER("pbc");
|
||||
}
|
||||
|
||||
initParameter(¶m);
|
||||
for(int i = 0; i < argc; i++) {
|
||||
if((strcmp(argv[i], "-p") == 0) || strcmp(argv[i], "--params") == 0) {
|
||||
for (int i = 0; i < argc; i++) {
|
||||
if ((strcmp(argv[i], "-p") == 0) || strcmp(argv[i], "--params") == 0) {
|
||||
readParameter(¶m, argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-f") == 0)) {
|
||||
if((param.force_field = str2ff(argv[++i])) < 0) {
|
||||
if ((strcmp(argv[i], "-f") == 0)) {
|
||||
if ((param.force_field = str2ff(argv[++i])) < 0) {
|
||||
fprintf(stderr, "Invalid force field!\n");
|
||||
exit(-1);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-i") == 0)) {
|
||||
if ((strcmp(argv[i], "-i") == 0)) {
|
||||
param.input_file = strdup(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-e") == 0)) {
|
||||
if ((strcmp(argv[i], "-e") == 0)) {
|
||||
param.eam_file = strdup(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0)) {
|
||||
if ((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0)) {
|
||||
param.ntimes = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-nx") == 0)) {
|
||||
if ((strcmp(argv[i], "-nx") == 0)) {
|
||||
param.nx = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-ny") == 0)) {
|
||||
if ((strcmp(argv[i], "-ny") == 0)) {
|
||||
param.ny = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-nz") == 0)) {
|
||||
if ((strcmp(argv[i], "-nz") == 0)) {
|
||||
param.nz = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-half") == 0)) {
|
||||
if ((strcmp(argv[i], "-half") == 0)) {
|
||||
param.half_neigh = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-r") == 0) || (strcmp(argv[i], "--radius") == 0)) {
|
||||
if ((strcmp(argv[i], "-r") == 0) || (strcmp(argv[i], "--radius") == 0)) {
|
||||
param.cutforce = atof(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-s") == 0) || (strcmp(argv[i], "--skin") == 0)) {
|
||||
if ((strcmp(argv[i], "-s") == 0) || (strcmp(argv[i], "--skin") == 0)) {
|
||||
param.skin = atof(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "--freq") == 0)) {
|
||||
if ((strcmp(argv[i], "--freq") == 0)) {
|
||||
param.proc_freq = atof(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "--vtk") == 0)) {
|
||||
if ((strcmp(argv[i], "--vtk") == 0)) {
|
||||
param.vtk_file = strdup(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-w") == 0)) {
|
||||
if ((strcmp(argv[i], "-w") == 0)) {
|
||||
param.write_atom_file = strdup(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
|
||||
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
|
||||
if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
|
||||
printf("MD Bench: A performance-oriented prototyping harness for MD "
|
||||
"algorithms\n");
|
||||
printf(HLINE);
|
||||
printf("-p / --params <string>: file to read parameters from (can be specified more than once)\n");
|
||||
printf("-f <string>: force field (lj, eam or dem), default lj\n");
|
||||
printf("-i <string>: input file with atom positions (dump)\n");
|
||||
printf("-p / --params <string>: file to read parameters from (can be "
|
||||
"specified more than once)\n");
|
||||
printf("-f <string>: force field (lj, eam or dem), "
|
||||
"default lj\n");
|
||||
printf("-i <string>: input file with atom positions "
|
||||
"(dump)\n");
|
||||
printf("-e <string>: input file for EAM\n");
|
||||
printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
|
||||
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
|
||||
printf("-half <int>: use half (1) or full (0) neighbor lists\n");
|
||||
printf("-n / --nsteps <int>: set number of timesteps for "
|
||||
"simulation\n");
|
||||
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in "
|
||||
"x/y/z direction\n");
|
||||
printf("-half <int>: use half (1) or full (0) neighbor "
|
||||
"lists\n");
|
||||
printf("-r / --radius <real>: set cutoff radius\n");
|
||||
printf("-s / --skin <real>: set skin (verlet buffer)\n");
|
||||
printf("-w <file>: write input atoms to file\n");
|
||||
@ -239,48 +265,48 @@ int main(int argc, char** argv) {
|
||||
|
||||
printf("step\ttemp\t\tpressure\n");
|
||||
computeThermo(0, ¶m, &atom);
|
||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
||||
traceAddresses(¶m, &atom, &neighbor, n + 1);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if(param.write_atom_file != NULL) {
|
||||
if (param.write_atom_file != NULL) {
|
||||
writeAtom(&atom, ¶m);
|
||||
}
|
||||
|
||||
//writeInput(¶m, &atom);
|
||||
// writeInput(¶m, &atom);
|
||||
|
||||
timer[FORCE] = computeForce(&eam, ¶m, &atom, &neighbor, &stats);
|
||||
timer[NEIGH] = 0.0;
|
||||
timer[TOTAL] = getTimeStamp();
|
||||
|
||||
if(param.vtk_file != NULL) {
|
||||
if (param.vtk_file != NULL) {
|
||||
write_atoms_to_vtk_file(param.vtk_file, &atom, 0);
|
||||
}
|
||||
|
||||
for(int n = 0; n < param.ntimes; n++) {
|
||||
for (int n = 0; n < param.ntimes; n++) {
|
||||
bool reneigh = (n + 1) % param.reneigh_every == 0;
|
||||
initialIntegrate(reneigh, ¶m, &atom);
|
||||
if((n + 1) % param.reneigh_every) {
|
||||
if ((n + 1) % param.reneigh_every) {
|
||||
updatePbc(&atom, ¶m, false);
|
||||
} else {
|
||||
timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
|
||||
}
|
||||
|
||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
||||
traceAddresses(¶m, &atom, &neighbor, n + 1);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
timer[FORCE] += computeForce(&eam, ¶m, &atom, &neighbor, &stats);
|
||||
finalIntegrate(reneigh, ¶m, &atom);
|
||||
|
||||
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
|
||||
#ifdef CUDA_TARGET
|
||||
if (!((n + 1) % param.nstat) && (n + 1) < param.ntimes) {
|
||||
#ifdef CUDA_TARGET
|
||||
memcpyFromGPU(atom.x, atom.d_atom.x, atom.Nmax * sizeof(MD_FLOAT) * 3);
|
||||
#endif
|
||||
#endif
|
||||
computeThermo(n + 1, ¶m, &atom);
|
||||
}
|
||||
|
||||
if(param.vtk_file != NULL) {
|
||||
if (param.vtk_file != NULL) {
|
||||
write_atoms_to_vtk_file(param.vtk_file, &atom, n + 1);
|
||||
}
|
||||
}
|
||||
@ -289,36 +315,52 @@ int main(int argc, char** argv) {
|
||||
computeThermo(-1, ¶m, &atom);
|
||||
|
||||
printf(HLINE);
|
||||
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
|
||||
printf("System: %d atoms %d ghost atoms, Steps: %d\n",
|
||||
atom.Natoms,
|
||||
atom.Nghost,
|
||||
param.ntimes);
|
||||
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
|
||||
timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
|
||||
timer[TOTAL],
|
||||
timer[FORCE],
|
||||
timer[NEIGH],
|
||||
timer[TOTAL] - timer[FORCE] - timer[NEIGH]);
|
||||
printf(HLINE);
|
||||
|
||||
int nthreads = 0;
|
||||
int nthreads = 0;
|
||||
int chunkSize = 0;
|
||||
omp_sched_t schedKind;
|
||||
char schedType[10];
|
||||
#pragma omp parallel
|
||||
#pragma omp master
|
||||
{
|
||||
omp_get_schedule(&schedKind, &chunkSize);
|
||||
omp_get_schedule(&schedKind, &chunkSize);
|
||||
|
||||
switch (schedKind)
|
||||
{
|
||||
case omp_sched_static: strcpy(schedType, "static"); break;
|
||||
case omp_sched_dynamic: strcpy(schedType, "dynamic"); break;
|
||||
case omp_sched_guided: strcpy(schedType, "guided"); break;
|
||||
case omp_sched_auto: strcpy(schedType, "auto"); break;
|
||||
}
|
||||
switch (schedKind) {
|
||||
case omp_sched_static:
|
||||
strcpy(schedType, "static");
|
||||
break;
|
||||
case omp_sched_dynamic:
|
||||
strcpy(schedType, "dynamic");
|
||||
break;
|
||||
case omp_sched_guided:
|
||||
strcpy(schedType, "guided");
|
||||
break;
|
||||
case omp_sched_auto:
|
||||
strcpy(schedType, "auto");
|
||||
break;
|
||||
case omp_sched_monotonic:
|
||||
strcpy(schedType, "auto");
|
||||
break;
|
||||
}
|
||||
|
||||
nthreads = omp_get_max_threads();
|
||||
nthreads = omp_get_max_threads();
|
||||
}
|
||||
|
||||
printf("Num threads: %d\n", nthreads);
|
||||
printf("Schedule: (%s,%d)\n", schedType, chunkSize);
|
||||
|
||||
printf("Performance: %.2f million atom updates per second\n",
|
||||
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
|
||||
1e-6 * (double)atom.Natoms * param.ntimes / timer[TOTAL]);
|
||||
#ifdef COMPUTE_STATS
|
||||
displayStatistics(&atom, ¶m, &stats, timer);
|
||||
#endif
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
234
src/verletlist/pbc.c
Normal file
234
src/verletlist/pbc.c
Normal file
@ -0,0 +1,234 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
//---
|
||||
#include <allocate.h>
|
||||
#include <atom.h>
|
||||
#include <pbc.h>
|
||||
|
||||
#define DELTA 20000
|
||||
|
||||
int nmaxGhost;
|
||||
int *PBCx, *PBCy, *PBCz;
|
||||
|
||||
static void growPbc(Atom*);
|
||||
|
||||
/* exported subroutines */
|
||||
void initPbc(Atom* atom)
|
||||
{
|
||||
nmaxGhost = 0;
|
||||
atom->border_map = NULL;
|
||||
PBCx = NULL;
|
||||
PBCy = NULL;
|
||||
PBCz = NULL;
|
||||
}
|
||||
|
||||
/* update coordinates of ghost atoms */
|
||||
/* uses mapping created in setupPbc */
|
||||
void updatePbc_cpu(Atom* atom, Parameter* param, bool doReneighbor)
|
||||
{
|
||||
int* borderMap = atom->border_map;
|
||||
int nlocal = atom->Nlocal;
|
||||
MD_FLOAT xprd = param->xprd;
|
||||
MD_FLOAT yprd = param->yprd;
|
||||
MD_FLOAT zprd = param->zprd;
|
||||
|
||||
for (int i = 0; i < atom->Nghost; i++) {
|
||||
atom_x(nlocal + i) = atom_x(borderMap[i]) + PBCx[i] * xprd;
|
||||
atom_y(nlocal + i) = atom_y(borderMap[i]) + PBCy[i] * yprd;
|
||||
atom_z(nlocal + i) = atom_z(borderMap[i]) + PBCz[i] * zprd;
|
||||
}
|
||||
}
|
||||
|
||||
/* relocate atoms that have left domain according
|
||||
* to periodic boundary conditions */
|
||||
void updateAtomsPbc_cpu(Atom* atom, Parameter* param)
|
||||
{
|
||||
MD_FLOAT xprd = param->xprd;
|
||||
MD_FLOAT yprd = param->yprd;
|
||||
MD_FLOAT zprd = param->zprd;
|
||||
|
||||
for (int i = 0; i < atom->Nlocal; i++) {
|
||||
if (atom_x(i) < 0.0) {
|
||||
atom_x(i) += xprd;
|
||||
} else if (atom_x(i) >= xprd) {
|
||||
atom_x(i) -= xprd;
|
||||
}
|
||||
|
||||
if (atom_y(i) < 0.0) {
|
||||
atom_y(i) += yprd;
|
||||
} else if (atom_y(i) >= yprd) {
|
||||
atom_y(i) -= yprd;
|
||||
}
|
||||
|
||||
if (atom_z(i) < 0.0) {
|
||||
atom_z(i) += zprd;
|
||||
} else if (atom_z(i) >= zprd) {
|
||||
atom_z(i) -= zprd;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* setup periodic boundary conditions by
|
||||
* defining ghost atoms around domain
|
||||
* only creates mapping and coordinate corrections
|
||||
* that are then enforced in updatePbc */
|
||||
#define ADDGHOST(dx, dy, dz) \
|
||||
Nghost++; \
|
||||
border_map[Nghost] = i; \
|
||||
PBCx[Nghost] = dx; \
|
||||
PBCy[Nghost] = dy; \
|
||||
PBCz[Nghost] = dz; \
|
||||
atom->type[atom->Nlocal + Nghost] = atom->type[i]
|
||||
|
||||
void setupPbc(Atom* atom, Parameter* param)
|
||||
{
|
||||
int* border_map = atom->border_map;
|
||||
MD_FLOAT xprd = param->xprd;
|
||||
MD_FLOAT yprd = param->yprd;
|
||||
MD_FLOAT zprd = param->zprd;
|
||||
MD_FLOAT cutneigh = param->cutneigh;
|
||||
int Nghost = -1;
|
||||
|
||||
for (int i = 0; i < atom->Nlocal; i++) {
|
||||
if (atom->Nlocal + Nghost + 7 >= atom->Nmax) {
|
||||
growAtom(atom);
|
||||
}
|
||||
|
||||
if (Nghost + 7 >= nmaxGhost) {
|
||||
growPbc(atom);
|
||||
border_map = atom->border_map;
|
||||
}
|
||||
|
||||
MD_FLOAT x = atom_x(i);
|
||||
MD_FLOAT y = atom_y(i);
|
||||
MD_FLOAT z = atom_z(i);
|
||||
|
||||
/* Setup ghost atoms */
|
||||
/* 6 planes */
|
||||
if (param->pbc_x != 0) {
|
||||
if (x < cutneigh) {
|
||||
ADDGHOST(+1, 0, 0);
|
||||
}
|
||||
if (x >= (xprd - cutneigh)) {
|
||||
ADDGHOST(-1, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (param->pbc_y != 0) {
|
||||
if (y < cutneigh) {
|
||||
ADDGHOST(0, +1, 0);
|
||||
}
|
||||
if (y >= (yprd - cutneigh)) {
|
||||
ADDGHOST(0, -1, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (param->pbc_z != 0) {
|
||||
if (z < cutneigh) {
|
||||
ADDGHOST(0, 0, +1);
|
||||
}
|
||||
if (z >= (zprd - cutneigh)) {
|
||||
ADDGHOST(0, 0, -1);
|
||||
}
|
||||
}
|
||||
|
||||
/* 8 corners */
|
||||
if (param->pbc_x != 0 && param->pbc_y != 0 && param->pbc_z != 0) {
|
||||
if (x < cutneigh && y < cutneigh && z < cutneigh) {
|
||||
ADDGHOST(+1, +1, +1);
|
||||
}
|
||||
if (x < cutneigh && y >= (yprd - cutneigh) && z < cutneigh) {
|
||||
ADDGHOST(+1, -1, +1);
|
||||
}
|
||||
if (x < cutneigh && y < cutneigh && z >= (zprd - cutneigh)) {
|
||||
ADDGHOST(+1, +1, -1);
|
||||
}
|
||||
if (x < cutneigh && y >= (yprd - cutneigh) && z >= (zprd - cutneigh)) {
|
||||
ADDGHOST(+1, -1, -1);
|
||||
}
|
||||
if (x >= (xprd - cutneigh) && y < cutneigh && z < cutneigh) {
|
||||
ADDGHOST(-1, +1, +1);
|
||||
}
|
||||
if (x >= (xprd - cutneigh) && y >= (yprd - cutneigh) && z < cutneigh) {
|
||||
ADDGHOST(-1, -1, +1);
|
||||
}
|
||||
if (x >= (xprd - cutneigh) && y < cutneigh && z >= (zprd - cutneigh)) {
|
||||
ADDGHOST(-1, +1, -1);
|
||||
}
|
||||
if (x >= (xprd - cutneigh) && y >= (yprd - cutneigh) &&
|
||||
z >= (zprd - cutneigh)) {
|
||||
ADDGHOST(-1, -1, -1);
|
||||
}
|
||||
}
|
||||
|
||||
/* 12 edges */
|
||||
if (param->pbc_x != 0 && param->pbc_z != 0) {
|
||||
if (x < cutneigh && z < cutneigh) {
|
||||
ADDGHOST(+1, 0, +1);
|
||||
}
|
||||
if (x < cutneigh && z >= (zprd - cutneigh)) {
|
||||
ADDGHOST(+1, 0, -1);
|
||||
}
|
||||
if (x >= (xprd - cutneigh) && z < cutneigh) {
|
||||
ADDGHOST(-1, 0, +1);
|
||||
}
|
||||
if (x >= (xprd - cutneigh) && z >= (zprd - cutneigh)) {
|
||||
ADDGHOST(-1, 0, -1);
|
||||
}
|
||||
}
|
||||
|
||||
if (param->pbc_y != 0 && param->pbc_z != 0) {
|
||||
if (y < cutneigh && z < cutneigh) {
|
||||
ADDGHOST(0, +1, +1);
|
||||
}
|
||||
if (y < cutneigh && z >= (zprd - cutneigh)) {
|
||||
ADDGHOST(0, +1, -1);
|
||||
}
|
||||
if (y >= (yprd - cutneigh) && z < cutneigh) {
|
||||
ADDGHOST(0, -1, +1);
|
||||
}
|
||||
if (y >= (yprd - cutneigh) && z >= (zprd - cutneigh)) {
|
||||
ADDGHOST(0, -1, -1);
|
||||
}
|
||||
}
|
||||
|
||||
if (param->pbc_x != 0 && param->pbc_y != 0) {
|
||||
if (y < cutneigh && x < cutneigh) {
|
||||
ADDGHOST(+1, +1, 0);
|
||||
}
|
||||
if (y < cutneigh && x >= (xprd - cutneigh)) {
|
||||
ADDGHOST(-1, +1, 0);
|
||||
}
|
||||
if (y >= (yprd - cutneigh) && x < cutneigh) {
|
||||
ADDGHOST(+1, -1, 0);
|
||||
}
|
||||
if (y >= (yprd - cutneigh) && x >= (xprd - cutneigh)) {
|
||||
ADDGHOST(-1, -1, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
// increase by one to make it the ghost atom count
|
||||
atom->Nghost = Nghost + 1;
|
||||
}
|
||||
|
||||
/* internal subroutines */
|
||||
void growPbc(Atom* atom)
|
||||
{
|
||||
int nold = nmaxGhost;
|
||||
nmaxGhost += DELTA;
|
||||
|
||||
atom->border_map = (int*)reallocate(atom->border_map,
|
||||
ALIGNMENT,
|
||||
nmaxGhost * sizeof(int),
|
||||
nold * sizeof(int));
|
||||
PBCx = (int*)reallocate(PBCx, ALIGNMENT, nmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
PBCy = (int*)reallocate(PBCy, ALIGNMENT, nmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
PBCz = (int*)reallocate(PBCz, ALIGNMENT, nmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
@ -11,7 +11,7 @@
|
||||
|
||||
#ifndef __PBC_H_
|
||||
#define __PBC_H_
|
||||
extern void initPbc();
|
||||
extern void initPbc(Atom*);
|
||||
extern void updatePbc_cpu(Atom*, Parameter*, bool);
|
||||
extern void updateAtomsPbc_cpu(Atom*, Parameter*);
|
||||
extern void setupPbc(Atom*, Parameter*);
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@ -1,37 +0,0 @@
|
||||
# Utility tools for MD-Bench
|
||||
|
||||
**mdBench.c:** Single file version for MD-Bench, used mostly for teaching purposes.
|
||||
|
||||
**run_stub.sh:** Bash script to run the MD-Bench stubbed force calculation for different configurations and evaluate the performance.
|
||||
The configuration parameters are:
|
||||
- **-a <numbers>:** specify the number of atoms per unit cell (the number of neighbors per atom is this value minus 1), the default is 8.
|
||||
- **-n <numbers>:** timesteps to run the simulation, the default is 200.
|
||||
- **-nx <numbers>:** number of unit cells in the x dimension, the default is 4.
|
||||
- **-ny <numbers>:** number of unit cells in the y dimension, the default is 4.
|
||||
- **-nz <numbers>:** number of unit cells in the z dimension, the default is 2.
|
||||
|
||||
Notice that these parameters can also be specified as lists, which executes the stubbed force calculation several times varying the specific parameter to each element of the list, and hence all combinations of parameters will be executed. For example, the following command:
|
||||
|
||||
```bash
|
||||
bash run_stub.sh -a "8 16" -nx "4 8" -ny 8 -nz 4
|
||||
```
|
||||
|
||||
Will execute the stubbed force calculation for the following 4 configurations:
|
||||
|
||||
```bash
|
||||
1> 8 atoms per unit cell on a 4x8x4 grid of unit cells, 200 timesteps
|
||||
2> 16 atoms per unit cell on a 4x8x4 grid of unit cells, 200 timesteps
|
||||
3> 8 atoms per unit cell on a 8x8x4 grid of unit cells, 200 timesteps
|
||||
4> 16 atoms per unit cell on a 8x8x4 grid of unit cells, 200 timesteps
|
||||
```
|
||||
|
||||
The following parameters are also available:
|
||||
- **-f <frequency>:** CPU frequency in GHz (assure your CPU frequency is fixed by disabling Turbo mode), more performance metrics such as cycles per iteration are displayed if this option is defined.
|
||||
- **-o <file>:** output file (.txt) for the results, the default is *run_results.txt*.
|
||||
- **-r <runs>:** number of runs for each configuration (only the values for the best run are displayed), the default is 3.
|
||||
|
||||
**plot_run_stub_data.py:** Python script to plot the data generated by the *run_stub.sh* script. Just provide the name of the .txt file as a parameter and this script generates a corresponding PDF with the same file name.
|
||||
|
||||
**plot_gather_data.py:** Python script to plot the data generated by the gather benchmark. Just provide the name of the .txt file containing the gather output as a parameter and this script generates a corresponding PDF with the same file name. Multiple outputs with different strides can be included in the text file by concatenating the outputs. The script handles output from both standard simple array case and MD variant.
|
||||
|
||||
**cache.py:** Python script to run the cache simulator with the data obtained from the memory tracer. Just run it with the tracer output file name as a parameter. The cache specifications can be directly adapted in the script to match those of the target processor of interest.
|
@ -1,33 +0,0 @@
|
||||
import sys
|
||||
from cachesim import CacheSimulator, Cache, MainMemory
|
||||
|
||||
filename = sys.argv[1]
|
||||
mem = MainMemory()
|
||||
|
||||
#l3 = Cache("L3", 20480, 16, 64, "LRU") # 20MB: 20480 sets, 16-ways with cacheline size of 64 bytes
|
||||
#l2 = Cache("L2", 256, 4, 64, "LRU", store_to=l3, load_from=l3) # 256KB
|
||||
#l1 = Cache("L1", 64, 8, 64, "LRU", store_to=l2, load_from=l2) # 32KB
|
||||
|
||||
# Cascade Lake
|
||||
l3 = Cache("L3", 14336, 16, 64, "LRU", write_allocate=False)
|
||||
l2 = Cache("L2", 1024, 16, 64, "LRU", store_to=l3, victims_to=l3)
|
||||
l1 = Cache("L1", 64, 8, 64, "LRU", store_to=l2, load_from=l2)
|
||||
mem.load_to(l2)
|
||||
mem.store_from(l3)
|
||||
cs = CacheSimulator(l1, mem)
|
||||
|
||||
with open(filename, 'r') as fp:
|
||||
for line in fp.readlines():
|
||||
op, addr = line.split(": ")
|
||||
op = op[0]
|
||||
addr = int(addr, 16)
|
||||
|
||||
if op == 'W':
|
||||
cs.store(addr, length=8)
|
||||
elif op == 'R':
|
||||
cs.load(addr, length=8)
|
||||
else:
|
||||
sys.exit("Invalid operation: {}".format(op))
|
||||
|
||||
cs.force_write_back()
|
||||
cs.print_stats()
|
@ -1,39 +0,0 @@
|
||||
import sys
|
||||
from cachesim import CacheSimulator, Cache, MainMemory
|
||||
|
||||
def get_set_id(cache, addr):
|
||||
return (addr >> cache.cl_bits) % cache.sets
|
||||
|
||||
filename = sys.argv[1]
|
||||
N = sys.argv[2]
|
||||
mem = MainMemory()
|
||||
|
||||
# Cascade Lake
|
||||
l3 = Cache("L3", 14336, 16, 64, "LRU", write_allocate=False)
|
||||
l2 = Cache("L2", 1024, 16, 64, "LRU", store_to=l3, victims_to=l3)
|
||||
l1 = Cache("L1", 64, 8, 64, "LRU", store_to=l2, load_from=l2)
|
||||
mem.load_to(l2)
|
||||
mem.store_from(l3)
|
||||
cs = CacheSimulator(l1, mem)
|
||||
|
||||
sets_hist = {
|
||||
'l1': {s: 0 for s in range(l1.sets)},
|
||||
'l2': {s: 0 for s in range(l2.sets)},
|
||||
'l3': {s: 0 for s in range(l3.sets)}
|
||||
}
|
||||
|
||||
with open(filename, 'r') as fp:
|
||||
for line in fp.readlines():
|
||||
op, addr = line.split(": ")
|
||||
op = op[0]
|
||||
addr = int(addr, 16)
|
||||
sets_hist['l1'][get_set_id(l1, addr)] += 1
|
||||
sets_hist['l2'][get_set_id(l2, addr)] += 1
|
||||
sets_hist['l3'][get_set_id(l3, addr)] += 1
|
||||
|
||||
for cache_level, data in sets_hist.items():
|
||||
if cache_level != 'l3':
|
||||
print(cache_level, ": ")
|
||||
for set_id in data:
|
||||
if data[set_id] > 0:
|
||||
print(set_id, " -> ", data[set_id])
|
@ -1,116 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
[[ -z "$1" ]] && echo "Use: $0 <binary> [-c <core>] [-f <freq>] [-n <nruns>] [-l <log>] [-s]" && exit
|
||||
[[ ! -f "$1" ]] && echo "Binary file not found, make sure to use 'make'" && exit
|
||||
[[ ! -f "$1-stub" ]] && echo "Binary file for stubbed case not found, make sure to use 'make VARIANT=stub'" && exit
|
||||
|
||||
MDBENCH_BIN=$1
|
||||
BIN_INFO="${MDBENCH_BIN#*-}" # $OPT_SCHEME-$TAG-$ISA-$PREC
|
||||
OPT_SCHEME="${BIN_INFO%%-*}"
|
||||
PREC="${BIN_INFO##*-}"
|
||||
BIN_INFO="${BIN_INFO#*-}" # $TAG-$ISA-$PREC
|
||||
BIN_INFO="${BIN_INFO%-*}" # $TAG-$ISA
|
||||
TAG="${BIN_INFO%%-*}"
|
||||
ISA="${BIN_INFO##*-}"
|
||||
CORE="${CORE:-0}"
|
||||
FREQ="${FREQ:-2.4}"
|
||||
NRUNS="${NRUNS:-3}"
|
||||
LOG="${LOG:-latencies_and_cfds.$(hostname).log}"
|
||||
STUB_ONLY="${STUB_ONLY:-false}"
|
||||
SKIP_SET_FREQ="${SKIP_SET_FREQ:-false}"
|
||||
|
||||
OPTIND=2
|
||||
while getopts "c:f:n:l:s" flag; do
|
||||
case "${flag}" in
|
||||
c) CORE=${OPTARG};;
|
||||
f) FREQ=${OPTARG};;
|
||||
n) NRUNS=${OPTARG};;
|
||||
l) LOG=${OPTARG};;
|
||||
s) STUB_ONLY=true;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Other useful variables
|
||||
MDBENCH_BIN=./MDBench-$OPT_SCHEME-$TAG-$ISA-$PREC
|
||||
FIXED_PARAMS="--freq $FREQ"
|
||||
CPU_VENDOR=$(lscpu | grep "Vendor ID" | tr -s ' ' | cut -d ' ' -f3)
|
||||
|
||||
if [ "$CPU_VENDOR" == "GenuineIntel" ]; then
|
||||
ALL_PREFETCHERS="HW_PREFETCHER,CL_PREFETCHER,DCU_PREFETCHER,IP_PREFETCHER"
|
||||
DEFAULT_PREFETCHERS=("ALL HW_PREFETCHER CL_PREFETCHER DCU_PREFETCHER IP_PREFETCHER NONE")
|
||||
else
|
||||
ALL_PREFETCHERS=""
|
||||
DEFAULT_PREFETCHERS=("IGNORE")
|
||||
fi
|
||||
|
||||
if [ -z ${PREFETCHERS+x} ]; then
|
||||
PREFETCHERS=${DEFAULT_PREFETCHERS}
|
||||
fi
|
||||
|
||||
if [ "$OPT_SCHEME" == "gromacs" ]; then
|
||||
STUB1_NAME=stub-33
|
||||
STUB1_PARAMS="-na 4 -nn 33"
|
||||
STUB2_NAME=stub-128
|
||||
STUB2_PARAMS="-na 4 -nn 128"
|
||||
else
|
||||
STUB1_NAME=stub-76
|
||||
STUB1_PARAMS="-nn 76"
|
||||
STUB2_NAME=stub-1024
|
||||
STUB2_PARAMS="-nn 1024"
|
||||
fi
|
||||
|
||||
function run_benchmark() {
|
||||
BEST=10000000
|
||||
for i in $(seq $NRUNS); do
|
||||
RES=$(likwid-pin -c $CORE "$* $FIXED_PARAMS" 2>&1 | grep "Cycles/SIMD iteration" | cut -d ' ' -f3)
|
||||
if (( $(echo "$BEST > $RES" | bc -l ) )); then
|
||||
BEST=$RES
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
echo "Tag: $TAG" | tee -a $LOG
|
||||
echo "Optimization scheme: $OPT_SCHEME" | tee -a $LOG
|
||||
echo "Instruction set: $ISA" | tee -a $LOG
|
||||
echo "Precision: $PREC" | tee -a $LOG
|
||||
echo "Binary: $MDBENCH_BIN(-stub)" | tee -a $LOG
|
||||
echo "Frequency: $FREQ" | tee -a $LOG
|
||||
echo "Number of runs: $NRUNS" | tee -a $LOG
|
||||
echo "Run only stubbed cases: $STUB_ONLY" | tee -a $LOG
|
||||
|
||||
if [ "$SKIP_SET_FREQ" == "false" ]; then
|
||||
echo "Fixing frequencies..."
|
||||
likwid-setFrequencies -f $FREQ -t 0
|
||||
fi
|
||||
|
||||
for p in $PREFETCHERS; do
|
||||
if [ "$p" != "IGNORE" ]; then
|
||||
if [ "$p" == "ALL" ]; then
|
||||
likwid-features -c $CORE -e $ALL_PREFETCHERS
|
||||
elif [ "$p" == "NONE" ]; then
|
||||
likwid-features -c $CORE -d $ALL_PREFETCHERS
|
||||
else
|
||||
likwid-features -c $CORE -d $ALL_PREFETCHERS
|
||||
likwid-features -c $CORE -e $p
|
||||
fi
|
||||
|
||||
echo "Prefetcher settings: $p"
|
||||
likwid-features -c $CORE -l
|
||||
fi
|
||||
|
||||
MSG="$p: "
|
||||
if [ "$STUB_ONLY" == "false" ]; then
|
||||
run_benchmark $MDBENCH_BIN
|
||||
MSG+="standard=$BEST, "
|
||||
run_benchmark $MDBENCH_BIN -i data/copper_melting/input_lj_cu_one_atomtype_20x20x20.dmp
|
||||
MSG+="melt=$BEST, "
|
||||
run_benchmark $MDBENCH_BIN -p data/argon_1000/mdbench_params.conf -i data/argon_1000/tprout.gro
|
||||
MSG+="argon=$BEST, "
|
||||
fi
|
||||
|
||||
run_benchmark $MDBENCH_BIN-stub $STUB1_PARAMS
|
||||
MSG+="$STUB1_NAME=$BEST, "
|
||||
run_benchmark $MDBENCH_BIN-stub $STUB2_PARAMS
|
||||
MSG+="$STUB2_NAME=$BEST"
|
||||
echo $MSG | tee -a $LOG
|
||||
done
|
52
util/gather-bench/.gitignore
vendored
52
util/gather-bench/.gitignore
vendored
@ -1,52 +0,0 @@
|
||||
# Prerequisites
|
||||
*.d
|
||||
|
||||
# Object files
|
||||
*.o
|
||||
*.ko
|
||||
*.obj
|
||||
*.elf
|
||||
|
||||
# Linker output
|
||||
*.ilk
|
||||
*.map
|
||||
*.exp
|
||||
|
||||
# Precompiled Headers
|
||||
*.gch
|
||||
*.pch
|
||||
|
||||
# Libraries
|
||||
*.lib
|
||||
*.a
|
||||
*.la
|
||||
*.lo
|
||||
|
||||
# Shared objects (inc. Windows DLLs)
|
||||
*.dll
|
||||
*.so
|
||||
*.so.*
|
||||
*.dylib
|
||||
|
||||
# Executables
|
||||
*.exe
|
||||
*.out
|
||||
*.app
|
||||
*.i*86
|
||||
*.x86_64
|
||||
*.hex
|
||||
|
||||
# Debug files
|
||||
*.dSYM/
|
||||
*.su
|
||||
*.idb
|
||||
*.pdb
|
||||
|
||||
# Kernel Module Compile Results
|
||||
*.mod*
|
||||
*.cmd
|
||||
.tmp_versions/
|
||||
modules.order
|
||||
Module.symvers
|
||||
Mkfile.old
|
||||
dkms.conf
|
@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2021 RRZE-HPC
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -1,126 +0,0 @@
|
||||
#CONFIGURE BUILD SYSTEM
|
||||
TARGET = gather-bench-$(TAG)
|
||||
BUILD_DIR = ./$(TAG)
|
||||
SRC_DIR = ./src
|
||||
MAKE_DIR = ./
|
||||
ISA_DIR = ./src/$(ISA)
|
||||
Q ?= @
|
||||
|
||||
#DO NOT EDIT BELOW
|
||||
include $(MAKE_DIR)/config.mk
|
||||
include $(MAKE_DIR)/include_$(TAG).mk
|
||||
include $(MAKE_DIR)/include_LIKWID.mk
|
||||
INCLUDES += -I./src/includes
|
||||
|
||||
VPATH = $(SRC_DIR) ${ISA_DIR}
|
||||
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
|
||||
ASM += $(patsubst $(SRC_DIR)/%.f90, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.f90))
|
||||
OBJ = $(filter-out $(BUILD_DIR)/main%, $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)))
|
||||
OBJ += $(patsubst $(SRC_DIR)/%.cc, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.cc))
|
||||
OBJ += $(patsubst $(SRC_DIR)/%.cpp, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.cpp))
|
||||
OBJ += $(patsubst $(SRC_DIR)/%.f90, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.f90))
|
||||
OBJ += $(patsubst $(SRC_DIR)/%.F90, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.F90))
|
||||
OBJ += $(patsubst $(SRC_DIR)/%.s, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.s))
|
||||
OBJ += $(patsubst $(ISA_DIR)/%.S, $(BUILD_DIR)/%.o,$(wildcard $(ISA_DIR)/*.S))
|
||||
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(INCLUDES) -DISA_$(ISA)
|
||||
|
||||
ifneq ($(VARIANT),)
|
||||
.DEFAULT_GOAL := ${TARGET}-$(VARIANT)
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(DATA_LAYOUT)),AOS)
|
||||
CPPFLAGS += -DAOS
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(TEST)),true)
|
||||
CPPFLAGS += -DTEST
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(PADDING)),true)
|
||||
CPPFLAGS += -DPADDING
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(MEASURE_GATHER_CYCLES)),true)
|
||||
CPPFLAGS += -DMEASURE_GATHER_CYCLES
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(ONLY_FIRST_DIMENSION)),true)
|
||||
CPPFLAGS += -DONLY_FIRST_DIMENSION
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(MEM_TRACER)),true)
|
||||
CPPFLAGS += -DMEM_TRACER
|
||||
endif
|
||||
|
||||
${TARGET}: $(BUILD_DIR) $(OBJ) $(SRC_DIR)/main.c
|
||||
@echo "===> LINKING $(TARGET)"
|
||||
$(Q)${LINKER} ${CPPFLAGS} ${LFLAGS} -o $(TARGET) $(SRC_DIR)/main.c $(OBJ) $(LIBS)
|
||||
|
||||
${TARGET}-%: $(BUILD_DIR) $(OBJ) $(SRC_DIR)/main-%.c
|
||||
@echo "===> LINKING $(TARGET)-$* "
|
||||
$(Q)${LINKER} ${CPPFLAGS} ${LFLAGS} -o $(TARGET)-$* $(SRC_DIR)/main-$*.c $(OBJ) $(LIBS)
|
||||
|
||||
asm: $(BUILD_DIR) $(ASM)
|
||||
|
||||
$(BUILD_DIR)/%.o: %.c
|
||||
@echo "===> COMPILE $@"
|
||||
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
|
||||
$(Q)$(CC) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
|
||||
|
||||
$(BUILD_DIR)/%.s: %.c
|
||||
@echo "===> GENERATE ASM $@"
|
||||
$(Q)$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
|
||||
|
||||
$(BUILD_DIR)/%.s: %.f90
|
||||
@echo "===> COMPILE $@"
|
||||
$(Q)$(FC) -S $(FCFLAGS) $< -o $@
|
||||
|
||||
$(BUILD_DIR)/%.o: %.cc
|
||||
@echo "===> COMPILE $@"
|
||||
$(Q)$(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@
|
||||
$(Q)$(CXX) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
|
||||
|
||||
$(BUILD_DIR)/%.o: %.cpp
|
||||
@echo "===> COMPILE $@"
|
||||
$(Q)$(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $< -o $@
|
||||
$(Q)$(CXX) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
|
||||
|
||||
$(BUILD_DIR)/%.o: %.f90
|
||||
@echo "===> COMPILE $@"
|
||||
$(Q)$(FC) -c $(FCFLAGS) $< -o $@
|
||||
|
||||
$(BUILD_DIR)/%.o: %.F90
|
||||
@echo "===> COMPILE $@"
|
||||
$(Q)$(FC) -c $(CPPFLAGS) $(FCFLAGS) $< -o $@
|
||||
|
||||
$(BUILD_DIR)/%.o: %.s
|
||||
@echo "===> ASSEMBLE $@"
|
||||
$(Q)$(AS) $(ASFLAGS) $< -o $@
|
||||
|
||||
$(BUILD_DIR)/%.o: %.S
|
||||
@echo "===> ASSEMBLE $@"
|
||||
$(Q)$(CC) -c $(CPPFLAGS) $< -o $@
|
||||
|
||||
tags:
|
||||
@echo "===> GENERATE TAGS"
|
||||
$(Q)ctags -R
|
||||
|
||||
|
||||
$(BUILD_DIR):
|
||||
@mkdir $(BUILD_DIR)
|
||||
|
||||
ifeq ($(findstring $(MAKECMDGOALS),clean),)
|
||||
-include $(OBJ:.o=.d)
|
||||
endif
|
||||
|
||||
.PHONY: clean distclean
|
||||
|
||||
clean:
|
||||
@echo "===> CLEAN"
|
||||
@rm -rf $(BUILD_DIR)
|
||||
@rm -f tags
|
||||
|
||||
distclean: clean
|
||||
@echo "===> DIST CLEAN"
|
||||
@rm -f $(TARGET)
|
||||
@rm -f tags
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user