Compare commits
2 Commits
main
...
0094c3c4e1
Author | SHA1 | Date | |
---|---|---|---|
|
0094c3c4e1 | ||
|
a13a0f3bae |
176
.clang-format
176
.clang-format
@@ -1,176 +0,0 @@
|
||||
---
|
||||
Language: Cpp
|
||||
# BasedOnStyle: WebKit
|
||||
AccessModifierOffset: -4
|
||||
AlignAfterOpenBracket: DontAlign
|
||||
AlignArrayOfStructures: None
|
||||
AlignConsecutiveAssignments: Consecutive
|
||||
AlignConsecutiveBitFields: None
|
||||
AlignConsecutiveDeclarations: None
|
||||
AlignConsecutiveMacros: Consecutive
|
||||
AlignEscapedNewlines: Right
|
||||
AlignOperands: Align
|
||||
AlignTrailingComments: true
|
||||
AllowAllArgumentsOnNextLine: false
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortEnumsOnASingleLine: true
|
||||
AllowShortBlocksOnASingleLine: Never
|
||||
AllowShortCaseLabelsOnASingleLine: false
|
||||
AllowShortFunctionsOnASingleLine: All
|
||||
AllowShortLambdasOnASingleLine: All
|
||||
AllowShortIfStatementsOnASingleLine: OnlyFirstIf
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
AlwaysBreakAfterDefinitionReturnType: None
|
||||
AlwaysBreakAfterReturnType: None
|
||||
AlwaysBreakBeforeMultilineStrings: false
|
||||
AlwaysBreakTemplateDeclarations: MultiLine
|
||||
AttributeMacros:
|
||||
- __capability
|
||||
BinPackArguments: false
|
||||
BinPackParameters: false
|
||||
BraceWrapping:
|
||||
AfterCaseLabel: false
|
||||
AfterClass: false
|
||||
AfterControlStatement: Never
|
||||
AfterEnum: false
|
||||
AfterFunction: true
|
||||
AfterNamespace: false
|
||||
AfterObjCDeclaration: false
|
||||
AfterStruct: false
|
||||
AfterUnion: false
|
||||
AfterExternBlock: false
|
||||
BeforeCatch: false
|
||||
BeforeElse: false
|
||||
BeforeLambdaBody: false
|
||||
BeforeWhile: false
|
||||
IndentBraces: false
|
||||
SplitEmptyFunction: true
|
||||
SplitEmptyRecord: true
|
||||
SplitEmptyNamespace: true
|
||||
BreakBeforeBinaryOperators: None
|
||||
BreakBeforeBraces: WebKit
|
||||
BreakBeforeInheritanceComma: false
|
||||
BreakInheritanceList: BeforeColon
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializersBeforeComma: false
|
||||
BreakConstructorInitializers: BeforeComma
|
||||
BreakAfterJavaFieldAnnotations: false
|
||||
BreakStringLiterals: true
|
||||
ColumnLimit: 90
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
CompactNamespaces: false
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
ContinuationIndentWidth: 4
|
||||
Cpp11BracedListStyle: false
|
||||
DeriveLineEnding: true
|
||||
DerivePointerAlignment: false
|
||||
DisableFormat: false
|
||||
EmptyLineAfterAccessModifier: Never
|
||||
EmptyLineBeforeAccessModifier: LogicalBlock
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
BasedOnStyle: ''
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: false
|
||||
AllowAllConstructorInitializersOnNextLine: true
|
||||
FixNamespaceComments: false
|
||||
ForEachMacros:
|
||||
- foreach
|
||||
- Q_FOREACH
|
||||
- BOOST_FOREACH
|
||||
IfMacros:
|
||||
- KJ_IF_MAYBE
|
||||
IncludeBlocks: Preserve
|
||||
IncludeCategories:
|
||||
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
|
||||
Priority: 2
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '^(<|"(gtest|gmock|isl|json)/)'
|
||||
Priority: 3
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '.*'
|
||||
Priority: 1
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
IncludeIsMainRegex: '(Test)?$'
|
||||
IncludeIsMainSourceRegex: ''
|
||||
IndentAccessModifiers: false
|
||||
IndentCaseLabels: false
|
||||
IndentCaseBlocks: false
|
||||
IndentGotoLabels: true
|
||||
IndentPPDirectives: None
|
||||
IndentExternBlock: AfterExternBlock
|
||||
IndentWidth: 4
|
||||
IndentWrappedFunctionNames: false
|
||||
InsertTrailingCommas: None
|
||||
JavaScriptQuotes: Leave
|
||||
JavaScriptWrapImports: true
|
||||
KeepEmptyLinesAtTheStartOfBlocks: true
|
||||
LambdaBodyIndentation: Signature
|
||||
MacroBlockBegin: ''
|
||||
MacroBlockEnd: ''
|
||||
MaxEmptyLinesToKeep: 1
|
||||
NamespaceIndentation: Inner
|
||||
ObjCBinPackProtocolList: Auto
|
||||
ObjCBlockIndentWidth: 4
|
||||
ObjCBreakBeforeNestedBlockParam: true
|
||||
ObjCSpaceAfterProperty: true
|
||||
ObjCSpaceBeforeProtocolList: true
|
||||
PenaltyBreakAssignment: 200
|
||||
PenaltyBreakBeforeFirstCallParameter: 19
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyBreakTemplateDeclaration: 10
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyReturnTypeOnItsOwnLine: 60
|
||||
PenaltyIndentedWhitespace: 0
|
||||
PointerAlignment: Left
|
||||
PPIndentWidth: -1
|
||||
ReferenceAlignment: Pointer
|
||||
ReflowComments: true
|
||||
ShortNamespaceLines: 1
|
||||
SortIncludes: CaseSensitive
|
||||
SortJavaStaticImport: Before
|
||||
SortUsingDeclarations: true
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceAfterLogicalNot: false
|
||||
SpaceAfterTemplateKeyword: true
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
SpaceBeforeCaseColon: false
|
||||
SpaceBeforeCpp11BracedList: true
|
||||
SpaceBeforeCtorInitializerColon: true
|
||||
SpaceBeforeInheritanceColon: true
|
||||
SpaceBeforeParens: ControlStatements
|
||||
SpaceAroundPointerQualifiers: Default
|
||||
SpaceBeforeRangeBasedForLoopColon: true
|
||||
SpaceInEmptyBlock: false
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesBeforeTrailingComments: 1
|
||||
SpacesInAngles: Never
|
||||
SpacesInConditionalStatement: false
|
||||
SpacesInContainerLiterals: true
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInLineCommentPrefix:
|
||||
Minimum: 1
|
||||
Maximum: -1
|
||||
SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
SpaceBeforeSquareBrackets: false
|
||||
BitFieldColonSpacing: Both
|
||||
Standard: Latest
|
||||
StatementAttributeLikeMacros:
|
||||
- Q_EMIT
|
||||
StatementMacros:
|
||||
- Q_UNUSED
|
||||
- QT_REQUIRE_VERSION
|
||||
TabWidth: 8
|
||||
UseCRLF: false
|
||||
UseTab: Never
|
||||
WhitespaceSensitiveMacros:
|
||||
- STRINGIZE
|
||||
- PP_STRINGIZE
|
||||
- BOOST_PP_STRINGIZE
|
||||
- NS_SWIFT_NAME
|
||||
- CF_SWIFT_NAME
|
||||
...
|
14
.clang-tidy
14
.clang-tidy
@@ -1,14 +0,0 @@
|
||||
---
|
||||
Checks: 'clang-diagnostic-*,clang-analyzer-*,clang-bugprone-*,readability-identifier-naming'
|
||||
WarningsAsErrors: true
|
||||
HeaderFilterRegex: '.*'
|
||||
AnalyzeTemporaryDtors: false
|
||||
CheckOptions:
|
||||
- key: readability-identifier-naming.StructCase
|
||||
value: 'CamelCase'
|
||||
- key: readability-identifier-naming.FunctionCase
|
||||
value: 'camelBack'
|
||||
- key: readability-identifier-naming.VariableCase
|
||||
value: 'camelBack'
|
||||
- key: readability-identifier-naming.GlobalConstantCase
|
||||
value: 'UPPER_CASE'
|
3
.clangd
3
.clangd
@@ -1,3 +0,0 @@
|
||||
CompileFlags:
|
||||
Add: [-I/Users/jan/prg/MD-Bench/src/verletlist/, -I/Users/jan/prg/MD-Bench/src/common/, -DALIGNMENT=64]
|
||||
Compiler: clang
|
136
Makefile
136
Makefile
@@ -1,32 +1,123 @@
|
||||
#CONFIGURE BUILD SYSTEM
|
||||
TAG = $(OPT_TAG)-$(TOOLCHAIN)-$(DATA_TYPE)
|
||||
TARGET = MDBench-$(TAG)
|
||||
BUILD_DIR = ./build/build-$(TAG)
|
||||
SRC_ROOT = ./src
|
||||
SRC_DIR = $(SRC_ROOT)/$(OPT_SCHEME)
|
||||
COMMON_DIR = $(SRC_ROOT)/common
|
||||
CUDA_DIR = $(SRC_DIR)/cuda
|
||||
MAKE_DIR = ./make
|
||||
IDENTIFIER = $(OPT_SCHEME)-$(TAG)-$(ISA)-$(DATA_TYPE)
|
||||
TARGET = MDBench-$(IDENTIFIER)
|
||||
BUILD_DIR = ./build-$(IDENTIFIER)
|
||||
SRC_DIR = ./$(OPT_SCHEME)
|
||||
ASM_DIR = ./asm
|
||||
COMMON_DIR = ./common
|
||||
CUDA_DIR = ./$(SRC_DIR)/cuda
|
||||
MAKE_DIR = ./
|
||||
Q ?= @
|
||||
|
||||
#DO NOT EDIT BELOW
|
||||
include config.mk
|
||||
include $(MAKE_DIR)/include_$(TOOLCHAIN).mk
|
||||
include $(MAKE_DIR)/config.mk
|
||||
include $(MAKE_DIR)/include_$(TAG).mk
|
||||
include $(MAKE_DIR)/include_LIKWID.mk
|
||||
ifneq ($(strip $(ISA)),NONE)
|
||||
include $(MAKE_DIR)/include_ISA.mk
|
||||
endif
|
||||
INCLUDES += -I./$(SRC_DIR) -I./$(COMMON_DIR)
|
||||
include $(MAKE_DIR)/include_GROMACS.mk
|
||||
INCLUDES += -I./$(SRC_DIR)/includes -I./$(COMMON_DIR)/includes
|
||||
|
||||
VPATH = $(SRC_DIR) $(COMMON_DIR) $(CUDA_DIR)
|
||||
ifeq ($(strip $(OPT_SCHEME)),gromacs)
|
||||
DEFINES += -DGROMACS
|
||||
endif
|
||||
ifeq ($(strip $(DATA_LAYOUT)),AOS)
|
||||
DEFINES += -DAOS
|
||||
endif
|
||||
ifeq ($(strip $(DATA_TYPE)),SP)
|
||||
DEFINES += -DPRECISION=1
|
||||
else
|
||||
DEFINES += -DPRECISION=2
|
||||
endif
|
||||
|
||||
ifneq ($(ASM_SYNTAX), ATT)
|
||||
ASFLAGS += -masm=intel
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(SORT_ATOMS)),true)
|
||||
DEFINES += -DSORT_ATOMS
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(EXPLICIT_TYPES)),true)
|
||||
DEFINES += -DEXPLICIT_TYPES
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(MEM_TRACER)),true)
|
||||
DEFINES += -DMEM_TRACER
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(INDEX_TRACER)),true)
|
||||
DEFINES += -DINDEX_TRACER
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(COMPUTE_STATS)),true)
|
||||
DEFINES += -DCOMPUTE_STATS
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(XTC_OUTPUT)),true)
|
||||
DEFINES += -DXTC_OUTPUT
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(USE_REFERENCE_VERSION)),true)
|
||||
DEFINES += -DUSE_REFERENCE_VERSION
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(HALF_NEIGHBOR_LISTS_CHECK_CJ)),true)
|
||||
DEFINES += -DHALF_NEIGHBOR_LISTS_CHECK_CJ
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(DEBUG)),true)
|
||||
DEFINES += -DDEBUG
|
||||
endif
|
||||
|
||||
ifneq ($(VECTOR_WIDTH),)
|
||||
DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH)
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__SIMD_KERNEL__)),true)
|
||||
DEFINES += -D__SIMD_KERNEL__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__SSE__)),true)
|
||||
DEFINES += -D__ISA_SSE__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__ISA_AVX__)),true)
|
||||
DEFINES += -D__ISA_AVX__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__ISA_AVX_FMA__)),true)
|
||||
DEFINES += -D__ISA_AVX_FMA__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__ISA_AVX2__)),true)
|
||||
DEFINES += -D__ISA_AVX2__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__ISA_AVX512__)),true)
|
||||
DEFINES += -D__ISA_AVX512__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(ENABLE_OMP_SIMD)),true)
|
||||
DEFINES += -DENABLE_OMP_SIMD
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(USE_SIMD_KERNEL)),true)
|
||||
DEFINES += -DUSE_SIMD_KERNEL
|
||||
endif
|
||||
|
||||
VPATH = $(SRC_DIR) $(ASM_DIR) $(CUDA_DIR)
|
||||
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
|
||||
OBJ = $(filter-out $(BUILD_DIR)/main%, $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)))
|
||||
OBJ += $(patsubst $(COMMON_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(COMMON_DIR)/*.c))
|
||||
OVERWRITE:= $(patsubst $(ASM_DIR)/%-new.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*-new.s))
|
||||
OBJ = $(filter-out $(BUILD_DIR)/main% $(OVERWRITE),$(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c)))
|
||||
OBJ += $(patsubst $(ASM_DIR)/%.s, $(BUILD_DIR)/%.o,$(wildcard $(ASM_DIR)/*.s))
|
||||
OBJ += $(patsubst $(COMMON_DIR)/%.c, $(BUILD_DIR)/%-common.o,$(wildcard $(COMMON_DIR)/*.c))
|
||||
ifeq ($(strip $(TAG)),NVCC)
|
||||
OBJ += $(patsubst $(CUDA_DIR)/%.cu, $(BUILD_DIR)/%-cuda.o,$(wildcard $(CUDA_DIR)/*.cu))
|
||||
endif
|
||||
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
|
||||
|
||||
# $(warning $(OBJ))
|
||||
|
||||
ifneq ($(VARIANT),)
|
||||
.DEFAULT_GOAL := ${TARGET}-$(VARIANT)
|
||||
DEFINES += -DVARIANT=$(VARIANT)
|
||||
@@ -45,6 +136,11 @@ $(BUILD_DIR)/%.o: %.c
|
||||
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
|
||||
$(Q)$(CC) $(CPPFLAGS) -MT $@ -MM $< > $(BUILD_DIR)/$*.d
|
||||
|
||||
$(BUILD_DIR)/%-common.o: $(COMMON_DIR)/%.c
|
||||
$(info ===> COMPILE $@)
|
||||
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
|
||||
$(Q)$(CC) $(CPPFLAGS) -MT $@ -MM $< > $(BUILD_DIR)/$*.d
|
||||
|
||||
$(BUILD_DIR)/%-cuda.o: %.cu
|
||||
$(info ===> COMPILE $@)
|
||||
$(Q)$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
|
||||
@@ -63,16 +159,18 @@ $(BUILD_DIR)/%.o: %.s
|
||||
clean:
|
||||
$(info ===> CLEAN)
|
||||
@rm -rf $(BUILD_DIR)
|
||||
@rm -rf $(TARGET)*
|
||||
@rm -f tags
|
||||
|
||||
cleanall:
|
||||
$(info ===> CLEAN)
|
||||
@rm -rf build
|
||||
@rm -rf build-*
|
||||
@rm -rf MDBench-*
|
||||
@rm -f tags
|
||||
|
||||
distclean: clean
|
||||
$(info ===> DIST CLEAN)
|
||||
@rm -f $(TARGET)
|
||||
@rm -f $(TARGET)*
|
||||
@rm -f tags
|
||||
|
||||
info:
|
||||
@@ -86,6 +184,6 @@ tags:
|
||||
$(Q)ctags -R
|
||||
|
||||
$(BUILD_DIR):
|
||||
@mkdir -p $(BUILD_DIR)
|
||||
@mkdir $(BUILD_DIR)
|
||||
|
||||
-include $(OBJ:.o=.d)
|
||||
|
54
README.md
54
README.md
@@ -1,14 +1,34 @@
|
||||
# MD-Bench
|
||||
|
||||
MD-Bench is a toolbox for the performance engineering of short-range force
|
||||
calculation kernels on molecular-dynamics applications. It aims at covering all
|
||||
available state-of-the-art algorithms from different community codes such as
|
||||
LAMMPS and GROMACS.
|
||||

|
||||
|
||||
MD-Bench is a toolbox for the performance engineering of short-range force calculation kernels on molecular-dynamics applications.
|
||||
It aims at covering all available state-of-the-art algorithms from different community codes such as LAMMPS and GROMACS.
|
||||
|
||||
Apart from that, many tools to study and evaluate the in-depth performance of such kernels on distinct hardware are offered, like gather-bench, a standalone benchmark that mimics the data movement from MD kernels and the stubbed force calculation cases that focus on isolating the impacts caused by memory latencies and control flow divergence contributions in the overall performance.
|
||||
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Verlet Lists</th>
|
||||
<th>GROMACS MxN</th>
|
||||
<th>Stubbed cases</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><a target="_blank" rel="noopener noreferrer" href="figures/verlet_v2.png"><img src="figures/verlet_v2.png" alt="Image" title="Verlet Lists" style="width: 100%;"></a></td>
|
||||
<td><a target="_blank" rel="noopener noreferrer" href="figures/gromacs_mxn_v2.png"><img src="figures/gromacs_mxn_v2.png" alt="Image" title="GROMACS MxN" style="width: 90%;"></a></td>
|
||||
<td><a target="_blank" rel="noopener noreferrer" href="figures/stub_new_v3.png"><img src="figures/stub_new_v3.png" alt="Image" title="Stubbed cases" style="width: 100%;"></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<!--  -->
|
||||
|
||||
## Build instructions
|
||||
|
||||
Properly configure your building by changing `config.mk` file. The following
|
||||
options are available:
|
||||
Properly configure your building by changing `config.mk` file. The following options are available:
|
||||
|
||||
- **TAG:** Compiler tag (available options: GCC, CLANG, ICC, ONEAPI, NVCC).
|
||||
- **ISA:** Instruction set (available options: SSE, AVX, AVX\_FMA, AVX2, AVX512).
|
||||
@@ -25,18 +45,15 @@ options are available:
|
||||
- **COMPUTE\_STATS:** Compute statistics.
|
||||
|
||||
Configurations for LAMMPS Verlet Lists optimization scheme:
|
||||
|
||||
- **ENABLE\_OMP\_SIMD:** Use omp simd pragma on half neighbor-lists kernels.
|
||||
- **USE\_SIMD\_KERNEL:** Compile kernel with explicit SIMD intrinsics.
|
||||
|
||||
Configurations for GROMACS MxN optimization scheme:
|
||||
|
||||
- **USE\_REFERENCE\_VERSION:** Use reference version (only for correction purposes).
|
||||
- **XTC\_OUTPUT:** Enable XTC output.
|
||||
- **HALF\_NEIGHBOR\_LISTS\_CHECK\_CJ:** Check if j-clusters are local when decreasing the reaction force.
|
||||
|
||||
Configurations for CUDA:
|
||||
|
||||
- **USE\_CUDA\_HOST\_MEMORY:** Use CUDA host memory to optimize host-device transfers.
|
||||
|
||||
When done, just use `make` to compile the code.
|
||||
@@ -51,14 +68,11 @@ Use the following command to run a simulation:
|
||||
./MD-Bench-<TAG>-<OPT_SCHEME> [OPTION]...
|
||||
```
|
||||
|
||||
Where `TAG` and `OPT_SCHEME` correspond to the building options with the same
|
||||
name. Without any options, a Copper FCC lattice system with size 32x32x32
|
||||
(131072 atoms) over 200 time-steps using the Lennard-Jones potential (sigma=1.0,
|
||||
epsilon=1.0) is simulated.
|
||||
Where `TAG` and `OPT_SCHEME` correspond to the building options with the same name.
|
||||
Without any options, a Copper FCC lattice system with size 32x32x32 (131072 atoms) over 200 time-steps using the Lennard-Jones potential (sigma=1.0, epsilon=1.0) is simulated.
|
||||
|
||||
The default behavior and other options can be changed using the following parameters:
|
||||
|
||||
```sh
|
||||
```
|
||||
-p <string>: file to read parameters from (can be specified more than once)
|
||||
-f <string>: force field (lj or eam), default lj
|
||||
-i <string>: input file with atom positions (dump)
|
||||
@@ -78,17 +92,11 @@ TBD
|
||||
|
||||
## Citations
|
||||
|
||||
Rafael Ravedutti Lucio Machado, Jan Eitzinger, Harald Köstler, and Gerhard
|
||||
Wellein: MD-Bench: A generic proxy-app toolbox for state-of-the-art molecular
|
||||
dynamics algorithms. Accepted for [PPAM](https://ppam.edu.pl/) 2022, the 14th
|
||||
International Conference on Parallel Processing and Applied Mathematics, Gdansk,
|
||||
Poland, September 11-14, 2022. PPAM 2022 Best Paper Award. Preprint:
|
||||
[arXiv:2207.13094](https://arxiv.org/abs/2207.13094)
|
||||
Rafael Ravedutti Lucio Machado, Jan Eitzinger, Harald Köstler, and Gerhard Wellein: MD-Bench: A generic proxy-app toolbox for state-of-the-art molecular dynamics algorithms. Accepted for [PPAM](https://ppam.edu.pl/) 2022, the 14th International Conference on Parallel Processing and Applied Mathematics, Gdansk, Poland, September 11-14, 2022. PPAM 2022 Best Paper Award. Preprint: [arXiv:2207.13094](https://arxiv.org/abs/2207.13094)
|
||||
|
||||
## Credits
|
||||
|
||||
MD-Bench is developed by the Erlangen National High Performance Computing Center
|
||||
([NHR@FAU](https://hpc.fau.de/)) at the University of Erlangen-Nürnberg.
|
||||
MD-Bench is developed by the Erlangen National High Performance Computing Center ([NHR@FAU](https://hpc.fau.de/)) at the University of Erlangen-Nürnberg.
|
||||
|
||||
## License
|
||||
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
97
common/box.c
Normal file
97
common/box.c
Normal file
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <parameter.h>
|
||||
#include <util.h>
|
||||
#include <box.h>
|
||||
#include <mpi.h>
|
||||
|
||||
int overlapBox(int dim, int dir, const Box* mybox, const Box* other, Box* cut, MD_FLOAT xprd, MD_FLOAT cutneigh)
|
||||
{
|
||||
int pbc = -100;
|
||||
MD_FLOAT min[3], max[3];
|
||||
int same = (mybox->id == other->id) ? 1 : 0;
|
||||
|
||||
//projections
|
||||
min[_x] = MAX(mybox->lo[_x], other->lo[_x]); max[_x] = MIN(mybox->hi[_x], other->hi[_x]);
|
||||
min[_y] = MAX(mybox->lo[_y], other->lo[_y]); max[_y] = MIN(mybox->hi[_y], other->hi[_y]);
|
||||
min[_z] = MAX(mybox->lo[_z], other->lo[_z]); max[_z] = MIN(mybox->hi[_z], other->hi[_z]);
|
||||
|
||||
//Intersection no periodic case
|
||||
if(!same){
|
||||
if (dir == 0) max[dim] = MIN(mybox->hi[dim], other->hi[dim]+ cutneigh);
|
||||
if (dir == 1) min[dim] = MAX(mybox->lo[dim], other->lo[dim]- cutneigh);
|
||||
if ((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z])) pbc = 0;
|
||||
}
|
||||
|
||||
//Intersection periodic case
|
||||
if(pbc < 0)
|
||||
{
|
||||
if(dir == 0){
|
||||
min[dim] = MAX(mybox->lo[dim] , other->lo[dim]- xprd);
|
||||
max[dim] = MIN(mybox->hi[dim] , other->hi[dim]- xprd + cutneigh);
|
||||
|
||||
} else {
|
||||
min[dim] = MAX(mybox->lo[dim], other->lo[dim]+ xprd - cutneigh);
|
||||
max[dim] = MIN(mybox->hi[dim], other->hi[dim]+ xprd);
|
||||
|
||||
}
|
||||
if((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z]))
|
||||
pbc = (dir == 0) ? 1:-1;
|
||||
}
|
||||
|
||||
//storing the cuts
|
||||
cut->lo[_x] = min[_x]; cut->hi[_x] = max[_x];
|
||||
cut->lo[_y] = min[_y]; cut->hi[_y] = max[_y];
|
||||
cut->lo[_z] = min[_z]; cut->hi[_z] = max[_z];
|
||||
|
||||
return pbc;
|
||||
}
|
||||
|
||||
int overlapFullBox(Parameter* param, MD_FLOAT *cutneigh ,const Box* mybox, const Box* other)
|
||||
{
|
||||
MD_FLOAT min[3], max[3];
|
||||
MD_FLOAT xprd = param->xprd;
|
||||
MD_FLOAT yprd = param->yprd;
|
||||
MD_FLOAT zprd = param->zprd;
|
||||
|
||||
for(int k = -1; k < 2; k++)
|
||||
{
|
||||
for(int j = -1; j < 2; j++)
|
||||
{
|
||||
for(int i= -1; i < 2; i++)
|
||||
{
|
||||
min[_x] = MAX(mybox->lo[_x], other->lo[_x]-cutneigh[_x] + i*xprd);
|
||||
min[_y] = MAX(mybox->lo[_y], other->lo[_y]-cutneigh[_y] + j*yprd);
|
||||
min[_z] = MAX(mybox->lo[_z], other->lo[_z]-cutneigh[_z] + k*zprd);
|
||||
max[_x] = MIN(mybox->hi[_x], other->hi[_x]+cutneigh[_x] + i*xprd);
|
||||
max[_y] = MIN(mybox->hi[_y], other->hi[_y]+cutneigh[_y] + j*yprd);
|
||||
max[_z] = MIN(mybox->hi[_z], other->hi[_z]+cutneigh[_z] + k*zprd);
|
||||
if ((min[_x]<max[_x]) && (min[_y]<max[_y]) && (min[_z]<max[_z]))
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void expandBox(int iswap, const Box* me, const Box* other, Box* cut, MD_FLOAT cutneigh)
|
||||
{
|
||||
if(iswap==2 || iswap==3){
|
||||
if(me->lo[_x] <= other->lo[_x]) cut->lo[_x] -= cutneigh;
|
||||
if(me->hi[_x] >= other->hi[_x]) cut->hi[_x] += cutneigh;
|
||||
}
|
||||
|
||||
if(iswap==4 || iswap==5){
|
||||
if(me->lo[_x] <= other->lo[_x]) cut->lo[_x] -= cutneigh;
|
||||
if(me->hi[_x] >= other->hi[_x]) cut->hi[_x] += cutneigh;
|
||||
if(me->lo[_y] <= other->lo[_y]) cut->lo[_y] -= cutneigh;
|
||||
if(me->hi[_y] >= other->hi[_y]) cut->hi[_y] += cutneigh;
|
||||
}
|
||||
}
|
||||
|
556
common/comm.c
Normal file
556
common/comm.c
Normal file
@@ -0,0 +1,556 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <comm.h>
|
||||
#include <allocate.h>
|
||||
#include <mpi.h>
|
||||
#include <util.h>
|
||||
|
||||
#define NEIGHMIN 6
|
||||
#define BUFFACTOR 2
|
||||
#define BUFMIN 1000
|
||||
#define BUFEXTRA 100
|
||||
#define world MPI_COMM_WORLD
|
||||
|
||||
MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
|
||||
static inline void allocDynamicBuffers(Comm*);
|
||||
static inline void freeDynamicBuffers(Comm*);
|
||||
static inline void freeBuffers(Comm*);
|
||||
|
||||
void defineReverseList(Comm* comm){
|
||||
int dim = 0;
|
||||
int index = 0;
|
||||
int me = comm->myproc;
|
||||
|
||||
//Set the inverse list
|
||||
for(int iswap = 0; iswap<6; iswap++){
|
||||
int dim = comm->swapdim[iswap];
|
||||
int dir = comm->swapdir[iswap];
|
||||
int invswap = comm->swap[dim][(dir+1)%2];
|
||||
|
||||
for(int ineigh = comm->sendfrom[invswap]; ineigh< comm->sendtill[invswap]; ineigh++)
|
||||
comm->nrecv[index++] = comm->nsend[ineigh];
|
||||
|
||||
comm->recvfrom[iswap] = (iswap == 0) ? 0 : comm->recvtill[iswap-1];
|
||||
comm->recvtill[iswap] = index;
|
||||
}
|
||||
|
||||
//set if myproc is unique in the swap
|
||||
for(int iswap = 0; iswap<6; iswap++){
|
||||
int sizeswap = comm->sendtill[iswap]-comm->sendfrom[iswap];
|
||||
int index = comm->sendfrom[iswap];
|
||||
int myneigh = comm->nsend[index];
|
||||
comm->othersend[iswap] = (sizeswap != 1 || comm->myproc != myneigh) ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
void addNeighToExchangeList(Comm* comm, int newneigh){
|
||||
|
||||
int numneigh = comm->numneighexch;
|
||||
|
||||
if(comm->numneighexch>=comm->maxneighexch){
|
||||
size_t oldByteSize = comm->maxneighexch*sizeof(int);
|
||||
comm->maxneighexch *=2;
|
||||
comm->nexch = (int*) reallocate(comm->nexch, ALIGNMENT, comm->maxneighexch * sizeof(int), oldByteSize);
|
||||
}
|
||||
|
||||
// Add the new element to the list
|
||||
comm->nexch[numneigh] = newneigh;
|
||||
comm->numneighexch++;
|
||||
}
|
||||
|
||||
//Exported functions
|
||||
void neighComm(Comm *comm, Parameter* param, Grid *grid)
|
||||
{
|
||||
int me = comm->myproc;
|
||||
int numproc = comm ->numproc;
|
||||
int PAD = 6; //number of elements for processor in the map
|
||||
int ineigh = 0;
|
||||
int sneigh = 0;
|
||||
MD_FLOAT *map = grid->map;
|
||||
MD_FLOAT cutneigh = param->cutneigh;
|
||||
MD_FLOAT prd[3] = {param->xprd, param->yprd, param->zprd};
|
||||
Box mybox, other, cut;
|
||||
|
||||
//needed for rebalancing
|
||||
freeDynamicBuffers(comm);
|
||||
|
||||
//Local box
|
||||
mybox.id = me;
|
||||
mybox.lo[_x] = map[me*PAD+0]; mybox.hi[_x] = map[me*PAD+3];
|
||||
mybox.lo[_y] = map[me*PAD+1]; mybox.hi[_y] = map[me*PAD+4];
|
||||
mybox.lo[_z] = map[me*PAD+2]; mybox.hi[_z] = map[me*PAD+5];
|
||||
|
||||
//Check for all possible neighbours only for exchange atoms
|
||||
comm->numneighexch = 0;
|
||||
for(int proc = 0; proc <numproc; proc++){
|
||||
other.id = proc;
|
||||
other.lo[_x] = map[proc*PAD+0]; other.hi[_x] = map[proc*PAD+3];
|
||||
other.lo[_y] = map[proc*PAD+1]; other.hi[_y] = map[proc*PAD+4];
|
||||
other.lo[_z] = map[proc*PAD+2]; other.hi[_z] = map[proc*PAD+5];
|
||||
|
||||
if(proc != me){
|
||||
int intersection = overlapFullBox(param,grid->cutneigh,&mybox,&other);
|
||||
if(intersection) addNeighToExchangeList(comm,proc);
|
||||
}
|
||||
}
|
||||
|
||||
//MAP is stored as follows: xlo,ylo,zlo,xhi,yhi,zhi
|
||||
for(int iswap = 0; iswap <6; iswap++)
|
||||
{
|
||||
int dir = comm->swapdir[iswap];
|
||||
int dim = comm->swapdim[iswap];
|
||||
|
||||
for(int proc = 0; proc < numproc; proc++)
|
||||
{
|
||||
//Check for neighbours along dimmensions, for forwardComm, backwardComm and ghostComm
|
||||
other.id = proc;
|
||||
other.lo[_x] = map[proc*PAD+0]; other.hi[_x] = map[proc*PAD+3];
|
||||
other.lo[_y] = map[proc*PAD+1]; other.hi[_y] = map[proc*PAD+4];
|
||||
other.lo[_z] = map[proc*PAD+2]; other.hi[_z] = map[proc*PAD+5];
|
||||
|
||||
//return if two boxes intersect: -100 not intersection, 0, 1 and -1 intersection for each different pbc.
|
||||
int pbc = overlapBox(dim,dir,&mybox,&other,&cut,prd[dim],cutneigh);
|
||||
if(pbc == -100) continue;
|
||||
|
||||
expandBox(iswap, &mybox, &other, &cut, cutneigh);
|
||||
|
||||
if(ineigh >= comm->maxneigh) {
|
||||
size_t oldByteSize = comm->maxneigh*sizeof(int);
|
||||
size_t oldBoxSize = comm->maxneigh*sizeof(Box);
|
||||
comm->maxneigh = 2*ineigh;
|
||||
comm->nsend = (int*) reallocate(comm->nsend, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
|
||||
comm->nrecv = (int*) reallocate(comm->nrecv, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
|
||||
comm->pbc_x = (int*) reallocate(comm->pbc_x, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
|
||||
comm->pbc_y = (int*) reallocate(comm->pbc_y, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
|
||||
comm->pbc_z = (int*) reallocate(comm->pbc_z, ALIGNMENT, comm->maxneigh * sizeof(int), oldByteSize);
|
||||
comm->boxes = (Box*) reallocate(comm->boxes, ALIGNMENT, comm->maxneigh * sizeof(Box), oldBoxSize);
|
||||
}
|
||||
|
||||
comm->boxes[ineigh] = cut;
|
||||
comm->nsend[ineigh] = proc;
|
||||
comm->pbc_x[ineigh] = (dim == _x) ? pbc : 0;
|
||||
comm->pbc_y[ineigh] = (dim == _y) ? pbc : 0;
|
||||
comm->pbc_z[ineigh] = (dim == _z) ? pbc : 0;
|
||||
ineigh++;
|
||||
}
|
||||
|
||||
comm->sendfrom[iswap] = (iswap == 0) ? 0:comm->sendtill[iswap-1];
|
||||
comm->sendtill[iswap] = ineigh;
|
||||
comm->numneigh = ineigh;
|
||||
}
|
||||
|
||||
allocDynamicBuffers(comm);
|
||||
defineReverseList(comm);
|
||||
}
|
||||
|
||||
void initComm(int* argc, char*** argv, Comm* comm)
|
||||
{
|
||||
//MPI Initialize
|
||||
MPI_Init(argc, argv);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &(comm->numproc));
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &(comm->myproc));
|
||||
comm->numneigh = 0;
|
||||
comm->numneighexch = 0;
|
||||
comm->nrecv=NULL;
|
||||
comm->nsend=NULL;
|
||||
comm->nexch=NULL;
|
||||
comm->pbc_x=NULL;
|
||||
comm->pbc_y=NULL;
|
||||
comm->pbc_z=NULL;
|
||||
comm->boxes=NULL;
|
||||
comm->atom_send=NULL;
|
||||
comm->atom_recv=NULL;
|
||||
comm->off_atom_send=NULL;
|
||||
comm->off_atom_recv=NULL;
|
||||
comm->maxsendlist=NULL;
|
||||
comm->sendlist=NULL;
|
||||
comm->buf_send=NULL;
|
||||
comm->buf_recv=NULL;
|
||||
}
|
||||
|
||||
void endComm(Comm* comm)
|
||||
{
|
||||
comm->maxneigh = 0;
|
||||
comm->maxneighexch =0;
|
||||
comm->maxsend = 0;
|
||||
comm->maxrecv = 0;
|
||||
freeBuffers(comm);
|
||||
MPI_Finalize();
|
||||
}
|
||||
|
||||
void setupComm(Comm* comm, Parameter* param, Grid* grid){
|
||||
|
||||
comm->swap[_x][0] = 0; comm->swap[_x][1] =1;
|
||||
comm->swap[_y][0] = 2; comm->swap[_y][1] =3;
|
||||
comm->swap[_z][0] = 4; comm->swap[_z][1] =5;
|
||||
|
||||
comm->swapdim[0] = comm->swapdim[1] = _x;
|
||||
comm->swapdim[2] = comm->swapdim[3] = _y;
|
||||
comm->swapdim[4] = comm->swapdim[5] = _z;
|
||||
|
||||
comm->swapdir[0] = comm->swapdir[2] = comm->swapdir[4] = 0;
|
||||
comm->swapdir[1] = comm->swapdir[3] = comm->swapdir[5] = 1;
|
||||
|
||||
for(int i = 0; i<6; i++){
|
||||
comm->sendfrom[i] = 0;
|
||||
comm->sendtill[i] = 0;
|
||||
comm->recvfrom[i] = 0;
|
||||
comm->recvtill[i] = 0;
|
||||
}
|
||||
|
||||
comm->forwardSize = FORWARD_SIZE; //send coordiantes x,y,z
|
||||
comm->reverseSize = REVERSE_SIZE; //return forces fx, fy, fz
|
||||
comm->ghostSize = GHOST_SIZE; //send x,y,z,type;
|
||||
comm->exchangeSize = EXCHANGE_SIZE; //send x,y,z,vx,vy,vz,type
|
||||
|
||||
//Allocate memory for recv buffer and recv buffer
|
||||
comm->maxsend = BUFMIN;
|
||||
comm->maxrecv = BUFMIN;
|
||||
comm->buf_send = (MD_FLOAT*) allocate(ALIGNMENT,(comm->maxsend + BUFEXTRA) * sizeof(MD_FLOAT));
|
||||
comm->buf_recv = (MD_FLOAT*) allocate(ALIGNMENT, comm->maxrecv * sizeof(MD_FLOAT));
|
||||
|
||||
comm->maxneighexch = NEIGHMIN;
|
||||
comm->nexch = (int*) allocate(ALIGNMENT, comm->maxneighexch * sizeof(int));
|
||||
|
||||
comm->maxneigh = NEIGHMIN;
|
||||
comm->nsend = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
|
||||
comm->nrecv = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
|
||||
comm->pbc_x = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
|
||||
comm->pbc_y = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
|
||||
comm->pbc_z = (int*) allocate(ALIGNMENT, comm->maxneigh * sizeof(int));
|
||||
comm->boxes = (Box*) allocate(ALIGNMENT, comm->maxneigh * sizeof(Box));
|
||||
|
||||
neighComm(comm, param, grid);
|
||||
}
|
||||
|
||||
void forwardComm(Comm* comm, Atom* atom, int iswap)
|
||||
{
|
||||
int nrqst=0, offset=0, nsend=0, nrecv=0;
|
||||
int pbc[3];
|
||||
int size = comm->forwardSize;
|
||||
int maxrqst = comm->numneigh;
|
||||
MD_FLOAT* buf;
|
||||
MPI_Request requests[maxrqst];
|
||||
|
||||
for(int ineigh = comm->sendfrom[iswap]; ineigh < comm->sendtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_send[ineigh];
|
||||
pbc[_x]=comm->pbc_x[ineigh]; pbc[_y]=comm->pbc_y[ineigh]; pbc[_z]=comm->pbc_z[ineigh];
|
||||
packForward(atom, comm->atom_send[ineigh], comm->sendlist[ineigh], &comm->buf_send[offset*size],pbc);
|
||||
}
|
||||
|
||||
//Receives elements
|
||||
if(comm->othersend[iswap])
|
||||
for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_recv[ineigh]*size;
|
||||
nrecv = comm->atom_recv[ineigh]*size;
|
||||
MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nrecv[ineigh],0,world,&requests[nrqst++]);
|
||||
}
|
||||
|
||||
//Send elements
|
||||
if(comm->othersend[iswap])
|
||||
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_send[ineigh]*size;
|
||||
nsend = comm->atom_send[ineigh]*size;
|
||||
MPI_Send(&comm->buf_send[offset],nsend,type,comm->nsend[ineigh],0,world);
|
||||
}
|
||||
|
||||
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
|
||||
|
||||
if(comm->othersend[iswap]) buf = comm->buf_recv;
|
||||
else buf = comm->buf_send;
|
||||
|
||||
/* unpack buffer */
|
||||
for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_recv[ineigh];
|
||||
unpackForward(atom, comm->atom_recv[ineigh], comm->firstrecv[iswap] + offset, &buf[offset*size]);
|
||||
}
|
||||
}
|
||||
|
||||
void reverseComm(Comm* comm, Atom* atom, int iswap)
|
||||
{
|
||||
int nrqst=0, offset=0, nsend=0, nrecv=0 ;
|
||||
int size = comm->reverseSize;
|
||||
int maxrqst = comm->numneigh;
|
||||
MD_FLOAT* buf;
|
||||
MPI_Request requests[maxrqst];
|
||||
|
||||
for(int ineigh = comm->recvfrom[iswap]; ineigh < comm->recvtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_recv[ineigh];
|
||||
packReverse(atom, comm->atom_recv[ineigh], comm->firstrecv[iswap] + offset, &comm->buf_send[offset*size]);
|
||||
}
|
||||
//Receives elements
|
||||
if(comm->othersend[iswap])
|
||||
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_send[ineigh]*size;
|
||||
nrecv = comm->atom_send[ineigh]*size;
|
||||
MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nsend[ineigh],0,world,&requests[nrqst++]);
|
||||
}
|
||||
//Send elements
|
||||
if(comm->othersend[iswap])
|
||||
for (int ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_recv[ineigh]*size;
|
||||
nsend = comm->atom_recv[ineigh]*size;
|
||||
MPI_Send(&comm->buf_send[offset],nsend,type,comm->nrecv[ineigh],0,world);
|
||||
}
|
||||
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
|
||||
if(comm->othersend[iswap]) buf = comm->buf_recv;
|
||||
else buf = comm->buf_send;
|
||||
|
||||
/* unpack buffer */
|
||||
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_send[ineigh];
|
||||
unpackReverse(atom, comm->atom_send[ineigh], comm->sendlist[ineigh], &buf[offset*size]);
|
||||
}
|
||||
}
|
||||
|
||||
void ghostComm(Comm* comm, Atom* atom,int iswap){
|
||||
|
||||
MD_FLOAT xlo=0, xhi=0, ylo=0, yhi=0, zlo=0, zhi=0;
|
||||
MD_FLOAT* buf;
|
||||
int nrqst=0, nsend=0, nrecv=0, offset=0, ineigh=0, pbc[3];
|
||||
int all_recv=0, all_send=0, currentSend=0;
|
||||
int size = comm->ghostSize;
|
||||
int maxrqrst = comm->numneigh;
|
||||
MPI_Request requests[maxrqrst];
|
||||
for(int i = 0; i<maxrqrst; i++)
|
||||
requests[maxrqrst]=MPI_REQUEST_NULL;
|
||||
if(iswap%2==0) comm->iterAtom = LOCAL+GHOST;
|
||||
int iter = 0;
|
||||
for(int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++)
|
||||
{
|
||||
Box* tile = &comm->boxes[ineigh];
|
||||
|
||||
xlo = tile->lo[_x]; ylo = tile->lo[_y]; zlo = tile->lo[_z];
|
||||
xhi = tile->hi[_x]; yhi = tile->hi[_y]; zhi = tile->hi[_z];
|
||||
pbc[_x]=comm->pbc_x[ineigh]; pbc[_y]=comm->pbc_y[ineigh]; pbc[_z]=comm->pbc_z[ineigh];
|
||||
nsend = 0;
|
||||
|
||||
for(int i = 0; i < comm->iterAtom ; i++)
|
||||
{
|
||||
if(IsinRegionToSend(i)){
|
||||
if(nsend >= comm->maxsendlist[ineigh]) growList(comm,ineigh,nsend);
|
||||
if(currentSend + size >= comm->maxsend) growSend(comm,currentSend);
|
||||
comm->sendlist[ineigh][nsend++] = i;
|
||||
currentSend += packGhost(atom, i, &comm->buf_send[currentSend], pbc);
|
||||
}
|
||||
}
|
||||
comm->atom_send[ineigh] = nsend; //#atoms send per neigh
|
||||
comm->off_atom_send[ineigh] = all_send; //offset atom respect to neighbours in a swap
|
||||
all_send += nsend; //all atoms send
|
||||
}
|
||||
//Receives how many elements to be received.
|
||||
if(comm->othersend[iswap])
|
||||
for(nrqst=0, ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++)
|
||||
MPI_Irecv(&comm->atom_recv[ineigh],1,MPI_INT,comm->nrecv[ineigh],0,world,&requests[nrqst++]);
|
||||
|
||||
if(!comm->othersend[iswap]) comm->atom_recv[comm->recvfrom[iswap]] = nsend;
|
||||
|
||||
//Communicate how many elements to be sent.
|
||||
if(comm->othersend[iswap])
|
||||
for(int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++)
|
||||
MPI_Send(&comm->atom_send[ineigh],1,MPI_INT,comm->nsend[ineigh],0,world);
|
||||
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
|
||||
|
||||
//Define offset to store in the recv_buff
|
||||
for(int ineigh = comm->recvfrom[iswap]; ineigh<comm->recvtill[iswap]; ineigh++){
|
||||
comm->off_atom_recv[ineigh] = all_recv;
|
||||
all_recv += comm->atom_recv[ineigh];
|
||||
}
|
||||
|
||||
if(all_recv*size>=comm->maxrecv) growRecv(comm,all_recv*size);
|
||||
|
||||
//Receives elements
|
||||
if(comm->othersend[iswap])
|
||||
for (nrqst=0, ineigh = comm->recvfrom[iswap]; ineigh< comm->recvtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_recv[ineigh]*size;
|
||||
nrecv = comm->atom_recv[ineigh]*size;
|
||||
MPI_Irecv(&comm->buf_recv[offset], nrecv, type, comm->nrecv[ineigh],0,world,&requests[nrqst++]);
|
||||
}
|
||||
//Send elements
|
||||
if(comm->othersend[iswap])
|
||||
for (int ineigh = comm->sendfrom[iswap]; ineigh< comm->sendtill[iswap]; ineigh++){
|
||||
offset = comm->off_atom_send[ineigh]*size;
|
||||
nsend = comm->atom_send[ineigh]*size;
|
||||
MPI_Send(&comm->buf_send[offset],nsend,type,comm->nsend[ineigh],0,world);
|
||||
}
|
||||
if(comm->othersend[iswap]) MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
|
||||
|
||||
if(comm->othersend[iswap]) buf = comm->buf_recv;
|
||||
else buf = comm->buf_send;
|
||||
//unpack elements
|
||||
comm->firstrecv[iswap] = LOCAL+GHOST;
|
||||
for(int i = 0; i < all_recv; i++)
|
||||
unpackGhost(atom, LOCAL+GHOST, &buf[i*size]);
|
||||
|
||||
//Increases the buffer if needed
|
||||
int max_size = MAX(comm->forwardSize,comm->reverseSize);
|
||||
int max_buf = max_size * MAX(all_recv, all_send);
|
||||
if(max_buf>=comm->maxrecv) growRecv(comm,max_buf);
|
||||
if(max_buf>=comm->maxsend) growSend(comm,max_buf);
|
||||
}
|
||||
|
||||
void exchangeComm(Comm* comm, Atom* atom){
|
||||
|
||||
MD_FLOAT x,y,z;
|
||||
MD_FLOAT *lo = atom->mybox.lo;
|
||||
MD_FLOAT *hi = atom->mybox.hi;
|
||||
int size = comm->exchangeSize;
|
||||
int numneigh = comm->numneighexch;
|
||||
int offset_recv[numneigh];
|
||||
int size_recv[numneigh];
|
||||
MPI_Request requests[numneigh];
|
||||
int i =0, nsend = 0, nrecv = 0;
|
||||
int nrqst = 0;
|
||||
int nlocal, offset,m;
|
||||
|
||||
/* enforce PBC */
|
||||
pbc(atom);
|
||||
|
||||
if(comm->numneigh == 0) return;
|
||||
|
||||
nlocal = atom->Nlocal;
|
||||
while(i < nlocal) {
|
||||
if(atom_x(i) < lo[_x] || atom_x(i) >= hi[_x] ||
|
||||
atom_y(i) < lo[_y] || atom_y(i) >= hi[_y] ||
|
||||
atom_z(i) < lo[_z] || atom_z(i) >= hi[_z]) {
|
||||
if(nsend+size >= comm->maxsend) growSend(comm, nsend);
|
||||
nsend += packExchange(atom, i, &comm->buf_send[nsend]);
|
||||
copy(atom, i, nlocal-1);
|
||||
nlocal--;
|
||||
} else i++;
|
||||
}
|
||||
atom->Nlocal = nlocal;
|
||||
|
||||
/* send/recv number of to share atoms with neighbouring procs*/
|
||||
for(int ineigh = 0; ineigh < numneigh; ineigh++)
|
||||
MPI_Irecv(&size_recv[ineigh],1,MPI_INT,comm->nexch[ineigh],0,world,&requests[nrqst++]);
|
||||
|
||||
for (int ineigh = 0; ineigh < numneigh; ineigh++)
|
||||
MPI_Send(&nsend,1,MPI_INT,comm->nexch[ineigh],0,world);
|
||||
MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
|
||||
|
||||
//Define offset to store in the recv_buff
|
||||
for(int ineigh = 0; ineigh<numneigh; ineigh++){
|
||||
offset_recv[ineigh] = nrecv;
|
||||
nrecv += size_recv[ineigh];
|
||||
}
|
||||
|
||||
if(nrecv >= comm->maxrecv) growRecv(comm,nrecv);
|
||||
|
||||
//Receives elements
|
||||
nrqst=0;
|
||||
for (int ineigh = 0; ineigh< numneigh; ineigh++){
|
||||
offset = offset_recv[ineigh];
|
||||
MPI_Irecv(&comm->buf_recv[offset], size_recv[ineigh], type, comm->nexch[ineigh],0,world,&requests[nrqst++]);
|
||||
}
|
||||
//Send elements
|
||||
for (int ineigh = 0; ineigh< numneigh; ineigh++)
|
||||
MPI_Send(comm->buf_send,nsend,type,comm->nexch[ineigh],0,world);
|
||||
MPI_Waitall(nrqst,requests,MPI_STATUS_IGNORE);
|
||||
|
||||
nlocal = atom->Nlocal;
|
||||
m = 0;
|
||||
while(m < nrecv) {
|
||||
x = comm->buf_recv[m + _x];
|
||||
y = comm->buf_recv[m + _y];
|
||||
z = comm->buf_recv[m + _z];
|
||||
|
||||
if(x >= lo[_x] && x < hi[_x] &&
|
||||
y >= lo[_y] && y < hi[_y] &&
|
||||
z >= lo[_z] && z < hi[_z]){
|
||||
m += unpackExchange(atom, nlocal++, &comm->buf_recv[m]);
|
||||
} else {
|
||||
m += size;
|
||||
}
|
||||
}
|
||||
atom->Nlocal = nlocal;
|
||||
|
||||
int all_atoms=0;
|
||||
MPI_Allreduce(&atom->Nlocal, &all_atoms, 1, MPI_INT, MPI_SUM, world);
|
||||
if(atom->Natoms!=all_atoms && comm->myproc ==0){
|
||||
printf("Losing atoms! current atoms:%d expected atoms:%d\n",all_atoms,atom->Natoms);
|
||||
}
|
||||
}
|
||||
|
||||
//Internal functions
|
||||
|
||||
inline void growRecv(Comm* comm, int n)
|
||||
{
|
||||
comm -> maxrecv = BUFFACTOR * n;
|
||||
if(comm->buf_recv) free(comm -> buf_recv);
|
||||
comm -> buf_recv = (MD_FLOAT*) allocate(ALIGNMENT, comm->maxrecv * sizeof(MD_FLOAT));
|
||||
}
|
||||
|
||||
inline void growSend(Comm* comm, int n)
|
||||
{
|
||||
size_t oldByteSize = (comm->maxsend+BUFEXTRA)*sizeof(MD_FLOAT);
|
||||
comm -> maxsend = BUFFACTOR * n;
|
||||
comm -> buf_send = (MD_FLOAT*) reallocate(comm->buf_send, ALIGNMENT, (comm->maxsend + BUFEXTRA) * sizeof(MD_FLOAT), oldByteSize);
|
||||
}
|
||||
|
||||
inline void growList(Comm* comm, int ineigh, int n)
|
||||
{
|
||||
size_t oldByteSize = comm->maxsendlist[ineigh]*sizeof(int);
|
||||
comm->maxsendlist[ineigh] = BUFFACTOR * n;
|
||||
comm->sendlist[ineigh] = (int*) reallocate(comm->sendlist[ineigh],ALIGNMENT, comm->maxsendlist[ineigh] * sizeof(int), oldByteSize);
|
||||
}
|
||||
|
||||
static inline void allocDynamicBuffers(Comm* comm)
|
||||
{
|
||||
//Buffers depending on the # of my neighs
|
||||
int numneigh = comm->numneigh;
|
||||
comm->atom_send = (int*) allocate(ALIGNMENT, numneigh * sizeof(int));
|
||||
comm->atom_recv = (int*) allocate(ALIGNMENT, numneigh * sizeof(int));
|
||||
comm->off_atom_send = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
|
||||
comm->off_atom_recv = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
|
||||
comm->maxsendlist = (int*) allocate(ALIGNMENT,numneigh * sizeof(int));
|
||||
|
||||
for(int i = 0; i < numneigh; i++)
|
||||
comm->maxsendlist[i] = BUFMIN;
|
||||
|
||||
comm->sendlist = (int**) allocate(ALIGNMENT, numneigh * sizeof(int*));
|
||||
for(int i = 0; i < numneigh; i++)
|
||||
comm->sendlist[i] = (int*) allocate(ALIGNMENT, comm->maxsendlist[i] * sizeof(int));
|
||||
}
|
||||
|
||||
static inline void freeDynamicBuffers(Comm* comm)
|
||||
{
|
||||
int numneigh =comm->numneigh;
|
||||
|
||||
if(comm->atom_send) free(comm->atom_send);
|
||||
if(comm->atom_recv) free(comm->atom_recv);
|
||||
if(comm->off_atom_send) free(comm->off_atom_send);
|
||||
if(comm->off_atom_recv) free(comm->off_atom_recv);
|
||||
if(comm->maxsendlist) free(comm->maxsendlist);
|
||||
if(comm->sendlist){
|
||||
for(int i = 0; i < numneigh; i++)
|
||||
if(comm->sendlist[i]) free(comm->sendlist[i]);
|
||||
}
|
||||
if(comm->sendlist) free(comm->sendlist);
|
||||
}
|
||||
|
||||
static inline void freeBuffers(Comm* comm)
|
||||
{
|
||||
if(comm->nrecv) free(comm->nrecv);
|
||||
if(comm->nsend) free(comm->nsend);
|
||||
if(comm->nexch) free(comm->nexch);
|
||||
if(comm->pbc_x) free(comm->pbc_x);
|
||||
if(comm->pbc_y) free(comm->pbc_y);
|
||||
if(comm->pbc_z) free(comm->pbc_z);
|
||||
if(comm->boxes) free(comm->boxes);
|
||||
if(comm->atom_send) free(comm->atom_send);
|
||||
if(comm->atom_recv) free(comm->atom_recv);
|
||||
if(comm->off_atom_send) free(comm->off_atom_send);
|
||||
if(comm->off_atom_recv) free(comm->off_atom_recv);
|
||||
if(comm->maxsendlist) free(comm->maxsendlist);
|
||||
|
||||
if(comm->sendlist){
|
||||
for(int i = 0; i < comm->numneigh; i++)
|
||||
if(comm->sendlist[i]) free(comm->sendlist[i]);
|
||||
}
|
||||
if(comm->sendlist) free(comm->sendlist);
|
||||
|
||||
if(comm->buf_send) free(comm->buf_send);
|
||||
if(comm->buf_recv) free(comm->buf_recv);
|
||||
}
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
490
common/grid.c
Normal file
490
common/grid.c
Normal file
@@ -0,0 +1,490 @@
|
||||
#include <stdio.h>
|
||||
#include <grid.h>
|
||||
#include <mpi.h>
|
||||
#include <parameter.h>
|
||||
#include <allocate.h>
|
||||
#include <util.h>
|
||||
#include <math.h>
|
||||
|
||||
static MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
|
||||
|
||||
//Grommacs Balancing
|
||||
MD_FLOAT f_normalization(MD_FLOAT* x,MD_FLOAT* fx, MD_FLOAT minx, int nprocs) {
|
||||
|
||||
MD_FLOAT sum=0;
|
||||
for(int n = 0; n<nprocs; n++){
|
||||
fx[n] = MAX(minx,x[n]);
|
||||
sum+=fx[n];
|
||||
}
|
||||
|
||||
for(int n = 0; n<nprocs; n++)
|
||||
fx[n] /= sum;
|
||||
}
|
||||
|
||||
void fixedPointIteration(MD_FLOAT* x0, int nprocs, MD_FLOAT minx)
|
||||
{
|
||||
MD_FLOAT tolerance = 1e-3;
|
||||
MD_FLOAT alpha = 0.5;
|
||||
MD_FLOAT *fx = (MD_FLOAT*) malloc(nprocs*sizeof(MD_FLOAT));
|
||||
int maxIterations = 100;
|
||||
|
||||
for (int i = 0; i < maxIterations; i++) {
|
||||
|
||||
int converged = 1;
|
||||
f_normalization(x0,fx,minx,nprocs);
|
||||
|
||||
for(int n=0; n<nprocs; n++)
|
||||
fx[n]= (1-alpha) * x0[n] + alpha * fx[n];
|
||||
|
||||
for (int n=0; n<nprocs; n++) {
|
||||
if (fabs(fx[n] - x0[n]) >= tolerance) {
|
||||
converged = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (int n=0; n<nprocs; n++)
|
||||
x0[n] = fx[n];
|
||||
|
||||
if(converged){
|
||||
for(int n = 0; n<nprocs; n++)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
void staggeredBalance(Grid* grid, Atom* atom, Parameter* param, double newTime)
|
||||
{
|
||||
int me;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
int *coord = grid->coord;
|
||||
int *nprocs = grid ->nprocs;
|
||||
//Elapsed time since the last rebalance
|
||||
double time = newTime - grid->Timer;
|
||||
grid->Timer = newTime;
|
||||
//store the older dimm to compare later for exchange
|
||||
MD_FLOAT lo[3], hi[3];
|
||||
for(int dim = 0; dim< 3; dim++){
|
||||
lo[dim] = atom->mybox.lo[dim];
|
||||
hi[dim] = atom->mybox.hi[dim];
|
||||
}
|
||||
|
||||
//Define parameters
|
||||
MPI_Comm subComm[3];
|
||||
int color[3] = {0,0,0};
|
||||
int id[3] = {0,0,0};
|
||||
MD_FLOAT ** load = (MD_FLOAT**) malloc(3*sizeof(MD_FLOAT*));
|
||||
for(int dim = 0; dim<3; dim++)
|
||||
load[dim] = (MD_FLOAT*) malloc(nprocs[dim]*sizeof(MD_FLOAT));
|
||||
|
||||
int maxprocs = MAX(MAX(nprocs[_x],nprocs[_y]),nprocs[_z]);
|
||||
MD_FLOAT* cellSize = (MD_FLOAT*) malloc(maxprocs*sizeof(MD_FLOAT));
|
||||
MD_FLOAT* limits = (MD_FLOAT*) malloc(2*maxprocs*sizeof(MD_FLOAT)); //limits: (x0, x1), (x1, x2)... Repeat values in between to perfom MPI_Scatter later
|
||||
MD_FLOAT t_sum[3] = {0,0,0};
|
||||
MD_FLOAT recv_buf[2] = {0,0}; //Each proc only receives 2 elments per dimension xlo and xhi
|
||||
MD_FLOAT balancedLoad[3] = {0,0,0}; //1/nprocs
|
||||
MD_FLOAT minLoad[3] = {0,0,0}; //beta*(1/nprocs)
|
||||
MD_FLOAT prd[3] = {param->xprd, param->yprd, param->zprd};
|
||||
MD_FLOAT boundaries[6] ={0,0,0,0,0,0}; // xlo,xhi,ylo,yhi,zlo,zhi
|
||||
|
||||
//Create sub-communications along each dimension
|
||||
for(int dim = 0; dim<3; dim++){
|
||||
if(dim == _x){
|
||||
color[_x] = (coord[_y] == 0 && coord[_z] ==0) ? 1:MPI_UNDEFINED;
|
||||
id[_x] = me;
|
||||
} else if(dim == _y) {
|
||||
color[_y] = coord[_z] == 0 ? coord[_x]:MPI_UNDEFINED;
|
||||
id[_y] = (coord[_y] == 0 && coord[_z] == 0) ? 0:me;
|
||||
} else {
|
||||
color[_z]= coord[_y]*nprocs[_x]+coord[_x];
|
||||
id[_z] = coord[_z] == 0 ? 0 : me;
|
||||
}
|
||||
MPI_Comm_split(world, color[dim], id[dim], &subComm[dim]);
|
||||
}
|
||||
|
||||
//Set the minimum load and the balance load
|
||||
for(int dim = 0; dim<3; dim++){
|
||||
balancedLoad[dim] = 1./nprocs[dim];
|
||||
minLoad[dim] = 0.8*balancedLoad[dim];
|
||||
}
|
||||
//set and communicate the workload in reverse order
|
||||
for(int dim = _z; dim>= _x; dim--)
|
||||
{
|
||||
if(subComm[dim] != MPI_COMM_NULL){
|
||||
MPI_Gather(&time,1,type,load[dim],1,type,0,subComm[dim]);
|
||||
|
||||
if(id[dim] == 0)
|
||||
{
|
||||
for(int n=0; n<nprocs[dim]; n++)
|
||||
t_sum[dim] += load[dim][n];
|
||||
|
||||
for(int n=0; n<nprocs[dim]; n++)
|
||||
load[dim][n] /= t_sum[dim];
|
||||
}
|
||||
time =t_sum[dim];
|
||||
}
|
||||
MPI_Barrier(world);
|
||||
}
|
||||
|
||||
//Brodacast the new boundaries along dimensions
|
||||
for(int dim=0; dim<3; dim++){
|
||||
|
||||
if(subComm[dim] != MPI_COMM_NULL){
|
||||
|
||||
MPI_Bcast(boundaries,6,type,0,subComm[dim]);
|
||||
if(id[dim] == 0) {
|
||||
fixedPointIteration(load[dim], nprocs[dim], minLoad[dim]);
|
||||
MD_FLOAT inv_sum=0;
|
||||
for(int n=0; n<nprocs[dim];n++)
|
||||
inv_sum +=(1/load[dim][n]);
|
||||
|
||||
for(int n=0; n<nprocs[dim];n++)
|
||||
cellSize[n] = (prd[dim]/load[dim][n])*(1./inv_sum);
|
||||
|
||||
MD_FLOAT sum=0;
|
||||
for(int n=0; n<nprocs[dim]; n++){
|
||||
limits[2*n] = sum;
|
||||
limits[2*n+1] = sum+cellSize[n];
|
||||
sum+= cellSize[n];
|
||||
}
|
||||
limits[2*nprocs[dim]-1] = prd[dim];
|
||||
}
|
||||
MPI_Scatter(limits,2,type,recv_buf,2,type,0,subComm[dim]);
|
||||
boundaries[2*dim] = recv_buf[0];
|
||||
boundaries[2*dim+1] = recv_buf[1];
|
||||
}
|
||||
MPI_Barrier(world);
|
||||
}
|
||||
|
||||
atom->mybox.lo[_x]=boundaries[0]; atom->mybox.hi[_x]=boundaries[1];
|
||||
atom->mybox.lo[_y]=boundaries[2]; atom->mybox.hi[_y]=boundaries[3];
|
||||
atom->mybox.lo[_z]=boundaries[4]; atom->mybox.hi[_z]=boundaries[5];
|
||||
|
||||
MD_FLOAT domain[6] = {boundaries[0], boundaries[2], boundaries[4], boundaries[1], boundaries[3], boundaries[5]};
|
||||
MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
|
||||
|
||||
//because cells change dynamically, It is required to increase the neighbouring exchange region
|
||||
for(int dim =_x; dim<=_z; dim++){
|
||||
MD_FLOAT dr,dr_max;
|
||||
int n = grid->nprocs[dim];
|
||||
MD_FLOAT maxdelta = 0.2*prd[dim];
|
||||
dr = MAX(fabs(lo[dim] - atom->mybox.lo[dim]),fabs(hi[dim] - atom->mybox.hi[dim]));
|
||||
MPI_Allreduce(&dr, &dr_max, 1, type, MPI_MAX, world);
|
||||
grid->cutneigh[dim] = param->cutneigh+dr_max;
|
||||
}
|
||||
|
||||
for(int dim=0; dim<3; dim++) {
|
||||
if(subComm[dim] != MPI_COMM_NULL){
|
||||
MPI_Comm_free(&subComm[dim]);
|
||||
}
|
||||
free(load[dim]);
|
||||
}
|
||||
free(load);
|
||||
free(limits);
|
||||
}
|
||||
|
||||
//RCB Balancing
|
||||
MD_FLOAT meanTimeBisect(Atom *atom, MPI_Comm subComm, int dim, double time)
|
||||
{
|
||||
MD_FLOAT mean=0, sum=0, total_sum=0, weightAtoms= 0, total_weight=0;
|
||||
|
||||
for(int i=0; i<atom->Nlocal; i++){
|
||||
sum += atom_pos(i);
|
||||
}
|
||||
sum*=time;
|
||||
weightAtoms = atom->Nlocal*time;
|
||||
MPI_Allreduce(&sum, &total_sum, 1, type, MPI_SUM, subComm);
|
||||
MPI_Allreduce(&weightAtoms, &total_weight, 1, type, MPI_SUM, subComm);
|
||||
|
||||
mean = total_sum/total_weight;
|
||||
return mean;
|
||||
}
|
||||
|
||||
MD_FLOAT meanBisect(Atom* atom, MPI_Comm subComm, int dim, double time)
|
||||
{
|
||||
int Natoms = 0;
|
||||
MD_FLOAT sum=0, mean=0, total_sum=0;
|
||||
|
||||
for(int i=0; i<atom->Nlocal; i++){
|
||||
sum += atom_pos(i);
|
||||
}
|
||||
MPI_Allreduce(&sum, &total_sum, 1, type, MPI_SUM, subComm);
|
||||
MPI_Allreduce(&atom->Nlocal, &Natoms, 1, MPI_INT, MPI_SUM, subComm);
|
||||
mean = total_sum/Natoms;
|
||||
return mean;
|
||||
}
|
||||
|
||||
void nextBisectionLevel(Grid* grid, Atom* atom, RCB_Method method, MPI_Comm subComm, int dim ,int* color, int ilevel, double time)
|
||||
{
|
||||
int rank, size;
|
||||
int branch = 0, i = 0, m = 0;
|
||||
int nsend = 0, nrecv = 0, nrecv2 = 0;
|
||||
int values_per_atom = 7;
|
||||
MD_FLOAT bisection, pos;
|
||||
MPI_Request request[2] = {MPI_REQUEST_NULL,MPI_REQUEST_NULL};
|
||||
MPI_Comm_rank(subComm,&rank);
|
||||
MPI_Comm_size(subComm,&size);
|
||||
|
||||
int odd = size%2;
|
||||
int extraProc = odd ? size-1:size;
|
||||
int half = (int) (0.5*size);
|
||||
int partner = (rank<half) ? rank+half:rank-half;
|
||||
if(odd && rank == extraProc) partner = 0;
|
||||
//Apply the bisection
|
||||
bisection = method(atom,subComm,dim,time);
|
||||
//Define the new boundaries
|
||||
if(rank<half){
|
||||
atom->mybox.hi[dim] = bisection;
|
||||
branch = 0;
|
||||
} else {
|
||||
atom->mybox.lo[dim] = bisection;
|
||||
branch = 1;
|
||||
}
|
||||
//Define new color for the further communicaton
|
||||
*color = (branch << ilevel) | *color;
|
||||
//Grow the send buffer
|
||||
if(atom->Nlocal>=grid->maxsend){
|
||||
if(grid->buf_send) free(grid->buf_send);
|
||||
grid->buf_send = (MD_FLOAT*) malloc(atom->Nlocal*values_per_atom* sizeof(MD_FLOAT));
|
||||
grid->maxsend = atom->Nlocal;
|
||||
}
|
||||
//buffer particles to send
|
||||
while(i < atom->Nlocal) {
|
||||
pos = atom_pos(i);
|
||||
if(pos < atom->mybox.lo[dim] || pos >= atom->mybox.hi[dim]) {
|
||||
nsend += packExchange(atom, i, &grid->buf_send[nsend]);
|
||||
copy(atom, i, atom->Nlocal-1);
|
||||
atom->Nlocal--;
|
||||
} else i++;
|
||||
}
|
||||
|
||||
//Communicate the number of elements to be sent
|
||||
if(rank < extraProc){
|
||||
MPI_Irecv(&nrecv,1,MPI_INT,partner,0,subComm,&request[0]);
|
||||
}
|
||||
if(odd && rank == 0){
|
||||
MPI_Irecv(&nrecv2,1,MPI_INT,extraProc,0,subComm,&request[1]);
|
||||
}
|
||||
MPI_Send(&nsend,1,MPI_INT,partner,0,subComm);
|
||||
MPI_Waitall(2,request,MPI_STATUS_IGNORE);
|
||||
|
||||
//Grow the recv buffer
|
||||
if(nrecv+nrecv2>=grid->maxrecv){
|
||||
if(grid->buf_recv) free(grid->buf_recv);
|
||||
grid->buf_recv = (MD_FLOAT*) malloc((nrecv+nrecv2)*values_per_atom*sizeof(MD_FLOAT));
|
||||
grid->maxrecv = nrecv+nrecv2;
|
||||
}
|
||||
|
||||
//communicate elements in the buffer
|
||||
request[0] = MPI_REQUEST_NULL;
|
||||
request[1] = MPI_REQUEST_NULL;
|
||||
|
||||
if(rank < extraProc){
|
||||
MPI_Irecv(grid->buf_recv,nrecv,type,partner,0,subComm,&request[0]);
|
||||
}
|
||||
if(odd && rank == 0){
|
||||
MPI_Irecv(&grid->buf_recv[nrecv],nrecv2,type,extraProc,0,subComm,&request[1]);
|
||||
}
|
||||
MPI_Send (grid->buf_send,nsend,type,partner,0,subComm);
|
||||
MPI_Waitall(2,request,MPI_STATUS_IGNORE);
|
||||
|
||||
//store atoms in atom list
|
||||
while(m < nrecv+nrecv2){
|
||||
m += unpackExchange(atom, atom->Nlocal++, &grid->buf_recv[m]);
|
||||
}
|
||||
}
|
||||
|
||||
void rcbBalance(Grid* grid, Atom* atom, Parameter* param, RCB_Method method, int ndim, double newTime)
|
||||
{
|
||||
int me, nprocs=0, ilevel=0, nboxes=1;
|
||||
int color = 0, size =0;
|
||||
int index, prd[3];
|
||||
MPI_Comm subComm;
|
||||
MPI_Comm_size(world, &nprocs);
|
||||
MPI_Comm_rank(world, &me);
|
||||
|
||||
//set the elapsed time since the last dynamic balance
|
||||
double time = newTime - grid->Timer;
|
||||
|
||||
prd[_x] = atom->mybox.xprd = param->xprd;
|
||||
prd[_y] = atom->mybox.yprd = param->yprd;
|
||||
prd[_z] = atom->mybox.zprd = param->zprd;
|
||||
|
||||
//Sort by larger dimension
|
||||
int largerDim[3] ={_x, _y, _z};
|
||||
|
||||
for(int i = 0; i< 2; i++){
|
||||
for(int j = i+1; j<3; j++)
|
||||
{
|
||||
if(prd[largerDim[j]]>prd[largerDim[i]]){
|
||||
MD_FLOAT tmp = largerDim[j];
|
||||
largerDim[j] = largerDim[i];
|
||||
largerDim[i] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
//Initial Partition
|
||||
atom->mybox.lo[_x] = 0; atom->mybox.hi[_x] = atom->mybox.xprd;
|
||||
atom->mybox.lo[_y] = 0; atom->mybox.hi[_y] = atom->mybox.yprd;
|
||||
atom->mybox.lo[_z] = 0; atom->mybox.hi[_z] = atom->mybox.zprd;
|
||||
|
||||
//Recursion tree
|
||||
while(nboxes<nprocs)
|
||||
{
|
||||
index = ilevel%ndim;
|
||||
MPI_Comm_split(world, color, me, &subComm);
|
||||
MPI_Comm_size(subComm,&size);
|
||||
if(size > 1){
|
||||
nextBisectionLevel(grid, atom, method, subComm, largerDim[index], &color, ilevel, time);
|
||||
}
|
||||
MPI_Comm_free(&subComm);
|
||||
nboxes = pow(2,++ilevel);
|
||||
}
|
||||
//Set the new timer grid
|
||||
grid->Timer = newTime;
|
||||
|
||||
//Creating the global map
|
||||
MD_FLOAT domain[6] = {atom->mybox.lo[_x], atom->mybox.lo[_y], atom->mybox.lo[_z], atom->mybox.hi[_x], atom->mybox.hi[_y], atom->mybox.hi[_z]};
|
||||
MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
|
||||
|
||||
//Define the same cutneighbour in all dimensions for the exchange communication
|
||||
for(int dim =_x; dim<=_z; dim++)
|
||||
grid->cutneigh[dim] = param->cutneigh;
|
||||
}
|
||||
|
||||
//Regular grid
|
||||
void cartisian3d(Grid* grid, Parameter* param, Box* box)
|
||||
{
|
||||
int me, nproc;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
|
||||
int numdim=3;
|
||||
int reorder=0;
|
||||
int periods[3]={1,1,1};
|
||||
int mycoord[3]={0,0,0};
|
||||
int griddim[3]={0,0,0};
|
||||
MD_FLOAT len[3];
|
||||
MPI_Comm cartesian;
|
||||
|
||||
box->xprd = param->xprd;
|
||||
box->yprd = param->yprd;
|
||||
box->zprd = param->zprd;
|
||||
|
||||
//Creates a cartesian 3d grid
|
||||
MPI_Dims_create(nproc, numdim, griddim);
|
||||
MPI_Cart_create(world,numdim,griddim,periods,reorder,&cartesian);
|
||||
grid->nprocs[_x] = griddim[_x];
|
||||
grid->nprocs[_y] = griddim[_y];
|
||||
grid->nprocs[_z] = griddim[_z];
|
||||
|
||||
//Coordinates position in the grid
|
||||
MPI_Cart_coords(cartesian,me,3,mycoord);
|
||||
grid->coord[_x] = mycoord[_x];
|
||||
grid->coord[_y] = mycoord[_y];
|
||||
grid->coord[_z] = mycoord[_z];
|
||||
|
||||
//boundaries of my local box, with origin in (0,0,0).
|
||||
len[_x] = param->xprd / griddim[_x];
|
||||
len[_y] = param->yprd / griddim[_y];
|
||||
len[_z] = param->zprd / griddim[_z];
|
||||
|
||||
box->lo[_x] = mycoord[_x] * len[_x];
|
||||
box->hi[_x] = (mycoord[_x] + 1) * len[_x];
|
||||
box->lo[_y] = mycoord[_y] * len[_y];
|
||||
box->hi[_y] = (mycoord[_y] + 1) * len[_y];
|
||||
box->lo[_z] = mycoord[_z] * len[_z];
|
||||
box->hi[_z] = (mycoord[_z] + 1) * len[_z];
|
||||
|
||||
MD_FLOAT domain[6] = {box->lo[_x], box->lo[_y], box->lo[_z], box->hi[_x], box->hi[_y], box->hi[_z]};
|
||||
MPI_Allgather(domain, 6, type, grid->map, 6, type, world);
|
||||
MPI_Comm_free(&cartesian);
|
||||
|
||||
//Define the same cutneighbour in all dimensions for the exchange communication
|
||||
for(int dim =_x; dim<=_z; dim++)
|
||||
grid->cutneigh[dim] = param->cutneigh;
|
||||
}
|
||||
|
||||
//Other Functions from the grid
|
||||
void initGrid(Grid* grid)
|
||||
{ //start with regular grid
|
||||
int nprocs;
|
||||
MPI_Comm_size(world, &nprocs);
|
||||
grid->map_size = 6 * nprocs;
|
||||
grid->map = (MD_FLOAT*) allocate(ALIGNMENT, grid->map_size * sizeof(MD_FLOAT));
|
||||
//========rcb=======
|
||||
grid->maxsend = 0;
|
||||
grid->maxrecv = 0;
|
||||
grid->buf_send = NULL;
|
||||
grid->buf_recv = NULL;
|
||||
//====staggered=====
|
||||
grid->Timer = 0.;
|
||||
}
|
||||
|
||||
void setupGrid(Grid* grid, Atom* atom, Parameter* param)
|
||||
{
|
||||
int me;
|
||||
MD_FLOAT xlo, ylo, zlo, xhi, yhi, zhi;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
initGrid(grid);
|
||||
|
||||
//Set the origin at (0,0,0)
|
||||
if(param->input_file){
|
||||
for(int i=0; i<atom->Nlocal; i++){
|
||||
atom_x(i) = atom_x(i) - param->xlo;
|
||||
atom_y(i) = atom_y(i) - param->ylo;
|
||||
atom_z(i) = atom_z(i) - param->zlo;
|
||||
}
|
||||
}
|
||||
|
||||
cartisian3d(grid, param, &atom->mybox);
|
||||
|
||||
xlo = atom->mybox.lo[_x]; xhi = atom->mybox.hi[_x];
|
||||
ylo = atom->mybox.lo[_y]; yhi = atom->mybox.hi[_y];
|
||||
zlo = atom->mybox.lo[_z]; zhi = atom->mybox.hi[_z];
|
||||
|
||||
int i = 0;
|
||||
while(i < atom->Nlocal)
|
||||
{
|
||||
if(atom_x(i) >= xlo && atom_x(i)< xhi &&
|
||||
atom_y(i) >= ylo && atom_y(i)< yhi &&
|
||||
atom_z(i) >= zlo && atom_z(i)< zhi)
|
||||
{
|
||||
i++;
|
||||
} else {
|
||||
copy(atom, i, atom->Nlocal-1);
|
||||
atom->Nlocal--;
|
||||
}
|
||||
}
|
||||
|
||||
//printGrid(grid);
|
||||
if(!param->balance){
|
||||
MPI_Allreduce(&atom->Nlocal, &atom->Natoms, 1, MPI_INT, MPI_SUM, world);
|
||||
printf("Processor:%i, Local atoms:%i, Total atoms:%i\n",me, atom->Nlocal,atom->Natoms);
|
||||
MPI_Barrier(world);
|
||||
}
|
||||
}
|
||||
|
||||
void printGrid(Grid* grid)
|
||||
{
|
||||
int me, nprocs;
|
||||
MPI_Comm_size(world, &nprocs);
|
||||
MPI_Comm_rank(world, &me);
|
||||
MD_FLOAT* map = grid->map;
|
||||
if(me==0)
|
||||
{
|
||||
|
||||
printf("GRID:\n");
|
||||
printf("===================================================================================================\n");
|
||||
for(int i=0; i<nprocs; i++)
|
||||
printf("Box:%i\txlo:%.4f\txhi:%.4f\tylo:%.4f\tyhi:%.4f\tzlo:%.4f\tzhi:%.4f\n", i,map[6*i],map[6*i+3],map[6*i+1],map[6*i+4],map[6*i+2],map[6*i+5]);
|
||||
printf("\n\n");
|
||||
//printf("Box processor:%i\n xlo:%.4f\txhi:%.4f\n ylo:%.4f\tyhi:%.4f\n zlo:%.4f\tzhi:%.4f\n", i,map[6*i],map[6*i+3],map[6*i+1],map[6*i+4],map[6*i+2],map[6*i+5]);
|
||||
}
|
||||
MPI_Barrier(world);
|
||||
}
|
||||
|
||||
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
22
common/includes/box.h
Normal file
22
common/includes/box.h
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <parameter.h>
|
||||
|
||||
#ifndef __BOX_H_
|
||||
#define __BOX_H_
|
||||
|
||||
typedef struct {
|
||||
int id;
|
||||
MD_FLOAT xprd, yprd, zprd; //Domain Dimension
|
||||
MD_FLOAT lo[3]; //smallest coordinate of my subdomain
|
||||
MD_FLOAT hi[3]; //Highest coordinate of my subdomain
|
||||
} Box;
|
||||
|
||||
int overlapBox(int, int , const Box*, const Box* , Box* , MD_FLOAT , MD_FLOAT);
|
||||
int overlapFullBox(Parameter*, MD_FLOAT*, const Box*, const Box*);
|
||||
void expandBox(int , const Box*, const Box* , Box* , MD_FLOAT);
|
||||
#endif
|
104
common/includes/comm.h
Normal file
104
common/includes/comm.h
Normal file
@@ -0,0 +1,104 @@
|
||||
#include <atom.h>
|
||||
#include <parameter.h>
|
||||
#include <box.h>
|
||||
#include <grid.h>
|
||||
|
||||
#ifndef COMM_H
|
||||
#define COMM_H
|
||||
|
||||
#ifdef GROMACS
|
||||
#define FORWARD_SIZE (3*CLUSTER_N)
|
||||
#define REVERSE_SIZE (3*CLUSTER_N)
|
||||
#define GHOST_SIZE (4*CLUSTER_N+10)
|
||||
#define EXCHANGE_SIZE 7
|
||||
|
||||
#define JFAC MAX(1, CLUSTER_N / CLUSTER_M)
|
||||
#define LOCAL atom->Nclusters_local / JFAC
|
||||
#define GHOST atom->Nclusters_ghost
|
||||
|
||||
#define IsinRegionToSend(cj) \
|
||||
((atom->jclusters[(cj)].bbminx >= xlo || atom->jclusters[(cj)].bbmaxx >= xlo) && \
|
||||
(atom->jclusters[(cj)].bbminx < xhi || atom->jclusters[(cj)].bbmaxx < xhi) && \
|
||||
(atom->jclusters[(cj)].bbminy >= ylo || atom->jclusters[(cj)].bbmaxy >= ylo) && \
|
||||
(atom->jclusters[(cj)].bbminy < yhi || atom->jclusters[(cj)].bbmaxy < yhi) && \
|
||||
(atom->jclusters[(cj)].bbminz >= zlo || atom->jclusters[(cj)].bbmaxz >= zlo) && \
|
||||
(atom->jclusters[(cj)].bbminz < zhi || atom->jclusters[(cj)].bbmaxz < zhi))
|
||||
|
||||
#else
|
||||
|
||||
#define FORWARD_SIZE 3
|
||||
#define REVERSE_SIZE 3
|
||||
#define GHOST_SIZE 4
|
||||
#define EXCHANGE_SIZE 7
|
||||
#define LOCAL atom->Nlocal
|
||||
#define GHOST atom->Nghost
|
||||
|
||||
#define IsinRegionToSend(i) \
|
||||
((atom_x((i)) >= xlo && atom_x((i)) < xhi) && \
|
||||
(atom_y((i)) >= ylo && atom_y((i)) < yhi) && \
|
||||
(atom_z((i)) >= zlo && atom_z((i)) < zhi))
|
||||
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
int myproc; // my proc ID
|
||||
int numproc; // # of processors
|
||||
|
||||
int numneigh; // # of all my neighs along all swaps
|
||||
int maxneigh; // Buffer size for my neighs
|
||||
int sendfrom[6]; //return the lowest neigh index to send in each swap
|
||||
int sendtill[6]; //return the highest neigh index to send in each swao
|
||||
int recvfrom[6]; //return the lowest neigh index to recv in each swap
|
||||
int recvtill[6]; //return the highest neigh index to recv in each swap
|
||||
int* nsend; // neigh whose I want to send
|
||||
int* nrecv; // neigh whose I want to recv
|
||||
|
||||
int* pbc_x; // if pbc in x
|
||||
int* pbc_y; // if pbc in y
|
||||
int* pbc_z; // if pbc in z
|
||||
|
||||
int* atom_send, *atom_recv; // # of atoms to send/recv for each of my neighs
|
||||
int* off_atom_send; // atom offset to send, inside of a swap
|
||||
int* off_atom_recv; // atom offset to recv, inside of a swap
|
||||
|
||||
int* nexch; //procs to exchange
|
||||
int numneighexch; //# of neighbours to exchange
|
||||
int maxneighexch; //max buff size to store neighbours
|
||||
|
||||
int numswap; // # of swaps to perform, it is 6
|
||||
int swapdim[6]; // dimension of the swap (_x, _y or _z)
|
||||
int swapdir[6]; // direction of the swap 0 or 1
|
||||
int swap[3][2]; // given a dim and dir, knows the swap
|
||||
int othersend[6]; // Determine if a proc interact with more procs in a given swap
|
||||
|
||||
int firstrecv[6]; // where to put 1st recv atom in each swap
|
||||
int** sendlist; // list of atoms to send in each swap
|
||||
int* maxsendlist; // max # of atoms send in each list-swap
|
||||
|
||||
int maxsend; // max elements in buff sender
|
||||
int maxrecv; // max elements in buff receiver
|
||||
MD_FLOAT* buf_send; // sender buffer for all comm
|
||||
MD_FLOAT* buf_recv; // receicer buffer for all comm
|
||||
|
||||
int forwardSize; // # of paramaters per atom in forward comm.
|
||||
int reverseSize; // # of parameters per atom in reverse
|
||||
int exchangeSize; // # of parameters per atom in exchange
|
||||
int ghostSize; // # of parameters per atom in ghost list
|
||||
|
||||
int iterAtom; //last atom to iterate in each swap.
|
||||
Box* boxes; // Boundaries to be sent to other procs as ghost.
|
||||
} Comm;
|
||||
|
||||
|
||||
void initComm(int*, char***, Comm*); //Init MPI
|
||||
void endComm(Comm*); //End MPI
|
||||
void setupComm(Comm*,Parameter*,Grid*); //Creates a 3d grid or rcb grid
|
||||
void neighComm(Comm*,Parameter*,Grid*); //Find neighbours within cut-off and defines ghost regions
|
||||
void forwardComm(Comm*,Atom*,int); //Send info in one direction
|
||||
void reverseComm(Comm*,Atom*,int); //Return info after forward communication
|
||||
void exchangeComm(Comm*,Atom*); //Exchange info between procs
|
||||
void ghostComm(Comm*, Atom*,int); //Build the ghost neighbours to send during next forwards
|
||||
void growSend(Comm*,int); //Grows the size of the buffer sender
|
||||
void growRecv(Comm*,int); //Grows the size of the buffer receiver
|
||||
void growList(Comm*, int, int); //Grows the size of the list to send
|
||||
#endif
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
51
common/includes/grid.h
Normal file
51
common/includes/grid.h
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
|
||||
|
||||
#include <parameter.h>
|
||||
#include <box.h>
|
||||
#include <atom.h>
|
||||
#include <mpi.h>
|
||||
|
||||
#ifndef __MAP_H_
|
||||
#define __MAP_H_
|
||||
|
||||
#define world MPI_COMM_WORLD
|
||||
#define atom_pos(i) ((dim == _x) ? atom_x((i)) : (dim == _y) ? atom_y((i)) : atom_z((i)))
|
||||
|
||||
enum {RCB=1, meanTimeRCB, Staggered};
|
||||
|
||||
typedef struct {
|
||||
int balance_every;
|
||||
int map_size;
|
||||
MD_FLOAT* map;
|
||||
//===Param for Staggerd balance
|
||||
int nprocs[3];
|
||||
int coord[3];
|
||||
MD_FLOAT cutneigh[3];
|
||||
double Timer;
|
||||
//===Param for RCB balance
|
||||
MD_FLOAT* buf_send;
|
||||
MD_FLOAT* buf_recv;
|
||||
int maxsend;
|
||||
int maxrecv;
|
||||
} Grid;
|
||||
|
||||
|
||||
typedef MD_FLOAT(*RCB_Method)(Atom*,MPI_Comm,int,double);
|
||||
|
||||
void setupGrid(Grid*, Atom*, Parameter*);
|
||||
void cartisian3d(Grid*, Parameter*, Box*);
|
||||
void rcbBalance(Grid*, Atom*, Parameter* ,RCB_Method, int, double);
|
||||
void staggeredBalance(Grid*, Atom*, Parameter*, double);
|
||||
void printGrid(Grid*);
|
||||
//rcb methods
|
||||
MD_FLOAT meanBisect(Atom* , MPI_Comm, int, double);
|
||||
MD_FLOAT meanTimeBisect(Atom*, MPI_Comm, int, double);
|
||||
#endif
|
||||
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
@@ -53,6 +53,10 @@ typedef struct {
|
||||
MD_FLOAT k_dn;
|
||||
MD_FLOAT gx, gy, gz;
|
||||
MD_FLOAT reflect_x, reflect_y, reflect_z;
|
||||
//MPI implementation
|
||||
int balance;
|
||||
int method;
|
||||
int balance_every;
|
||||
} Parameter;
|
||||
|
||||
void initParameter(Parameter*);
|
71
common/includes/shell_methods.h
Normal file
71
common/includes/shell_methods.h
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
#include <comm.h>
|
||||
#include <atom.h>
|
||||
#include <timing.h>
|
||||
#include <parameter.h>
|
||||
#include <util.h>
|
||||
|
||||
//static void addDummyCluster(Atom*);
|
||||
|
||||
double forward(Comm* comm, Atom *atom, Parameter* param){
|
||||
double S, E;
|
||||
S = getTimeStamp();
|
||||
if(param->method == halfShell){
|
||||
for(int iswap = 0; iswap < 5; iswap++)
|
||||
forwardComm(comm, atom, iswap);
|
||||
} else if(param->method == eightShell){
|
||||
for(int iswap = 0; iswap < 6; iswap+=2)
|
||||
forwardComm(comm, atom, iswap);
|
||||
} else {
|
||||
for(int iswap = 0; iswap < 6; iswap++)
|
||||
forwardComm(comm, atom, iswap);
|
||||
}
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double reverse(Comm* comm, Atom *atom, Parameter* param){
|
||||
double S, E;
|
||||
S = getTimeStamp();
|
||||
if(param->method == halfShell){
|
||||
for(int iswap = 4; iswap >= 0; iswap--)
|
||||
reverseComm(comm, atom, iswap);
|
||||
} else if(param->method == eightShell){
|
||||
for(int iswap = 4; iswap >= 0; iswap-=2)
|
||||
reverseComm(comm, atom, iswap);
|
||||
} else if(param->method == halfStencil){
|
||||
for(int iswap = 5; iswap >= 0; iswap--)
|
||||
reverseComm(comm, atom, iswap);
|
||||
} else { } //Full Shell Reverse does nothing
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
void ghostNeighbor(Comm* comm, Atom* atom, Parameter* param)
|
||||
{
|
||||
#ifdef GROMACS
|
||||
atom->Nclusters_ghost = 0;
|
||||
#endif
|
||||
atom->Nghost = 0;
|
||||
if(param->method == halfShell){
|
||||
for(int iswap=0; iswap<5; iswap++)
|
||||
ghostComm(comm,atom,iswap);
|
||||
} else if(param->method == eightShell){
|
||||
for(int iswap = 0; iswap<6; iswap+=2)
|
||||
ghostComm(comm, atom,iswap);
|
||||
} else {
|
||||
for(int iswap=0; iswap<6; iswap++)
|
||||
ghostComm(comm,atom,iswap);
|
||||
}
|
||||
}
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
@@ -9,9 +9,15 @@
|
||||
|
||||
typedef enum {
|
||||
TOTAL = 0,
|
||||
NEIGH,
|
||||
FORCE,
|
||||
NEIGH,
|
||||
FORWARD,
|
||||
REVERSE,
|
||||
UPDATE,
|
||||
BALANCE,
|
||||
SETUP,
|
||||
REST,
|
||||
NUMTIMER
|
||||
} timertype;
|
||||
} timerComm;
|
||||
|
||||
#endif
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
@@ -9,5 +9,6 @@
|
||||
|
||||
extern double getTimeStamp(void);
|
||||
extern double getTimeResolution(void);
|
||||
extern double getTimeStamp_(void);
|
||||
|
||||
#endif
|
55
common/includes/util.h
Normal file
55
common/includes/util.h
Normal file
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <math.h>
|
||||
|
||||
#ifndef __UTIL_H_
|
||||
#define __UTIL_H_
|
||||
|
||||
#ifndef MIN
|
||||
# define MIN(x,y) ((x)<(y)?(x):(y))
|
||||
#endif
|
||||
|
||||
#ifndef MAX
|
||||
# define MAX(x,y) ((x)>(y)?(x):(y))
|
||||
#endif
|
||||
|
||||
#ifndef ABS
|
||||
# define ABS(a) ((a) >= 0 ? (a) : -(a))
|
||||
#endif
|
||||
|
||||
#define DEBUG_MESSAGE debug_printf
|
||||
|
||||
#ifndef MAXLINE
|
||||
# define MAXLINE 4096
|
||||
#endif
|
||||
|
||||
#define FF_LJ 0
|
||||
#define FF_EAM 1
|
||||
#define FF_DEM 2
|
||||
|
||||
#if PRECISION == 1
|
||||
# define PRECISION_STRING "single"
|
||||
#else
|
||||
# define PRECISION_STRING "double"
|
||||
#endif
|
||||
|
||||
#define BigOrEqual(a,b) (fabs((a)-(b))<1e-9 || (a)>(b))
|
||||
#define Equal(a,b) (fabs((a)-(b))<1e-9)
|
||||
|
||||
enum {_x=0, _y, _z};
|
||||
enum {fullShell=0, halfShell, eightShell, halfStencil};
|
||||
|
||||
|
||||
extern double myrandom(int*);
|
||||
extern void random_reset(int *seed, int ibase, double *coord);
|
||||
extern int str2ff(const char *string);
|
||||
extern const char* ff2str(int ff);
|
||||
extern void readline(char *line, FILE *fp);
|
||||
extern void debug_printf(const char *format, ...);
|
||||
extern int get_cuda_num_threads();
|
||||
|
||||
#endif
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
@@ -11,6 +11,7 @@
|
||||
#include <atom.h>
|
||||
#include <parameter.h>
|
||||
#include <util.h>
|
||||
#include <mpi.h>
|
||||
|
||||
void initParameter(Parameter *param) {
|
||||
param->input_file = NULL;
|
||||
@@ -54,6 +55,10 @@ void initParameter(Parameter *param) {
|
||||
param->reflect_x = 0.0;
|
||||
param->reflect_y = 0.0;
|
||||
param->reflect_z = 0.0;
|
||||
//MPI
|
||||
param->balance = 0;
|
||||
param->method = 0;
|
||||
param->balance_every =param->reneigh_every;
|
||||
}
|
||||
|
||||
void readParameter(Parameter *param, const char *filename) {
|
||||
@@ -72,8 +77,8 @@ void readParameter(Parameter *param, const char *filename) {
|
||||
for(i = 0; line[i] != '\0' && line[i] != '#'; i++);
|
||||
line[i] = '\0';
|
||||
|
||||
char *tok = strtok(line, " ");
|
||||
char *val = strtok(NULL, " ");
|
||||
char *tok = strtok(line, "\t ");
|
||||
char *val = strtok(NULL, "\t ");
|
||||
|
||||
#define PARSE_PARAM(p,f) if(strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) { param->p = f(val); }
|
||||
#define PARSE_STRING(p) PARSE_PARAM(p, strdup)
|
||||
@@ -117,15 +122,20 @@ void readParameter(Parameter *param, const char *filename) {
|
||||
PARSE_INT(x_out_every);
|
||||
PARSE_INT(v_out_every);
|
||||
PARSE_INT(half_neigh);
|
||||
PARSE_INT(method);
|
||||
PARSE_INT(balance);
|
||||
PARSE_INT(balance_every);
|
||||
}
|
||||
}
|
||||
|
||||
// Update dtforce
|
||||
param->dtforce = 0.5 * param->dt;
|
||||
|
||||
// Update sigma6 parameter
|
||||
MD_FLOAT s2 = param->sigma * param->sigma;
|
||||
param->sigma6 = s2 * s2 * s2;
|
||||
|
||||
//Update balance parameter, 10 could be change
|
||||
param->balance_every *=param->reneigh_every;
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
@@ -183,4 +193,19 @@ void printParameter(Parameter *param) {
|
||||
printf("\tSkin: %e\n", param->skin);
|
||||
printf("\tHalf neighbor lists: %d\n", param->half_neigh);
|
||||
printf("\tProcessor frequency (GHz): %.4f\n", param->proc_freq);
|
||||
|
||||
// ================ New MPI features =============
|
||||
char str[20];
|
||||
strcpy(str, (param->method == 1) ? "Half Shell" :
|
||||
(param->method == 2) ? "Eight Shell" :
|
||||
(param->method == 3) ? "Half Stencil":
|
||||
"Full Shell");
|
||||
printf("\tMethod: %s\n", str);
|
||||
strcpy(str, (param->balance == 1) ? "mean RCB" :
|
||||
(param->balance == 2) ? "mean Time RCB" :
|
||||
(param->balance == 3) ? "Staggered" :
|
||||
"cartisian");
|
||||
printf("\tPartition: %s\n", str);
|
||||
if(param->balance)
|
||||
printf("\tRebalancing every (timesteps): %d\n",param->balance_every);
|
||||
}
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
#include <thermo.h>
|
||||
#include <util.h>
|
||||
#include <mpi.h>
|
||||
|
||||
static int *steparr;
|
||||
static MD_FLOAT *tmparr;
|
||||
@@ -24,6 +25,7 @@ static MD_FLOAT t_act;
|
||||
static MD_FLOAT p_act;
|
||||
static MD_FLOAT e_act;
|
||||
static int mstat;
|
||||
static MPI_Datatype type = (sizeof(MD_FLOAT) == 4) ? MPI_FLOAT : MPI_DOUBLE;
|
||||
|
||||
/* exported subroutines */
|
||||
void setupThermo(Parameter *param, int natoms)
|
||||
@@ -53,33 +55,42 @@ void setupThermo(Parameter *param, int natoms)
|
||||
|
||||
void computeThermo(int iflag, Parameter *param, Atom *atom)
|
||||
{
|
||||
MD_FLOAT t = 0.0, p;
|
||||
MD_FLOAT t_sum = 0.0, t = 0.0, p;
|
||||
int me;
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
|
||||
}
|
||||
|
||||
t = t * t_scale;
|
||||
p = (t * dof_boltz) * p_scale;
|
||||
int istep = iflag;
|
||||
MPI_Reduce(&t, &t_sum, 1, type, MPI_SUM, 0 ,MPI_COMM_WORLD);
|
||||
if(me == 0)
|
||||
{
|
||||
t = t_sum * t_scale;
|
||||
p = (t * dof_boltz) * p_scale;
|
||||
int istep = iflag;
|
||||
|
||||
if(iflag == -1){
|
||||
istep = param->ntimes;
|
||||
}
|
||||
if(iflag == 0){
|
||||
mstat = 0;
|
||||
}
|
||||
if(iflag == -1){
|
||||
istep = param->ntimes;
|
||||
}
|
||||
if(iflag == 0){
|
||||
mstat = 0;
|
||||
}
|
||||
|
||||
steparr[mstat] = istep;
|
||||
tmparr[mstat] = t;
|
||||
prsarr[mstat] = p;
|
||||
mstat++;
|
||||
fprintf(stdout, "%i\t%e\t%e\n", istep, t, p);
|
||||
steparr[mstat] = istep;
|
||||
tmparr[mstat] = t;
|
||||
prsarr[mstat] = p;
|
||||
mstat++;
|
||||
fprintf(stdout, "%i\t%e\t%e\n", istep, t, p);
|
||||
}
|
||||
}
|
||||
|
||||
void adjustThermo(Parameter *param, Atom *atom)
|
||||
{
|
||||
/* zero center-of-mass motion */
|
||||
MD_FLOAT vxtot = 0.0; MD_FLOAT vytot = 0.0; MD_FLOAT vztot = 0.0;
|
||||
MD_FLOAT v_sum[3], vtot[3];
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
vxtot += atom_vx(i);
|
||||
@@ -87,9 +98,13 @@ void adjustThermo(Parameter *param, Atom *atom)
|
||||
vztot += atom_vz(i);
|
||||
}
|
||||
|
||||
vxtot = vxtot / atom->Natoms;
|
||||
vytot = vytot / atom->Natoms;
|
||||
vztot = vztot / atom->Natoms;
|
||||
vtot[0] = vxtot; vtot[1] = vytot; vtot[2] = vztot;
|
||||
|
||||
MPI_Allreduce(vtot, v_sum, 3, type, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
vxtot = v_sum[0] / atom->Natoms;
|
||||
vytot = v_sum[1] / atom->Natoms;
|
||||
vztot = v_sum[2] / atom->Natoms;
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
atom_vx(i) -= vxtot;
|
||||
@@ -97,13 +112,16 @@ void adjustThermo(Parameter *param, Atom *atom)
|
||||
atom_vz(i) -= vztot;
|
||||
}
|
||||
|
||||
t_act = 0;
|
||||
MD_FLOAT t = 0.0;
|
||||
MD_FLOAT t_sum = 0.0;
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
|
||||
}
|
||||
|
||||
MPI_Allreduce(&t, &t_sum, 1,type, MPI_SUM,MPI_COMM_WORLD);
|
||||
|
||||
t = t_sum;
|
||||
t *= t_scale;
|
||||
MD_FLOAT factor = sqrt(param->temp / t);
|
||||
|
@@ -1,21 +1,27 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
double getTimeStamp(void)
|
||||
double getTimeStamp()
|
||||
{
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
|
||||
}
|
||||
|
||||
double getTimeResolution(void)
|
||||
double getTimeResolution()
|
||||
{
|
||||
struct timespec ts;
|
||||
clock_getres(CLOCK_MONOTONIC, &ts);
|
||||
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
|
||||
}
|
||||
|
||||
double getTimeStamp_()
|
||||
{
|
||||
return getTimeStamp();
|
||||
}
|
@@ -1,39 +1,39 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <errno.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <util.h>
|
||||
#include <math.h>
|
||||
|
||||
/* Park/Miller RNG w/out MASKING, so as to be like f90s version */
|
||||
#define IA 16807
|
||||
#define IM 2147483647
|
||||
#define AM (1.0 / IM)
|
||||
#define IQ 127773
|
||||
#define IR 2836
|
||||
#define IA 16807
|
||||
#define IM 2147483647
|
||||
#define AM (1.0/IM)
|
||||
#define IQ 127773
|
||||
#define IR 2836
|
||||
#define MASK 123459876
|
||||
|
||||
double myrandom(int* seed)
|
||||
{
|
||||
int k = (*seed) / IQ;
|
||||
double myrandom(int* seed) {
|
||||
int k= (*seed) / IQ;
|
||||
double ans;
|
||||
|
||||
*seed = IA * (*seed - k * IQ) - IR * k;
|
||||
if (*seed < 0) *seed += IM;
|
||||
if(*seed < 0) *seed += IM;
|
||||
ans = AM * (*seed);
|
||||
return ans;
|
||||
}
|
||||
|
||||
void random_reset(int* seed, int ibase, double* coord)
|
||||
{
|
||||
void random_reset(int *seed, int ibase, double *coord) {
|
||||
int i;
|
||||
char* str = (char*)&ibase;
|
||||
int n = sizeof(int);
|
||||
char *str = (char *) &ibase;
|
||||
int n = sizeof(int);
|
||||
unsigned int hash = 0;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
@@ -42,8 +42,8 @@ void random_reset(int* seed, int ibase, double* coord)
|
||||
hash ^= (hash >> 6);
|
||||
}
|
||||
|
||||
str = (char*)coord;
|
||||
n = 3 * sizeof(double);
|
||||
str = (char *) coord;
|
||||
n = 3 * sizeof(double);
|
||||
for (i = 0; i < n; i++) {
|
||||
hash += str[i];
|
||||
hash += (hash << 10);
|
||||
@@ -62,59 +62,46 @@ void random_reset(int* seed, int ibase, double* coord)
|
||||
|
||||
// warm up the RNG
|
||||
|
||||
for (i = 0; i < 5; i++)
|
||||
myrandom(seed);
|
||||
// save = 0;
|
||||
for (i = 0; i < 5; i++) myrandom(seed);
|
||||
//save = 0;
|
||||
}
|
||||
|
||||
int str2ff(const char* string)
|
||||
{
|
||||
if (strncmp(string, "lj", 2) == 0) return FF_LJ;
|
||||
if (strncmp(string, "eam", 3) == 0) return FF_EAM;
|
||||
if (strncmp(string, "dem", 3) == 0) return FF_DEM;
|
||||
int str2ff(const char *string) {
|
||||
if(strncmp(string, "lj", 2) == 0) return FF_LJ;
|
||||
if(strncmp(string, "eam", 3) == 0) return FF_EAM;
|
||||
if(strncmp(string, "dem", 3) == 0) return FF_DEM;
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char* ff2str(int ff)
|
||||
{
|
||||
if (ff == FF_LJ) {
|
||||
return "lj";
|
||||
}
|
||||
if (ff == FF_EAM) {
|
||||
return "eam";
|
||||
}
|
||||
if (ff == FF_DEM) {
|
||||
return "dem";
|
||||
}
|
||||
const char* ff2str(int ff) {
|
||||
if(ff == FF_LJ) { return "lj"; }
|
||||
if(ff == FF_EAM) { return "eam"; }
|
||||
if(ff == FF_DEM) { return "dem"; }
|
||||
return "invalid";
|
||||
}
|
||||
|
||||
int get_cuda_num_threads(void)
|
||||
{
|
||||
const char* num_threads_env = getenv("NUM_THREADS");
|
||||
int get_cuda_num_threads() {
|
||||
const char *num_threads_env = getenv("NUM_THREADS");
|
||||
return (num_threads_env == NULL) ? 32 : atoi(num_threads_env);
|
||||
}
|
||||
|
||||
void readline(char* line, FILE* fp)
|
||||
{
|
||||
if (fgets(line, MAXLINE, fp) == NULL) {
|
||||
if (errno != 0) {
|
||||
void readline(char *line, FILE *fp) {
|
||||
if(fgets(line, MAXLINE, fp) == NULL) {
|
||||
printf("error %i\n",errno);
|
||||
if(errno != 0) {
|
||||
perror("readline()");
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void debug_printf(const char* format, ...)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
void debug_printf(const char *format, ...) {
|
||||
#ifdef DEBUG
|
||||
va_list arg;
|
||||
int ret;
|
||||
|
||||
va_start(arg, format);
|
||||
if ((vfprintf(stdout, format, arg)) < 0) {
|
||||
perror("debug_printf()");
|
||||
}
|
||||
if((vfprintf(stdout, format, arg)) < 0) { perror("debug_printf()"); }
|
||||
va_end(arg);
|
||||
#endif
|
||||
#endif
|
||||
}
|
113
config.mk
113
config.mk
@@ -1,18 +1,17 @@
|
||||
# Compiler tool chain (GCC/CLANG/ICC/ICX/ONEAPI/NVCC)
|
||||
TOOLCHAIN ?= CLANG
|
||||
# Instruction set for instrinsic kernels (NONE/SSE/AVX/AVX_FMA/AVX2/AVX512)
|
||||
ISA ?= ARM
|
||||
SIMD ?= NONE
|
||||
# Optimization scheme (verletlist/clusterpair/clusters_per_bin)
|
||||
OPT_SCHEME ?= verletlist
|
||||
# Compiler tag (GCC/CLANG/ICC/ICX/ONEAPI/NVCC)
|
||||
TAG ?= MPIICC
|
||||
# Instruction set (SSE/AVX/AVX_FMA/AVX2/AVX512)
|
||||
ISA ?= AVX512
|
||||
# Optimization scheme (lammps/gromacs/clusters_per_bin)
|
||||
OPT_SCHEME ?= gromacs
|
||||
# Enable likwid (true or false)
|
||||
ENABLE_LIKWID ?= false
|
||||
# SP or DP
|
||||
DATA_TYPE ?= DP
|
||||
# AOS or SOA
|
||||
DATA_LAYOUT ?= AOS
|
||||
DATA_LAYOUT ?= SOA
|
||||
# Assembly syntax to generate (ATT/INTEL)
|
||||
ASM_SYNTAX ?= INTEL
|
||||
ASM_SYNTAX ?= ATT
|
||||
# Debug
|
||||
DEBUG ?= false
|
||||
|
||||
@@ -25,13 +24,13 @@ MEM_TRACER ?= false
|
||||
# Trace indexes and distances for gather-md (true or false)
|
||||
INDEX_TRACER ?= false
|
||||
# Compute statistics
|
||||
COMPUTE_STATS ?= true
|
||||
COMPUTE_STATS ?= false
|
||||
|
||||
# Configurations for lammps optimization scheme
|
||||
# Use omp simd pragma when running with half neighbor-lists
|
||||
ENABLE_OMP_SIMD ?= false
|
||||
# Use kernel with explicit SIMD intrinsics
|
||||
USE_SIMD_KERNEL ?= false
|
||||
USE_SIMD_KERNEL ?= true
|
||||
|
||||
# Configurations for gromacs optimization scheme
|
||||
# Use reference version
|
||||
@@ -39,7 +38,7 @@ USE_REFERENCE_VERSION ?= false
|
||||
# Enable XTC output
|
||||
XTC_OUTPUT ?= false
|
||||
# Check if cj is local when decreasing reaction force
|
||||
HALF_NEIGHBOR_LISTS_CHECK_CJ ?= true
|
||||
HALF_NEIGHBOR_LISTS_CHECK_CJ ?= false
|
||||
|
||||
# Configurations for CUDA
|
||||
# Use CUDA host memory to optimize transfers
|
||||
@@ -48,93 +47,3 @@ USE_CUDA_HOST_MEMORY ?= false
|
||||
#Feature options
|
||||
OPTIONS = -DALIGNMENT=64
|
||||
#OPTIONS += More options
|
||||
|
||||
#DO NOT EDIT BELOW
|
||||
ifeq ($(strip $(DATA_LAYOUT)),AOS)
|
||||
DEFINES += -DAOS
|
||||
endif
|
||||
ifeq ($(strip $(DATA_TYPE)),SP)
|
||||
DEFINES += -DPRECISION=1
|
||||
else
|
||||
DEFINES += -DPRECISION=2
|
||||
endif
|
||||
|
||||
ifneq ($(ASM_SYNTAX), ATT)
|
||||
ASFLAGS += -masm=intel
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(SORT_ATOMS)),true)
|
||||
DEFINES += -DSORT_ATOMS
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(EXPLICIT_TYPES)),true)
|
||||
DEFINES += -DEXPLICIT_TYPES
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(MEM_TRACER)),true)
|
||||
DEFINES += -DMEM_TRACER
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(INDEX_TRACER)),true)
|
||||
DEFINES += -DINDEX_TRACER
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(COMPUTE_STATS)),true)
|
||||
DEFINES += -DCOMPUTE_STATS
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(XTC_OUTPUT)),true)
|
||||
DEFINES += -DXTC_OUTPUT
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(USE_REFERENCE_VERSION)),true)
|
||||
DEFINES += -DUSE_REFERENCE_VERSION
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(HALF_NEIGHBOR_LISTS_CHECK_CJ)),true)
|
||||
DEFINES += -DHALF_NEIGHBOR_LISTS_CHECK_CJ
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(DEBUG)),true)
|
||||
DEFINES += -DDEBUG
|
||||
endif
|
||||
|
||||
ifneq ($(VECTOR_WIDTH),)
|
||||
DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH)
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__SIMD_KERNEL__)),true)
|
||||
DEFINES += -D__SIMD_KERNEL__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__SSE__)),true)
|
||||
DEFINES += -D__ISA_SSE__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__ISA_AVX__)),true)
|
||||
DEFINES += -D__ISA_AVX__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__ISA_AVX_FMA__)),true)
|
||||
DEFINES += -D__ISA_AVX_FMA__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__ISA_AVX2__)),true)
|
||||
DEFINES += -D__ISA_AVX2__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(__ISA_AVX512__)),true)
|
||||
DEFINES += -D__ISA_AVX512__
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(ENABLE_OMP_SIMD)),true)
|
||||
DEFINES += -DENABLE_OMP_SIMD
|
||||
endif
|
||||
|
||||
ifeq ($(strip $(OPT_SCHEME)),verletlist)
|
||||
OPT_TAG = VL
|
||||
endif
|
||||
|
||||
ifneq ($(strip $(SIMD)),NONE)
|
||||
TOOLCHAIN = $(TOOLCHAIN)-$(ISA)-$(SIMD)
|
||||
endif
|
||||
|
BIN
figures/features-v3.png
Normal file
BIN
figures/features-v3.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 273 KiB |
BIN
figures/gather_bench.png
Normal file
BIN
figures/gather_bench.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 98 KiB |
523
figures/gather_bench.svg
Normal file
523
figures/gather_bench.svg
Normal file
@@ -0,0 +1,523 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
width="297mm"
|
||||
height="210mm"
|
||||
viewBox="0 0 297 210"
|
||||
version="1.1"
|
||||
id="svg5"
|
||||
inkscape:version="1.1.2 (0a00cf5339, 2022-02-04)"
|
||||
sodipodi:docname="gather_bench.svg"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:svg="http://www.w3.org/2000/svg">
|
||||
<sodipodi:namedview
|
||||
id="namedview7"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pagecheckerboard="0"
|
||||
inkscape:document-units="mm"
|
||||
showgrid="false"
|
||||
inkscape:zoom="0.73508842"
|
||||
inkscape:cx="551.63432"
|
||||
inkscape:cy="348.25743"
|
||||
inkscape:window-width="1920"
|
||||
inkscape:window-height="1011"
|
||||
inkscape:window-x="0"
|
||||
inkscape:window-y="165"
|
||||
inkscape:window-maximized="1"
|
||||
inkscape:current-layer="layer1" />
|
||||
<defs
|
||||
id="defs2">
|
||||
<rect
|
||||
x="144.01516"
|
||||
y="304.36604"
|
||||
width="248.99777"
|
||||
height="100.91557"
|
||||
id="rect79475" />
|
||||
<rect
|
||||
x="309.01869"
|
||||
y="43.698615"
|
||||
width="552.19421"
|
||||
height="71.390348"
|
||||
id="rect65238" />
|
||||
<rect
|
||||
x="762.55856"
|
||||
y="341.3838"
|
||||
width="277.62756"
|
||||
height="105.0235"
|
||||
id="rect47632" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
id="linearGradient40704">
|
||||
<stop
|
||||
style="stop-color:#ccffaa;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop40700" />
|
||||
<stop
|
||||
style="stop-color:#ccffaa;stop-opacity:0;"
|
||||
offset="1"
|
||||
id="stop40702" />
|
||||
</linearGradient>
|
||||
<marker
|
||||
style="overflow:visible;"
|
||||
id="Arrow2Mend"
|
||||
refX="0.0"
|
||||
refY="0.0"
|
||||
orient="auto"
|
||||
inkscape:stockid="Arrow2Mend"
|
||||
inkscape:isstock="true">
|
||||
<path
|
||||
transform="scale(0.6) rotate(180) translate(0,0)"
|
||||
d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
|
||||
style="stroke:context-stroke;fill-rule:evenodd;fill:context-stroke;stroke-width:0.62500000;stroke-linejoin:round;"
|
||||
id="path39486" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible;"
|
||||
id="Arrow1Mend"
|
||||
refX="0.0"
|
||||
refY="0.0"
|
||||
orient="auto"
|
||||
inkscape:stockid="Arrow1Mend"
|
||||
inkscape:isstock="true">
|
||||
<path
|
||||
transform="scale(0.4) rotate(180) translate(10,0)"
|
||||
style="fill-rule:evenodd;fill:context-stroke;stroke:context-stroke;stroke-width:1.0pt;"
|
||||
d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
|
||||
id="path39468" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible;"
|
||||
id="Arrow1Lend"
|
||||
refX="0.0"
|
||||
refY="0.0"
|
||||
orient="auto"
|
||||
inkscape:stockid="Arrow1Lend"
|
||||
inkscape:isstock="true">
|
||||
<path
|
||||
transform="scale(0.8) rotate(180) translate(12.5,0)"
|
||||
style="fill-rule:evenodd;fill:context-stroke;stroke:context-stroke;stroke-width:1.0pt;"
|
||||
d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
|
||||
id="path39462" />
|
||||
</marker>
|
||||
<rect
|
||||
x="707.09731"
|
||||
y="616.36746"
|
||||
width="407.71288"
|
||||
height="417.08306"
|
||||
id="rect24254" />
|
||||
<rect
|
||||
x="47.404365"
|
||||
y="100.3268"
|
||||
width="398.49855"
|
||||
height="110.16514"
|
||||
id="rect5050" />
|
||||
<rect
|
||||
x="47.404366"
|
||||
y="100.3268"
|
||||
width="398.49854"
|
||||
height="110.16514"
|
||||
id="rect5050-3" />
|
||||
<rect
|
||||
x="47.404366"
|
||||
y="100.3268"
|
||||
width="398.49854"
|
||||
height="110.16514"
|
||||
id="rect5050-3-5" />
|
||||
<rect
|
||||
x="47.404366"
|
||||
y="100.3268"
|
||||
width="398.49854"
|
||||
height="110.16514"
|
||||
id="rect5050-3-5-6" />
|
||||
<rect
|
||||
x="47.404366"
|
||||
y="100.3268"
|
||||
width="398.49854"
|
||||
height="110.16514"
|
||||
id="rect5050-3-5-6-1" />
|
||||
<rect
|
||||
x="47.404366"
|
||||
y="100.3268"
|
||||
width="398.49854"
|
||||
height="110.16514"
|
||||
id="rect5050-0" />
|
||||
<rect
|
||||
x="47.404366"
|
||||
y="100.3268"
|
||||
width="398.49854"
|
||||
height="110.16514"
|
||||
id="rect5050-0-6" />
|
||||
<rect
|
||||
x="47.404366"
|
||||
y="100.3268"
|
||||
width="398.49854"
|
||||
height="110.16514"
|
||||
id="rect5050-0-6-2" />
|
||||
<rect
|
||||
x="47.404366"
|
||||
y="100.3268"
|
||||
width="398.49854"
|
||||
height="110.16514"
|
||||
id="rect5050-0-6-2-8" />
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="Arrow2Mend-2"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto"
|
||||
inkscape:stockid="Arrow2Mend"
|
||||
inkscape:isstock="true">
|
||||
<path
|
||||
transform="scale(-0.6)"
|
||||
d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
|
||||
style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
|
||||
id="path39486-3" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="Arrow2Mend-2-5"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto"
|
||||
inkscape:stockid="Arrow2Mend"
|
||||
inkscape:isstock="true">
|
||||
<path
|
||||
transform="scale(-0.6)"
|
||||
d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
|
||||
style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
|
||||
id="path39486-3-9" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="Arrow2Mend-2-5-2"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto"
|
||||
inkscape:stockid="Arrow2Mend"
|
||||
inkscape:isstock="true">
|
||||
<path
|
||||
transform="scale(-0.6)"
|
||||
d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
|
||||
style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
|
||||
id="path39486-3-9-8" />
|
||||
</marker>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient40704"
|
||||
id="linearGradient40706"
|
||||
x1="324.58157"
|
||||
y1="127.35331"
|
||||
x2="363.61096"
|
||||
y2="98.957848"
|
||||
gradientUnits="userSpaceOnUse" />
|
||||
<rect
|
||||
x="47.404366"
|
||||
y="100.3268"
|
||||
width="398.49854"
|
||||
height="110.16514"
|
||||
id="rect5050-3-5-6-1-7" />
|
||||
<rect
|
||||
x="309.01868"
|
||||
y="43.698616"
|
||||
width="552.19421"
|
||||
height="71.39035"
|
||||
id="rect65238-1" />
|
||||
</defs>
|
||||
<g
|
||||
inkscape:label="Layer 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1">
|
||||
<rect
|
||||
style="fill:#d5d5ff;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0"
|
||||
id="rect55834"
|
||||
width="250.31726"
|
||||
height="74.676537"
|
||||
x="25.257824"
|
||||
y="97.277718" />
|
||||
<rect
|
||||
style="fill:#d5f6ff;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0"
|
||||
id="rect55832"
|
||||
width="250.35208"
|
||||
height="64.461151"
|
||||
x="25.256891"
|
||||
y="32.817505" />
|
||||
<rect
|
||||
style="fill:#ccffaa;stroke:#091600;stroke-width:1.31891"
|
||||
id="rect6462"
|
||||
width="82.385742"
|
||||
height="20.525751"
|
||||
x="28.355024"
|
||||
y="48.740646" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.26458333,0,0,0.26458333,17.244577,26.206534)"
|
||||
id="text5048"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="47.404297"
|
||||
y="135.7168"
|
||||
id="tspan82948"><tspan
|
||||
style="font-weight:bold;-inkscape-font-specification:'sans-serif Bold'"
|
||||
id="tspan82946">gather-bench</tspan></tspan></text>
|
||||
<rect
|
||||
style="fill:#de87aa;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
|
||||
id="rect6462-9"
|
||||
width="18.764017"
|
||||
height="20.965076"
|
||||
x="39.518955"
|
||||
y="140.726" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.33667319,0,0,0.33667319,25.589293,109.42998)"
|
||||
id="text5048-3"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="47.404297"
|
||||
y="135.7168"
|
||||
id="tspan82950">L1</tspan></text>
|
||||
<rect
|
||||
style="fill:#de87aa;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
|
||||
id="rect6462-9-0"
|
||||
width="21.653919"
|
||||
height="24.193966"
|
||||
x="97.687294"
|
||||
y="138.51564" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.3885252,0,0,0.3885252,81.212654,102.39964)"
|
||||
id="text5048-3-6"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0-6);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="47.404297"
|
||||
y="135.7168"
|
||||
id="tspan82952">L2</tspan></text>
|
||||
<rect
|
||||
style="fill:#de87aa;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
|
||||
id="rect6462-9-0-6"
|
||||
width="27.217058"
|
||||
height="30.409672"
|
||||
x="149.19933"
|
||||
y="134.83977" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.48834178,0,0,0.48834178,128.49215,89.445174)"
|
||||
id="text5048-3-6-1"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0-6-2);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="47.404297"
|
||||
y="135.7168"
|
||||
id="tspan82954">L3</tspan></text>
|
||||
<rect
|
||||
style="fill:#eeaaff;stroke:#091600;stroke-width:1.5;stroke-miterlimit:4;stroke-dasharray:none"
|
||||
id="rect6462-9-0-6-7"
|
||||
width="61.032539"
|
||||
height="29.96501"
|
||||
x="204.01265"
|
||||
y="135.61238" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.48834178,0,0,0.48834178,182.37007,89.995434)"
|
||||
id="text5048-3-6-1-9"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-0-6-2-8);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="47.404297"
|
||||
y="135.7168"
|
||||
id="tspan82956">DRAM</tspan></text>
|
||||
<rect
|
||||
style="fill:#ffccaa;stroke:#091600;stroke-width:1.10636"
|
||||
id="rect6462-6"
|
||||
width="74.980759"
|
||||
height="15.869514"
|
||||
x="126.09525"
|
||||
y="38.773243" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.26458333,0,0,0.26458333,115.65481,14.295323)"
|
||||
id="text5048-7"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="47.404297"
|
||||
y="135.7168"
|
||||
id="tspan82958">Single gather</tspan></text>
|
||||
<rect
|
||||
style="fill:#ffccaa;stroke:#091600;stroke-width:1.03971"
|
||||
id="rect6462-6-3"
|
||||
width="66.071701"
|
||||
height="15.904838"
|
||||
x="126.90776"
|
||||
y="63.642746" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.26458333,0,0,0.26458333,116.63325,39.114393)"
|
||||
id="text5048-7-5"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3-5);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="47.404297"
|
||||
y="135.7168"
|
||||
id="tspan82960">MD gathers</tspan></text>
|
||||
<rect
|
||||
style="fill:#afe9dd;stroke:#091600;stroke-width:1.02848"
|
||||
id="rect6462-6-3-2"
|
||||
width="64.479698"
|
||||
height="15.947394"
|
||||
x="206.65364"
|
||||
y="52.98967" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.26458333,0,0,0.26458333,196.01512,28.482594)"
|
||||
id="text5048-7-5-9"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3-5-6);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="47.404297"
|
||||
y="135.7168"
|
||||
id="tspan82962">Contiguous</tspan></text>
|
||||
<rect
|
||||
style="fill:#afe9dd;stroke:#091600;stroke-width:0.987323"
|
||||
id="rect6462-6-3-2-2"
|
||||
width="59.269382"
|
||||
height="15.988551"
|
||||
x="208.16559"
|
||||
y="76.856781" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.26458333,0,0,0.26458333,197.58604,52.220445)"
|
||||
id="text5048-7-5-9-7"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect5050-3-5-6-1);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="47.404297"
|
||||
y="135.7168"
|
||||
id="tspan82964">"Random"</tspan></text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="scale(0.26458333)"
|
||||
id="text24252"
|
||||
style="fill:black;fill-opacity:1;stroke:none;font-family:sans-serif;font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect24254)" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="M 193.10512,71.273276 206.30683,61.033513"
|
||||
id="path39049"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="M 193.08841,71.196939 207.86207,84.43804"
|
||||
id="path39053"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.39816;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 58.548229,151.24436 38.298093,0.25023"
|
||||
id="path39219"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.24847;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 119.19252,150.09399 29.28333,0.26095"
|
||||
id="path39219-2"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 177.02022,150.44367 26.36623,0.26095"
|
||||
id="path39219-2-0"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend)"
|
||||
d="m 48.145458,92.71788 -0.644819,47.57709"
|
||||
id="path39377"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend-2)"
|
||||
d="M 48.121208,92.873762 106.60807,137.41946"
|
||||
id="path39377-7"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend-2-5)"
|
||||
d="M 48.073928,92.825143 158.88023,133.04546"
|
||||
id="path39377-7-2"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3, 1;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#Arrow2Mend-2-5-2)"
|
||||
d="M 48.051946,92.813593 233.0959,134.16596"
|
||||
id="path39377-7-2-9"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<rect
|
||||
style="fill:#e9afaf;stroke:#091600;stroke-width:1.34518"
|
||||
id="rect6462-6-3-2-2-3"
|
||||
width="65.880661"
|
||||
height="26.700579"
|
||||
x="38.104012"
|
||||
y="80.530182" />
|
||||
<path
|
||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 77.365612,69.678744 h 2e-6"
|
||||
id="path39808"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 111.64767,59.183009 6.84466,0.03069"
|
||||
id="path41004"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 119.03378,47.056357 -0.58704,25.198541"
|
||||
id="path41006"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.02423;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 118.07503,72.254897 7.94998,-0.05784"
|
||||
id="path41008"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.882836;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 118.26666,47.054814 7.69322,0.173925"
|
||||
id="path41112"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
|
||||
d="M 68.213642,69.068864 67.910274,80.302728"
|
||||
id="path55728"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.26458333,0,0,0.26458333,-1.3782637,4.0412367)"
|
||||
id="text65236"
|
||||
style="font-style:normal;font-weight:normal;font-size:53.3333px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect65238);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="309.01953"
|
||||
y="90.886691"
|
||||
id="tspan82968"><tspan
|
||||
style="font-weight:bold;-inkscape-font-specification:'sans-serif Bold'"
|
||||
id="tspan82966">Application Level</tspan></tspan></text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.26458333,0,0,0.26458333,2.7015103,160.71919)"
|
||||
id="text65236-2"
|
||||
style="font-style:normal;font-weight:normal;font-size:53.3333px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect65238-1);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="309.01953"
|
||||
y="90.886691"
|
||||
id="tspan82972"><tspan
|
||||
style="font-weight:bold;-inkscape-font-specification:'sans-serif Bold'"
|
||||
id="tspan82970">Hardware Level</tspan></tspan></text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
transform="matrix(0.26458333,0,0,0.26458333,2.3490396,0.57331532)"
|
||||
id="text79473"
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;white-space:pre;shape-inside:url(#rect79475);fill:#000000;fill-opacity:1;stroke:none"><tspan
|
||||
x="144.01562"
|
||||
y="339.75586"
|
||||
id="tspan82974">vgather </tspan><tspan
|
||||
x="144.01562"
|
||||
y="389.75586"
|
||||
id="tspan82976">instructions</tspan></text>
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 21 KiB |
BIN
figures/gromacs_mxn_v2.pdf
Normal file
BIN
figures/gromacs_mxn_v2.pdf
Normal file
Binary file not shown.
BIN
figures/gromacs_mxn_v2.png
Normal file
BIN
figures/gromacs_mxn_v2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 128 KiB |
BIN
figures/stub_new_v3.pdf
Normal file
BIN
figures/stub_new_v3.pdf
Normal file
Binary file not shown.
BIN
figures/stub_new_v3.png
Normal file
BIN
figures/stub_new_v3.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 52 KiB |
BIN
figures/verlet_v2.pdf
Normal file
BIN
figures/verlet_v2.pdf
Normal file
Binary file not shown.
BIN
figures/verlet_v2.png
Normal file
BIN
figures/verlet_v2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 62 KiB |
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <atom.h>
|
||||
#include <allocate.h>
|
||||
#include <util.h>
|
||||
#include <mpi.h>
|
||||
|
||||
void initAtom(Atom *atom) {
|
||||
atom->x = NULL; atom->y = NULL; atom->z = NULL;
|
||||
@@ -27,6 +28,7 @@ void initAtom(Atom *atom) {
|
||||
atom->Nclusters = 0;
|
||||
atom->Nclusters_local = 0;
|
||||
atom->Nclusters_ghost = 0;
|
||||
atom->NmaxGhost = 0; //Temporal
|
||||
atom->Nclusters_max = 0;
|
||||
atom->type = NULL;
|
||||
atom->ntypes = 0;
|
||||
@@ -37,10 +39,19 @@ void initAtom(Atom *atom) {
|
||||
atom->iclusters = NULL;
|
||||
atom->jclusters = NULL;
|
||||
atom->icluster_bin = NULL;
|
||||
atom->PBCx = NULL;
|
||||
atom->PBCy = NULL;
|
||||
atom->PBCz = NULL;
|
||||
initMasks(atom);
|
||||
//MPI
|
||||
Box *mybox = &(atom->mybox);
|
||||
mybox->xprd = mybox->yprd = mybox->zprd = 0;
|
||||
mybox->lo[0] = mybox->lo[1] = mybox->lo[2] = 0;
|
||||
mybox->hi[0] = mybox->hi[1] = mybox->hi[2] = 0;
|
||||
}
|
||||
|
||||
void createAtom(Atom *atom, Parameter *param) {
|
||||
|
||||
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
|
||||
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
|
||||
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
|
||||
@@ -128,22 +139,26 @@ int type_str2int(const char *type) {
|
||||
}
|
||||
|
||||
int readAtom(Atom* atom, Parameter* param) {
|
||||
int me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
int len = strlen(param->input_file);
|
||||
if(strncmp(¶m->input_file[len - 4], ".pdb", 4) == 0) { return readAtom_pdb(atom, param); }
|
||||
if(strncmp(¶m->input_file[len - 4], ".gro", 4) == 0) { return readAtom_gro(atom, param); }
|
||||
if(strncmp(¶m->input_file[len - 4], ".dmp", 4) == 0) { return readAtom_dmp(atom, param); }
|
||||
fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp\n", param->input_file);
|
||||
if(me==0) fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp\n", param->input_file);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||
int me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
FILE *fp = fopen(param->input_file, "r");
|
||||
char line[MAXLINE];
|
||||
int read_atoms = 0;
|
||||
|
||||
if(!fp) {
|
||||
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@@ -153,11 +168,11 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||
char *item = strtok(line, " ");
|
||||
if(strncmp(item, "CRYST1", 6) == 0) {
|
||||
param->xlo = 0.0;
|
||||
param->xhi = atof(strtok(NULL, " "));
|
||||
param->xhi = atof(strtok(NULL, "\t "));
|
||||
param->ylo = 0.0;
|
||||
param->yhi = atof(strtok(NULL, " "));
|
||||
param->yhi = atof(strtok(NULL, "\t "));
|
||||
param->zlo = 0.0;
|
||||
param->zhi = atof(strtok(NULL, " "));
|
||||
param->zhi = atof(strtok(NULL, "\t "));
|
||||
param->xprd = param->xhi - param->xlo;
|
||||
param->yprd = param->yhi - param->ylo;
|
||||
param->zprd = param->zhi - param->zlo;
|
||||
@@ -166,23 +181,23 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||
char *label;
|
||||
int atom_id, comp_id;
|
||||
MD_FLOAT occupancy, charge;
|
||||
atom_id = atoi(strtok(NULL, " ")) - 1;
|
||||
atom_id = atoi(strtok(NULL, "\t ")) - 1;
|
||||
|
||||
while(atom_id + 1 >= atom->Nmax) {
|
||||
growAtom(atom);
|
||||
}
|
||||
|
||||
atom->type[atom_id] = type_str2int(strtok(NULL, " "));
|
||||
label = strtok(NULL, " ");
|
||||
comp_id = atoi(strtok(NULL, " "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, " "));
|
||||
atom->type[atom_id] = type_str2int(strtok(NULL, "\t "));
|
||||
label = strtok(NULL, "\t ");
|
||||
comp_id = atoi(strtok(NULL, "\t "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom->vx[atom_id] = 0.0;
|
||||
atom->vy[atom_id] = 0.0;
|
||||
atom->vz[atom_id] = 0.0;
|
||||
occupancy = atof(strtok(NULL, " "));
|
||||
charge = atof(strtok(NULL, " "));
|
||||
occupancy = atof(strtok(NULL, "\t "));
|
||||
charge = atof(strtok(NULL, "\t "));
|
||||
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
|
||||
atom->Natoms++;
|
||||
atom->Nlocal++;
|
||||
@@ -194,14 +209,14 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||
strncmp(item, "ENDMDL", 6) == 0) {
|
||||
// Do nothing
|
||||
} else {
|
||||
fprintf(stderr, "Invalid item: %s\n", item);
|
||||
if(me==0) fprintf(stderr, "Invalid item: %s\n", item);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if(!read_atoms) {
|
||||
fprintf(stderr, "Input error: No atoms read!\n");
|
||||
if(me==0) fprintf(stderr, "Input error: No atoms read!\n");
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@@ -217,12 +232,15 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||
}
|
||||
|
||||
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
|
||||
if(me==0) fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
|
||||
fclose(fp);
|
||||
return read_atoms;
|
||||
}
|
||||
|
||||
int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
int me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
|
||||
FILE *fp = fopen(param->input_file, "r");
|
||||
char line[MAXLINE];
|
||||
char desc[MAXLINE];
|
||||
@@ -231,7 +249,7 @@ int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
int i = 0;
|
||||
|
||||
if(!fp) {
|
||||
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@@ -241,25 +259,25 @@ int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
desc[i] = '\0';
|
||||
readline(line, fp);
|
||||
atoms_to_read = atoi(strtok(line, " "));
|
||||
fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
|
||||
if(me==0) fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
|
||||
|
||||
while(!feof(fp) && read_atoms < atoms_to_read) {
|
||||
readline(line, fp);
|
||||
char *label = strtok(line, " ");
|
||||
int type = type_str2int(strtok(NULL, " "));
|
||||
int atom_id = atoi(strtok(NULL, " ")) - 1;
|
||||
char *label = strtok(line, "\t ");
|
||||
int type = type_str2int(strtok(NULL, "\t "));
|
||||
int atom_id = atoi(strtok(NULL, "\t ")) - 1;
|
||||
atom_id = read_atoms;
|
||||
while(atom_id + 1 >= atom->Nmax) {
|
||||
growAtom(atom);
|
||||
}
|
||||
|
||||
atom->type[atom_id] = type;
|
||||
atom_x(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, " "));
|
||||
atom->vx[atom_id] = atof(strtok(NULL, " "));
|
||||
atom->vy[atom_id] = atof(strtok(NULL, " "));
|
||||
atom->vz[atom_id] = atof(strtok(NULL, " "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom->vx[atom_id] = atof(strtok(NULL, "\t "));
|
||||
atom->vy[atom_id] = atof(strtok(NULL, "\t "));
|
||||
atom->vz[atom_id] = atof(strtok(NULL, "\t "));
|
||||
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
|
||||
atom->Natoms++;
|
||||
atom->Nlocal++;
|
||||
@@ -269,18 +287,18 @@ int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
if(!feof(fp)) {
|
||||
readline(line, fp);
|
||||
param->xlo = 0.0;
|
||||
param->xhi = atof(strtok(line, " "));
|
||||
param->xhi = atof(strtok(line, "\t "));
|
||||
param->ylo = 0.0;
|
||||
param->yhi = atof(strtok(NULL, " "));
|
||||
param->yhi = atof(strtok(NULL, "\t "));
|
||||
param->zlo = 0.0;
|
||||
param->zhi = atof(strtok(NULL, " "));
|
||||
param->zhi = atof(strtok(NULL, "\t "));
|
||||
param->xprd = param->xhi - param->xlo;
|
||||
param->yprd = param->yhi - param->ylo;
|
||||
param->zprd = param->zhi - param->zlo;
|
||||
}
|
||||
|
||||
if(read_atoms != atoms_to_read) {
|
||||
fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
|
||||
if(me==0) fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@@ -296,12 +314,14 @@ int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||
}
|
||||
|
||||
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
|
||||
if(me==0) fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
|
||||
fclose(fp);
|
||||
return read_atoms;
|
||||
}
|
||||
|
||||
int readAtom_dmp(Atom* atom, Parameter* param) {
|
||||
int me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
FILE *fp = fopen(param->input_file, "r");
|
||||
char line[MAXLINE];
|
||||
int natoms = 0;
|
||||
@@ -310,7 +330,7 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
|
||||
int ts = -1;
|
||||
|
||||
if(!fp) {
|
||||
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@@ -333,47 +353,47 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
|
||||
}
|
||||
} else if(strncmp(item, "BOX BOUNDS pp pp pp", 19) == 0) {
|
||||
readline(line, fp);
|
||||
param->xlo = atof(strtok(line, " "));
|
||||
param->xhi = atof(strtok(NULL, " "));
|
||||
param->xlo = atof(strtok(line, "\t "));
|
||||
param->xhi = atof(strtok(NULL, "\t "));
|
||||
param->xprd = param->xhi - param->xlo;
|
||||
|
||||
readline(line, fp);
|
||||
param->ylo = atof(strtok(line, " "));
|
||||
param->yhi = atof(strtok(NULL, " "));
|
||||
param->ylo = atof(strtok(line, "\t "));
|
||||
param->yhi = atof(strtok(NULL, "\t "));
|
||||
param->yprd = param->yhi - param->ylo;
|
||||
|
||||
readline(line, fp);
|
||||
param->zlo = atof(strtok(line, " "));
|
||||
param->zhi = atof(strtok(NULL, " "));
|
||||
param->zlo = atof(strtok(line, "\t "));
|
||||
param->zhi = atof(strtok(NULL, "\t "));
|
||||
param->zprd = param->zhi - param->zlo;
|
||||
} else if(strncmp(item, "ATOMS id type x y z vx vy vz", 28) == 0) {
|
||||
for(int i = 0; i < natoms; i++) {
|
||||
readline(line, fp);
|
||||
atom_id = atoi(strtok(line, " ")) - 1;
|
||||
atom->type[atom_id] = atoi(strtok(NULL, " "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, " "));
|
||||
atom->vx[atom_id] = atof(strtok(NULL, " "));
|
||||
atom->vy[atom_id] = atof(strtok(NULL, " "));
|
||||
atom->vz[atom_id] = atof(strtok(NULL, " "));
|
||||
atom_id = atoi(strtok(line, "\t ")) - 1;
|
||||
atom->type[atom_id] = atoi(strtok(NULL, "\t "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom->vx[atom_id] = atof(strtok(NULL, "\t "));
|
||||
atom->vy[atom_id] = atof(strtok(NULL, "\t "));
|
||||
atom->vz[atom_id] = atof(strtok(NULL, "\t "));
|
||||
atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
|
||||
read_atoms++;
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "Invalid item: %s\n", item);
|
||||
if(me==0) fprintf(stderr, "Invalid item: %s\n", item);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
|
||||
if(me==0) fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if(ts < 0 || !natoms || !read_atoms) {
|
||||
fprintf(stderr, "Input error: atom data was not read!\n");
|
||||
if(me==0) fprintf(stderr, "Input error: atom data was not read!\n");
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@@ -389,7 +409,7 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
|
||||
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||
}
|
||||
|
||||
fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
|
||||
if(me==0) fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
|
||||
fclose(fp);
|
||||
return natoms;
|
||||
}
|
||||
@@ -530,3 +550,249 @@ void growClusters(Atom *atom) {
|
||||
atom->cl_v = (MD_FLOAT*) reallocate(atom->cl_v, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT), nold * CLUSTER_M * 3 * sizeof(MD_FLOAT));
|
||||
atom->cl_type = (int*) reallocate(atom->cl_type, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * sizeof(int), nold * CLUSTER_M * sizeof(int));
|
||||
}
|
||||
|
||||
/* MPI added*/
|
||||
void growPbc(Atom* atom) {
|
||||
int nold = atom->NmaxGhost;
|
||||
atom->NmaxGhost += DELTA;
|
||||
|
||||
if (atom->PBCx || atom->PBCy || atom->PBCz){
|
||||
atom->PBCx = (int*) reallocate(atom->PBCx, ALIGNMENT, atom->NmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
atom->PBCy = (int*) reallocate(atom->PBCy, ALIGNMENT, atom->NmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
atom->PBCz = (int*) reallocate(atom->PBCz, ALIGNMENT, atom->NmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
} else {
|
||||
atom->PBCx = (int*) malloc(atom->NmaxGhost * sizeof(int));
|
||||
atom->PBCy = (int*) malloc(atom->NmaxGhost * sizeof(int));
|
||||
atom->PBCz = (int*) malloc(atom->NmaxGhost * sizeof(int));
|
||||
}
|
||||
}
|
||||
|
||||
void packForward(Atom* atom, int nc, int* list, MD_FLOAT* buf, int* pbc)
|
||||
{
|
||||
for(int i = 0; i < nc; i++) {
|
||||
int cj = list[i];
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
int displ = i*CLUSTER_N;
|
||||
|
||||
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
|
||||
buf[3*(displ+cjj)+0] = cj_x[CL_X_OFFSET + cjj] + pbc[_x] * atom->mybox.xprd;
|
||||
buf[3*(displ+cjj)+1] = cj_x[CL_Y_OFFSET + cjj] + pbc[_y] * atom->mybox.yprd;
|
||||
buf[3*(displ+cjj)+2] = cj_x[CL_Z_OFFSET + cjj] + pbc[_z] * atom->mybox.zprd;
|
||||
}
|
||||
|
||||
for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
|
||||
buf[3*(displ+cjj)+0] = -1; //x
|
||||
buf[3*(displ+cjj)+1] = -1; //y
|
||||
buf[3*(displ+cjj)+2] = -1; //z
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void unpackForward(Atom* atom, int nc, int c0, MD_FLOAT* buf)
|
||||
{
|
||||
for(int i = 0; i < nc; i++) {
|
||||
int cj = c0+i;
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
int displ = i*CLUSTER_N;
|
||||
|
||||
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
|
||||
if(cj_x[CL_X_OFFSET + cjj]<INFINITY) cj_x[CL_X_OFFSET + cjj] = buf[3*(displ+cjj)+0];
|
||||
if(cj_x[CL_Y_OFFSET + cjj]<INFINITY) cj_x[CL_Y_OFFSET + cjj] = buf[3*(displ+cjj)+1];
|
||||
if(cj_x[CL_Z_OFFSET + cjj]<INFINITY) cj_x[CL_Z_OFFSET + cjj] = buf[3*(displ+cjj)+2];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int packGhost(Atom* atom, int cj, MD_FLOAT* buf, int* pbc)
|
||||
{
|
||||
//#of elements per cluster natoms,x0,y0,z0,type_0, . . ,xn,yn,zn,type_n,bbminx,bbmaxxy,bbminy,bbmaxy,bbminz,bbmaxz
|
||||
//count = 4*N_CLUSTER+7, if N_CLUSTER =4 => count = 23 value/cluster + trackpbc[x] + trackpbc[y] + trackpbc[z]
|
||||
int m = 0;
|
||||
if(atom->jclusters[cj].natoms > 0) {
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
int cj_sca_base = CJ_SCALAR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY;
|
||||
MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
|
||||
MD_FLOAT bbminz = INFINITY, bbmaxz = -INFINITY;
|
||||
|
||||
buf[m++] = atom->jclusters[cj].natoms;
|
||||
|
||||
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
|
||||
|
||||
MD_FLOAT xtmp = cj_x[CL_X_OFFSET + cjj] + pbc[_x] * atom->mybox.xprd;
|
||||
MD_FLOAT ytmp = cj_x[CL_Y_OFFSET + cjj] + pbc[_y] * atom->mybox.yprd;
|
||||
MD_FLOAT ztmp = cj_x[CL_Z_OFFSET + cjj] + pbc[_z] * atom->mybox.zprd;
|
||||
|
||||
buf[m++] = xtmp;
|
||||
buf[m++] = ytmp;
|
||||
buf[m++] = ztmp;
|
||||
buf[m++]= atom->cl_type[cj_sca_base + cjj];
|
||||
|
||||
if(bbminx > xtmp) { bbminx = xtmp; }
|
||||
if(bbmaxx < xtmp) { bbmaxx = xtmp; }
|
||||
if(bbminy > ytmp) { bbminy = ytmp; }
|
||||
if(bbmaxy < ytmp) { bbmaxy = ytmp; }
|
||||
if(bbminz > ztmp) { bbminz = ztmp; }
|
||||
if(bbmaxz < ztmp) { bbmaxz = ztmp; }
|
||||
}
|
||||
|
||||
for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
|
||||
buf[m++] = -1; //x
|
||||
buf[m++] = -1; //y
|
||||
buf[m++] = -1; //z
|
||||
buf[m++] = -1; //type
|
||||
}
|
||||
|
||||
buf[m++] = bbminx;
|
||||
buf[m++] = bbmaxx;
|
||||
buf[m++] = bbminy;
|
||||
buf[m++] = bbmaxy;
|
||||
buf[m++] = bbminz;
|
||||
buf[m++] = bbmaxz;
|
||||
//TODO: check atom->ncj
|
||||
int ghostId = cj-atom->ncj;
|
||||
//check for ghost particles
|
||||
buf[m++] = (cj-atom->ncj>=0) ? pbc[_x]+atom->PBCx[ghostId]:pbc[_x];
|
||||
buf[m++] = (cj-atom->ncj>=0) ? pbc[_y]+atom->PBCy[ghostId]:pbc[_y];
|
||||
buf[m++] = (cj-atom->ncj>=0) ? pbc[_z]+atom->PBCz[ghostId]:pbc[_z];
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
int unpackGhost(Atom* atom, int cj, MD_FLOAT* buf)
|
||||
{
|
||||
int m = 0;
|
||||
int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
|
||||
if(cj*jfac>=atom->Nclusters_max) growClusters(atom);
|
||||
if(atom->Nclusters_ghost>=atom->NmaxGhost) growPbc(atom);
|
||||
|
||||
int cj_sca_base = CJ_SCALAR_BASE_INDEX(cj);
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
|
||||
atom->jclusters[cj].natoms = buf[m++];
|
||||
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
|
||||
|
||||
cj_x[CL_X_OFFSET + cjj] = buf[m++];
|
||||
cj_x[CL_Y_OFFSET + cjj] = buf[m++];
|
||||
cj_x[CL_Z_OFFSET + cjj] = buf[m++];
|
||||
atom->cl_type[cj_sca_base + cjj] = buf[m++];
|
||||
atom->Nghost++;
|
||||
}
|
||||
|
||||
for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
|
||||
cj_x[CL_X_OFFSET + cjj] = INFINITY;
|
||||
cj_x[CL_Y_OFFSET + cjj] = INFINITY;
|
||||
cj_x[CL_Z_OFFSET + cjj] = INFINITY;
|
||||
atom->cl_type[cj_sca_base + cjj] = -1;
|
||||
m+=4;
|
||||
}
|
||||
|
||||
atom->jclusters[cj].bbminx = buf[m++];
|
||||
atom->jclusters[cj].bbmaxx = buf[m++];
|
||||
atom->jclusters[cj].bbminy = buf[m++];
|
||||
atom->jclusters[cj].bbmaxy = buf[m++];
|
||||
atom->jclusters[cj].bbminz = buf[m++];
|
||||
atom->jclusters[cj].bbmaxz = buf[m++];
|
||||
atom->PBCx[atom->Nclusters_ghost] = buf[m++];
|
||||
atom->PBCy[atom->Nclusters_ghost] = buf[m++];
|
||||
atom->PBCz[atom->Nclusters_ghost] = buf[m++];
|
||||
atom->Nclusters_ghost++;
|
||||
|
||||
}
|
||||
|
||||
void packReverse(Atom* atom, int nc, int c0, MD_FLOAT* buf)
|
||||
{
|
||||
for(int i = 0; i < nc; i++) {
|
||||
int cj = c0+i;
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
||||
int displ = i*CLUSTER_N;
|
||||
|
||||
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
|
||||
buf[3*(displ+cjj)+0] = cj_f[CL_X_OFFSET + cjj];
|
||||
buf[3*(displ+cjj)+1] = cj_f[CL_Y_OFFSET + cjj];
|
||||
buf[3*(displ+cjj)+2] = cj_f[CL_Z_OFFSET + cjj];
|
||||
}
|
||||
|
||||
for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
|
||||
buf[3*(displ+cjj)+0] = -1; //x
|
||||
buf[3*(displ+cjj)+1] = -1; //y
|
||||
buf[3*(displ+cjj)+2] = -1; //z
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void unpackReverse(Atom* atom, int nc, int* list, MD_FLOAT* buf)
|
||||
{
|
||||
for(int i = 0; i < nc; i++) {
|
||||
int cj = list[i];
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
||||
int displ = i*CLUSTER_N;
|
||||
|
||||
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
|
||||
cj_f[CL_X_OFFSET + cjj] += buf[3*(displ+cjj)+0];
|
||||
cj_f[CL_Y_OFFSET + cjj] += buf[3*(displ+cjj)+1];
|
||||
cj_f[CL_Z_OFFSET + cjj] += buf[3*(displ+cjj)+2];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int packExchange(Atom* atom, int i, MD_FLOAT* buf)
|
||||
{
|
||||
int m = 0;
|
||||
buf[m++] = atom_x(i);
|
||||
buf[m++] = atom_y(i);
|
||||
buf[m++] = atom_z(i);
|
||||
buf[m++] = atom_vx(i);
|
||||
buf[m++] = atom_vy(i);
|
||||
buf[m++] = atom_vz(i);
|
||||
buf[m++] = atom->type[i];
|
||||
return m;
|
||||
}
|
||||
|
||||
int unpackExchange(Atom* atom, int i, MD_FLOAT* buf)
|
||||
{
|
||||
while(i >= atom->Nmax) growAtom(atom);
|
||||
int m = 0;
|
||||
atom_x(i) = buf[m++];
|
||||
atom_y(i) = buf[m++];
|
||||
atom_z(i) = buf[m++];
|
||||
atom_vx(i) = buf[m++];
|
||||
atom_vy(i) = buf[m++];
|
||||
atom_vz(i) = buf[m++];
|
||||
atom->type[i] = buf[m++];
|
||||
return m;
|
||||
}
|
||||
|
||||
void pbc(Atom* atom)
|
||||
{
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
|
||||
MD_FLOAT xprd = atom->mybox.xprd;
|
||||
MD_FLOAT yprd = atom->mybox.yprd;
|
||||
MD_FLOAT zprd = atom->mybox.zprd;
|
||||
|
||||
if(atom_x(i) < 0.0) atom_x(i) += xprd;
|
||||
if(atom_y(i) < 0.0) atom_y(i) += yprd;
|
||||
if(atom_z(i) < 0.0) atom_z(i) +=zprd;
|
||||
if(atom_x(i) >= xprd) atom_x(i) -=xprd;
|
||||
if(atom_y(i) >= yprd) atom_y(i) -=yprd;
|
||||
if(atom_z(i) >= zprd) atom_z(i) -=zprd;
|
||||
}
|
||||
}
|
||||
|
||||
void copy(Atom* atom, int i, int j)
|
||||
{
|
||||
atom_x(i) = atom_x(j);
|
||||
atom_y(i) = atom_y(j);
|
||||
atom_z(i) = atom_z(j);
|
||||
atom_vx(i) = atom_vx(j);
|
||||
atom_vy(i) = atom_vy(j);
|
||||
atom_vz(i) = atom_vz(j);
|
||||
atom->type[i] = atom->type[j];
|
||||
}
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
@@ -14,8 +14,9 @@
|
||||
#include <stats.h>
|
||||
#include <util.h>
|
||||
#include <simd.h>
|
||||
#include <math.h>
|
||||
|
||||
|
||||
void computeForceGhostShell(Parameter*, Atom*, Neighbor*);
|
||||
/*
|
||||
static inline void gmx_load_simd_2xnn_interactions(
|
||||
int excl,
|
||||
@@ -49,7 +50,6 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
||||
@@ -60,13 +60,23 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
||||
}
|
||||
}
|
||||
|
||||
for(int cg = atom->ncj; cg < atom->ncj+atom->Nclusters_ghost; cg++) {
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
|
||||
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
||||
for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
|
||||
cj_f[CL_X_OFFSET + cjj] = 0.0;
|
||||
cj_f[CL_Y_OFFSET + cjj] = 0.0;
|
||||
cj_f[CL_Z_OFFSET + cjj] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
double S = getTimeStamp();
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
LIKWID_MARKER_START("force");
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
#pragma omp for
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||
int ci_cj1 = CJ1_FROM_CI(ci);
|
||||
@@ -103,6 +113,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
||||
cond = neighbor->half_neigh ? (ci_cj0 != cj || cii < cjj) && (ci_cj1 != cj || cii < cjj + CLUSTER_N) :
|
||||
(ci_cj0 != cj || cii != cjj) && (ci_cj1 != cj || cii != cjj + CLUSTER_N);
|
||||
#endif
|
||||
|
||||
if(cond) {
|
||||
MD_FLOAT delx = xtmp - cj_x[CL_X_OFFSET + cjj];
|
||||
MD_FLOAT dely = ytmp - cj_x[CL_Y_OFFSET + cjj];
|
||||
@@ -113,12 +124,11 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
|
||||
|
||||
if(neighbor->half_neigh) {
|
||||
if(neighbor->half_neigh || param->method) {
|
||||
cj_f[CL_X_OFFSET + cjj] -= delx * force;
|
||||
cj_f[CL_Y_OFFSET + cjj] -= dely * force;
|
||||
cj_f[CL_Z_OFFSET + cjj] -= delz * force;
|
||||
}
|
||||
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
@@ -129,13 +139,11 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(any != 0) {
|
||||
addStat(stats->clusters_within_cutoff, 1);
|
||||
} else {
|
||||
addStat(stats->clusters_outside_cutoff, 1);
|
||||
}
|
||||
|
||||
ci_f[CL_X_OFFSET + cii] += fix;
|
||||
ci_f[CL_Y_OFFSET + cii] += fiy;
|
||||
ci_f[CL_Z_OFFSET + cii] += fiz;
|
||||
@@ -146,7 +154,7 @@ double computeForceLJ_ref(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
||||
addStat(stats->num_neighs, numneighs);
|
||||
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
|
||||
}
|
||||
|
||||
if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor);
|
||||
LIKWID_MARKER_STOP("force");
|
||||
}
|
||||
|
||||
@@ -168,7 +176,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
||||
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
|
||||
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
|
||||
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
for(int ci = 0; ci < atom->Nclusters_local+atom->Nclusters_ghost; ci++) {
|
||||
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
||||
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
|
||||
@@ -178,6 +186,16 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
||||
}
|
||||
}
|
||||
|
||||
for(int cg = atom->ncj; cg < atom->ncj+atom->Nclusters_ghost; cg++) {
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
|
||||
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
||||
for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
|
||||
cj_f[CL_X_OFFSET + cjj] = 0.0;
|
||||
cj_f[CL_Y_OFFSET + cjj] = 0.0;
|
||||
cj_f[CL_Z_OFFSET + cjj] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
double S = getTimeStamp();
|
||||
|
||||
#pragma omp parallel
|
||||
@@ -213,7 +231,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
||||
#endif
|
||||
*/
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
#pragma omp for
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||
#if CLUSTER_M > CLUSTER_N
|
||||
@@ -322,7 +340,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
||||
fiz2 += tz2;
|
||||
|
||||
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
|
||||
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
|
||||
if(cj < CJ1_FROM_CI(atom->Nlocal)|| param->method) {
|
||||
simd_h_decr3(cj_f, tx0 + tx2, ty0 + ty2, tz0 + tz2);
|
||||
}
|
||||
#else
|
||||
@@ -373,7 +391,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
||||
fiz2 += tz2;
|
||||
|
||||
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
|
||||
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
|
||||
if(cj < CJ1_FROM_CI(atom->Nlocal) || param->method) {
|
||||
simd_h_decr3(cj_f, tx0 + tx2, ty0 + ty2, tz0 + tz2);
|
||||
}
|
||||
#else
|
||||
@@ -389,7 +407,7 @@ double computeForceLJ_2xnn_half(Parameter *param, Atom *atom, Neighbor *neighbor
|
||||
addStat(stats->num_neighs, numneighs);
|
||||
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
|
||||
}
|
||||
|
||||
if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor);
|
||||
LIKWID_MARKER_STOP("force");
|
||||
}
|
||||
|
||||
@@ -427,7 +445,7 @@ double computeForceLJ_2xnn_full(Parameter *param, Atom *atom, Neighbor *neighbor
|
||||
{
|
||||
LIKWID_MARKER_START("force");
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
#pragma omp for
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||
#if CLUSTER_M > CLUSTER_N
|
||||
@@ -562,7 +580,6 @@ double computeForceLJ_2xnn(Parameter *param, Atom *atom, Neighbor *neighbor, Sta
|
||||
if(neighbor->half_neigh) {
|
||||
return computeForceLJ_2xnn_half(param, atom, neighbor, stats);
|
||||
}
|
||||
|
||||
return computeForceLJ_2xnn_full(param, atom, neighbor, stats);
|
||||
}
|
||||
|
||||
@@ -589,13 +606,23 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
}
|
||||
}
|
||||
|
||||
for(int cg = atom->ncj; cg < atom->ncj+atom->Nclusters_ghost; cg++) {
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
|
||||
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
||||
for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
|
||||
cj_f[CL_X_OFFSET + cjj] = 0.0;
|
||||
cj_f[CL_Y_OFFSET + cjj] = 0.0;
|
||||
cj_f[CL_Z_OFFSET + cjj] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
double S = getTimeStamp();
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
LIKWID_MARKER_START("force");
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
#pragma omp for
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||
#if CLUSTER_M > CLUSTER_N
|
||||
@@ -726,7 +753,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
fiz3 = simd_add(fiz3, tz3);
|
||||
|
||||
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
|
||||
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
|
||||
if(cj < CJ1_FROM_CI(atom->Nlocal) || param->method) {
|
||||
simd_store(&cj_f[CL_X_OFFSET], simd_load(&cj_f[CL_X_OFFSET]) - (tx0 + tx1 + tx2 + tx3));
|
||||
simd_store(&cj_f[CL_Y_OFFSET], simd_load(&cj_f[CL_Y_OFFSET]) - (ty0 + ty1 + ty2 + ty3));
|
||||
simd_store(&cj_f[CL_Z_OFFSET], simd_load(&cj_f[CL_Z_OFFSET]) - (tz0 + tz1 + tz2 + tz3));
|
||||
@@ -811,7 +838,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
fiz3 = simd_add(fiz3, tz3);
|
||||
|
||||
#ifdef HALF_NEIGHBOR_LISTS_CHECK_CJ
|
||||
if(cj < CJ1_FROM_CI(atom->Nlocal)) {
|
||||
if(cj < CJ1_FROM_CI(atom->Nlocal) || param->method) {
|
||||
simd_store(&cj_f[CL_X_OFFSET], simd_load(&cj_f[CL_X_OFFSET]) - (tx0 + tx1 + tx2 + tx3));
|
||||
simd_store(&cj_f[CL_Y_OFFSET], simd_load(&cj_f[CL_Y_OFFSET]) - (ty0 + ty1 + ty2 + ty3));
|
||||
simd_store(&cj_f[CL_Z_OFFSET], simd_load(&cj_f[CL_Z_OFFSET]) - (tz0 + tz1 + tz2 + tz3));
|
||||
@@ -831,7 +858,7 @@ double computeForceLJ_4xn_half(Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
addStat(stats->num_neighs, numneighs);
|
||||
addStat(stats->force_iters, (long long int)((double)numneighs * CLUSTER_M / CLUSTER_N));
|
||||
}
|
||||
|
||||
if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor);
|
||||
LIKWID_MARKER_STOP("force");
|
||||
}
|
||||
|
||||
@@ -869,7 +896,7 @@ double computeForceLJ_4xn_full(Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
{
|
||||
LIKWID_MARKER_START("force");
|
||||
|
||||
#pragma omp for schedule(runtime)
|
||||
#pragma omp for
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_cj0 = CJ0_FROM_CI(ci);
|
||||
#if CLUSTER_M > CLUSTER_N
|
||||
@@ -1070,3 +1097,120 @@ double computeForceLJ_4xn(Parameter *param, Atom *atom, Neighbor *neighbor, Stat
|
||||
|
||||
return computeForceLJ_4xn_full(param, atom, neighbor, stats);
|
||||
}
|
||||
|
||||
//Routine for eight shell method
|
||||
void computeForceGhostShell(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||
DEBUG_MESSAGE("computeForceLJ begin\n");
|
||||
|
||||
int Nshell = neighbor->Nshell;
|
||||
int *neighs;
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
|
||||
for(int ci = 0; ci < Nshell; ci++) {
|
||||
neighs = &neighbor->neighshell[ci * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numNeighShell[ci];
|
||||
int cs = neighbor->listshell[ci];
|
||||
int cs_vec_base = CJ_VECTOR_BASE_INDEX(cs);
|
||||
MD_FLOAT *cs_x = &atom->cl_x[cs_vec_base];
|
||||
MD_FLOAT *cs_f = &atom->cl_f[cs_vec_base];
|
||||
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int cj = neighs[k];
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
||||
|
||||
for(int css = 0; css < CLUSTER_N; css++) {
|
||||
MD_FLOAT x = cs_x[CL_X_OFFSET + css];
|
||||
MD_FLOAT y = cs_x[CL_Y_OFFSET + css];
|
||||
MD_FLOAT z = cs_x[CL_Z_OFFSET + css];
|
||||
|
||||
MD_FLOAT fix = 0;
|
||||
MD_FLOAT fiy = 0;
|
||||
MD_FLOAT fiz = 0;
|
||||
|
||||
for(int cjj = 0; cjj < CLUSTER_N; cjj++) {
|
||||
|
||||
MD_FLOAT delx = x - cj_x[CL_X_OFFSET + cjj];
|
||||
MD_FLOAT dely = y - cj_x[CL_Y_OFFSET + cjj];
|
||||
MD_FLOAT delz = z - cj_x[CL_Z_OFFSET + cjj];
|
||||
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
if(rsq < cutforcesq) {
|
||||
MD_FLOAT sr2 = 1.0 / rsq;
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
|
||||
|
||||
cj_f[CL_X_OFFSET + cjj] -= delx * force;
|
||||
cj_f[CL_Y_OFFSET + cjj] -= dely * force;
|
||||
cj_f[CL_Z_OFFSET + cjj] -= delz * force;
|
||||
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
}
|
||||
}
|
||||
|
||||
cs_f[CL_X_OFFSET + css] += fix;
|
||||
cs_f[CL_Y_OFFSET + css] += fiy;
|
||||
cs_f[CL_Z_OFFSET + css] += fiz;
|
||||
}
|
||||
}
|
||||
// addStat(stats->calculated_forces, 1);
|
||||
// addStat(stats->num_neighs, numneighs);
|
||||
// addStat(stats->force_iters, (long long int)((double)numneighs));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
void computeForceGhostShell(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||
|
||||
int Nshell = neighbor->Nshell;
|
||||
Pair* neighs;
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
|
||||
for(int ci = 0; ci < Nshell; ci++) {
|
||||
neighs = &neighbor->neighshell[ci * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numNeighShell[ci];
|
||||
int cs = neighbor->listshell[ci].cluster;
|
||||
int css = neighbor->listshell[ci].atom;
|
||||
int cs_vec_base = CJ_VECTOR_BASE_INDEX(cs);
|
||||
MD_FLOAT *cs_x = &atom->cl_x[cs_vec_base];
|
||||
MD_FLOAT *cs_f = &atom->cl_f[cs_vec_base];
|
||||
MD_FLOAT x = cs_x[CL_X_OFFSET + css];
|
||||
MD_FLOAT y = cs_x[CL_Y_OFFSET + css];
|
||||
MD_FLOAT z = cs_x[CL_Z_OFFSET + css];
|
||||
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int cj = neighs[k].cluster;
|
||||
int cjj = neighs[k].atom;
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
MD_FLOAT *cj_f = &atom->cl_f[cj_vec_base];
|
||||
|
||||
MD_FLOAT delx = x - cj_x[CL_X_OFFSET + cjj];
|
||||
MD_FLOAT dely = y - cj_x[CL_Y_OFFSET + cjj];
|
||||
MD_FLOAT delz = z - cj_x[CL_Z_OFFSET + cjj];
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
if(rsq < cutforcesq) {
|
||||
MD_FLOAT sr2 = 1.0 / rsq;
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
|
||||
|
||||
cj_f[CL_X_OFFSET + cjj] -= delx * force;
|
||||
cj_f[CL_Y_OFFSET + cjj] -= dely * force;
|
||||
cj_f[CL_Z_OFFSET + cjj] -= delz * force;
|
||||
cs_f[CL_X_OFFSET + css] += delx * force;
|
||||
cs_f[CL_Y_OFFSET + css] += delx * force;
|
||||
cs_f[CL_Z_OFFSET + css] += delx * force;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
@@ -1,10 +1,11 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <parameter.h>
|
||||
#include <box.h>
|
||||
|
||||
#ifndef __ATOM_H_
|
||||
#define __ATOM_H_
|
||||
@@ -102,7 +103,7 @@ typedef struct {
|
||||
|
||||
typedef struct {
|
||||
int Natoms, Nlocal, Nghost, Nmax;
|
||||
int Nclusters, Nclusters_local, Nclusters_ghost, Nclusters_max;
|
||||
int Nclusters, Nclusters_local, Nclusters_ghost, Nclusters_max, NmaxGhost,ncj;
|
||||
MD_FLOAT *x, *y, *z;
|
||||
MD_FLOAT *vx, *vy, *vz;
|
||||
int *border_map;
|
||||
@@ -112,6 +113,7 @@ typedef struct {
|
||||
MD_FLOAT *sigma6;
|
||||
MD_FLOAT *cutforcesq;
|
||||
MD_FLOAT *cutneighsq;
|
||||
//track the movement of a particle along boundaries
|
||||
int *PBCx, *PBCy, *PBCz;
|
||||
// Data in cluster format
|
||||
MD_FLOAT *cl_x;
|
||||
@@ -128,6 +130,9 @@ typedef struct {
|
||||
unsigned int masks_2xnn_fn[8];
|
||||
unsigned int masks_4xn_hn[16];
|
||||
unsigned int masks_4xn_fn[16];
|
||||
|
||||
//Info Subdomain
|
||||
Box mybox;
|
||||
} Atom;
|
||||
|
||||
extern void initAtom(Atom*);
|
||||
@@ -140,6 +145,18 @@ extern int readAtom_dmp(Atom*, Parameter*);
|
||||
extern void growAtom(Atom*);
|
||||
extern void growClusters(Atom*);
|
||||
|
||||
int packGhost(Atom*, int, MD_FLOAT* , int*);
|
||||
int unpackGhost(Atom*, int, MD_FLOAT*);
|
||||
int packExchange(Atom*, int, MD_FLOAT*);
|
||||
int unpackExchange(Atom*, int, MD_FLOAT*);
|
||||
void packForward(Atom*, int, int*, MD_FLOAT*, int*);
|
||||
void unpackForward(Atom*, int, int, MD_FLOAT*);
|
||||
void packReverse(Atom* , int , int , MD_FLOAT*);
|
||||
void unpackReverse(Atom*, int, int*, MD_FLOAT*);
|
||||
void pbc(Atom*);
|
||||
void copy(Atom*, int, int);
|
||||
|
||||
|
||||
#ifdef AOS
|
||||
# define POS_DATA_LAYOUT "AoS"
|
||||
# define atom_x(i) atom->x[(i) * 3 + 0]
|
112
gromacs/includes/integrate.h
Normal file
112
gromacs/includes/integrate.h
Normal file
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdbool.h>
|
||||
//---
|
||||
#include <atom.h>
|
||||
#include <parameter.h>
|
||||
#include <util.h>
|
||||
#include <timers.h>
|
||||
#include <timing.h>
|
||||
#include <simd.h>
|
||||
/*
|
||||
void cpuInitialIntegrate(Parameter *param, Atom *atom) {
|
||||
|
||||
DEBUG_MESSAGE("cpuInitialIntegrate start\n");
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
|
||||
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
||||
|
||||
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
|
||||
ci_v[CL_X_OFFSET + cii] += param->dtforce * ci_f[CL_X_OFFSET + cii];
|
||||
ci_v[CL_Y_OFFSET + cii] += param->dtforce * ci_f[CL_Y_OFFSET + cii];
|
||||
ci_v[CL_Z_OFFSET + cii] += param->dtforce * ci_f[CL_Z_OFFSET + cii];
|
||||
ci_x[CL_X_OFFSET + cii] += param->dt * ci_v[CL_X_OFFSET + cii];
|
||||
ci_x[CL_Y_OFFSET + cii] += param->dt * ci_v[CL_Y_OFFSET + cii];
|
||||
ci_x[CL_Z_OFFSET + cii] += param->dt * ci_v[CL_Z_OFFSET + cii];
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_MESSAGE("cpuInitialIntegrate end\n");
|
||||
}
|
||||
|
||||
void cpuFinalIntegrate(Parameter *param, Atom *atom) {
|
||||
|
||||
DEBUG_MESSAGE("cpuFinalIntegrate start\n");
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
|
||||
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
||||
|
||||
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
|
||||
ci_v[CL_X_OFFSET + cii] += param->dtforce * ci_f[CL_X_OFFSET + cii];
|
||||
ci_v[CL_Y_OFFSET + cii] += param->dtforce * ci_f[CL_Y_OFFSET + cii];
|
||||
ci_v[CL_Z_OFFSET + cii] += param->dtforce * ci_f[CL_Z_OFFSET + cii];
|
||||
}
|
||||
}
|
||||
DEBUG_MESSAGE("cpuFinalIntegrate end\n");
|
||||
}
|
||||
*/
|
||||
|
||||
void cpuInitialIntegrate(Parameter *param, Atom *atom) {
|
||||
|
||||
DEBUG_MESSAGE("cpuInitialIntegrate start\n");
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci+=2) {
|
||||
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
|
||||
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
||||
|
||||
MD_SIMD_FLOAT dtforce = simd_broadcast(param->dtforce);
|
||||
MD_SIMD_FLOAT dt = simd_broadcast(param->dt);
|
||||
|
||||
MD_SIMD_FLOAT vx_vector = simd_fma(simd_load(&ci_f[CL_X_OFFSET]), dtforce, simd_load(&ci_v[CL_X_OFFSET]));
|
||||
MD_SIMD_FLOAT vy_vector = simd_fma(simd_load(&ci_f[CL_Y_OFFSET]), dtforce, simd_load(&ci_v[CL_Y_OFFSET]));
|
||||
MD_SIMD_FLOAT vz_vector = simd_fma(simd_load(&ci_f[CL_Z_OFFSET]), dtforce, simd_load(&ci_v[CL_Z_OFFSET]));
|
||||
MD_SIMD_FLOAT x_vector = simd_fma(vx_vector, dt, simd_load(&ci_x[CL_X_OFFSET]));
|
||||
MD_SIMD_FLOAT y_vector = simd_fma(vy_vector, dt, simd_load(&ci_x[CL_Y_OFFSET]));
|
||||
MD_SIMD_FLOAT z_vector = simd_fma(vz_vector, dt, simd_load(&ci_x[CL_Z_OFFSET]));
|
||||
|
||||
simd_store(&ci_v[CL_X_OFFSET], vx_vector);
|
||||
simd_store(&ci_v[CL_Y_OFFSET], vy_vector);
|
||||
simd_store(&ci_v[CL_Z_OFFSET], vz_vector);
|
||||
simd_store(&ci_x[CL_X_OFFSET], x_vector);
|
||||
simd_store(&ci_x[CL_Y_OFFSET], y_vector);
|
||||
simd_store(&ci_x[CL_Z_OFFSET], z_vector);
|
||||
}
|
||||
|
||||
DEBUG_MESSAGE("cpuInitialIntegrate end\n");
|
||||
}
|
||||
|
||||
void cpuFinalIntegrate(Parameter *param, Atom *atom) {
|
||||
|
||||
DEBUG_MESSAGE("cpuFinalIntegrate start\n");
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci+=2) {
|
||||
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
|
||||
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
||||
|
||||
MD_SIMD_FLOAT dtforce = simd_broadcast(param->dtforce);
|
||||
MD_SIMD_FLOAT vx_vector = simd_fma(simd_load(&ci_f[CL_X_OFFSET]), dtforce, simd_load(&ci_v[CL_X_OFFSET]));
|
||||
MD_SIMD_FLOAT vy_vector = simd_fma(simd_load(&ci_f[CL_Y_OFFSET]), dtforce, simd_load(&ci_v[CL_Y_OFFSET]));
|
||||
MD_SIMD_FLOAT vz_vector = simd_fma(simd_load(&ci_f[CL_Z_OFFSET]), dtforce, simd_load(&ci_v[CL_Z_OFFSET]));
|
||||
simd_store(&ci_v[CL_X_OFFSET], vx_vector);
|
||||
simd_store(&ci_v[CL_Y_OFFSET], vy_vector);
|
||||
simd_store(&ci_v[CL_Z_OFFSET], vz_vector);
|
||||
}
|
||||
|
||||
DEBUG_MESSAGE("cpuFinalIntegrate end\n");
|
||||
}
|
||||
|
||||
#ifdef CUDA_TARGET
|
||||
void cudaInitialIntegrate(Parameter*, Atom*);
|
||||
void cudaFinalIntegrate(Parameter*, Atom*);
|
||||
#endif
|
||||
|
||||
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
@@ -25,6 +25,11 @@
|
||||
#define NBNXN_INTERACTION_MASK_DIAG_J8_0 0xf0f8fcfeU
|
||||
#define NBNXN_INTERACTION_MASK_DIAG_J8_1 0x0080c0e0U
|
||||
|
||||
typedef struct {
|
||||
int cluster;
|
||||
int atom;
|
||||
} Pair;
|
||||
|
||||
typedef struct {
|
||||
int every;
|
||||
int ncalls;
|
||||
@@ -34,8 +39,20 @@ typedef struct {
|
||||
int half_neigh;
|
||||
int* neighbors;
|
||||
unsigned int* neighbors_imask;
|
||||
//MPI
|
||||
/*
|
||||
int Nshell; //# of atoms in listShell(Cluster here cover all possible ghost interactions)
|
||||
int *numNeighShell; //# of neighs for each atom in listShell
|
||||
Pair *neighshell; //list of neighs for each atom in listShell
|
||||
Pair *listshell; //Atoms to compute the force
|
||||
*/
|
||||
int Nshell; //# of cluster in listShell(Cluster here cover all possible ghost interactions)
|
||||
int *numNeighShell; //# of neighs for each atom in listShell
|
||||
int *neighshell; //list of neighs for each atom in listShell
|
||||
int *listshell; //Atoms to compute the force
|
||||
} Neighbor;
|
||||
|
||||
|
||||
extern void initNeighbor(Neighbor*, Parameter*);
|
||||
extern void setupNeighbor(Parameter*, Atom*);
|
||||
extern void binatoms(Atom*);
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,10 +1,12 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <atom.h>
|
||||
#include <comm.h>
|
||||
#include <parameter.h>
|
||||
|
||||
#ifndef __VTK_H_
|
||||
#define __VTK_H_
|
||||
@@ -13,4 +15,5 @@ extern int write_local_atoms_to_vtk_file(const char* filename, Atom* atom, int t
|
||||
extern int write_ghost_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
|
||||
extern int write_local_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep);
|
||||
extern int write_ghost_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep);
|
||||
extern void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep);
|
||||
#endif
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,13 +1,11 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <omp.h>
|
||||
//--
|
||||
#include <likwid-marker.h>
|
||||
//--
|
||||
@@ -26,6 +24,10 @@
|
||||
#include <util.h>
|
||||
#include <vtk.h>
|
||||
#include <xtc.h>
|
||||
#include <comm.h>
|
||||
#include <grid.h>
|
||||
#include <shell_methods.h>
|
||||
#include <mpi.h>
|
||||
|
||||
#define HLINE "----------------------------------------------------------------------------\n"
|
||||
|
||||
@@ -42,17 +44,55 @@ extern void copyDataFromCUDADevice(Atom *atom);
|
||||
extern void cudaDeviceFree();
|
||||
#endif
|
||||
|
||||
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
double dynamicBalance(Comm* comm, Grid* grid, Atom* atom, Parameter* param, double time)
|
||||
{
|
||||
double S, E;
|
||||
int dims = 3; //TODO: Adjust to do in 3d and 2d
|
||||
S = getTimeStamp();
|
||||
if(param->balance == RCB) {
|
||||
rcbBalance(grid, atom, param, meanBisect,dims,0);
|
||||
neighComm(comm, param, grid);
|
||||
}else if(param->balance == meanTimeRCB){
|
||||
rcbBalance(grid, atom, param, meanTimeBisect,dims,time);
|
||||
neighComm(comm, param, grid);
|
||||
}else if(param->balance == Staggered) {
|
||||
staggeredBalance(grid, atom, param, time);
|
||||
neighComm(comm, param, grid);
|
||||
exchangeComm(comm,atom);
|
||||
}else { } //Do nothing
|
||||
//printGrid(grid);
|
||||
E = getTimeStamp();
|
||||
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double initialBalance(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid)
|
||||
{
|
||||
double E,S,time;
|
||||
int me;
|
||||
MPI_Comm_rank(world,&me);
|
||||
S = getTimeStamp();
|
||||
if(param->balance == meanTimeRCB || param->balance == RCB){
|
||||
rcbBalance(grid, atom, param, meanBisect,3,0);
|
||||
neighComm(comm, param, grid);
|
||||
}
|
||||
MPI_Allreduce(&atom->Nlocal, &atom->Natoms, 1, MPI_INT, MPI_SUM, world);
|
||||
printf("Processor:%i, Local atoms:%i, Total atoms:%i\n",me, atom->Nlocal,atom->Natoms);
|
||||
MPI_Barrier(world);
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid) {
|
||||
if(param->force_field == FF_EAM) { initEam(eam, param); }
|
||||
double S, E;
|
||||
param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
|
||||
param->xprd = param->nx * param->lattice;
|
||||
param->yprd = param->ny * param->lattice;
|
||||
param->zprd = param->nz * param->lattice;
|
||||
|
||||
S = getTimeStamp();
|
||||
initAtom(atom);
|
||||
initPbc(atom);
|
||||
//initPbc(atom);
|
||||
initStats(stats);
|
||||
initNeighbor(neighbor, param);
|
||||
if(param->input_file == NULL) {
|
||||
@@ -60,13 +100,18 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
|
||||
} else {
|
||||
readAtom(atom, param);
|
||||
}
|
||||
|
||||
setupGrid(grid,atom,param);
|
||||
setupNeighbor(param, atom);
|
||||
setupComm(comm, param, grid);
|
||||
if(param->balance){
|
||||
initialBalance(param, eam, atom, neighbor, stats, comm, grid);
|
||||
}
|
||||
setupThermo(param, atom->Natoms);
|
||||
if(param->input_file == NULL) { adjustThermo(param, atom); }
|
||||
buildClusters(atom);
|
||||
defineJClusters(atom);
|
||||
setupPbc(atom, param);
|
||||
//setupPbc(atom, param);
|
||||
ghostNeighbor(comm, atom, param); //change
|
||||
binClusters(atom);
|
||||
buildNeighbor(atom, neighbor);
|
||||
initDevice(atom, neighbor);
|
||||
@@ -74,15 +119,15 @@ double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||
double reneighbour(Comm* comm, Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||
double S, E;
|
||||
S = getTimeStamp();
|
||||
LIKWID_MARKER_START("reneighbour");
|
||||
updateSingleAtoms(atom);
|
||||
updateAtomsPbc(atom, param);
|
||||
//updateAtomsPbc(atom, param);
|
||||
buildClusters(atom);
|
||||
defineJClusters(atom);
|
||||
setupPbc(atom, param);
|
||||
//setupPbc(atom, param);
|
||||
ghostNeighbor(comm, atom, param);
|
||||
binClusters(atom);
|
||||
buildNeighbor(atom, neighbor);
|
||||
LIKWID_MARKER_STOP("reneighbour");
|
||||
@@ -90,15 +135,13 @@ double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||
return E-S;
|
||||
}
|
||||
|
||||
void printAtomState(Atom *atom) {
|
||||
printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n",
|
||||
atom->Natoms, atom->Nlocal, atom->Nghost, atom->Nmax);
|
||||
|
||||
/* int nall = atom->Nlocal + atom->Nghost; */
|
||||
|
||||
/* for (int i=0; i<nall; i++) { */
|
||||
/* printf("%d %f %f %f\n", i, atom->x[i], atom->y[i], atom->z[i]); */
|
||||
/* } */
|
||||
double updateAtoms(Comm* comm, Atom* atom){
|
||||
double S,E;
|
||||
S = getTimeStamp();
|
||||
updateSingleAtoms(atom);
|
||||
exchangeComm(comm, atom);
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
@@ -108,7 +151,8 @@ int main(int argc, char** argv) {
|
||||
Neighbor neighbor;
|
||||
Stats stats;
|
||||
Parameter param;
|
||||
|
||||
Comm comm;
|
||||
Grid grid;
|
||||
LIKWID_MARKER_INIT;
|
||||
#pragma omp parallel
|
||||
{
|
||||
@@ -116,7 +160,7 @@ int main(int argc, char** argv) {
|
||||
//LIKWID_MARKER_REGISTER("reneighbour");
|
||||
//LIKWID_MARKER_REGISTER("pbc");
|
||||
}
|
||||
|
||||
initComm(&argc, &argv, &comm); //change
|
||||
initParameter(¶m);
|
||||
for(int i = 0; i < argc; i++) {
|
||||
if((strcmp(argv[i], "-p") == 0) || (strcmp(argv[i], "--param") == 0)) {
|
||||
@@ -158,6 +202,24 @@ int main(int argc, char** argv) {
|
||||
param.half_neigh = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-method") == 0)) {
|
||||
param.method = atoi(argv[++i]);
|
||||
if (param.method>2 || param.method< 0){
|
||||
if(comm.myproc == 0) fprintf(stderr, "Method does not exist!\n");
|
||||
endComm(&comm);
|
||||
exit(0);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-bal") == 0)) {
|
||||
param.balance = atoi(argv[++i]);
|
||||
if (param.balance>3 || param.balance< 0){
|
||||
if(comm.myproc == 0) fprintf(stderr, "Load balance does not exist!\n");
|
||||
endComm(&comm);
|
||||
exit(0);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-m") == 0) || (strcmp(argv[i], "--mass") == 0)) {
|
||||
param.mass = atof(argv[++i]);
|
||||
continue;
|
||||
@@ -188,6 +250,7 @@ int main(int argc, char** argv) {
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
|
||||
//TODO: add the shell and ac print options
|
||||
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
|
||||
printf(HLINE);
|
||||
printf("-p <string>: file to read parameters from (can be specified more than once)\n");
|
||||
@@ -206,83 +269,86 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
}
|
||||
|
||||
param.cutneigh = param.cutforce + param.skin;
|
||||
setup(¶m, &eam, &atom, &neighbor, &stats);
|
||||
printParameter(¶m);
|
||||
printf(HLINE);
|
||||
if(param.balance>0 && param.method == 1){
|
||||
if(comm.myproc == 0) fprintf(stderr, "Half Shell is not supported by load balance!\n");
|
||||
endComm(&comm);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
printf("step\ttemp\t\tpressure\n");
|
||||
param.cutneigh = param.cutforce + param.skin;
|
||||
timer[SETUP]=setup(¶m, &eam, &atom, &neighbor, &stats, &comm, &grid);
|
||||
if(comm.myproc == 0) printParameter(¶m);
|
||||
if(comm.myproc == 0) printf(HLINE);
|
||||
if(comm.myproc == 0) printf("step\ttemp\t\tpressure\n");
|
||||
computeThermo(0, ¶m, &atom);
|
||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
||||
traceAddresses(¶m, &atom, &neighbor, n + 1);
|
||||
#endif
|
||||
|
||||
#ifdef CUDA_TARGET
|
||||
copyDataToCUDADevice(&atom);
|
||||
#endif
|
||||
|
||||
if(param.force_field == FF_EAM) {
|
||||
timer[FORCE] = computeForceEam(&eam, ¶m, &atom, &neighbor, &stats);
|
||||
} else {
|
||||
timer[FORCE] = computeForceLJ(¶m, &atom, &neighbor, &stats);
|
||||
}
|
||||
|
||||
timer[NEIGH] = 0.0;
|
||||
timer[NEIGH] = 0.0;
|
||||
timer[FORWARD] = 0.0;
|
||||
timer[UPDATE] = 0.0;
|
||||
timer[BALANCE] = 0.0;
|
||||
timer[REVERSE] = reverse(&comm, &atom, ¶m);
|
||||
MPI_Barrier(world);
|
||||
timer[TOTAL] = getTimeStamp();
|
||||
|
||||
if(param.vtk_file != NULL) {
|
||||
write_data_to_vtk_file(param.vtk_file, &atom, 0);
|
||||
//write_data_to_vtk_file(param.vtk_file, &comm ,&atom, 0);
|
||||
printvtk(param.vtk_file, &comm, &atom, ¶m, 0);
|
||||
}
|
||||
|
||||
//TODO: modify xct
|
||||
if(param.xtc_file != NULL) {
|
||||
xtc_init(param.xtc_file, &atom, 0);
|
||||
}
|
||||
|
||||
double forceTime=0.0;
|
||||
double commTime=0.0;
|
||||
for(int n = 0; n < param.ntimes; n++) {
|
||||
initialIntegrate(¶m, &atom);
|
||||
|
||||
if((n + 1) % param.reneigh_every) {
|
||||
if(!((n + 1) % param.prune_every)) {
|
||||
timer[FORWARD]+=forward(&comm, &atom, ¶m);
|
||||
if(!((n + 1) % param.prune_every)){
|
||||
pruneNeighbor(¶m, &atom, &neighbor);
|
||||
}
|
||||
|
||||
updatePbc(&atom, ¶m, 0);
|
||||
} else {
|
||||
#ifdef CUDA_TARGET
|
||||
copyDataFromCUDADevice(&atom);
|
||||
#endif
|
||||
|
||||
timer[NEIGH] += reneighbour(¶m, &atom, &neighbor);
|
||||
|
||||
timer[UPDATE] +=updateAtoms(&comm,&atom);
|
||||
if(param.balance && !((n+1)%param.balance_every))
|
||||
timer[BALANCE] +=dynamicBalance(&comm, &grid, &atom , ¶m, timer[FORCE]);
|
||||
timer[NEIGH] += reneighbour(&comm, ¶m, &atom, &neighbor);
|
||||
#ifdef CUDA_TARGET
|
||||
copyDataToCUDADevice(&atom);
|
||||
isReneighboured = 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
||||
traceAddresses(¶m, &atom, &neighbor, n + 1);
|
||||
#endif
|
||||
|
||||
if(param.force_field == FF_EAM) {
|
||||
timer[FORCE] += computeForceEam(&eam, ¶m, &atom, &neighbor, &stats);
|
||||
} else {
|
||||
timer[FORCE] += computeForceLJ(¶m, &atom, &neighbor, &stats);
|
||||
}
|
||||
|
||||
timer[REVERSE] += reverse(&comm, &atom, ¶m);
|
||||
finalIntegrate(¶m, &atom);
|
||||
|
||||
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
|
||||
computeThermo(n + 1, ¶m, &atom);
|
||||
}
|
||||
|
||||
int write_pos = !((n + 1) % param.x_out_every);
|
||||
int write_vel = !((n + 1) % param.v_out_every);
|
||||
if(write_pos || write_vel) {
|
||||
if(param.vtk_file != NULL) {
|
||||
write_data_to_vtk_file(param.vtk_file, &atom, n + 1);
|
||||
printvtk(param.vtk_file, &comm, &atom, ¶m, n+1);
|
||||
}
|
||||
|
||||
//TODO: xtc file
|
||||
if(param.xtc_file != NULL) {
|
||||
xtc_write(&atom, n + 1, write_pos, write_vel);
|
||||
}
|
||||
@@ -292,11 +358,11 @@ int main(int argc, char** argv) {
|
||||
#ifdef CUDA_TARGET
|
||||
copyDataFromCUDADevice(&atom);
|
||||
#endif
|
||||
|
||||
MPI_Barrier(world);
|
||||
timer[TOTAL] = getTimeStamp() - timer[TOTAL];
|
||||
updateSingleAtoms(&atom);
|
||||
updateAtoms(&comm,&atom);
|
||||
computeThermo(-1, ¶m, &atom);
|
||||
|
||||
//TODO:
|
||||
if(param.xtc_file != NULL) {
|
||||
xtc_end();
|
||||
}
|
||||
@@ -304,41 +370,35 @@ int main(int argc, char** argv) {
|
||||
#ifdef CUDA_TARGET
|
||||
cudaDeviceFree();
|
||||
#endif
|
||||
double mint[NUMTIMER];
|
||||
double maxt[NUMTIMER];
|
||||
double sumt[NUMTIMER];
|
||||
timer[REST] = timer[TOTAL]-timer[FORCE]-timer[NEIGH]-timer[BALANCE]-timer[FORWARD]-timer[REVERSE];
|
||||
MPI_Reduce(timer,mint,NUMTIMER,MPI_DOUBLE,MPI_MIN,0,world);
|
||||
MPI_Reduce(timer,maxt,NUMTIMER,MPI_DOUBLE,MPI_MAX,0,world);
|
||||
MPI_Reduce(timer,sumt,NUMTIMER,MPI_DOUBLE,MPI_SUM,0,world);
|
||||
int Nghost;
|
||||
MPI_Reduce(&atom.Nghost,&Nghost,1,MPI_INT,MPI_SUM,0,world);
|
||||
|
||||
printf(HLINE);
|
||||
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
|
||||
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
|
||||
timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
|
||||
printf(HLINE);
|
||||
if(comm.myproc == 0){
|
||||
int n = comm.numproc;
|
||||
printf(HLINE);
|
||||
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, Nghost, param.ntimes);
|
||||
printf("TOTAL %.2fs\n\n",timer[TOTAL]);
|
||||
printf("%4s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|\n","","FORCE ", "NEIGH ", "BALANCE", "FORWARD", "REVERSE","UPDATE","REST ","SETUP");
|
||||
printf("----|-------|-------|-------|-------|-------|-------|-------|-------|\n");
|
||||
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "AVG", sumt[FORCE]/n,sumt[NEIGH]/n,sumt[BALANCE]/n,sumt[FORWARD]/n,sumt[REVERSE]/n,sumt[UPDATE]/n,sumt[REST]/n,sumt[SETUP]/n);
|
||||
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MIN", mint[FORCE],mint[NEIGH],mint[BALANCE],mint[FORWARD],mint[REVERSE],mint[UPDATE],mint[REST],mint[SETUP]);
|
||||
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MAX", maxt[FORCE],maxt[NEIGH],maxt[BALANCE],maxt[FORWARD],maxt[REVERSE],maxt[UPDATE],maxt[REST],maxt[SETUP]);
|
||||
printf(HLINE);
|
||||
printf("Performance: %.2f million atom updates per second\n",
|
||||
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
|
||||
|
||||
int nthreads = 0;
|
||||
int chunkSize = 0;
|
||||
omp_sched_t schedKind;
|
||||
char schedType[10];
|
||||
#pragma omp parallel
|
||||
#pragma omp master
|
||||
{
|
||||
omp_get_schedule(&schedKind, &chunkSize);
|
||||
|
||||
switch (schedKind)
|
||||
{
|
||||
case omp_sched_static: strcpy(schedType, "static"); break;
|
||||
case omp_sched_dynamic: strcpy(schedType, "dynamic"); break;
|
||||
case omp_sched_guided: strcpy(schedType, "guided"); break;
|
||||
case omp_sched_auto: strcpy(schedType, "auto"); break;
|
||||
}
|
||||
|
||||
nthreads = omp_get_max_threads();
|
||||
}
|
||||
|
||||
printf("Num threads: %d\n", nthreads);
|
||||
printf("Schedule: (%s,%d)\n", schedType, chunkSize);
|
||||
|
||||
printf("Performance: %.2f million atom updates per second\n",
|
||||
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
|
||||
#ifdef COMPUTE_STATS
|
||||
#ifdef COMPUTE_STATS
|
||||
displayStatistics(&atom, ¶m, &stats, timer);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
endComm(&comm);
|
||||
LIKWID_MARKER_CLOSE;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
@@ -7,15 +7,15 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <atom.h>
|
||||
#include <util.h>
|
||||
#include <mpi.h>
|
||||
|
||||
#define SMALL 1.0e-6
|
||||
#define FACTOR 0.999
|
||||
|
||||
#define eps 1.0e-9
|
||||
static MD_FLOAT xprd, yprd, zprd;
|
||||
static MD_FLOAT bininvx, bininvy;
|
||||
static int mbinxlo, mbinylo;
|
||||
@@ -34,9 +34,16 @@ static int nmax;
|
||||
static int nstencil; // # of bins in stencil
|
||||
static int* stencil; // stencil list of bin offsets
|
||||
static MD_FLOAT binsizex, binsizey;
|
||||
int me; //rank
|
||||
int method; // method
|
||||
int shellMethod; //If shell method exist
|
||||
|
||||
static int coord2bin(MD_FLOAT, MD_FLOAT);
|
||||
static MD_FLOAT bindist(int, int);
|
||||
//static int ghostZone(Atom*, int);
|
||||
static int halfZoneCluster(Atom*,int);
|
||||
static int ghostClusterinRange(Atom*, int, int, MD_FLOAT);
|
||||
static void neighborGhost(Atom*, Neighbor*);
|
||||
|
||||
/* exported subroutines */
|
||||
void initNeighbor(Neighbor *neighbor, Parameter *param) {
|
||||
@@ -53,12 +60,25 @@ void initNeighbor(Neighbor *neighbor, Parameter *param) {
|
||||
bincount = NULL;
|
||||
bin_clusters = NULL;
|
||||
bin_nclusters = NULL;
|
||||
neighbor->half_neigh = param->half_neigh;
|
||||
neighbor->maxneighs = 100;
|
||||
neighbor->maxneighs = 200;
|
||||
neighbor->numneigh = NULL;
|
||||
neighbor->numneigh_masked = NULL;
|
||||
neighbor->neighbors = NULL;
|
||||
neighbor->neighbors_imask = NULL;
|
||||
//MPI
|
||||
shellMethod = 0;
|
||||
method = param->method;
|
||||
if(method == halfShell || method == eightShell){
|
||||
param->half_neigh = 1;
|
||||
shellMethod = 1;
|
||||
}
|
||||
me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
neighbor->half_neigh = param->half_neigh;
|
||||
neighbor->Nshell = 0;
|
||||
neighbor->numNeighShell = NULL;
|
||||
neighbor->neighshell = NULL;
|
||||
neighbor->listshell = NULL;
|
||||
}
|
||||
|
||||
void setupNeighbor(Parameter *param, Atom *atom) {
|
||||
@@ -77,7 +97,7 @@ void setupNeighbor(Parameter *param, Atom *atom) {
|
||||
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
|
||||
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = zprd;
|
||||
|
||||
MD_FLOAT atom_density = ((MD_FLOAT)(atom->Nlocal)) / ((xhi - xlo) * (yhi - ylo) * (zhi - zlo));
|
||||
MD_FLOAT atom_density = ((MD_FLOAT)(atom->Natoms)) / ((xhi - xlo) * (yhi - ylo) * (zhi - zlo));
|
||||
MD_FLOAT atoms_in_cell = MAX(CLUSTER_M, CLUSTER_N);
|
||||
MD_FLOAT targetsizex = cbrt(atoms_in_cell / atom_density);
|
||||
MD_FLOAT targetsizey = cbrt(atoms_in_cell / atom_density);
|
||||
@@ -146,6 +166,7 @@ void setupNeighbor(Parameter *param, Atom *atom) {
|
||||
}
|
||||
|
||||
MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
|
||||
|
||||
MD_FLOAT dl = atom->iclusters[ci].bbminx - atom->jclusters[cj].bbmaxx;
|
||||
MD_FLOAT dh = atom->jclusters[cj].bbminx - atom->iclusters[ci].bbmaxx;
|
||||
MD_FLOAT dm = MAX(dl, dh);
|
||||
@@ -163,6 +184,7 @@ MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
|
||||
dm = MAX(dl, dh);
|
||||
dm0 = MAX(dm, 0.0);
|
||||
d2 += dm0 * dm0;
|
||||
|
||||
return d2;
|
||||
}
|
||||
|
||||
@@ -225,7 +247,6 @@ static unsigned int get_imask_simd_j8(int rdiag, int ci, int cj) {
|
||||
|
||||
void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
||||
DEBUG_MESSAGE("buildNeighbor start\n");
|
||||
|
||||
/* extend atom arrays if necessary */
|
||||
if(atom->Nclusters_local > nmax) {
|
||||
nmax = atom->Nclusters_local;
|
||||
@@ -249,7 +270,6 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
||||
while(resize) {
|
||||
int new_maxneighs = neighbor->maxneighs;
|
||||
resize = 0;
|
||||
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_cj1 = CJ1_FROM_CI(ci);
|
||||
int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
|
||||
@@ -262,14 +282,12 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
||||
MD_FLOAT ibb_ymax = atom->iclusters[ci].bbmaxy;
|
||||
MD_FLOAT ibb_zmin = atom->iclusters[ci].bbminz;
|
||||
MD_FLOAT ibb_zmax = atom->iclusters[ci].bbmaxz;
|
||||
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
int jbin = ibin + stencil[k];
|
||||
int *loc_bin = &bin_clusters[jbin * clusters_per_bin];
|
||||
int cj, m = -1;
|
||||
MD_FLOAT jbb_xmin, jbb_xmax, jbb_ymin, jbb_ymax, jbb_zmin, jbb_zmax;
|
||||
const int c = bin_nclusters[jbin];
|
||||
|
||||
if(c > 0) {
|
||||
MD_FLOAT dl, dh, dm, dm0, d_bb_sq;
|
||||
|
||||
@@ -279,6 +297,7 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
||||
if(neighbor->half_neigh && ci_cj1 > cj) {
|
||||
continue;
|
||||
}
|
||||
|
||||
jbb_zmin = atom->jclusters[cj].bbminz;
|
||||
jbb_zmax = atom->jclusters[cj].bbmaxz;
|
||||
dl = ibb_zmin - jbb_zmax;
|
||||
@@ -287,7 +306,6 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
||||
dm0 = MAX(dm, 0.0);
|
||||
d_bb_sq = dm0 * dm0;
|
||||
} while(m + 1 < c && d_bb_sq > cutneighsq);
|
||||
|
||||
jbb_xmin = atom->jclusters[cj].bbminx;
|
||||
jbb_xmax = atom->jclusters[cj].bbmaxx;
|
||||
jbb_ymin = atom->jclusters[cj].bbminy;
|
||||
@@ -383,13 +401,14 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) {
|
||||
|
||||
if(resize) {
|
||||
neighbor->maxneighs = new_maxneighs * 1.2;
|
||||
fprintf(stdout, "RESIZE %d\n", neighbor->maxneighs);
|
||||
fprintf(stdout, "RESIZE %d, PROC %d\n", neighbor->maxneighs,me);
|
||||
free(neighbor->neighbors);
|
||||
free(neighbor->neighbors_imask);
|
||||
neighbor->neighbors = (int *) malloc(nmax * neighbor->maxneighs * sizeof(int));
|
||||
neighbor->neighbors_imask = (unsigned int *) malloc(nmax * neighbor->maxneighs * sizeof(unsigned int));
|
||||
}
|
||||
}
|
||||
if(method == eightShell) neighborGhost(atom, neighbor);
|
||||
|
||||
/*
|
||||
DEBUG_MESSAGE("\ncutneighsq = %f, rbb_sq = %f\n", cutneighsq, rbb_sq);
|
||||
@@ -511,19 +530,19 @@ int coord2bin(MD_FLOAT xin, MD_FLOAT yin) {
|
||||
int ix, iy;
|
||||
|
||||
if(xin >= xprd) {
|
||||
ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
|
||||
ix = (int)((xin + eps - xprd) * bininvx) + nbinx - mbinxlo;
|
||||
} else if(xin >= 0.0) {
|
||||
ix = (int)(xin * bininvx) - mbinxlo;
|
||||
ix = (int)((xin+eps) * bininvx) - mbinxlo;
|
||||
} else {
|
||||
ix = (int)(xin * bininvx) - mbinxlo - 1;
|
||||
ix = (int)((xin+eps) * bininvx) - mbinxlo - 1;
|
||||
}
|
||||
|
||||
if(yin >= yprd) {
|
||||
iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
|
||||
iy = (int)(((yin+eps) - yprd) * bininvy) + nbiny - mbinylo;
|
||||
} else if(yin >= 0.0) {
|
||||
iy = (int)(yin * bininvy) - mbinylo;
|
||||
iy = (int)((yin+eps) * bininvy) - mbinylo;
|
||||
} else {
|
||||
iy = (int)(yin * bininvy) - mbinylo - 1;
|
||||
iy = (int)((yin+eps) * bininvy) - mbinylo - 1;
|
||||
}
|
||||
|
||||
return (iy * mbinx + ix + 1);
|
||||
@@ -531,26 +550,24 @@ int coord2bin(MD_FLOAT xin, MD_FLOAT yin) {
|
||||
|
||||
void coord2bin2D(MD_FLOAT xin, MD_FLOAT yin, int *ix, int *iy) {
|
||||
if(xin >= xprd) {
|
||||
*ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
|
||||
*ix = (int)((xin + eps - xprd) * bininvx) + nbinx - mbinxlo;
|
||||
} else if(xin >= 0.0) {
|
||||
*ix = (int)(xin * bininvx) - mbinxlo;
|
||||
*ix = (int)((xin+eps) * bininvx) - mbinxlo;
|
||||
} else {
|
||||
*ix = (int)(xin * bininvx) - mbinxlo - 1;
|
||||
*ix = (int)((xin+eps) * bininvx) - mbinxlo - 1;
|
||||
}
|
||||
|
||||
if(yin >= yprd) {
|
||||
*iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
|
||||
*iy = (int)((yin + eps - yprd) * bininvy) + nbiny - mbinylo;
|
||||
} else if(yin >= 0.0) {
|
||||
*iy = (int)(yin * bininvy) - mbinylo;
|
||||
*iy = (int)((yin+eps) * bininvy) - mbinylo;
|
||||
} else {
|
||||
*iy = (int)(yin * bininvy) - mbinylo - 1;
|
||||
*iy = (int)((yin+eps) * bininvy) - mbinylo - 1;
|
||||
}
|
||||
}
|
||||
|
||||
void binAtoms(Atom *atom) {
|
||||
DEBUG_MESSAGE("binAtoms start\n");
|
||||
int resize = 1;
|
||||
|
||||
while(resize > 0) {
|
||||
resize = 0;
|
||||
|
||||
@@ -616,7 +633,6 @@ void buildClusters(Atom *atom) {
|
||||
/* bin local atoms */
|
||||
binAtoms(atom);
|
||||
sortAtomsByZCoord(atom);
|
||||
|
||||
for(int bin = 0; bin < mbins; bin++) {
|
||||
int c = bincount[bin];
|
||||
int ac = 0;
|
||||
@@ -688,6 +704,9 @@ void buildClusters(Atom *atom) {
|
||||
void defineJClusters(Atom *atom) {
|
||||
DEBUG_MESSAGE("defineJClusters start\n");
|
||||
|
||||
const int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
|
||||
atom->ncj = atom->Nclusters_local / jfac;
|
||||
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int cj0 = CJ0_FROM_CI(ci);
|
||||
|
||||
@@ -830,12 +849,11 @@ void binClusters(Atom *atom) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(int cg = 0; cg < atom->Nclusters_ghost && !resize; cg++) {
|
||||
const int cj = ncj + cg;
|
||||
int ix = -1, iy = -1;
|
||||
MD_FLOAT xtmp, ytmp;
|
||||
|
||||
if(shellMethod == halfShell && !halfZoneCluster(atom, cj)) continue;
|
||||
if(atom->jclusters[cj].natoms > 0) {
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
|
||||
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
|
||||
@@ -846,6 +864,7 @@ void binClusters(Atom *atom) {
|
||||
coord2bin2D(xtmp, ytmp, &ix, &iy);
|
||||
ix = MAX(MIN(ix, mbinx - 1), 0);
|
||||
iy = MAX(MIN(iy, mbiny - 1), 0);
|
||||
|
||||
for(int cjj = 1; cjj < atom->jclusters[cj].natoms; cjj++) {
|
||||
int nix, niy;
|
||||
xtmp = cj_x[CL_X_OFFSET + cjj];
|
||||
@@ -861,7 +880,6 @@ void binClusters(Atom *atom) {
|
||||
if(atom->PBCy[cg] > 0 && iy > niy) { iy = niy; }
|
||||
if(atom->PBCy[cg] < 0 && iy < niy) { iy = niy; }
|
||||
}
|
||||
|
||||
int bin = iy * mbinx + ix + 1;
|
||||
int c = bin_nclusters[bin];
|
||||
if(c < clusters_per_bin) {
|
||||
@@ -883,25 +901,21 @@ void binClusters(Atom *atom) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(!inserted) {
|
||||
bin_clusters[bin * clusters_per_bin + c] = cj;
|
||||
}
|
||||
|
||||
bin_nclusters[bin]++;
|
||||
} else {
|
||||
resize = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(resize) {
|
||||
free(bin_clusters);
|
||||
clusters_per_bin *= 2;
|
||||
bin_clusters = (int*) malloc(mbins * clusters_per_bin * sizeof(int));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
DEBUG_MESSAGE("bin_nclusters\n");
|
||||
for(int i = 0; i < mbins; i++) { DEBUG_MESSAGE("%d, ", bin_nclusters[i]); }
|
||||
@@ -919,7 +933,6 @@ void updateSingleAtoms(Atom *atom) {
|
||||
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
|
||||
|
||||
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
|
||||
atom_x(Natom) = ci_x[CL_X_OFFSET + cii];
|
||||
atom_y(Natom) = ci_x[CL_Y_OFFSET + cii];
|
||||
@@ -930,10 +943,172 @@ void updateSingleAtoms(Atom *atom) {
|
||||
Natom++;
|
||||
}
|
||||
}
|
||||
|
||||
if(Natom != atom->Nlocal) {
|
||||
fprintf(stderr, "updateSingleAtoms(): Number of atoms changed!\n");
|
||||
}
|
||||
|
||||
DEBUG_MESSAGE("updateSingleAtoms stop\n");
|
||||
}
|
||||
|
||||
//MPI Shell Methods
|
||||
|
||||
static int eightZoneCluster(Atom* atom, int cj)
|
||||
{
|
||||
//Mapping: 0->0, 1->1, 2->2, 3->6, 4->3, 5->5, 6->4, 7->7
|
||||
int zoneMapping[] = {0, 1, 2, 6, 3, 5, 4, 7};
|
||||
int zone = 0;
|
||||
MD_FLOAT *hi = atom->mybox.hi;
|
||||
|
||||
if (atom->jclusters[cj].bbminx +eps >=hi[_x]){
|
||||
zone += 1;
|
||||
}
|
||||
if (atom->jclusters[cj].bbminy +eps >=hi[_y]){
|
||||
zone += 2;
|
||||
}
|
||||
if (atom->jclusters[cj].bbminz +eps >=hi[_z]){
|
||||
zone += 4;
|
||||
}
|
||||
return zoneMapping[zone];
|
||||
}
|
||||
|
||||
static int halfZoneCluster(Atom* atom, int cj)
|
||||
{
|
||||
MD_FLOAT *hi = atom->mybox.hi;
|
||||
MD_FLOAT *lo = atom->mybox.lo;
|
||||
|
||||
if(atom->jclusters[cj].bbmaxx < lo[_x] && atom->jclusters[cj].bbmaxy < hi[_y] &&
|
||||
atom->jclusters[cj].bbmaxz < hi[_z]){
|
||||
return 0;
|
||||
} else if(atom->jclusters[cj].bbmaxy < lo[_y] && atom->jclusters[cj].bbmaxz < hi[_z]){
|
||||
return 0;
|
||||
} else if(atom->jclusters[cj].bbmaxz < lo[_z]){
|
||||
return 0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int BoxGhostDistance(Atom *atom, int ci, int cj) {
|
||||
|
||||
MD_FLOAT dl = atom->jclusters[ci].bbminx - atom->jclusters[cj].bbmaxx;
|
||||
MD_FLOAT dh = atom->jclusters[cj].bbminx - atom->jclusters[ci].bbmaxx;
|
||||
MD_FLOAT dm = MAX(dl, dh);
|
||||
MD_FLOAT dm0 = MAX(dm, 0.0);
|
||||
MD_FLOAT dx2 = dm0 * dm0;
|
||||
|
||||
dl = atom->jclusters[ci].bbminy - atom->jclusters[cj].bbmaxy;
|
||||
dh = atom->jclusters[cj].bbminy - atom->jclusters[ci].bbmaxy;
|
||||
dm = MAX(dl, dh);
|
||||
dm0 = MAX(dm, 0.0);
|
||||
MD_FLOAT dy2 = dm0 * dm0;
|
||||
|
||||
dl = atom->jclusters[ci].bbminz - atom->jclusters[cj].bbmaxz;
|
||||
dh = atom->jclusters[cj].bbminz - atom->jclusters[ci].bbmaxz;
|
||||
dm = MAX(dl, dh);
|
||||
dm0 = MAX(dm, 0.0);
|
||||
MD_FLOAT dz2 = dm0 * dm0;
|
||||
|
||||
return dx2 > cutneighsq ? 0 : dy2 > cutneighsq ? 0 : dz2 > cutneighsq ? 0 : 1;
|
||||
}
|
||||
|
||||
static int ghostClusterinRange(Atom *atom, int cs, int cg, MD_FLOAT rsq) {
|
||||
int cs_vec_base = CJ_VECTOR_BASE_INDEX(cs);
|
||||
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cg);
|
||||
MD_FLOAT *cs_x = &atom->cl_x[cs_vec_base];
|
||||
MD_FLOAT *cg_x = &atom->cl_x[cj_vec_base];
|
||||
|
||||
for(int cii = 0; cii < atom->jclusters[cs].natoms; cii++) {
|
||||
for(int cjj = 0; cjj < atom->jclusters[cg].natoms; cjj++) {
|
||||
MD_FLOAT delx = cs_x[CL_X_OFFSET + cii] - cg_x[CL_X_OFFSET + cjj];
|
||||
MD_FLOAT dely = cs_x[CL_Y_OFFSET + cii] - cg_x[CL_Y_OFFSET + cjj];
|
||||
MD_FLOAT delz = cs_x[CL_Z_OFFSET + cii] - cg_x[CL_Z_OFFSET + cjj];
|
||||
if(delx * delx + dely * dely + delz * delz < rsq) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void neighborGhost(Atom *atom, Neighbor *neighbor) {
|
||||
int Nshell=0;
|
||||
int Ncluster_local = atom->Nclusters_local;
|
||||
int Nclusterghost = atom->Nclusters_ghost;
|
||||
if(neighbor->listshell) free(neighbor->listshell);
|
||||
neighbor->listshell = (int*) malloc(Nclusterghost * sizeof(int));
|
||||
int* listzone = (int*) malloc(8 * Nclusterghost * sizeof(int));
|
||||
int countCluster[8] = {0,0,0,0,0,0,0,0};
|
||||
|
||||
//Selecting ghost atoms for interaction and putting them into regions
|
||||
for(int cg = atom->ncj; cg < atom->ncj+Nclusterghost; cg++) {
|
||||
int czone = eightZoneCluster(atom,cg);
|
||||
int *list = &listzone[Nclusterghost*czone];
|
||||
int n = countCluster[czone];
|
||||
list[n] = cg;
|
||||
countCluster[czone]++;
|
||||
//It is only necessary to find neighbour particles for 3 regions
|
||||
//if(czone == 1 || czone == 2 || czone == 3)
|
||||
//neighbor->listshell[Nshell++] = cg;
|
||||
}
|
||||
|
||||
for(int zone = 1; zone<=3; zone++){
|
||||
int *list = &listzone[Nclusterghost*zone];
|
||||
for(int n=0; n<countCluster[zone]; n++)
|
||||
neighbor->listshell[Nshell++] = list[n];
|
||||
}
|
||||
|
||||
neighbor->Nshell = Nshell;
|
||||
if(neighbor->numNeighShell) free(neighbor->numNeighShell);
|
||||
if(neighbor->neighshell) free(neighbor->neighshell);
|
||||
neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
|
||||
neighbor->numNeighShell = (int*) malloc(Nshell * sizeof(int));
|
||||
|
||||
int resize = 1;
|
||||
|
||||
while(resize)
|
||||
{
|
||||
resize = 0;
|
||||
for(int ic = 0; ic < Nshell; ic++) {
|
||||
int *neighshell = &(neighbor->neighshell[ic*neighbor->maxneighs]);
|
||||
int n = 0;
|
||||
int icluster = neighbor->listshell[ic];
|
||||
int iczone = eightZoneCluster(atom, icluster);
|
||||
|
||||
for(int jczone=0; jczone<8; jczone++){
|
||||
|
||||
if(jczone <=iczone) continue;
|
||||
if(iczone == 1 && (jczone==5||jczone==6||jczone==7)) continue;
|
||||
if(iczone == 2 && (jczone==4||jczone==6||jczone==7)) continue;
|
||||
if(iczone == 3 && (jczone==4||jczone==5||jczone==7)) continue;
|
||||
|
||||
int Ncluster = countCluster[jczone];
|
||||
int* loc_zone = &listzone[jczone * Nclusterghost];
|
||||
|
||||
for(int k = 0; k < Ncluster ; k++) {
|
||||
int jcluster = loc_zone[k];
|
||||
|
||||
if(BoxGhostDistance(atom, icluster, jcluster))
|
||||
{
|
||||
if(ghostClusterinRange(atom, icluster, jcluster, cutneighsq))
|
||||
neighshell[n++] = jcluster;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
neighbor->numNeighShell[ic] = n;
|
||||
|
||||
if(n >= neighbor->maxneighs){
|
||||
resize = 1;
|
||||
neighbor->maxneighs = n * 1.2;
|
||||
fprintf(stdout, "RESIZE EIGHT SHELL %d, PROC %d\n", neighbor->maxneighs,me);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(resize) {
|
||||
free(neighbor->neighshell);
|
||||
neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
|
||||
}
|
||||
}
|
||||
free(listzone);
|
||||
}
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
@@ -9,6 +9,11 @@
|
||||
|
||||
#include <atom.h>
|
||||
#include <vtk.h>
|
||||
#include <mpi.h>
|
||||
#include <string.h>
|
||||
|
||||
static MPI_File _fh;
|
||||
static inline void flushBuffer(char*);
|
||||
|
||||
void write_data_to_vtk_file(const char *filename, Atom* atom, int timestep) {
|
||||
write_local_atoms_to_vtk_file(filename, atom, timestep);
|
||||
@@ -188,3 +193,128 @@ int write_ghost_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vtkOpen(const char* filename, Comm* comm, Atom* atom ,int timestep)
|
||||
{
|
||||
char msg[256];
|
||||
char timestep_filename[128];
|
||||
snprintf(timestep_filename, sizeof timestep_filename, "%s_%d.vtk", filename, timestep);
|
||||
MPI_File_open(MPI_COMM_WORLD, timestep_filename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &_fh);
|
||||
if(_fh == MPI_FILE_NULL) {
|
||||
if(comm->myproc == 0) fprintf(stderr, "Could not open VTK file for writing!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (comm->myproc==0){
|
||||
sprintf(msg, "# vtk DataFile Version 2.0\n");
|
||||
sprintf(msg, "%sParticle data\n",msg);
|
||||
sprintf(msg, "%sASCII\n",msg);
|
||||
sprintf(msg, "%sDATASET UNSTRUCTURED_GRID\n",msg);
|
||||
sprintf(msg, "%sPOINTS %d double\n",msg, atom->Natoms);
|
||||
flushBuffer(msg);
|
||||
}
|
||||
}
|
||||
|
||||
int vtkVector(Comm* comm, Atom* atom, Parameter* param)
|
||||
{
|
||||
if (_fh == MPI_FILE_NULL) {
|
||||
if(comm->myproc==0) printf("vtk not initialize! Call vtkOpen first!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
int sizeline= 25; //#initial guess of characters in "%.4f %.4f %.4f\n"
|
||||
int extrabuff = 100;
|
||||
int sizebuff = sizeline*atom->Nlocal+extrabuff;
|
||||
int mysize = 0;
|
||||
char* msg = (char*) malloc(sizebuff);
|
||||
sprintf(msg, "");
|
||||
for(int i = 0; i < atom->Nlocal; i++){
|
||||
if(mysize+extrabuff >= sizebuff){
|
||||
sizebuff*= 1.5;
|
||||
msg = (char*) realloc(msg, sizebuff);
|
||||
}
|
||||
//TODO: do not forget to add param->xlo, param->ylo, param->zlo
|
||||
sprintf(msg, "%s%.4f %.4f %.4f\n",msg, atom_x(i), atom_y(i), atom_z(i));
|
||||
mysize = strlen(msg);
|
||||
}
|
||||
int gatherSize[comm->numproc];
|
||||
|
||||
MPI_Allgather(&mysize, 1, MPI_INT, gatherSize, 1, MPI_INT, MPI_COMM_WORLD);
|
||||
int offset=0;
|
||||
int globalSize = 0;
|
||||
|
||||
for(int i = 0; i < comm->myproc; i++)
|
||||
offset+= gatherSize[i];
|
||||
|
||||
for(int i = 0; i < comm->numproc; i++)
|
||||
globalSize+= gatherSize[i];
|
||||
|
||||
MPI_Offset displ;
|
||||
MPI_Datatype FileType;
|
||||
int GlobalSize[] = {globalSize};
|
||||
int LocalSize[] = {mysize};
|
||||
int Start[] = {offset};
|
||||
|
||||
if(LocalSize[0]>0){
|
||||
MPI_Type_create_subarray(1, GlobalSize, LocalSize, Start, MPI_ORDER_C, MPI_CHAR, &FileType);
|
||||
} else {
|
||||
MPI_Type_vector(0,0,0,MPI_CHAR,&FileType);
|
||||
}
|
||||
MPI_Type_commit(&FileType);
|
||||
MPI_File_get_size(_fh, &displ);
|
||||
MPI_File_set_view(_fh, displ, MPI_CHAR, FileType, "native", MPI_INFO_NULL);
|
||||
MPI_File_write_all (_fh, msg, mysize , MPI_CHAR ,MPI_STATUS_IGNORE);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
MPI_File_set_view(_fh,0,MPI_CHAR, MPI_CHAR, "native", MPI_INFO_NULL);
|
||||
|
||||
if (comm->myproc==0){
|
||||
|
||||
sprintf(msg, "\n\n");
|
||||
sprintf(msg, "%sCELLS %d %d\n", msg, atom->Natoms, atom->Natoms * 2);
|
||||
|
||||
for(int i = 0; i < atom->Natoms; i++)
|
||||
sprintf(msg, "%s1 %d\n", msg, i);
|
||||
flushBuffer(msg);
|
||||
|
||||
sprintf(msg, "\n\n");
|
||||
sprintf(msg, "%sCELL_TYPES %d\n",msg, atom->Natoms);
|
||||
for(int i = 0; i < atom->Natoms; i++)
|
||||
sprintf(msg, "%s1\n",msg);
|
||||
flushBuffer(msg);
|
||||
|
||||
sprintf(msg, "\n\n");
|
||||
sprintf(msg, "%sPOINT_DATA %d\n",msg,atom->Natoms);
|
||||
sprintf(msg, "%sSCALARS mass double\n",msg);
|
||||
sprintf(msg, "%sLOOKUP_TABLE default\n",msg);
|
||||
for(int i = 0; i < atom->Natoms; i++)
|
||||
sprintf(msg, "%s1.0\n",msg);
|
||||
sprintf(msg, "%s\n\n",msg);
|
||||
flushBuffer(msg);
|
||||
}
|
||||
}
|
||||
|
||||
void vtkClose()
|
||||
{
|
||||
MPI_File_close(&_fh);
|
||||
_fh=MPI_FILE_NULL;
|
||||
}
|
||||
|
||||
//TODO: print ghost and cluster using MPI
|
||||
void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep)
|
||||
{
|
||||
if(comm->numproc == 1)
|
||||
{
|
||||
write_data_to_vtk_file(filename, atom, timestep);
|
||||
return;
|
||||
}
|
||||
|
||||
vtkOpen(filename, comm, atom, timestep);
|
||||
vtkVector(comm, atom, param);
|
||||
vtkClose();
|
||||
}
|
||||
|
||||
static inline void flushBuffer(char* msg){
|
||||
MPI_Offset displ;
|
||||
MPI_File_get_size(_fh, &displ);
|
||||
MPI_File_write_at(_fh, displ, msg, strlen(msg), MPI_CHAR, MPI_STATUS_IGNORE);
|
||||
}
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,18 +1,17 @@
|
||||
CC = /opt/homebrew/Cellar/llvm/18.1.5/bin/clang
|
||||
CC = clang
|
||||
LINKER = $(CC)
|
||||
|
||||
ANSI_CFLAGS = -ansi
|
||||
ANSI_CFLAGS += -std=c99
|
||||
ANSI_CFLAGS += -pedantic
|
||||
# ANSI_CFLAGS += -Wextra
|
||||
ANSI_CFLAGS += -Wextra
|
||||
|
||||
CFLAGS = -Ofast -march=native $(ANSI_CFLAGS) -Xpreprocessor -fopenmp #-g
|
||||
CFLAGS = -Ofast -march=native $(ANSI_CFLAGS) #-Xpreprocessor -fopenmp -g
|
||||
#CFLAGS = -Ofast -march=core-avx2 $(ANSI_CFLAGS) #-Xpreprocessor -fopenmp -g
|
||||
#CFLAGS = -O3 -march=cascadelake $(ANSI_CFLAGS) #-Xpreprocessor -fopenmp -g
|
||||
#CFLAGS = -Ofast $(ANSI_CFLAGS) -g #-Xpreprocessor -fopenmp -g
|
||||
ASFLAGS = #-masm=intel
|
||||
ASFLAGS = -masm=intel
|
||||
LFLAGS =
|
||||
DEFINES = -D_GNU_SOURCE
|
||||
# MacOSX with Apple Silicon and homebrew
|
||||
INCLUDES = -I/opt/homebrew/Cellar/libomp/18.1.5/include/
|
||||
LIBS = -lm -L/opt/homebrew/Cellar/libomp/18.1.5/lib/ -lomp
|
||||
INCLUDES =
|
||||
LIBS = -lm #-lomp
|
11
include_GROMACS.mk
Normal file
11
include_GROMACS.mk
Normal file
@@ -0,0 +1,11 @@
|
||||
GROMACS_PATH=/apps/Gromacs/2018.1-mkl
|
||||
GROMACS_INC ?= -I${GROMACS_PATH}/include
|
||||
GROMACS_DEFINES ?=
|
||||
GROMACS_LIB ?= -L${GROMACS_PATH}/lib64
|
||||
|
||||
ifeq ($(strip $(XTC_OUTPUT)),true)
|
||||
INCLUDES += ${GROMACS_INC}
|
||||
DEFINES += ${GROMACS_DEFINES}
|
||||
LIBS += -lgromacs
|
||||
LFLAGS += ${GROMACS_LIB}
|
||||
endif
|
32
include_MPIICC.mk
Normal file
32
include_MPIICC.mk
Normal file
@@ -0,0 +1,32 @@
|
||||
CC = mpiicc
|
||||
LINKER = $(CC)
|
||||
|
||||
OPENMP = #-qopenmp
|
||||
PROFILE = #-profile-functions -g -pg
|
||||
|
||||
ifeq ($(ISA),AVX512)
|
||||
OPTS = -Ofast -xCORE-AVX512 -qopt-zmm-usage=high $(PROFILE) #-g -debug
|
||||
endif
|
||||
|
||||
ifeq ($(ISA),AVX2)
|
||||
OPTS = -Ofast -xCORE-AVX2 $(PROFILE)
|
||||
#OPTS = -Ofast -xAVX2 $(PROFILE)
|
||||
#OPTS = -Ofast -march=core-avx2 $(PROFILE)
|
||||
endif
|
||||
|
||||
ifeq ($(ISA),AVX)
|
||||
OPTS = -Ofast -xAVX $(PROFILE)
|
||||
endif
|
||||
|
||||
ifeq ($(ISA),SSE)
|
||||
OPTS = -Ofast -xSSE4.2 $(PROFILE)
|
||||
endif
|
||||
|
||||
#OPTS = -Ofast -no-vec $(PROFILE)
|
||||
#OPTS = -Ofast -xHost $(PROFILE)
|
||||
CFLAGS = $(PROFILE) -restrict $(OPENMP) $(OPTS)
|
||||
ASFLAGS = #-masm=intel
|
||||
LFLAGS = $(PROFILE) $(OPTS) $(OPENMP)
|
||||
DEFINES = -std=c11 -pedantic-errors -D_GNU_SOURCE -DNO_ZMM_INTRIN
|
||||
INCLUDES =
|
||||
LIBS = -lm
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
@@ -9,10 +9,12 @@
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <parameter.h>
|
||||
#include <atom.h>
|
||||
#include <allocate.h>
|
||||
#include <device.h>
|
||||
#include <util.h>
|
||||
#include <mpi.h>
|
||||
|
||||
#define DELTA 20000
|
||||
|
||||
@@ -21,10 +23,10 @@
|
||||
#endif
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
void initAtom(Atom *atom) {
|
||||
void initAtom(Atom *atom){
|
||||
atom->x = NULL; atom->y = NULL; atom->z = NULL;
|
||||
atom->vx = NULL; atom->vy = NULL; atom->vz = NULL;
|
||||
atom->fx = NULL; atom->fy = NULL; atom->fz = NULL;
|
||||
@@ -41,6 +43,7 @@ void initAtom(Atom *atom) {
|
||||
atom->radius = NULL;
|
||||
atom->av = NULL;
|
||||
atom->r = NULL;
|
||||
atom->border_map = NULL;
|
||||
|
||||
DeviceAtom *d_atom = &(atom->d_atom);
|
||||
d_atom->x = NULL; d_atom->y = NULL; d_atom->z = NULL;
|
||||
@@ -52,12 +55,19 @@ void initAtom(Atom *atom) {
|
||||
d_atom->sigma6 = NULL;
|
||||
d_atom->cutforcesq = NULL;
|
||||
d_atom->cutneighsq = NULL;
|
||||
//MPI
|
||||
Box *mybox = &(atom->mybox);
|
||||
mybox->xprd = mybox->yprd = mybox->zprd = 0;
|
||||
mybox->lo[_x] = mybox->lo[_y] = mybox->lo[_z] = 0;
|
||||
mybox->hi[_x] = mybox->hi[_y] = mybox->hi[_z] = 0;
|
||||
}
|
||||
|
||||
void createAtom(Atom *atom, Parameter *param) {
|
||||
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
|
||||
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
|
||||
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
|
||||
|
||||
MD_FLOAT xlo = 0; MD_FLOAT xhi = param->xprd;
|
||||
MD_FLOAT ylo = 0; MD_FLOAT yhi = param->yprd;
|
||||
MD_FLOAT zlo = 0; MD_FLOAT zhi = param->zprd;
|
||||
|
||||
atom->Natoms = 4 * param->nx * param->ny * param->nz;
|
||||
atom->Nlocal = 0;
|
||||
atom->ntypes = param->ntypes;
|
||||
@@ -131,7 +141,7 @@ void createAtom(Atom *atom, Parameter *param) {
|
||||
}
|
||||
vztmp = myrandom(&n);
|
||||
|
||||
if(atom->Nlocal == atom->Nmax) {
|
||||
while(atom->Nlocal >= atom->Nmax) {
|
||||
growAtom(atom);
|
||||
}
|
||||
|
||||
@@ -163,38 +173,42 @@ int type_str2int(const char *type) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int readAtom(Atom* atom, Parameter* param) {
|
||||
int readAtom(Atom *atom, Parameter *param) {
|
||||
int me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
int len = strlen(param->input_file);
|
||||
if(strncmp(¶m->input_file[len - 4], ".pdb", 4) == 0) { return readAtom_pdb(atom, param); }
|
||||
if(strncmp(¶m->input_file[len - 4], ".gro", 4) == 0) { return readAtom_gro(atom, param); }
|
||||
if(strncmp(¶m->input_file[len - 4], ".dmp", 4) == 0) { return readAtom_dmp(atom, param); }
|
||||
if(strncmp(¶m->input_file[len - 3], ".in", 3) == 0) { return readAtom_in(atom, param); }
|
||||
fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp, in\n", param->input_file);
|
||||
if(me==0) fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp, in\n", param->input_file);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||
int me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
FILE *fp = fopen(param->input_file, "r");
|
||||
char line[MAXLINE];
|
||||
int read_atoms = 0;
|
||||
|
||||
if(!fp) {
|
||||
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
if(me==0)fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
|
||||
while(!feof(fp)) {
|
||||
readline(line, fp);
|
||||
char *item = strtok(line, " ");
|
||||
char *item = strtok(line, "\t ");
|
||||
if(strncmp(item, "CRYST1", 6) == 0) {
|
||||
param->xlo = 0.0;
|
||||
param->xhi = atof(strtok(NULL, " "));
|
||||
param->xhi = atof(strtok(NULL, "\t "));
|
||||
param->ylo = 0.0;
|
||||
param->yhi = atof(strtok(NULL, " "));
|
||||
param->yhi = atof(strtok(NULL, "\t "));
|
||||
param->zlo = 0.0;
|
||||
param->zhi = atof(strtok(NULL, " "));
|
||||
param->zhi = atof(strtok(NULL, "\t "));
|
||||
param->xprd = param->xhi - param->xlo;
|
||||
param->yprd = param->yhi - param->ylo;
|
||||
param->zprd = param->zhi - param->zlo;
|
||||
@@ -203,23 +217,23 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||
char *label;
|
||||
int atom_id, comp_id;
|
||||
MD_FLOAT occupancy, charge;
|
||||
atom_id = atoi(strtok(NULL, " ")) - 1;
|
||||
atom_id = atoi(strtok(NULL, "\t ")) - 1;
|
||||
|
||||
while(atom_id + 1 >= atom->Nmax) {
|
||||
growAtom(atom);
|
||||
}
|
||||
|
||||
atom->type[atom_id] = type_str2int(strtok(NULL, " "));
|
||||
label = strtok(NULL, " ");
|
||||
comp_id = atoi(strtok(NULL, " "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, " "));
|
||||
atom->type[atom_id] = type_str2int(strtok(NULL, "\t "));
|
||||
label = strtok(NULL, "\t ");
|
||||
comp_id = atoi(strtok(NULL, "\t "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vx(atom_id) = 0.0;
|
||||
atom_vy(atom_id) = 0.0;
|
||||
atom_vz(atom_id) = 0.0;
|
||||
occupancy = atof(strtok(NULL, " "));
|
||||
charge = atof(strtok(NULL, " "));
|
||||
occupancy = atof(strtok(NULL, "\t "));
|
||||
charge = atof(strtok(NULL, "\t "));
|
||||
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
|
||||
atom->Natoms++;
|
||||
atom->Nlocal++;
|
||||
@@ -231,14 +245,14 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||
strncmp(item, "ENDMDL", 6) == 0) {
|
||||
// Do nothing
|
||||
} else {
|
||||
fprintf(stderr, "Invalid item: %s\n", item);
|
||||
if(me==0)fprintf(stderr, "Invalid item: %s\n", item);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if(!read_atoms) {
|
||||
fprintf(stderr, "Input error: No atoms read!\n");
|
||||
if(me==0)fprintf(stderr, "Input error: No atoms read!\n");
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@@ -254,12 +268,15 @@ int readAtom_pdb(Atom* atom, Parameter* param) {
|
||||
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||
}
|
||||
|
||||
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
|
||||
if(me==0)fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
|
||||
fclose(fp);
|
||||
return read_atoms;
|
||||
}
|
||||
|
||||
int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
int me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
|
||||
FILE *fp = fopen(param->input_file, "r");
|
||||
char line[MAXLINE];
|
||||
char desc[MAXLINE];
|
||||
@@ -268,7 +285,7 @@ int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
int i = 0;
|
||||
|
||||
if(!fp) {
|
||||
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
if(me==0)fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@@ -277,26 +294,26 @@ int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
for(i = 0; desc[i] != '\n'; i++);
|
||||
desc[i] = '\0';
|
||||
readline(line, fp);
|
||||
atoms_to_read = atoi(strtok(line, " "));
|
||||
fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
|
||||
atoms_to_read = atoi(strtok(line, "\t "));
|
||||
if(me==0)fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
|
||||
|
||||
while(!feof(fp) && read_atoms < atoms_to_read) {
|
||||
readline(line, fp);
|
||||
char *label = strtok(line, " ");
|
||||
int type = type_str2int(strtok(NULL, " "));
|
||||
int atom_id = atoi(strtok(NULL, " ")) - 1;
|
||||
char *label = strtok(line, "\t ");
|
||||
int type = type_str2int(strtok(NULL, "\t "));
|
||||
int atom_id = atoi(strtok(NULL, "\t ")) - 1;
|
||||
atom_id = read_atoms;
|
||||
while(atom_id + 1 >= atom->Nmax) {
|
||||
growAtom(atom);
|
||||
}
|
||||
|
||||
atom->type[atom_id] = type;
|
||||
atom_x(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_vx(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_vy(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_vz(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vx(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vy(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vz(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
|
||||
atom->Natoms++;
|
||||
atom->Nlocal++;
|
||||
@@ -306,18 +323,18 @@ int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
if(!feof(fp)) {
|
||||
readline(line, fp);
|
||||
param->xlo = 0.0;
|
||||
param->xhi = atof(strtok(line, " "));
|
||||
param->xhi = atof(strtok(line, "\t "));
|
||||
param->ylo = 0.0;
|
||||
param->yhi = atof(strtok(NULL, " "));
|
||||
param->yhi = atof(strtok(NULL, "\t "));
|
||||
param->zlo = 0.0;
|
||||
param->zhi = atof(strtok(NULL, " "));
|
||||
param->zhi = atof(strtok(NULL, "\t "));
|
||||
param->xprd = param->xhi - param->xlo;
|
||||
param->yprd = param->yhi - param->ylo;
|
||||
param->zprd = param->zhi - param->zlo;
|
||||
}
|
||||
|
||||
if(read_atoms != atoms_to_read) {
|
||||
fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
|
||||
if(me==0)fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@@ -333,12 +350,14 @@ int readAtom_gro(Atom* atom, Parameter* param) {
|
||||
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||
}
|
||||
|
||||
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
|
||||
if(me==0)fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
|
||||
fclose(fp);
|
||||
return read_atoms;
|
||||
}
|
||||
|
||||
int readAtom_dmp(Atom* atom, Parameter* param) {
|
||||
int me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
FILE *fp = fopen(param->input_file, "r");
|
||||
char line[MAXLINE];
|
||||
int natoms = 0;
|
||||
@@ -347,7 +366,7 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
|
||||
int ts = -1;
|
||||
|
||||
if(!fp) {
|
||||
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
if(me==0)fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@@ -370,47 +389,47 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
|
||||
}
|
||||
} else if(strncmp(item, "BOX BOUNDS pp pp pp", 19) == 0) {
|
||||
readline(line, fp);
|
||||
param->xlo = atof(strtok(line, " "));
|
||||
param->xhi = atof(strtok(NULL, " "));
|
||||
param->xlo = atof(strtok(line, "\t "));
|
||||
param->xhi = atof(strtok(NULL, "\t "));
|
||||
param->xprd = param->xhi - param->xlo;
|
||||
|
||||
readline(line, fp);
|
||||
param->ylo = atof(strtok(line, " "));
|
||||
param->yhi = atof(strtok(NULL, " "));
|
||||
param->ylo = atof(strtok(line, "\t "));
|
||||
param->yhi = atof(strtok(NULL, "\t "));
|
||||
param->yprd = param->yhi - param->ylo;
|
||||
|
||||
readline(line, fp);
|
||||
param->zlo = atof(strtok(line, " "));
|
||||
param->zhi = atof(strtok(NULL, " "));
|
||||
param->zlo = atof(strtok(line, "\t "));
|
||||
param->zhi = atof(strtok(NULL, "\t "));
|
||||
param->zprd = param->zhi - param->zlo;
|
||||
} else if(strncmp(item, "ATOMS id type x y z vx vy vz", 28) == 0) {
|
||||
for(int i = 0; i < natoms; i++) {
|
||||
readline(line, fp);
|
||||
atom_id = atoi(strtok(line, " ")) - 1;
|
||||
atom->type[atom_id] = atoi(strtok(NULL, " "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_vx(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_vy(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_vz(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_id = atoi(strtok(line, "\t ")) - 1;
|
||||
atom->type[atom_id] = atoi(strtok(NULL, "\t "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vx(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vy(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vz(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
|
||||
read_atoms++;
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "Invalid item: %s\n", item);
|
||||
if(me==0)fprintf(stderr, "Invalid item: %s\n", item);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
|
||||
if(me==0)fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if(ts < 0 || !natoms || !read_atoms) {
|
||||
fprintf(stderr, "Input error: atom data was not read!\n");
|
||||
if(me==0)fprintf(stderr, "Input error: atom data was not read!\n");
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@@ -426,30 +445,34 @@ int readAtom_dmp(Atom* atom, Parameter* param) {
|
||||
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||
}
|
||||
|
||||
fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
|
||||
if(me==0)fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
|
||||
return natoms;
|
||||
}
|
||||
|
||||
int readAtom_in(Atom* atom, Parameter* param) {
|
||||
int me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
FILE *fp = fopen(param->input_file, "r");
|
||||
char line[MAXLINE];
|
||||
int natoms = 0;
|
||||
int atom_id = 0;
|
||||
|
||||
if(!fp) {
|
||||
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
if(me==0) fprintf(stderr, "Could not open input file: %s\n", param->input_file);
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
|
||||
readline(line, fp);
|
||||
natoms = atoi(strtok(line, " "));
|
||||
param->xlo = atof(strtok(NULL, " "));
|
||||
param->xhi = atof(strtok(NULL, " "));
|
||||
param->ylo = atof(strtok(NULL, " "));
|
||||
param->yhi = atof(strtok(NULL, " "));
|
||||
param->zlo = atof(strtok(NULL, " "));
|
||||
param->zhi = atof(strtok(NULL, " "));
|
||||
natoms = atoi(strtok(line, "\t "));
|
||||
param->xlo = atof(strtok(NULL, "\t "));
|
||||
param->xhi = atof(strtok(NULL, "\t "));
|
||||
param->ylo = atof(strtok(NULL, "\t "));
|
||||
param->yhi = atof(strtok(NULL, "\t "));
|
||||
param->zlo = atof(strtok(NULL, "\t "));
|
||||
param->zhi = atof(strtok(NULL, "\t "));
|
||||
param->xprd = param->xhi - param->xlo;
|
||||
param->yprd = param->yhi - param->ylo;
|
||||
param->zprd = param->zhi - param->zlo;
|
||||
atom->Natoms = natoms;
|
||||
atom->Nlocal = natoms;
|
||||
atom->ntypes = 1;
|
||||
@@ -462,27 +485,26 @@ int readAtom_in(Atom* atom, Parameter* param) {
|
||||
readline(line, fp);
|
||||
|
||||
// TODO: store mass per atom
|
||||
char *s_mass = strtok(line, " ");
|
||||
char *s_mass = strtok(line, "\t ");
|
||||
if(strncmp(s_mass, "inf", 3) == 0) {
|
||||
// Set atom's mass to INFINITY
|
||||
} else {
|
||||
param->mass = atof(s_mass);
|
||||
}
|
||||
|
||||
atom->radius[atom_id] = atof(strtok(NULL, " "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_vx(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_vy(atom_id) = atof(strtok(NULL, " "));
|
||||
atom_vz(atom_id) = atof(strtok(NULL, " "));
|
||||
atom->radius[atom_id] = atof(strtok(NULL, "\t "));
|
||||
atom_x(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_y(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_z(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vx(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vy(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom_vz(atom_id) = atof(strtok(NULL, "\t "));
|
||||
atom->type[atom_id] = 0;
|
||||
atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
|
||||
atom_id++;
|
||||
}
|
||||
|
||||
if(!natoms) {
|
||||
fprintf(stderr, "Input error: atom data was not read!\n");
|
||||
if(me==0)fprintf(stderr, "Input error: atom data was not read!\n");
|
||||
exit(-1);
|
||||
return -1;
|
||||
}
|
||||
@@ -498,25 +520,10 @@ int readAtom_in(Atom* atom, Parameter* param) {
|
||||
atom->cutforcesq[i] = param->cutforce * param->cutforce;
|
||||
}
|
||||
|
||||
fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
|
||||
if(me==0)fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
|
||||
return natoms;
|
||||
}
|
||||
|
||||
void writeAtom(Atom *atom, Parameter *param) {
|
||||
FILE *fp = fopen(param->write_atom_file, "w");
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
fprintf(fp, "%d,%f,%f,%f,%f,%f,%f,%f,0\n",
|
||||
atom->type[i], 1.0,
|
||||
atom_x(i), atom_y(i), atom_z(i),
|
||||
atom_vx(i), atom_vy(i), atom_vz(i));
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
fprintf(stdout, "Wrote input data to %s, grid size: %f, %f, %f\n",
|
||||
param->write_atom_file, param->xprd, param->yprd, param->zprd);
|
||||
}
|
||||
|
||||
void growAtom(Atom *atom) {
|
||||
DeviceAtom *d_atom = &(atom->d_atom);
|
||||
int nold = atom->Nmax;
|
||||
@@ -545,7 +552,125 @@ void growAtom(Atom *atom) {
|
||||
REALLOC(type, int, atom->Nmax * sizeof(int), nold * sizeof(int));
|
||||
|
||||
// DEM
|
||||
atom->radius = (MD_FLOAT *) reallocate(atom->radius, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||
atom->radius = (MD_FLOAT*) reallocate(atom->radius, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||
atom->av = (MD_FLOAT*) reallocate(atom->av, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
|
||||
atom->r = (MD_FLOAT*) reallocate(atom->r, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 4, nold * sizeof(MD_FLOAT) * 4);
|
||||
}
|
||||
|
||||
/* MPI added*/
|
||||
void packForward(Atom* atom, int n ,int* list, MD_FLOAT* buf, int* pbc)
|
||||
{
|
||||
int i, j;
|
||||
for(i = 0; i < n; i++) {
|
||||
j = list[i];
|
||||
buf_x(i) = atom_x(j) + pbc[0] * atom->mybox.xprd;
|
||||
buf_y(i) = atom_y(j) + pbc[1] * atom->mybox.yprd;
|
||||
buf_z(i) = atom_z(j) + pbc[2] * atom->mybox.zprd;
|
||||
}
|
||||
}
|
||||
|
||||
void unpackForward(Atom* atom, int n, int first, MD_FLOAT* buf)
|
||||
{
|
||||
for(int i = 0; i < n; i++) {
|
||||
atom_x((first + i)) = buf_x(i);
|
||||
atom_y((first + i)) = buf_y(i);
|
||||
atom_z((first + i)) = buf_z(i);
|
||||
}
|
||||
}
|
||||
|
||||
int packGhost(Atom* atom, int i, MD_FLOAT* buf, int* pbc)
|
||||
{
|
||||
int m = 0;
|
||||
buf[m++] = atom_x(i) + pbc[_x] * atom->mybox.xprd;
|
||||
buf[m++] = atom_y(i) + pbc[_y] * atom->mybox.yprd;
|
||||
buf[m++] = atom_z(i) + pbc[_z] * atom->mybox.zprd;
|
||||
buf[m++] = atom->type[i];
|
||||
return m;
|
||||
}
|
||||
|
||||
int unpackGhost(Atom* atom, int i, MD_FLOAT* buf)
|
||||
{
|
||||
while (i>=atom->Nmax) growAtom(atom);
|
||||
int m = 0;
|
||||
atom_x(i) = buf[m++];
|
||||
atom_y(i) = buf[m++];
|
||||
atom_z(i) = buf[m++];
|
||||
atom->type[i] = buf[m++];
|
||||
atom->Nghost++;
|
||||
return m;
|
||||
}
|
||||
|
||||
void packReverse(Atom* atom, int n, int first, MD_FLOAT* buf)
|
||||
{
|
||||
for(int i = 0; i < n; i++) {
|
||||
buf_x(i) = atom_fx(first + i);
|
||||
buf_y(i) = atom_fy(first + i);
|
||||
buf_z(i) = atom_fz(first + i);
|
||||
}
|
||||
}
|
||||
|
||||
void unpackReverse(Atom* atom, int n, int* list, MD_FLOAT* buf)
|
||||
{
|
||||
int i, j;
|
||||
for(i = 0; i < n; i++) {
|
||||
j = list[i];
|
||||
atom_fx(j) += buf_x(i);
|
||||
atom_fy(j) += buf_y(i);
|
||||
atom_fz(j) += buf_z(i);
|
||||
}
|
||||
}
|
||||
|
||||
int packExchange(Atom* atom, int i, MD_FLOAT* buf)
|
||||
{
|
||||
int m = 0;
|
||||
buf[m++] = atom_x(i);
|
||||
buf[m++] = atom_y(i);
|
||||
buf[m++] = atom_z(i);
|
||||
buf[m++] = atom_vx(i);
|
||||
buf[m++] = atom_vy(i);
|
||||
buf[m++] = atom_vz(i);
|
||||
buf[m++] = atom->type[i];
|
||||
return m;
|
||||
}
|
||||
|
||||
int unpackExchange(Atom* atom, int i, MD_FLOAT* buf)
|
||||
{
|
||||
while(i >= atom->Nmax) growAtom(atom);
|
||||
int m = 0;
|
||||
atom_x(i) = buf[m++];
|
||||
atom_y(i) = buf[m++];
|
||||
atom_z(i) = buf[m++];
|
||||
atom_vx(i) = buf[m++];
|
||||
atom_vy(i) = buf[m++];
|
||||
atom_vz(i) = buf[m++];
|
||||
atom->type[i] = buf[m++];
|
||||
return m;
|
||||
}
|
||||
|
||||
void pbc(Atom* atom)
|
||||
{
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
|
||||
MD_FLOAT xprd = atom->mybox.xprd;
|
||||
MD_FLOAT yprd = atom->mybox.yprd;
|
||||
MD_FLOAT zprd = atom->mybox.zprd;
|
||||
|
||||
if(atom_x(i) < 0.0) atom_x(i) += xprd;
|
||||
if(atom_y(i) < 0.0) atom_y(i) += yprd;
|
||||
if(atom_z(i) < 0.0) atom_z(i)+= zprd;
|
||||
if(atom_x(i) >= xprd) atom_x(i) -= xprd;
|
||||
if(atom_y(i) >= yprd) atom_y(i) -= yprd;
|
||||
if(atom_z(i) >= zprd) atom_z(i) -= zprd;
|
||||
}
|
||||
}
|
||||
|
||||
void copy(Atom* atom, int i, int j)
|
||||
{
|
||||
atom_x(i) = atom_x(j);
|
||||
atom_y(i) = atom_y(j);
|
||||
atom_z(i) = atom_z(j);
|
||||
atom_vx(i) = atom_vx(j);
|
||||
atom_vy(i) = atom_vy(j);
|
||||
atom_vz(i) = atom_vz(j);
|
||||
atom->type[i] = atom->type[j];
|
||||
}
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
334
lammps/force_lj.c
Normal file
334
lammps/force_lj.c
Normal file
@@ -0,0 +1,334 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
//---
|
||||
#include <atom.h>
|
||||
#include <likwid-marker.h>
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <stats.h>
|
||||
#include <timing.h>
|
||||
#include <mpi.h>
|
||||
#include <util.h>
|
||||
#ifdef __SIMD_KERNEL__
|
||||
#include <simd.h>
|
||||
#endif
|
||||
|
||||
void computeForceGhostShell(Parameter*, Atom*, Neighbor*);
|
||||
|
||||
double computeForceLJFullNeigh_plain_c(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
int Nlocal = atom->Nlocal;
|
||||
int Nghost = atom->Nghost;
|
||||
int* neighs;
|
||||
#ifndef EXPLICIT_TYPES
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
#endif
|
||||
const MD_FLOAT num1 = 1.0;
|
||||
const MD_FLOAT num48 = 48.0;
|
||||
const MD_FLOAT num05 = 0.5;
|
||||
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
atom_fx(i) = 0.0;
|
||||
atom_fy(i) = 0.0;
|
||||
atom_fz(i) = 0.0;
|
||||
}
|
||||
double S = getTimeStamp();
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
LIKWID_MARKER_START("force");
|
||||
|
||||
#pragma omp for
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numneigh[i];
|
||||
MD_FLOAT xtmp = atom_x(i);
|
||||
MD_FLOAT ytmp = atom_y(i);
|
||||
MD_FLOAT ztmp = atom_z(i);
|
||||
MD_FLOAT fix = 0.0;
|
||||
MD_FLOAT fiy = 0.0;
|
||||
MD_FLOAT fiz = 0.0;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_i = atom->type[i];
|
||||
#endif
|
||||
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int j = neighs[k];
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_j = atom->type[j];
|
||||
const int type_ij = type_i * atom->ntypes + type_j;
|
||||
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
||||
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
||||
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
||||
#endif
|
||||
if(rsq < cutforcesq) {
|
||||
MD_FLOAT sr2 = num1 / rsq;
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
|
||||
#ifdef USE_REFERENCE_VERSION
|
||||
addStat(stats->atoms_within_cutoff, 1);
|
||||
} else {
|
||||
addStat(stats->atoms_outside_cutoff, 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
atom_fx(i) += fix;
|
||||
atom_fy(i) += fiy;
|
||||
atom_fz(i) += fiz;
|
||||
|
||||
#ifdef USE_REFERENCE_VERSION
|
||||
if(numneighs % VECTOR_WIDTH > 0) {
|
||||
addStat(stats->atoms_outside_cutoff, VECTOR_WIDTH - (numneighs % VECTOR_WIDTH));
|
||||
}
|
||||
#endif
|
||||
|
||||
addStat(stats->total_force_neighs, numneighs);
|
||||
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
|
||||
}
|
||||
|
||||
LIKWID_MARKER_STOP("force");
|
||||
}
|
||||
double E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double computeForceLJHalfNeigh(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
int Nlocal = atom->Nlocal;
|
||||
int Nghost = atom->Nghost;
|
||||
int* neighs;
|
||||
#ifndef EXPLICIT_TYPES
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
#endif
|
||||
const MD_FLOAT num1 = 1.0;
|
||||
const MD_FLOAT num48 = 48.0;
|
||||
const MD_FLOAT num05 = 0.5;
|
||||
|
||||
for(int i = 0; i < Nlocal+Nghost; i++) {
|
||||
atom_fx(i) = 0.0;
|
||||
atom_fy(i) = 0.0;
|
||||
atom_fz(i) = 0.0;
|
||||
}
|
||||
double S = getTimeStamp();
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
LIKWID_MARKER_START("forceLJ-halfneigh");
|
||||
|
||||
#pragma omp for
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numneigh[i];
|
||||
MD_FLOAT xtmp = atom_x(i);
|
||||
MD_FLOAT ytmp = atom_y(i);
|
||||
MD_FLOAT ztmp = atom_z(i);
|
||||
MD_FLOAT fix = 0;
|
||||
MD_FLOAT fiy = 0;
|
||||
MD_FLOAT fiz = 0;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_i = atom->type[i];
|
||||
#endif
|
||||
|
||||
// Pragma required to vectorize the inner loop
|
||||
#ifdef ENABLE_OMP_SIMD
|
||||
#pragma omp simd reduction(+: fix,fiy,fiz)
|
||||
#endif
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
int j = neighs[k];
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_j = atom->type[j];
|
||||
const int type_ij = type_i * atom->ntypes + type_j;
|
||||
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
|
||||
const MD_FLOAT sigma6 = atom->sigma6[type_ij];
|
||||
const MD_FLOAT epsilon = atom->epsilon[type_ij];
|
||||
#endif
|
||||
|
||||
if(rsq < cutforcesq) {
|
||||
MD_FLOAT sr2 = num1 / rsq;
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
// We need to update forces for ghost atoms if shell_method or half stencil is requiered
|
||||
if((param->half_neigh && j<Nlocal) || param->method){
|
||||
atom_fx(j) -= delx * force;
|
||||
atom_fy(j) -= dely * force;
|
||||
atom_fz(j) -= delz * force;
|
||||
}
|
||||
}
|
||||
}
|
||||
atom_fx(i) += fix;
|
||||
atom_fy(i) += fiy;
|
||||
atom_fz(i) += fiz;
|
||||
|
||||
addStat(stats->total_force_neighs, numneighs);
|
||||
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
|
||||
}
|
||||
|
||||
if(param->method == eightShell) computeForceGhostShell(param, atom, neighbor);
|
||||
LIKWID_MARKER_STOP("forceLJ-halfneigh");
|
||||
}
|
||||
|
||||
double E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double computeForceLJFullNeigh_simd(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
int Nlocal = atom->Nlocal;
|
||||
int* neighs;
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
atom_fx(i) = 0.0;
|
||||
atom_fy(i) = 0.0;
|
||||
atom_fz(i) = 0.0;
|
||||
}
|
||||
|
||||
double S = getTimeStamp();
|
||||
|
||||
#ifndef __SIMD_KERNEL__
|
||||
fprintf(stderr, "Error: SIMD kernel not implemented for specified instruction set!");
|
||||
exit(-1);
|
||||
#else
|
||||
MD_SIMD_FLOAT cutforcesq_vec = simd_broadcast(cutforcesq);
|
||||
MD_SIMD_FLOAT sigma6_vec = simd_broadcast(sigma6);
|
||||
MD_SIMD_FLOAT eps_vec = simd_broadcast(epsilon);
|
||||
MD_SIMD_FLOAT c48_vec = simd_broadcast(48.0);
|
||||
MD_SIMD_FLOAT c05_vec = simd_broadcast(0.5);
|
||||
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
LIKWID_MARKER_START("force");
|
||||
|
||||
#pragma omp for
|
||||
for(int i = 0; i < Nlocal; i++) {
|
||||
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
|
||||
int numneighs = neighbor->numneigh[i];
|
||||
MD_SIMD_INT numneighs_vec = simd_int_broadcast(numneighs);
|
||||
MD_SIMD_FLOAT xtmp = simd_broadcast(atom_x(i));
|
||||
MD_SIMD_FLOAT ytmp = simd_broadcast(atom_y(i));
|
||||
MD_SIMD_FLOAT ztmp = simd_broadcast(atom_z(i));
|
||||
MD_SIMD_FLOAT fix = simd_zero();
|
||||
MD_SIMD_FLOAT fiy = simd_zero();
|
||||
MD_SIMD_FLOAT fiz = simd_zero();
|
||||
|
||||
for(int k = 0; k < numneighs; k += VECTOR_WIDTH) {
|
||||
// If the last iteration of this loop is separated from the rest, this mask can be set only there
|
||||
MD_SIMD_MASK mask_numneighs = simd_mask_int_cond_lt(simd_int_add(simd_int_broadcast(k), simd_int_seq()), numneighs_vec);
|
||||
MD_SIMD_INT j = simd_int_mask_load(&neighs[k], mask_numneighs);
|
||||
#ifdef AOS
|
||||
MD_SIMD_INT j3 = simd_int_add(simd_int_add(j, j), j); // j * 3
|
||||
MD_SIMD_FLOAT delx = xtmp - simd_gather(j3, &(atom->x[0]), sizeof(MD_FLOAT));
|
||||
MD_SIMD_FLOAT dely = ytmp - simd_gather(j3, &(atom->x[1]), sizeof(MD_FLOAT));
|
||||
MD_SIMD_FLOAT delz = ztmp - simd_gather(j3, &(atom->x[2]), sizeof(MD_FLOAT));
|
||||
#else
|
||||
MD_SIMD_FLOAT delx = xtmp - simd_gather(j, atom->x, sizeof(MD_FLOAT));
|
||||
MD_SIMD_FLOAT dely = ytmp - simd_gather(j, atom->y, sizeof(MD_FLOAT));
|
||||
MD_SIMD_FLOAT delz = ztmp - simd_gather(j, atom->z, sizeof(MD_FLOAT));
|
||||
#endif
|
||||
MD_SIMD_FLOAT rsq = simd_fma(delx, delx, simd_fma(dely, dely, simd_mul(delz, delz)));
|
||||
MD_SIMD_MASK cutoff_mask = simd_mask_and(mask_numneighs, simd_mask_cond_lt(rsq, cutforcesq_vec));
|
||||
MD_SIMD_FLOAT sr2 = simd_reciprocal(rsq);
|
||||
MD_SIMD_FLOAT sr6 = simd_mul(sr2, simd_mul(sr2, simd_mul(sr2, sigma6_vec)));
|
||||
MD_SIMD_FLOAT force = simd_mul(c48_vec, simd_mul(sr6, simd_mul(simd_sub(sr6, c05_vec), simd_mul(sr2, eps_vec))));
|
||||
|
||||
fix = simd_masked_add(fix, simd_mul(delx, force), cutoff_mask);
|
||||
fiy = simd_masked_add(fiy, simd_mul(dely, force), cutoff_mask);
|
||||
fiz = simd_masked_add(fiz, simd_mul(delz, force), cutoff_mask);
|
||||
}
|
||||
|
||||
atom_fx(i) += simd_h_reduce_sum(fix);
|
||||
atom_fy(i) += simd_h_reduce_sum(fiy);
|
||||
atom_fz(i) += simd_h_reduce_sum(fiz);
|
||||
}
|
||||
|
||||
LIKWID_MARKER_STOP("force");
|
||||
}
|
||||
#endif
|
||||
|
||||
double E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
void computeForceGhostShell(Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||
int Nshell = neighbor->Nshell;
|
||||
int* neighs;
|
||||
#ifndef EXPLICIT_TYPES
|
||||
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
|
||||
MD_FLOAT sigma6 = param->sigma6;
|
||||
MD_FLOAT epsilon = param->epsilon;
|
||||
#endif
|
||||
const MD_FLOAT num1 = 1.0;
|
||||
const MD_FLOAT num48 = 48.0;
|
||||
const MD_FLOAT num05 = 0.5;
|
||||
|
||||
for(int i = 0; i < Nshell; i++) {
|
||||
neighs = &(neighbor->neighshell[i * neighbor->maxneighs]);
|
||||
int numneigh = neighbor->numNeighShell[i];
|
||||
int iatom = neighbor->listshell[i];
|
||||
MD_FLOAT xtmp = atom_x(iatom);
|
||||
MD_FLOAT ytmp = atom_y(iatom);
|
||||
MD_FLOAT ztmp = atom_z(iatom);
|
||||
MD_FLOAT fix = 0;
|
||||
MD_FLOAT fiy = 0;
|
||||
MD_FLOAT fiz = 0;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
const int type_i = atom->type[i];
|
||||
#endif
|
||||
|
||||
for(int k = 0; k < numneigh; k++) {
|
||||
int jatom = neighs[k];
|
||||
MD_FLOAT delx = xtmp - atom_x(jatom);
|
||||
MD_FLOAT dely = ytmp - atom_y(jatom);
|
||||
MD_FLOAT delz = ztmp - atom_z(jatom);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
if(rsq < cutforcesq) {
|
||||
MD_FLOAT sr2 = num1 / rsq;
|
||||
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
MD_FLOAT force = num48 * sr6 * (sr6 - num05) * sr2 * epsilon;
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
|
||||
atom_fx(jatom) -= delx * force;
|
||||
atom_fy(jatom) -= dely * force;
|
||||
atom_fz(jatom) -= delz * force;
|
||||
}
|
||||
}
|
||||
atom_fx(iatom) += fix;
|
||||
atom_fy(iatom) += fiy;
|
||||
atom_fz(iatom) += fiz;
|
||||
|
||||
}
|
||||
}
|
||||
|
123
lammps/includes/atom.h
Normal file
123
lammps/includes/atom.h
Normal file
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include <box.h>
|
||||
#include <parameter.h>
|
||||
#ifndef __ATOM_H_
|
||||
#define __ATOM_H_
|
||||
|
||||
#ifdef CUDA_TARGET
|
||||
# define KERNEL_NAME "CUDA"
|
||||
# define computeForceLJFullNeigh computeForceLJFullNeigh_cuda
|
||||
# define initialIntegrate initialIntegrate_cuda
|
||||
# define finalIntegrate finalIntegrate_cuda
|
||||
# define buildNeighbor buildNeighbor_cuda
|
||||
# define updatePbc updatePbc_cuda
|
||||
# define updateAtomsPbc updateAtomsPbc_cuda
|
||||
#else
|
||||
# ifdef USE_SIMD_KERNEL
|
||||
# define KERNEL_NAME "SIMD"
|
||||
# define computeForceLJFullNeigh computeForceLJFullNeigh_simd
|
||||
# else
|
||||
# define KERNEL_NAME "plain-C"
|
||||
# define computeForceLJFullNeigh computeForceLJFullNeigh_plain_c
|
||||
# endif
|
||||
# define initialIntegrate initialIntegrate_cpu
|
||||
# define finalIntegrate finalIntegrate_cpu
|
||||
# define buildNeighbor buildNeighbor_cpu
|
||||
# define updatePbc updatePbc_cpu
|
||||
# define updateAtomsPbc updateAtomsPbc_cpu
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
MD_FLOAT *x, *y, *z;
|
||||
MD_FLOAT *vx, *vy, *vz;
|
||||
MD_FLOAT *fx, *fy, *fz;
|
||||
int *border_map;
|
||||
int *type;
|
||||
MD_FLOAT *epsilon;
|
||||
MD_FLOAT *sigma6;
|
||||
MD_FLOAT *cutforcesq;
|
||||
MD_FLOAT *cutneighsq;
|
||||
} DeviceAtom;
|
||||
|
||||
typedef struct {
|
||||
int Natoms, Nlocal, Nghost, Nmax;
|
||||
MD_FLOAT *x, *y, *z;
|
||||
MD_FLOAT *vx, *vy, *vz;
|
||||
MD_FLOAT *fx, *fy, *fz;
|
||||
int *border_map;
|
||||
int *type;
|
||||
int ntypes;
|
||||
MD_FLOAT *epsilon;
|
||||
MD_FLOAT *sigma6;
|
||||
MD_FLOAT *cutforcesq;
|
||||
MD_FLOAT *cutneighsq;
|
||||
//TODO: insert the id number
|
||||
//MD_FLOAT *Atom_id;
|
||||
|
||||
// DEM
|
||||
MD_FLOAT *radius;
|
||||
MD_FLOAT *av;
|
||||
MD_FLOAT *r;
|
||||
|
||||
// Device data
|
||||
DeviceAtom d_atom;
|
||||
|
||||
//Info Subdomain
|
||||
Box mybox;
|
||||
} Atom;
|
||||
|
||||
extern void initAtom(Atom*);
|
||||
extern void createAtom(Atom*, Parameter*);
|
||||
extern int readAtom(Atom*, Parameter*);
|
||||
extern int readAtom_pdb(Atom*, Parameter*);
|
||||
extern int readAtom_gro(Atom*, Parameter*);
|
||||
extern int readAtom_dmp(Atom*, Parameter*);
|
||||
extern int readAtom_in(Atom*, Parameter*);
|
||||
extern void growAtom(Atom*);
|
||||
|
||||
int packGhost(Atom*, int, MD_FLOAT*, int*);
|
||||
int unpackGhost(Atom*, int, MD_FLOAT*);
|
||||
int packExchange(Atom*, int, MD_FLOAT*);
|
||||
int unpackExchange(Atom*, int, MD_FLOAT*);
|
||||
void packForward(Atom*, int, int*, MD_FLOAT*, int*);
|
||||
void unpackForward(Atom*, int, int, MD_FLOAT*);
|
||||
void packReverse(Atom* , int , int , MD_FLOAT*);
|
||||
void unpackReverse(Atom*, int, int*, MD_FLOAT*);
|
||||
void pbc(Atom*);
|
||||
void copy(Atom*, int, int);
|
||||
|
||||
#ifdef AOS
|
||||
# define POS_DATA_LAYOUT "AoS"
|
||||
# define atom_x(i) atom->x[(i) * 3 + 0]
|
||||
# define atom_y(i) atom->x[(i) * 3 + 1]
|
||||
# define atom_z(i) atom->x[(i) * 3 + 2]
|
||||
# define atom_vx(i) atom->vx[(i) * 3 + 0]
|
||||
# define atom_vy(i) atom->vx[(i) * 3 + 1]
|
||||
# define atom_vz(i) atom->vx[(i) * 3 + 2]
|
||||
# define atom_fx(i) atom->fx[(i) * 3 + 0]
|
||||
# define atom_fy(i) atom->fx[(i) * 3 + 1]
|
||||
# define atom_fz(i) atom->fx[(i) * 3 + 2]
|
||||
#else
|
||||
# define POS_DATA_LAYOUT "SoA"
|
||||
# define atom_x(i) atom->x[i]
|
||||
# define atom_y(i) atom->y[i]
|
||||
# define atom_z(i) atom->z[i]
|
||||
# define atom_vx(i) atom->vx[i]
|
||||
# define atom_vy(i) atom->vy[i]
|
||||
# define atom_vz(i) atom->vz[i]
|
||||
# define atom_fx(i) atom->fx[i]
|
||||
# define atom_fy(i) atom->fy[i]
|
||||
# define atom_fz(i) atom->fz[i]
|
||||
#endif
|
||||
|
||||
# define buf_x(i) buf[3*(i)]
|
||||
# define buf_y(i) buf[3*(i)+1]
|
||||
# define buf_z(i) buf[3*(i)+2]
|
||||
|
||||
#endif
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
@@ -20,9 +20,14 @@ typedef struct {
|
||||
int ncalls;
|
||||
int maxneighs;
|
||||
int half_neigh;
|
||||
int half_stencil;
|
||||
int *neighbors;
|
||||
int *numneigh;
|
||||
|
||||
//MPI
|
||||
int Nshell; //# of atoms in listShell
|
||||
int *numNeighShell; //# of neighs for each atom in listShell
|
||||
int *neighshell; //list of neighs for each atom in listShell
|
||||
int *listshell; //Atoms to compute the force
|
||||
// Device data
|
||||
DeviceNeighbor d_neighbor;
|
||||
} Neighbor;
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
#ifndef __PBC_H_
|
||||
#define __PBC_H_
|
||||
extern void initPbc(Atom*);
|
||||
extern void initPbc();
|
||||
extern void updatePbc_cpu(Atom*, Parameter*, bool);
|
||||
extern void updateAtomsPbc_cpu(Atom*, Parameter*);
|
||||
extern void setupPbc(Atom*, Parameter*);
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,12 +1,15 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <atom.h>
|
||||
#include <comm.h>
|
||||
#include <parameter.h>
|
||||
|
||||
#ifndef __VTK_H_
|
||||
#define __VTK_H_
|
||||
extern int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
|
||||
extern void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep);
|
||||
#endif
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
377
lammps/main.c
Normal file
377
lammps/main.c
Normal file
@@ -0,0 +1,377 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
#include <likwid-marker.h>
|
||||
#include <allocate.h>
|
||||
#include <atom.h>
|
||||
#include <device.h>
|
||||
#include <eam.h>
|
||||
#include <integrate.h>
|
||||
#include <thermo.h>
|
||||
#include <timing.h>
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <stats.h>
|
||||
#include <timers.h>
|
||||
#include <util.h>
|
||||
#include <vtk.h>
|
||||
#include <comm.h>
|
||||
#include <grid.h>
|
||||
#include <shell_methods.h>
|
||||
#include <mpi.h>
|
||||
|
||||
#define HLINE "----------------------------------------------------------------------------\n"
|
||||
#ifdef CUDA_TARGET
|
||||
extern double computeForceLJFullNeigh_cuda(Parameter*, Atom*, Neighbor*);
|
||||
#endif
|
||||
|
||||
extern double computeForceLJFullNeigh_plain_c(Parameter*, Atom*, Neighbor*, Stats*);
|
||||
extern double computeForceLJFullNeigh_simd(Parameter*, Atom*, Neighbor*, Stats*);
|
||||
extern double computeForceLJHalfNeigh(Parameter*, Atom*, Neighbor*, Stats*);
|
||||
extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
|
||||
extern double computeForceDemFullNeigh(Parameter*, Atom*, Neighbor*, Stats*);
|
||||
|
||||
double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
|
||||
if(param->force_field == FF_EAM) {
|
||||
return computeForceEam(eam, param, atom, neighbor, stats);
|
||||
} else if(param->force_field == FF_DEM) {
|
||||
if(param->half_neigh) {
|
||||
fprintf(stderr, "Error: DEM cannot use half neighbor-lists!\n");
|
||||
return 0.0;
|
||||
} else {
|
||||
return computeForceDemFullNeigh(param, atom, neighbor, stats);
|
||||
}
|
||||
}
|
||||
|
||||
if(param->half_neigh || param->method) {
|
||||
return computeForceLJHalfNeigh(param, atom, neighbor, stats);
|
||||
}
|
||||
|
||||
#ifdef CUDA_TARGET
|
||||
return computeForceLJFullNeigh(param, atom, neighbor);
|
||||
#else
|
||||
return computeForceLJFullNeigh(param, atom, neighbor, stats);
|
||||
#endif
|
||||
}
|
||||
|
||||
double dynamicBalance(Comm* comm, Grid* grid, Atom* atom, Parameter* param, double time){
|
||||
double S, E;
|
||||
int dims = 3; //TODO: Adjust to do in 3d and 2d
|
||||
S = getTimeStamp();
|
||||
if(param->balance == RCB) {
|
||||
rcbBalance(grid, atom, param, meanBisect,dims,0);
|
||||
neighComm(comm, param, grid);
|
||||
}else if(param->balance == meanTimeRCB){
|
||||
rcbBalance(grid, atom, param, meanTimeBisect,dims,time);
|
||||
neighComm(comm, param, grid);
|
||||
}else if(param->balance == Staggered) {
|
||||
staggeredBalance(grid, atom, param, time);
|
||||
neighComm(comm, param, grid);
|
||||
exchangeComm(comm,atom);
|
||||
}else { } //Do nothing
|
||||
//printGrid(grid);
|
||||
E = getTimeStamp();
|
||||
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double initialBalance(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid)
|
||||
{
|
||||
double E,S,time;
|
||||
int me;
|
||||
MPI_Comm_rank(world,&me);
|
||||
S = getTimeStamp();
|
||||
if(param->balance == meanTimeRCB || param->balance == RCB){
|
||||
rcbBalance(grid, atom, param, meanBisect,3,0);
|
||||
neighComm(comm, param, grid);
|
||||
}
|
||||
MPI_Allreduce(&atom->Nlocal, &atom->Natoms, 1, MPI_INT, MPI_SUM, world);
|
||||
printf("Processor:%i, Local atoms:%i, Total atoms:%i\n",me, atom->Nlocal,atom->Natoms);
|
||||
MPI_Barrier(world);
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats, Comm *comm, Grid *grid) {
|
||||
if(param->force_field == FF_EAM) { initEam(eam, param); }
|
||||
double S, E;
|
||||
param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
|
||||
param->xprd = param->nx * param->lattice;
|
||||
param->yprd = param->ny * param->lattice;
|
||||
param->zprd = param->nz * param->lattice;
|
||||
S = getTimeStamp();
|
||||
initAtom(atom);
|
||||
initStats(stats);
|
||||
initNeighbor(neighbor, param);
|
||||
if(param->input_file == NULL) {
|
||||
createAtom(atom, param);
|
||||
} else {
|
||||
readAtom(atom, param);
|
||||
}
|
||||
setupGrid(grid,atom,param);
|
||||
setupNeighbor(param);
|
||||
setupComm(comm, param, grid);
|
||||
if(param->balance){
|
||||
initialBalance(param, eam, atom, neighbor, stats, comm, grid);
|
||||
}
|
||||
setupThermo(param, atom->Natoms);
|
||||
if(param->input_file == NULL) { adjustThermo(param, atom); }
|
||||
#ifdef SORT_ATOMS
|
||||
atom->Nghost = 0;
|
||||
sortAtom(atom);
|
||||
#endif
|
||||
initDevice(atom, neighbor);
|
||||
ghostNeighbor(comm, atom, param);
|
||||
buildNeighbor(atom, neighbor);
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double reneighbour(Comm* comm, Parameter *param, Atom *atom, Neighbor *neighbor) {
|
||||
double S, E;
|
||||
S = getTimeStamp();
|
||||
LIKWID_MARKER_START("reneighbour");
|
||||
#ifdef SORT_ATOMS
|
||||
atom->Nghost = 0;
|
||||
sortAtom(atom);
|
||||
#endif
|
||||
ghostNeighbor(comm, atom, param);
|
||||
buildNeighbor(atom, neighbor);
|
||||
LIKWID_MARKER_STOP("reneighbour");
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
double updateAtoms(Comm* comm, Atom* atom){
|
||||
double S,E;
|
||||
S = getTimeStamp();
|
||||
exchangeComm(comm, atom);
|
||||
E = getTimeStamp();
|
||||
return E-S;
|
||||
}
|
||||
|
||||
void writeInput(Parameter *param, Atom *atom) {
|
||||
FILE *fpin = fopen("input.in", "w");
|
||||
fprintf(fpin, "0,%f,0,%f,0,%f\n", param->xprd, param->yprd, param->zprd);
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
fprintf(fpin, "1,%f,%f,%f,%f,%f,%f\n", atom_x(i), atom_y(i), atom_z(i), atom_vx(i), atom_vy(i), atom_vz(i));
|
||||
}
|
||||
|
||||
fclose(fpin);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
double timer[NUMTIMER];
|
||||
Eam eam;
|
||||
Atom atom;
|
||||
Neighbor neighbor;
|
||||
Stats stats;
|
||||
Parameter param;
|
||||
Comm comm;
|
||||
Grid grid;
|
||||
LIKWID_MARKER_INIT;
|
||||
#pragma omp parallel
|
||||
{
|
||||
LIKWID_MARKER_REGISTER("force");
|
||||
//LIKWID_MARKER_REGISTER("reneighbour");
|
||||
//LIKWID_MARKER_REGISTER("pbc");
|
||||
}
|
||||
initComm(&argc, &argv, &comm);
|
||||
initParameter(¶m);
|
||||
for(int i = 0; i < argc; i++) {
|
||||
if((strcmp(argv[i], "-p") == 0)) {
|
||||
readParameter(¶m, argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-f") == 0)) {
|
||||
if((param.force_field = str2ff(argv[++i])) < 0) {
|
||||
fprintf(stderr, "Invalid force field!\n");
|
||||
exit(-1);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-i") == 0)) {
|
||||
param.input_file = strdup(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-e") == 0)) {
|
||||
param.eam_file = strdup(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0)) {
|
||||
param.ntimes = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-nx") == 0)) {
|
||||
param.nx = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-ny") == 0)) {
|
||||
param.ny = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-nz") == 0)) {
|
||||
param.nz = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-half") == 0)) {
|
||||
param.half_neigh = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-method") == 0)) {
|
||||
param.method = atoi(argv[++i]);
|
||||
if (param.method>3 || param.method< 0){
|
||||
if(comm.myproc == 0) fprintf(stderr, "Method does not exist!\n");
|
||||
endComm(&comm);
|
||||
exit(0);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-bal") == 0)) {
|
||||
param.balance = atoi(argv[++i]);
|
||||
if (param.balance>3 || param.balance< 0){
|
||||
if(comm.myproc == 0) fprintf(stderr, "Load Balance does not exist!\n");
|
||||
endComm(&comm);
|
||||
exit(0);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-r") == 0) || (strcmp(argv[i], "--radius") == 0)) {
|
||||
param.cutforce = atof(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-s") == 0) || (strcmp(argv[i], "--skin") == 0)) {
|
||||
param.skin = atof(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "--freq") == 0)) {
|
||||
param.proc_freq = atof(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "--vtk") == 0)) {
|
||||
param.vtk_file = strdup(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
|
||||
if(comm.myproc ==0 ){
|
||||
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
|
||||
printf(HLINE);
|
||||
printf("-p <string>: file to read parameters from (can be specified more than once)\n");
|
||||
printf("-f <string>: force field (lj, eam or dem), default lj\n");
|
||||
printf("-i <string>: input file with atom positions (dump)\n");
|
||||
printf("-e <string>: input file for EAM\n");
|
||||
printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
|
||||
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
|
||||
printf("-r / --radius <real>: set cutoff radius\n");
|
||||
printf("-s / --skin <real>: set skin (verlet buffer)\n");
|
||||
printf("--freq <real>: processor frequency (GHz)\n");
|
||||
printf("--vtk <string>: VTK file for visualization\n");
|
||||
printf(HLINE);
|
||||
}
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
if(param.balance>0 && param.method == 1){
|
||||
if(comm.myproc == 0) fprintf(stderr, "Half Shell is not supported by load balance!\n");
|
||||
endComm(&comm);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
param.cutneigh = param.cutforce + param.skin;
|
||||
timer[SETUP]=setup(¶m, &eam, &atom, &neighbor, &stats, &comm, &grid);
|
||||
if(comm.myproc == 0)printParameter(¶m);
|
||||
if(comm.myproc == 0)printf(HLINE);
|
||||
if(comm.myproc == 0) printf("step\ttemp\t\tpressure\n");
|
||||
computeThermo(0, ¶m, &atom);
|
||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
||||
traceAddresses(¶m, &atom, &neighbor, n + 1);// TODO: trace adress
|
||||
#endif
|
||||
//writeInput(¶m, &atom);
|
||||
timer[FORCE] = computeForce(&eam, ¶m, &atom, &neighbor, &stats);
|
||||
timer[NEIGH] = 0.0;
|
||||
timer[FORWARD] = 0.0;
|
||||
timer[UPDATE] = 0.0;
|
||||
timer[BALANCE] = 0.0;
|
||||
timer[REVERSE] = reverse(&comm, &atom, ¶m);
|
||||
MPI_Barrier(world);
|
||||
timer[TOTAL] = getTimeStamp();
|
||||
if(param.vtk_file != NULL) {
|
||||
printvtk(param.vtk_file, &comm, &atom, ¶m, 0);
|
||||
}
|
||||
for(int n = 0; n < param.ntimes; n++) {
|
||||
bool reneigh = (n + 1) % param.reneigh_every == 0;
|
||||
initialIntegrate(reneigh, ¶m, &atom);
|
||||
if(reneigh) {
|
||||
timer[UPDATE] +=updateAtoms(&comm,&atom);
|
||||
if(param.balance && !((n+1)%param.balance_every))
|
||||
timer[BALANCE] +=dynamicBalance(&comm, &grid, &atom , ¶m, timer[FORCE]);
|
||||
timer[NEIGH] += reneighbour(&comm, ¶m, &atom, &neighbor);
|
||||
} else {
|
||||
timer[FORWARD] += forward(&comm, &atom, ¶m);
|
||||
}
|
||||
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
|
||||
traceAddresses(¶m, &atom, &neighbor, n + 1);
|
||||
#endif
|
||||
timer[FORCE] += computeForce(&eam, ¶m, &atom, &neighbor, &stats);
|
||||
timer[REVERSE] += reverse(&comm, &atom, ¶m);
|
||||
finalIntegrate(reneigh, ¶m, &atom);
|
||||
|
||||
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
|
||||
#ifdef CUDA_TARGET
|
||||
memcpyFromGPU(atom.x, atom.d_atom.x, atom.Nmax * sizeof(MD_FLOAT) * 3);
|
||||
#endif
|
||||
computeThermo(n + 1, ¶m, &atom);
|
||||
}
|
||||
|
||||
if(param.vtk_file != NULL) {
|
||||
printvtk(param.vtk_file, &comm, &atom ,¶m, n+1);
|
||||
}
|
||||
}
|
||||
MPI_Barrier(world);
|
||||
timer[TOTAL] = getTimeStamp() - timer[TOTAL];
|
||||
computeThermo(-1, ¶m, &atom);
|
||||
|
||||
double mint[NUMTIMER];
|
||||
double maxt[NUMTIMER];
|
||||
double sumt[NUMTIMER];
|
||||
timer[REST] = timer[TOTAL]-timer[FORCE]-timer[NEIGH]-timer[BALANCE]-timer[FORWARD]-timer[REVERSE];
|
||||
MPI_Reduce(timer,mint,NUMTIMER,MPI_DOUBLE,MPI_MIN,0,world);
|
||||
MPI_Reduce(timer,maxt,NUMTIMER,MPI_DOUBLE,MPI_MAX,0,world);
|
||||
MPI_Reduce(timer,sumt,NUMTIMER,MPI_DOUBLE,MPI_SUM,0,world);
|
||||
int Nghost;
|
||||
MPI_Reduce(&atom.Nghost,&Nghost,1,MPI_INT,MPI_SUM,0,world);
|
||||
|
||||
if(comm.myproc == 0){
|
||||
int n = comm.numproc;
|
||||
printf(HLINE);
|
||||
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, Nghost, param.ntimes);
|
||||
printf("TOTAL %.2fs\n\n",timer[TOTAL]);
|
||||
printf("%4s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|%7s|\n","","FORCE ", "NEIGH ", "BALANCE", "FORWARD", "REVERSE","UPDATE","REST ","SETUP");
|
||||
printf("----|-------|-------|-------|-------|-------|-------|-------|-------|\n");
|
||||
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "AVG", sumt[FORCE]/n,sumt[NEIGH]/n,sumt[BALANCE]/n,sumt[FORWARD]/n,sumt[REVERSE]/n,sumt[UPDATE]/n,sumt[REST]/n,sumt[SETUP]/n);
|
||||
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MIN", mint[FORCE],mint[NEIGH],mint[BALANCE],mint[FORWARD],mint[REVERSE],mint[UPDATE],mint[REST],mint[SETUP]);
|
||||
printf("%4s|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|%7.2f|\n", "MAX", maxt[FORCE],maxt[NEIGH],maxt[BALANCE],maxt[FORWARD],maxt[REVERSE],maxt[UPDATE],maxt[REST],maxt[SETUP]);
|
||||
printf(HLINE);
|
||||
printf("Performance: %.2f million atom updates per second\n",
|
||||
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
|
||||
|
||||
#ifdef COMPUTE_STATS
|
||||
displayStatistics(&atom, ¶m, &stats, timer);
|
||||
#endif
|
||||
}
|
||||
endComm(&comm);
|
||||
LIKWID_MARKER_CLOSE;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
@@ -11,27 +11,39 @@
|
||||
#include <neighbor.h>
|
||||
#include <parameter.h>
|
||||
#include <atom.h>
|
||||
#include <util.h>
|
||||
#include <mpi.h>
|
||||
|
||||
#define SMALL 1.0e-6
|
||||
#define FACTOR 0.999
|
||||
|
||||
MD_FLOAT xprd, yprd, zprd;
|
||||
MD_FLOAT bininvx, bininvy, bininvz;
|
||||
int mbinxlo, mbinylo, mbinzlo;
|
||||
int pad_x, pad_y, pad_z;
|
||||
int nbinx, nbiny, nbinz;
|
||||
int mbinx, mbiny, mbinz; // n bins in x, y, z
|
||||
int mbinx, mbiny, mbinz; // m bins in x, y, z
|
||||
int *bincount;
|
||||
int *bins;
|
||||
int mbins; //total number of bins
|
||||
int atoms_per_bin; // max atoms per bin
|
||||
int mbins; //total number of bins
|
||||
int atoms_per_bin; // max atoms per bin
|
||||
MD_FLOAT cutneigh;
|
||||
MD_FLOAT cutneighsq; // neighbor cutoff squared
|
||||
MD_FLOAT cutneighsq; // neighbor cutoff squared
|
||||
int nmax;
|
||||
int nstencil; // # of bins in stencil
|
||||
int* stencil; // stencil list of bin offsets
|
||||
int nstencil; // # of bins in stencil
|
||||
int* stencil; // stencil list of bin offsets
|
||||
MD_FLOAT binsizex, binsizey, binsizez;
|
||||
int me; //rank
|
||||
int method; // method
|
||||
int half_stencil; //If half stencil exist
|
||||
int shellMethod; //If shell method exist
|
||||
|
||||
static int coord2bin(MD_FLOAT, MD_FLOAT , MD_FLOAT);
|
||||
static MD_FLOAT bindist(int, int, int);
|
||||
static int ghostZone(Atom*, int);
|
||||
static int eightZone(Atom*, int);
|
||||
static int halfZone(Atom*, int);
|
||||
static void neighborGhost(Atom*, Neighbor*);
|
||||
static inline int interaction(Atom* atom, int i, int j);
|
||||
|
||||
/* exported subroutines */
|
||||
void initNeighbor(Neighbor *neighbor, Parameter *param) {
|
||||
@@ -51,7 +63,25 @@ void initNeighbor(Neighbor *neighbor, Parameter *param) {
|
||||
neighbor->maxneighs = 100;
|
||||
neighbor->numneigh = NULL;
|
||||
neighbor->neighbors = NULL;
|
||||
//========== MPI =============
|
||||
shellMethod = 0;
|
||||
half_stencil = 0;
|
||||
method = param->method;
|
||||
if(method == halfShell || method == eightShell){
|
||||
param->half_neigh = 1;
|
||||
shellMethod = 1;
|
||||
}
|
||||
if(method == halfStencil){
|
||||
param->half_neigh = 0;
|
||||
half_stencil = 1;
|
||||
}
|
||||
me = 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD,&me);
|
||||
neighbor->half_neigh = param->half_neigh;
|
||||
neighbor->Nshell = 0;
|
||||
neighbor->numNeighShell = NULL;
|
||||
neighbor->neighshell = NULL;
|
||||
neighbor->listshell = NULL;
|
||||
}
|
||||
|
||||
void setupNeighbor(Parameter* param) {
|
||||
@@ -64,7 +94,6 @@ void setupNeighbor(Parameter* param) {
|
||||
yprd = param->yprd;
|
||||
zprd = param->zprd;
|
||||
}
|
||||
|
||||
// TODO: update lo and hi for standard case and use them here instead
|
||||
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = xprd;
|
||||
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
|
||||
@@ -93,54 +122,48 @@ void setupNeighbor(Parameter* param) {
|
||||
bininvy = 1.0 / binsizey;
|
||||
bininvz = 1.0 / binsizez;
|
||||
}
|
||||
|
||||
coord = xlo - cutneigh - SMALL * xprd;
|
||||
mbinxlo = (int) (coord * bininvx);
|
||||
if (coord < 0.0) { mbinxlo = mbinxlo - 1; }
|
||||
coord = xhi + cutneigh + SMALL * xprd;
|
||||
mbinxhi = (int) (coord * bininvx);
|
||||
|
||||
coord = ylo - cutneigh - SMALL * yprd;
|
||||
mbinylo = (int) (coord * bininvy);
|
||||
if (coord < 0.0) { mbinylo = mbinylo - 1; }
|
||||
coord = yhi + cutneigh + SMALL * yprd;
|
||||
mbinyhi = (int) (coord * bininvy);
|
||||
|
||||
coord = zlo - cutneigh - SMALL * zprd;
|
||||
mbinzlo = (int) (coord * bininvz);
|
||||
if (coord < 0.0) { mbinzlo = mbinzlo - 1; }
|
||||
coord = zhi + cutneigh + SMALL * zprd;
|
||||
mbinzhi = (int) (coord * bininvz);
|
||||
|
||||
mbinxlo = mbinxlo - 1;
|
||||
mbinxhi = mbinxhi + 1;
|
||||
mbinx = mbinxhi - mbinxlo + 1;
|
||||
|
||||
mbinylo = mbinylo - 1;
|
||||
mbinyhi = mbinyhi + 1;
|
||||
mbiny = mbinyhi - mbinylo + 1;
|
||||
|
||||
mbinzlo = mbinzlo - 1;
|
||||
mbinzhi = mbinzhi + 1;
|
||||
mbinz = mbinzhi - mbinzlo + 1;
|
||||
pad_x = (int)(cutneigh*bininvx);
|
||||
while(pad_x * binsizex < FACTOR * cutneigh) pad_x++;
|
||||
pad_y = (int)(cutneigh*bininvy);
|
||||
while(pad_y * binsizey < FACTOR * cutneigh) pad_y++;
|
||||
pad_z = (int)(cutneigh*bininvz);
|
||||
while(pad_z * binsizez < FACTOR * cutneigh) pad_z++;
|
||||
|
||||
nextx = (int) (cutneigh * bininvx);
|
||||
if(nextx * binsizex < FACTOR * cutneigh) nextx++;
|
||||
if(nextx * binsizex < FACTOR * cutneigh){
|
||||
nextx++;
|
||||
pad_x++;
|
||||
}
|
||||
nexty = (int) (cutneigh * bininvy);
|
||||
if(nexty * binsizey < FACTOR * cutneigh) nexty++;
|
||||
if(nexty * binsizey < FACTOR * cutneigh){
|
||||
nexty++;
|
||||
pad_y++;
|
||||
}
|
||||
nextz = (int) (cutneigh * bininvz);
|
||||
if(nextz * binsizez < FACTOR * cutneigh) nextz++;
|
||||
if(nextz * binsizez < FACTOR * cutneigh){
|
||||
nextz++;
|
||||
pad_z++;
|
||||
}
|
||||
|
||||
mbinx = nbinx+4*pad_x;
|
||||
mbiny = nbiny+4*pad_y;
|
||||
mbinz = nbinz+4*pad_z;
|
||||
|
||||
if (stencil) { free(stencil); }
|
||||
stencil = (int*) malloc((2 * nextz + 1) * (2 * nexty + 1) * (2 * nextx + 1) * sizeof(int));
|
||||
nstencil = 0;
|
||||
int kstart = -nextz;
|
||||
|
||||
int kstart = -nextz;
|
||||
int jstart = -nexty;
|
||||
int istart = -nextx;
|
||||
int ibin = 0;
|
||||
for(int k = kstart; k <= nextz; k++) {
|
||||
for(int j = -nexty; j <= nexty; j++) {
|
||||
for(int i = -nextx; i <= nextx; i++) {
|
||||
for(int j = jstart; j <= nexty; j++) {
|
||||
for(int i = istart; i <= nextx; i++) {
|
||||
if(bindist(i, j, k) < cutneighsq) {
|
||||
stencil[nstencil++] = k * mbiny * mbinx + j * mbinx + i;
|
||||
int jbin = k * mbiny * mbinx + j * mbinx + i;
|
||||
if(ibin>jbin && half_stencil) continue;
|
||||
stencil[nstencil++] = jbin;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -155,7 +178,6 @@ void setupNeighbor(Parameter* param) {
|
||||
|
||||
void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
|
||||
int nall = atom->Nlocal + atom->Nghost;
|
||||
|
||||
/* extend atom arrays if necessary */
|
||||
if(nall > nmax) {
|
||||
nmax = nall;
|
||||
@@ -164,16 +186,13 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
|
||||
neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
|
||||
neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*));
|
||||
}
|
||||
|
||||
/* bin local & ghost atoms */
|
||||
binatoms(atom);
|
||||
int resize = 1;
|
||||
|
||||
/* loop over each atom, storing neighbors */
|
||||
while(resize) {
|
||||
int new_maxneighs = neighbor->maxneighs;
|
||||
resize = 0;
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
int* neighptr = &(neighbor->neighbors[i * neighbor->maxneighs]);
|
||||
int n = 0;
|
||||
@@ -184,21 +203,22 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
|
||||
#ifdef EXPLICIT_TYPES
|
||||
int type_i = atom->type[i];
|
||||
#endif
|
||||
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
int jbin = ibin + stencil[k];
|
||||
int* loc_bin = &bins[jbin * atoms_per_bin];
|
||||
|
||||
for(int m = 0; m < bincount[jbin]; m++) {
|
||||
int j = loc_bin[m];
|
||||
if((j == i) || (neighbor->half_neigh && (j < i))) {
|
||||
|
||||
if((j==i) || (neighbor->half_neigh && (j<i)))
|
||||
continue;
|
||||
if(half_stencil && ibin==jbin && !interaction(atom,i,j))
|
||||
continue;
|
||||
}
|
||||
|
||||
MD_FLOAT delx = xtmp - atom_x(j);
|
||||
MD_FLOAT dely = ytmp - atom_y(j);
|
||||
MD_FLOAT delz = ztmp - atom_z(j);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
int type_j = atom->type[j];
|
||||
const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
|
||||
@@ -210,8 +230,8 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
neighbor->numneigh[i] = n;
|
||||
|
||||
if(n >= neighbor->maxneighs) {
|
||||
resize = 1;
|
||||
|
||||
@@ -220,14 +240,15 @@ void buildNeighbor_cpu(Atom *atom, Neighbor *neighbor) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(resize) {
|
||||
printf("RESIZE %d\n", neighbor->maxneighs);
|
||||
printf("RESIZE %d, PROC %d\n", neighbor->maxneighs,me);
|
||||
neighbor->maxneighs = new_maxneighs * 1.2;
|
||||
free(neighbor->neighbors);
|
||||
neighbor->neighbors = (int*) malloc(atom->Nmax * neighbor->maxneighs * sizeof(int));
|
||||
}
|
||||
}
|
||||
|
||||
if(method == eightShell) neighborGhost(atom, neighbor);
|
||||
}
|
||||
|
||||
/* internal subroutines */
|
||||
@@ -257,38 +278,22 @@ MD_FLOAT bindist(int i, int j, int k) {
|
||||
} else {
|
||||
delz = (k + 1) * binsizez;
|
||||
}
|
||||
|
||||
return (delx * delx + dely * dely + delz * delz);
|
||||
}
|
||||
|
||||
int coord2bin(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin) {
|
||||
int ix, iy, iz;
|
||||
int ix, iy, iz;
|
||||
MD_FLOAT eps = 1e-9;
|
||||
MD_FLOAT xlo=0.0; MD_FLOAT ylo=0.0; MD_FLOAT zlo=0.0;
|
||||
xlo = fabs(xlo - pad_x*binsizex)+eps;
|
||||
ylo = fabs(ylo - pad_y*binsizey)+eps;
|
||||
zlo = fabs(zlo - pad_z*binsizez)+eps;
|
||||
ix = (int) ((xin + xlo)*bininvx);
|
||||
iy = (int) ((yin + ylo)*bininvy);
|
||||
iz = (int) ((zin + zlo)*bininvz);
|
||||
|
||||
if(xin >= xprd) {
|
||||
ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
|
||||
} else if(xin >= 0.0) {
|
||||
ix = (int)(xin * bininvx) - mbinxlo;
|
||||
} else {
|
||||
ix = (int)(xin * bininvx) - mbinxlo - 1;
|
||||
}
|
||||
|
||||
if(yin >= yprd) {
|
||||
iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
|
||||
} else if(yin >= 0.0) {
|
||||
iy = (int)(yin * bininvy) - mbinylo;
|
||||
} else {
|
||||
iy = (int)(yin * bininvy) - mbinylo - 1;
|
||||
}
|
||||
|
||||
if(zin >= zprd) {
|
||||
iz = (int)((zin - zprd) * bininvz) + nbinz - mbinzlo;
|
||||
} else if(zin >= 0.0) {
|
||||
iz = (int)(zin * bininvz) - mbinzlo;
|
||||
} else {
|
||||
iz = (int)(zin * bininvz) - mbinzlo - 1;
|
||||
}
|
||||
|
||||
return (iz * mbiny * mbinx + iy * mbinx + ix + 1);
|
||||
return (iz * mbiny * mbinx + iy * mbinx + ix);
|
||||
//return (iz * mbiny * mbinx + iy * mbinx + ix + 1);
|
||||
}
|
||||
|
||||
void binatoms(Atom *atom) {
|
||||
@@ -304,7 +309,7 @@ void binatoms(Atom *atom) {
|
||||
|
||||
for(int i = 0; i < nall; i++) {
|
||||
int ibin = coord2bin(atom_x(i), atom_y(i), atom_z(i));
|
||||
|
||||
if(shellMethod && !ghostZone(atom, i)) continue;
|
||||
if(bincount[ibin] < atoms_per_bin) {
|
||||
int ac = bincount[ibin]++;
|
||||
bins[ibin * atoms_per_bin + ac] = i;
|
||||
@@ -325,11 +330,9 @@ void sortAtom(Atom* atom) {
|
||||
binatoms(atom);
|
||||
int Nmax = atom->Nmax;
|
||||
int* binpos = bincount;
|
||||
|
||||
for(int i = 1; i < mbins; i++) {
|
||||
binpos[i] += binpos[i - 1];
|
||||
}
|
||||
|
||||
#ifdef AOS
|
||||
MD_FLOAT* new_x = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
|
||||
MD_FLOAT* new_vx = (MD_FLOAT*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
|
||||
@@ -367,7 +370,6 @@ void sortAtom(Atom* atom) {
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
free(atom->x);
|
||||
free(atom->vx);
|
||||
atom->x = new_x;
|
||||
@@ -383,3 +385,158 @@ void sortAtom(Atom* atom) {
|
||||
atom->vz = new_vz;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* internal subroutines
|
||||
Added with MPI*/
|
||||
|
||||
static int ghostZone(Atom* atom, int i){
|
||||
if(i<atom->Nlocal) return 1;
|
||||
else if(method == halfShell) return halfZone(atom,i);
|
||||
else if(method == eightShell) return eightZone(atom,i);
|
||||
else return 0;
|
||||
}
|
||||
|
||||
static int eightZone(Atom* atom, int i)
|
||||
{
|
||||
//Mapping: 0->0, 1->1, 2->2, 3->6, 4->3, 5->5, 6->4, 7->7
|
||||
int zoneMapping[] = {0, 1, 2, 6, 3, 5, 4, 7};
|
||||
MD_FLOAT *hi = atom->mybox.hi;
|
||||
int zone = 0;
|
||||
|
||||
if(BigOrEqual(atom_x(i),hi[_x])) {
|
||||
zone += 1;
|
||||
}
|
||||
if(BigOrEqual(atom_y(i),hi[_y])) {
|
||||
zone += 2;
|
||||
}
|
||||
if(BigOrEqual(atom_z(i),hi[_z])) {
|
||||
zone += 4;
|
||||
}
|
||||
return zoneMapping[zone];
|
||||
}
|
||||
|
||||
static int halfZone(Atom* atom, int i)
|
||||
{
|
||||
MD_FLOAT *hi = atom->mybox.hi;
|
||||
MD_FLOAT *lo = atom->mybox.lo;
|
||||
|
||||
if(atom_x(i)<lo[_x] && atom_y(i)<hi[_y] && atom_z(i)<hi[_z]){
|
||||
return 0;
|
||||
} else if(atom_y(i)<lo[_y] && atom_z(i)<hi[_z]){
|
||||
return 0;
|
||||
} else if(atom_z(i)<lo[_z]){
|
||||
return 0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void neighborGhost(Atom *atom, Neighbor *neighbor) {
|
||||
int Nshell=0;
|
||||
int Nlocal = atom->Nlocal;
|
||||
int Nghost = atom->Nghost;
|
||||
if(neighbor->listshell) free(neighbor->listshell);
|
||||
neighbor->listshell = (int*) malloc(Nghost * sizeof(int));
|
||||
int* listzone = (int*) malloc(8 * Nghost * sizeof(int));
|
||||
int countAtoms[8] = {0,0,0,0,0,0,0,0};
|
||||
|
||||
//Selecting ghost atoms for interaction
|
||||
for(int i = Nlocal; i < Nlocal+Nghost; i++) {
|
||||
int izone = ghostZone(atom,i);
|
||||
int *list = &listzone[Nghost*izone];
|
||||
int n = countAtoms[izone];
|
||||
list[n] = i;
|
||||
countAtoms[izone]++;
|
||||
}
|
||||
|
||||
for(int zone = 1; zone<=3; zone++){
|
||||
int *list = &listzone[Nghost*zone];
|
||||
for(int n=0; n<countAtoms[zone]; n++)
|
||||
neighbor->listshell[Nshell++] = list[n];
|
||||
}
|
||||
|
||||
neighbor->Nshell = Nshell;
|
||||
if(neighbor->numNeighShell) free(neighbor->numNeighShell);
|
||||
if(neighbor->neighshell) free(neighbor->neighshell);
|
||||
neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
|
||||
neighbor->numNeighShell = (int*) malloc(Nshell * sizeof(int));
|
||||
int resize = 1;
|
||||
|
||||
while(resize)
|
||||
{
|
||||
resize = 0;
|
||||
for(int i = 0; i < Nshell; i++) {
|
||||
int *neighshell = &(neighbor->neighshell[i*neighbor->maxneighs]);
|
||||
int n = 0;
|
||||
int iatom = neighbor->listshell[i];
|
||||
int izone = ghostZone(atom, iatom);
|
||||
MD_FLOAT xtmp = atom_x(iatom);
|
||||
MD_FLOAT ytmp = atom_y(iatom);
|
||||
MD_FLOAT ztmp = atom_z(iatom);
|
||||
int ibin = coord2bin(xtmp, ytmp, ztmp);
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
int type_i = atom->type[iatom];
|
||||
#endif
|
||||
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
int jbin = ibin + stencil[k];
|
||||
int* loc_bin = &bins[jbin * atoms_per_bin];
|
||||
for(int m = 0; m < bincount[jbin]; m++) {
|
||||
int jatom = loc_bin[m];
|
||||
|
||||
int jzone = ghostZone(atom,jatom);
|
||||
|
||||
if(jzone <=izone) continue;
|
||||
if(izone == 1 && (jzone==5||jzone==6||jzone==7)) continue;
|
||||
if(izone == 2 && (jzone==4||jzone==6||jzone==7)) continue;
|
||||
if(izone == 3 && (jzone==4||jzone==5||jzone==7)) continue;
|
||||
|
||||
MD_FLOAT delx = xtmp - atom_x(jatom);
|
||||
MD_FLOAT dely = ytmp - atom_y(jatom);
|
||||
MD_FLOAT delz = ztmp - atom_z(jatom);
|
||||
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
#ifdef EXPLICIT_TYPES
|
||||
int type_j = atom->type[jatom];
|
||||
const MD_FLOAT cutoff = atom->cutneighsq[type_i * atom->ntypes + type_j];
|
||||
#else
|
||||
const MD_FLOAT cutoff = cutneighsq;
|
||||
#endif
|
||||
if(rsq <= cutoff) {
|
||||
neighshell[n++] = jatom;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
neighbor->numNeighShell[i] = n;
|
||||
if(n >= neighbor->maxneighs){
|
||||
resize = 1;
|
||||
neighbor->maxneighs = n * 1.2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(resize) {
|
||||
free(neighbor->neighshell);
|
||||
neighbor->neighshell = (int*) malloc(Nshell * neighbor->maxneighs * sizeof(int));
|
||||
}
|
||||
}
|
||||
free(listzone);
|
||||
}
|
||||
|
||||
static inline int interaction(Atom* atom, int i, int j) {
|
||||
|
||||
if(i<j && j<atom->Nlocal) {
|
||||
return 1;
|
||||
} else if( atom_z(j)>atom_z(i) && j>=atom->Nlocal) {
|
||||
return 1;
|
||||
} else if(Equal(atom_z(j),atom_z(i)) && atom_y(j)<atom_y(i) && j>=atom->Nlocal){
|
||||
return 1;
|
||||
} else if(Equal(atom_z(j),atom_z(i)) && Equal(atom_y(j),atom_y(i)) && atom_x(j)<atom_x(i) && j>=atom->Nlocal){
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
171
lammps/pbc.c
Normal file
171
lammps/pbc.c
Normal file
@@ -0,0 +1,171 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
//---
|
||||
#include <pbc.h>
|
||||
#include <atom.h>
|
||||
#include <allocate.h>
|
||||
|
||||
#define DELTA 20000
|
||||
|
||||
int NmaxGhost;
|
||||
int *PBCx, *PBCy, *PBCz;
|
||||
|
||||
static void growPbc(Atom*);
|
||||
|
||||
/* exported subroutines */
|
||||
void initPbc(Atom* atom) {
|
||||
NmaxGhost = 0;
|
||||
atom->border_map = NULL;
|
||||
PBCx = NULL; PBCy = NULL; PBCz = NULL;
|
||||
}
|
||||
|
||||
/* update coordinates of ghost atoms */
|
||||
/* uses mapping created in setupPbc */
|
||||
void updatePbc_cpu(Atom *atom, Parameter *param, bool doReneighbor) {
|
||||
int *border_map = atom->border_map;
|
||||
int nlocal = atom->Nlocal;
|
||||
MD_FLOAT xprd = param->xprd;
|
||||
MD_FLOAT yprd = param->yprd;
|
||||
MD_FLOAT zprd = param->zprd;
|
||||
|
||||
for(int i = 0; i < atom->Nghost; i++) {
|
||||
atom_x(nlocal + i) = atom_x(border_map[i]) + PBCx[i] * xprd;
|
||||
atom_y(nlocal + i) = atom_y(border_map[i]) + PBCy[i] * yprd;
|
||||
atom_z(nlocal + i) = atom_z(border_map[i]) + PBCz[i] * zprd;
|
||||
}
|
||||
}
|
||||
|
||||
/* relocate atoms that have left domain according
|
||||
* to periodic boundary conditions */
|
||||
void updateAtomsPbc_cpu(Atom *atom, Parameter *param) {
|
||||
MD_FLOAT xprd = param->xprd;
|
||||
MD_FLOAT yprd = param->yprd;
|
||||
MD_FLOAT zprd = param->zprd;
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
if(atom_x(i) < 0.0) {
|
||||
atom_x(i) += xprd;
|
||||
} else if(atom_x(i) >= xprd) {
|
||||
atom_x(i) -= xprd;
|
||||
}
|
||||
|
||||
if(atom_y(i) < 0.0) {
|
||||
atom_y(i) += yprd;
|
||||
} else if(atom_y(i) >= yprd) {
|
||||
atom_y(i) -= yprd;
|
||||
}
|
||||
|
||||
if(atom_z(i) < 0.0) {
|
||||
atom_z(i) += zprd;
|
||||
} else if(atom_z(i) >= zprd) {
|
||||
atom_z(i) -= zprd;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* setup periodic boundary conditions by
|
||||
* defining ghost atoms around domain
|
||||
* only creates mapping and coordinate corrections
|
||||
* that are then enforced in updatePbc */
|
||||
#define ADDGHOST(dx,dy,dz) \
|
||||
Nghost++; \
|
||||
border_map[Nghost] = i; \
|
||||
PBCx[Nghost] = dx; \
|
||||
PBCy[Nghost] = dy; \
|
||||
PBCz[Nghost] = dz; \
|
||||
atom->type[atom->Nlocal + Nghost] = atom->type[i]
|
||||
|
||||
void setupPbc(Atom *atom, Parameter *param) {
|
||||
int *border_map = atom->border_map;
|
||||
MD_FLOAT xprd = param->xprd;
|
||||
MD_FLOAT yprd = param->yprd;
|
||||
MD_FLOAT zprd = param->zprd;
|
||||
MD_FLOAT Cutneigh = param->cutneigh;
|
||||
int Nghost = -1;
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
if (atom->Nlocal + Nghost + 7 >= atom->Nmax) {
|
||||
growAtom(atom);
|
||||
}
|
||||
|
||||
if (Nghost + 7 >= NmaxGhost) {
|
||||
growPbc(atom);
|
||||
border_map = atom->border_map;
|
||||
}
|
||||
|
||||
MD_FLOAT x = atom_x(i);
|
||||
MD_FLOAT y = atom_y(i);
|
||||
MD_FLOAT z = atom_z(i);
|
||||
|
||||
/* Setup ghost atoms */
|
||||
/* 6 planes */
|
||||
if(param->pbc_x != 0) {
|
||||
if (x < Cutneigh) { ADDGHOST(+1,0,0); }
|
||||
if (x >= (xprd-Cutneigh)) { ADDGHOST(-1,0,0); }
|
||||
}
|
||||
|
||||
if(param->pbc_y != 0) {
|
||||
if (y < Cutneigh) { ADDGHOST(0,+1,0); }
|
||||
if (y >= (yprd-Cutneigh)) { ADDGHOST(0,-1,0); }
|
||||
}
|
||||
|
||||
if(param->pbc_z != 0) {
|
||||
if (z < Cutneigh) { ADDGHOST(0,0,+1); }
|
||||
if (z >= (zprd-Cutneigh)) { ADDGHOST(0,0,-1); }
|
||||
}
|
||||
|
||||
/* 8 corners */
|
||||
if(param->pbc_x != 0 && param->pbc_y != 0 && param->pbc_z != 0) {
|
||||
if (x < Cutneigh && y < Cutneigh && z < Cutneigh) { ADDGHOST(+1,+1,+1); }
|
||||
if (x < Cutneigh && y >= (yprd-Cutneigh) && z < Cutneigh) { ADDGHOST(+1,-1,+1); }
|
||||
if (x < Cutneigh && y < Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(+1,+1,-1); }
|
||||
if (x < Cutneigh && y >= (yprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(+1,-1,-1); }
|
||||
if (x >= (xprd-Cutneigh) && y < Cutneigh && z < Cutneigh) { ADDGHOST(-1,+1,+1); }
|
||||
if (x >= (xprd-Cutneigh) && y >= (yprd-Cutneigh) && z < Cutneigh) { ADDGHOST(-1,-1,+1); }
|
||||
if (x >= (xprd-Cutneigh) && y < Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(-1,+1,-1); }
|
||||
if (x >= (xprd-Cutneigh) && y >= (yprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(-1,-1,-1); }
|
||||
}
|
||||
|
||||
/* 12 edges */
|
||||
if(param->pbc_x != 0 && param->pbc_z != 0) {
|
||||
if (x < Cutneigh && z < Cutneigh) { ADDGHOST(+1,0,+1); }
|
||||
if (x < Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(+1,0,-1); }
|
||||
if (x >= (xprd-Cutneigh) && z < Cutneigh) { ADDGHOST(-1,0,+1); }
|
||||
if (x >= (xprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(-1,0,-1); }
|
||||
}
|
||||
|
||||
if(param->pbc_y != 0 && param->pbc_z != 0) {
|
||||
if (y < Cutneigh && z < Cutneigh) { ADDGHOST(0,+1,+1); }
|
||||
if (y < Cutneigh && z >= (zprd-Cutneigh)) { ADDGHOST(0,+1,-1); }
|
||||
if (y >= (yprd-Cutneigh) && z < Cutneigh) { ADDGHOST(0,-1,+1); }
|
||||
if (y >= (yprd-Cutneigh) && z >= (zprd-Cutneigh)) { ADDGHOST(0,-1,-1); }
|
||||
}
|
||||
|
||||
if(param->pbc_x != 0 && param->pbc_y != 0) {
|
||||
if (y < Cutneigh && x < Cutneigh) { ADDGHOST(+1,+1,0); }
|
||||
if (y < Cutneigh && x >= (xprd-Cutneigh)) { ADDGHOST(-1,+1,0); }
|
||||
if (y >= (yprd-Cutneigh) && x < Cutneigh) { ADDGHOST(+1,-1,0); }
|
||||
if (y >= (yprd-Cutneigh) && x >= (xprd-Cutneigh)) { ADDGHOST(-1,-1,0); }
|
||||
}
|
||||
}
|
||||
// increase by one to make it the ghost atom count
|
||||
atom->Nghost = Nghost + 1;
|
||||
}
|
||||
|
||||
/* internal subroutines */
|
||||
void growPbc(Atom* atom) {
|
||||
int nold = NmaxGhost;
|
||||
NmaxGhost += DELTA;
|
||||
|
||||
atom->border_map = (int*) reallocate(atom->border_map, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
PBCx = (int*) reallocate(PBCx, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
PBCy = (int*) reallocate(PBCy, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
PBCz = (int*) reallocate(PBCz, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
|
||||
}
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
219
lammps/vtk.c
Normal file
219
lammps/vtk.c
Normal file
@@ -0,0 +1,219 @@
|
||||
/*
|
||||
* Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <vtk.h>
|
||||
#include <mpi.h>
|
||||
|
||||
static MPI_File _fh;
|
||||
static inline void flushBuffer(char*);
|
||||
|
||||
int write_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
|
||||
char timestep_filename[128];
|
||||
snprintf(timestep_filename, sizeof timestep_filename, "%s_%d.vtk", filename, timestep);
|
||||
FILE* fp = fopen(timestep_filename, "wb");
|
||||
|
||||
if(fp == NULL) {
|
||||
fprintf(stderr, "Could not open VTK file for writing!\n");
|
||||
return -1;
|
||||
}
|
||||
fprintf(fp, "# vtk DataFile Version 2.0\n");
|
||||
fprintf(fp, "Particle data\n");
|
||||
fprintf(fp, "ASCII\n");
|
||||
fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
|
||||
fprintf(fp, "POINTS %d double\n", atom->Nlocal);
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; ++i) {
|
||||
fprintf(fp, "%.4f %.4f %.4f\n", atom_x(i), atom_y(i), atom_z(i));
|
||||
}
|
||||
fprintf(fp, "\n\n");
|
||||
fprintf(fp, "CELLS %d %d\n", atom->Nlocal, atom->Nlocal * 2);
|
||||
for(int i = 0; i < atom->Nlocal; ++i) {
|
||||
fprintf(fp, "1 %d\n", i);
|
||||
}
|
||||
fprintf(fp, "\n\n");
|
||||
fprintf(fp, "CELL_TYPES %d\n", atom->Nlocal);
|
||||
for(int i = 0; i < atom->Nlocal; ++i) {
|
||||
fprintf(fp, "1\n");
|
||||
}
|
||||
fprintf(fp, "\n\n");
|
||||
fprintf(fp, "POINT_DATA %d\n", atom->Nlocal);
|
||||
fprintf(fp, "SCALARS mass double\n");
|
||||
fprintf(fp, "LOOKUP_TABLE default\n");
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
fprintf(fp, "1.0\n");
|
||||
}
|
||||
fprintf(fp, "\n\n");
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vtkOpen(const char* filename, Comm* comm, Atom* atom ,int timestep)
|
||||
{
|
||||
char msg[256];
|
||||
char timestep_filename[128];
|
||||
snprintf(timestep_filename, sizeof timestep_filename, "%s_%d.vtk", filename, timestep);
|
||||
MPI_File_open(MPI_COMM_WORLD, timestep_filename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &_fh);
|
||||
if(_fh == MPI_FILE_NULL) {
|
||||
if(comm->myproc == 0) fprintf(stderr, "Could not open VTK file for writing!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (comm->myproc==0){
|
||||
sprintf(msg, "# vtk DataFile Version 2.0\n");
|
||||
sprintf(msg, "%sParticle data\n",msg);
|
||||
sprintf(msg, "%sASCII\n",msg);
|
||||
sprintf(msg, "%sDATASET UNSTRUCTURED_GRID\n",msg);
|
||||
sprintf(msg, "%sPOINTS %d double\n",msg, atom->Natoms);
|
||||
flushBuffer(msg);
|
||||
}
|
||||
}
|
||||
|
||||
int vtkVector(Comm* comm, Atom* atom, Parameter* param)
|
||||
{
|
||||
if (_fh == MPI_FILE_NULL) {
|
||||
if(comm->myproc==0) printf("vtk not initialize! Call vtkOpen first!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
int sizeline= 25; //#initial guess of characters in "%.4f %.4f %.4f\n"
|
||||
int extrabuff = 100;
|
||||
int sizebuff = sizeline*atom->Nlocal+extrabuff;
|
||||
int mysize = 0;
|
||||
char* msg = (char*) malloc(sizebuff);
|
||||
sprintf(msg, "");
|
||||
for(int i = 0; i < atom->Nlocal; i++){
|
||||
if(mysize+extrabuff >= sizebuff){
|
||||
sizebuff*= 1.5;
|
||||
msg = (char*) realloc(msg, sizebuff);
|
||||
}
|
||||
//TODO: do not forget to add param->xlo, param->ylo, param->zlo
|
||||
sprintf(msg, "%s%.4f %.4f %.4f\n",msg, atom_x(i), atom_y(i), atom_z(i));
|
||||
mysize = strlen(msg);
|
||||
}
|
||||
int gatherSize[comm->numproc];
|
||||
|
||||
MPI_Allgather(&mysize, 1, MPI_INT, gatherSize, 1, MPI_INT, MPI_COMM_WORLD);
|
||||
int offset=0;
|
||||
int globalSize = 0;
|
||||
|
||||
for(int i = 0; i < comm->myproc; i++)
|
||||
offset+= gatherSize[i];
|
||||
|
||||
for(int i = 0; i < comm->numproc; i++)
|
||||
globalSize+= gatherSize[i];
|
||||
|
||||
MPI_Offset displ;
|
||||
MPI_Datatype FileType;
|
||||
int GlobalSize[] = {globalSize};
|
||||
int LocalSize[] = {mysize};
|
||||
int Start[] = {offset};
|
||||
|
||||
if(LocalSize[0]>0){
|
||||
MPI_Type_create_subarray(1, GlobalSize, LocalSize, Start, MPI_ORDER_C, MPI_CHAR, &FileType);
|
||||
} else {
|
||||
MPI_Type_vector(0,0,0,MPI_CHAR,&FileType);
|
||||
}
|
||||
MPI_Type_commit(&FileType);
|
||||
MPI_File_get_size(_fh, &displ);
|
||||
MPI_File_set_view(_fh, displ, MPI_CHAR, FileType, "native", MPI_INFO_NULL);
|
||||
MPI_File_write_all (_fh, msg, mysize , MPI_CHAR ,MPI_STATUS_IGNORE);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
MPI_File_set_view(_fh,0,MPI_CHAR, MPI_CHAR, "native", MPI_INFO_NULL);
|
||||
|
||||
if (comm->myproc==0){
|
||||
|
||||
sprintf(msg, "\n\n");
|
||||
sprintf(msg, "%sCELLS %d %d\n", msg, atom->Natoms, atom->Natoms * 2);
|
||||
|
||||
for(int i = 0; i < atom->Natoms; i++)
|
||||
sprintf(msg, "%s1 %d\n", msg, i);
|
||||
flushBuffer(msg);
|
||||
|
||||
sprintf(msg, "\n\n");
|
||||
sprintf(msg, "%sCELL_TYPES %d\n",msg, atom->Natoms);
|
||||
for(int i = 0; i < atom->Natoms; i++)
|
||||
sprintf(msg, "%s1\n",msg);
|
||||
flushBuffer(msg);
|
||||
|
||||
sprintf(msg, "\n\n");
|
||||
sprintf(msg, "%sPOINT_DATA %d\n",msg,atom->Natoms);
|
||||
sprintf(msg, "%sSCALARS mass double\n",msg);
|
||||
sprintf(msg, "%sLOOKUP_TABLE default\n",msg);
|
||||
for(int i = 0; i < atom->Natoms; i++)
|
||||
sprintf(msg, "%s1.0\n",msg);
|
||||
sprintf(msg, "%s\n\n",msg);
|
||||
flushBuffer(msg);
|
||||
}
|
||||
}
|
||||
|
||||
void vtkClose()
|
||||
{
|
||||
MPI_File_close(&_fh);
|
||||
_fh=MPI_FILE_NULL;
|
||||
}
|
||||
|
||||
int printGhost(const char* filename, Atom* atom, int timestep, int me) {
|
||||
char timestep_filename[128];
|
||||
snprintf(timestep_filename, sizeof timestep_filename, "%s_%d_ghost%i.vtk", filename, timestep,me);
|
||||
FILE* fp = fopen(timestep_filename, "wb");
|
||||
|
||||
if(fp == NULL) {
|
||||
fprintf(stderr, "Could not open VTK file for writing!\n");
|
||||
return -1;
|
||||
}
|
||||
fprintf(fp, "# vtk DataFile Version 2.0\n");
|
||||
fprintf(fp, "Particle data\n");
|
||||
fprintf(fp, "ASCII\n");
|
||||
fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
|
||||
fprintf(fp, "POINTS %d double\n", atom->Nghost);
|
||||
|
||||
for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; ++i) {
|
||||
fprintf(fp, "%.4f %.4f %.4f\n", atom_x(i), atom_y(i), atom_z(i));
|
||||
}
|
||||
fprintf(fp, "\n\n");
|
||||
fprintf(fp, "CELLS %d %d\n", atom->Nlocal, atom->Nlocal * 2);
|
||||
for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; ++i) {
|
||||
fprintf(fp, "1 %d\n", i);
|
||||
}
|
||||
fprintf(fp, "\n\n");
|
||||
fprintf(fp, "CELL_TYPES %d\n", atom->Nlocal);
|
||||
for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; ++i) {
|
||||
fprintf(fp, "1\n");
|
||||
}
|
||||
fprintf(fp, "\n\n");
|
||||
fprintf(fp, "POINT_DATA %d\n", atom->Nghost);
|
||||
fprintf(fp, "SCALARS mass double\n");
|
||||
fprintf(fp, "LOOKUP_TABLE default\n");
|
||||
for(int i = atom->Nlocal; i < atom->Nlocal+atom->Nghost; i++) {
|
||||
fprintf(fp, "1.0\n");
|
||||
}
|
||||
fprintf(fp, "\n\n");
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void printvtk(const char* filename, Comm* comm, Atom* atom ,Parameter* param, int timestep)
|
||||
{
|
||||
if(comm->numproc == 1)
|
||||
{
|
||||
write_atoms_to_vtk_file(filename, atom, timestep);
|
||||
return;
|
||||
}
|
||||
|
||||
vtkOpen(filename, comm, atom, timestep);
|
||||
vtkVector(comm, atom, param);
|
||||
vtkClose();
|
||||
//printGhost(filename, atom, timestep, comm->myproc);
|
||||
}
|
||||
|
||||
static inline void flushBuffer(char* msg){
|
||||
MPI_Offset displ;
|
||||
MPI_File_get_size(_fh, &displ);
|
||||
MPI_File_write_at(_fh, displ, msg, strlen(msg), MPI_CHAR, MPI_STATUS_IGNORE);
|
||||
}
|
@@ -1,56 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <stdbool.h>
|
||||
//---
|
||||
#include <atom.h>
|
||||
#include <parameter.h>
|
||||
#include <util.h>
|
||||
|
||||
void cpuInitialIntegrate(Parameter *param, Atom *atom) {
|
||||
DEBUG_MESSAGE("cpuInitialIntegrate start\n");
|
||||
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
|
||||
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
|
||||
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
||||
|
||||
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
|
||||
ci_v[CL_X_OFFSET + cii] += param->dtforce * ci_f[CL_X_OFFSET + cii];
|
||||
ci_v[CL_Y_OFFSET + cii] += param->dtforce * ci_f[CL_Y_OFFSET + cii];
|
||||
ci_v[CL_Z_OFFSET + cii] += param->dtforce * ci_f[CL_Z_OFFSET + cii];
|
||||
ci_x[CL_X_OFFSET + cii] += param->dt * ci_v[CL_X_OFFSET + cii];
|
||||
ci_x[CL_Y_OFFSET + cii] += param->dt * ci_v[CL_Y_OFFSET + cii];
|
||||
ci_x[CL_Z_OFFSET + cii] += param->dt * ci_v[CL_Z_OFFSET + cii];
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_MESSAGE("cpuInitialIntegrate end\n");
|
||||
}
|
||||
|
||||
void cpuFinalIntegrate(Parameter *param, Atom *atom) {
|
||||
DEBUG_MESSAGE("cpuFinalIntegrate start\n");
|
||||
|
||||
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
|
||||
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
|
||||
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
|
||||
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
|
||||
|
||||
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
|
||||
ci_v[CL_X_OFFSET + cii] += param->dtforce * ci_f[CL_X_OFFSET + cii];
|
||||
ci_v[CL_Y_OFFSET + cii] += param->dtforce * ci_f[CL_Y_OFFSET + cii];
|
||||
ci_v[CL_Z_OFFSET + cii] += param->dtforce * ci_f[CL_Z_OFFSET + cii];
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_MESSAGE("cpuFinalIntegrate end\n");
|
||||
}
|
||||
|
||||
#ifdef CUDA_TARGET
|
||||
void cudaInitialIntegrate(Parameter*, Atom*);
|
||||
void cudaFinalIntegrate(Parameter*, Atom*);
|
||||
#endif
|
@@ -1,47 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#ifndef __UTIL_H_
|
||||
#define __UTIL_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#ifndef MIN
|
||||
#define MIN(x, y) ((x) < (y) ? (x) : (y))
|
||||
#endif
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX(x, y) ((x) > (y) ? (x) : (y))
|
||||
#endif
|
||||
|
||||
#ifndef ABS
|
||||
#define ABS(a) ((a) >= 0 ? (a) : -(a))
|
||||
#endif
|
||||
|
||||
#define DEBUG_MESSAGE debug_printf
|
||||
|
||||
#ifndef MAXLINE
|
||||
#define MAXLINE 4096
|
||||
#endif
|
||||
|
||||
#define FF_LJ 0
|
||||
#define FF_EAM 1
|
||||
#define FF_DEM 2
|
||||
|
||||
#if PRECISION == 1
|
||||
#define PRECISION_STRING "single"
|
||||
#else
|
||||
#define PRECISION_STRING "double"
|
||||
#endif
|
||||
|
||||
extern double myrandom(int *);
|
||||
extern void random_reset(int *seed, int ibase, double *coord);
|
||||
extern int str2ff(const char *string);
|
||||
extern const char *ff2str(int ff);
|
||||
extern void readline(char *line, FILE *fp);
|
||||
extern void debug_printf(const char *format, ...);
|
||||
extern int get_cuda_num_threads(void);
|
||||
|
||||
#endif
|
@@ -1,102 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
* All rights reserved. This file is part of MD-Bench.
|
||||
* Use of this source code is governed by a LGPL-3.0
|
||||
* license that can be found in the LICENSE file.
|
||||
*/
|
||||
#include <parameter.h>
|
||||
|
||||
#ifndef __ATOM_H_
|
||||
#define __ATOM_H_
|
||||
|
||||
#ifdef CUDA_TARGET
|
||||
#define KERNEL_NAME "CUDA"
|
||||
#define computeForceLJFullNeigh computeForceLJFullNeigh_cuda
|
||||
#define initialIntegrate initialIntegrate_cuda
|
||||
#define finalIntegrate finalIntegrate_cuda
|
||||
#define buildNeighbor buildNeighbor_cuda
|
||||
#define updatePbc updatePbc_cuda
|
||||
#define updateAtomsPbc updateAtomsPbc_cuda
|
||||
#else
|
||||
#ifdef USE_SIMD_KERNEL
|
||||
#define KERNEL_NAME "SIMD"
|
||||
#define computeForceLJFullNeigh computeForceLJFullNeigh_simd
|
||||
#else
|
||||
#define KERNEL_NAME "PLAIN"
|
||||
#endif
|
||||
#define initialIntegrate initialIntegrate_cpu
|
||||
#define finalIntegrate finalIntegrate_cpu
|
||||
#define buildNeighbor buildNeighbor_cpu
|
||||
#define updatePbc updatePbc_cpu
|
||||
#define updateAtomsPbc updateAtomsPbc_cpu
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
MD_FLOAT *x, *y, *z;
|
||||
MD_FLOAT *vx, *vy, *vz;
|
||||
MD_FLOAT *fx, *fy, *fz;
|
||||
int* border_map;
|
||||
int* type;
|
||||
MD_FLOAT* epsilon;
|
||||
MD_FLOAT* sigma6;
|
||||
MD_FLOAT* cutforcesq;
|
||||
MD_FLOAT* cutneighsq;
|
||||
} DeviceAtom;
|
||||
|
||||
typedef struct {
|
||||
int Natoms, Nlocal, Nghost, Nmax;
|
||||
MD_FLOAT *x, *y, *z;
|
||||
MD_FLOAT *vx, *vy, *vz;
|
||||
MD_FLOAT *fx, *fy, *fz;
|
||||
int* border_map;
|
||||
int* type;
|
||||
int ntypes;
|
||||
MD_FLOAT* epsilon;
|
||||
MD_FLOAT* sigma6;
|
||||
MD_FLOAT* cutforcesq;
|
||||
MD_FLOAT* cutneighsq;
|
||||
|
||||
// DEM
|
||||
MD_FLOAT* radius;
|
||||
MD_FLOAT* av;
|
||||
MD_FLOAT* r;
|
||||
|
||||
// Device data
|
||||
DeviceAtom d_atom;
|
||||
} Atom;
|
||||
|
||||
extern void initAtom(Atom*);
|
||||
extern void createAtom(Atom*, Parameter*);
|
||||
extern int readAtom(Atom*, Parameter*);
|
||||
extern int readAtom_pdb(Atom*, Parameter*);
|
||||
extern int readAtom_gro(Atom*, Parameter*);
|
||||
extern int readAtom_dmp(Atom*, Parameter*);
|
||||
extern int readAtom_in(Atom*, Parameter*);
|
||||
extern void writeAtom(Atom*, Parameter*);
|
||||
extern void growAtom(Atom*);
|
||||
|
||||
#ifdef AOS
|
||||
#define POS_DATA_LAYOUT "AoS"
|
||||
#define atom_x(i) atom->x[(i) * 3 + 0]
|
||||
#define atom_y(i) atom->x[(i) * 3 + 1]
|
||||
#define atom_z(i) atom->x[(i) * 3 + 2]
|
||||
#define atom_vx(i) atom->vx[(i) * 3 + 0]
|
||||
#define atom_vy(i) atom->vx[(i) * 3 + 1]
|
||||
#define atom_vz(i) atom->vx[(i) * 3 + 2]
|
||||
#define atom_fx(i) atom->fx[(i) * 3 + 0]
|
||||
#define atom_fy(i) atom->fx[(i) * 3 + 1]
|
||||
#define atom_fz(i) atom->fx[(i) * 3 + 2]
|
||||
#else
|
||||
#define POS_DATA_LAYOUT "SoA"
|
||||
#define atom_x(i) atom->x[i]
|
||||
#define atom_y(i) atom->y[i]
|
||||
#define atom_z(i) atom->z[i]
|
||||
#define atom_vx(i) atom->vx[i]
|
||||
#define atom_vy(i) atom->vy[i]
|
||||
#define atom_vz(i) atom->vz[i]
|
||||
#define atom_fx(i) atom->fx[i]
|
||||
#define atom_fy(i) atom->fy[i]
|
||||
#define atom_fz(i) atom->fz[i]
|
||||
#endif
|
||||
|
||||
#endif
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user