Cleanup. Remove copyright year. Reformat.

This commit is contained in:
2024-05-13 12:33:08 +02:00
parent a6a269703d
commit 9712d7e2c8
77 changed files with 959 additions and 648 deletions

171
clusterpair/includes/atom.h Normal file
View File

@@ -0,0 +1,171 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <parameter.h>
#ifndef __ATOM_H_
#define __ATOM_H_
#define DELTA 20000
// Nbnxn layouts (as of GROMACS):
// Simd4xN: M=4, N=VECTOR_WIDTH
// Simd2xNN: M=4, N=(VECTOR_WIDTH/2)
// Cuda: M=8, N=VECTOR_WIDTH
#ifdef CUDA_TARGET
# undef VECTOR_WIDTH
# define VECTOR_WIDTH 8
# define KERNEL_NAME "CUDA"
# define CLUSTER_M 8
# define CLUSTER_N VECTOR_WIDTH
# define UNROLL_J 1
# define computeForceLJ computeForceLJ_cuda
# define initialIntegrate cudaInitialIntegrate
# define finalIntegrate cudaFinalIntegrate
# define updatePbc cudaUpdatePbc
#else
# define CLUSTER_M 4
// Simd2xNN (here used for single-precision)
# if VECTOR_WIDTH > CLUSTER_M * 2
# define KERNEL_NAME "Simd2xNN"
# define CLUSTER_N (VECTOR_WIDTH / 2)
# define UNROLL_I 4
# define UNROLL_J 2
# define computeForceLJ computeForceLJ_2xnn
// Simd4xN
# else
# define KERNEL_NAME "Simd4xN"
# define CLUSTER_N VECTOR_WIDTH
# define UNROLL_I 4
# define UNROLL_J 1
# define computeForceLJ computeForceLJ_4xn
# endif
# ifdef USE_REFERENCE_VERSION
# undef KERNEL_NAME
# undef computeForceLJ
# define KERNEL_NAME "Reference"
# define computeForceLJ computeForceLJ_ref
# endif
# define initialIntegrate cpuInitialIntegrate
# define finalIntegrate cpuFinalIntegrate
# define updatePbc cpuUpdatePbc
#endif
#if CLUSTER_M == CLUSTER_N
# define CJ0_FROM_CI(a) (a)
# define CJ1_FROM_CI(a) (a)
# define CI_BASE_INDEX(a,b) ((a) * CLUSTER_N * (b))
# define CJ_BASE_INDEX(a,b) ((a) * CLUSTER_N * (b))
#elif CLUSTER_M == CLUSTER_N * 2 // M > N
# define CJ0_FROM_CI(a) ((a) << 1)
# define CJ1_FROM_CI(a) (((a) << 1) | 0x1)
# define CI_BASE_INDEX(a,b) ((a) * CLUSTER_M * (b))
# define CJ_BASE_INDEX(a,b) (((a) >> 1) * CLUSTER_M * (b) + ((a) & 0x1) * (CLUSTER_M >> 1))
#elif CLUSTER_M == CLUSTER_N / 2 // M < N
# define CJ0_FROM_CI(a) ((a) >> 1)
# define CJ1_FROM_CI(a) ((a) >> 1)
# define CI_BASE_INDEX(a,b) (((a) >> 1) * CLUSTER_N * (b) + ((a) & 0x1) * (CLUSTER_N >> 1))
# define CJ_BASE_INDEX(a,b) ((a) * CLUSTER_N * (b))
#else
# error "Invalid cluster configuration!"
#endif
#if CLUSTER_N != 2 && CLUSTER_N != 4 && CLUSTER_N != 8
# error "Cluster N dimension can be only 2, 4 and 8"
#endif
#define CI_SCALAR_BASE_INDEX(a) (CI_BASE_INDEX(a, 1))
#define CI_VECTOR_BASE_INDEX(a) (CI_BASE_INDEX(a, 3))
#define CJ_SCALAR_BASE_INDEX(a) (CJ_BASE_INDEX(a, 1))
#define CJ_VECTOR_BASE_INDEX(a) (CJ_BASE_INDEX(a, 3))
#if CLUSTER_M >= CLUSTER_N
# define CL_X_OFFSET (0 * CLUSTER_M)
# define CL_Y_OFFSET (1 * CLUSTER_M)
# define CL_Z_OFFSET (2 * CLUSTER_M)
#else
# define CL_X_OFFSET (0 * CLUSTER_N)
# define CL_Y_OFFSET (1 * CLUSTER_N)
# define CL_Z_OFFSET (2 * CLUSTER_N)
#endif
typedef struct {
int natoms;
MD_FLOAT bbminx, bbmaxx;
MD_FLOAT bbminy, bbmaxy;
MD_FLOAT bbminz, bbmaxz;
} Cluster;
typedef struct {
int Natoms, Nlocal, Nghost, Nmax;
int Nclusters, Nclusters_local, Nclusters_ghost, Nclusters_max;
MD_FLOAT *x, *y, *z;
MD_FLOAT *vx, *vy, *vz;
int *border_map;
int *type;
int ntypes;
MD_FLOAT *epsilon;
MD_FLOAT *sigma6;
MD_FLOAT *cutforcesq;
MD_FLOAT *cutneighsq;
int *PBCx, *PBCy, *PBCz;
// Data in cluster format
MD_FLOAT *cl_x;
MD_FLOAT *cl_v;
MD_FLOAT *cl_f;
int *cl_type;
Cluster *iclusters, *jclusters;
int *icluster_bin;
int dummy_cj;
MD_UINT *exclusion_filter;
MD_FLOAT *diagonal_4xn_j_minus_i;
MD_FLOAT *diagonal_2xnn_j_minus_i;
unsigned int masks_2xnn_hn[8];
unsigned int masks_2xnn_fn[8];
unsigned int masks_4xn_hn[16];
unsigned int masks_4xn_fn[16];
} Atom;
extern void initAtom(Atom*);
extern void initMasks(Atom*);
extern void createAtom(Atom*, Parameter*);
extern int readAtom(Atom*, Parameter*);
extern int readAtom_pdb(Atom*, Parameter*);
extern int readAtom_gro(Atom*, Parameter*);
extern int readAtom_dmp(Atom*, Parameter*);
extern void growAtom(Atom*);
extern void growClusters(Atom*);
#ifdef AOS
# define POS_DATA_LAYOUT "AoS"
# define atom_x(i) atom->x[(i) * 3 + 0]
# define atom_y(i) atom->x[(i) * 3 + 1]
# define atom_z(i) atom->x[(i) * 3 + 2]
/*
# define atom_vx(i) atom->vx[(i) * 3 + 0]
# define atom_vy(i) atom->vx[(i) * 3 + 1]
# define atom_vz(i) atom->vx[(i) * 3 + 2]
# define atom_fx(i) atom->fx[(i) * 3 + 0]
# define atom_fy(i) atom->fx[(i) * 3 + 1]
# define atom_fz(i) atom->fx[(i) * 3 + 2]
*/
#else
# define POS_DATA_LAYOUT "SoA"
# define atom_x(i) atom->x[i]
# define atom_y(i) atom->y[i]
# define atom_z(i) atom->z[i]
#endif
// TODO: allow to switch velocites and forces to AoS
# define atom_vx(i) atom->vx[i]
# define atom_vy(i) atom->vy[i]
# define atom_vz(i) atom->vz[i]
# define atom_fx(i) atom->fx[i]
# define atom_fy(i) atom->fy[i]
# define atom_fz(i) atom->fz[i]
#endif

View File

@@ -0,0 +1,56 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdbool.h>
//---
#include <atom.h>
#include <parameter.h>
#include <util.h>
void cpuInitialIntegrate(Parameter *param, Atom *atom) {
DEBUG_MESSAGE("cpuInitialIntegrate start\n");
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
ci_v[CL_X_OFFSET + cii] += param->dtforce * ci_f[CL_X_OFFSET + cii];
ci_v[CL_Y_OFFSET + cii] += param->dtforce * ci_f[CL_Y_OFFSET + cii];
ci_v[CL_Z_OFFSET + cii] += param->dtforce * ci_f[CL_Z_OFFSET + cii];
ci_x[CL_X_OFFSET + cii] += param->dt * ci_v[CL_X_OFFSET + cii];
ci_x[CL_Y_OFFSET + cii] += param->dt * ci_v[CL_Y_OFFSET + cii];
ci_x[CL_Z_OFFSET + cii] += param->dt * ci_v[CL_Z_OFFSET + cii];
}
}
DEBUG_MESSAGE("cpuInitialIntegrate end\n");
}
void cpuFinalIntegrate(Parameter *param, Atom *atom) {
DEBUG_MESSAGE("cpuFinalIntegrate start\n");
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
ci_v[CL_X_OFFSET + cii] += param->dtforce * ci_f[CL_X_OFFSET + cii];
ci_v[CL_Y_OFFSET + cii] += param->dtforce * ci_f[CL_Y_OFFSET + cii];
ci_v[CL_Z_OFFSET + cii] += param->dtforce * ci_f[CL_Z_OFFSET + cii];
}
}
DEBUG_MESSAGE("cpuFinalIntegrate end\n");
}
#ifdef CUDA_TARGET
void cudaInitialIntegrate(Parameter*, Atom*);
void cudaFinalIntegrate(Parameter*, Atom*);
#endif

View File

@@ -0,0 +1,49 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <atom.h>
#include <parameter.h>
#ifndef __NEIGHBOR_H_
#define __NEIGHBOR_H_
// Interaction masks from GROMACS, things to remember (maybe these confused just me):
// 1. These are not "exclusion" masks as the name suggests in GROMACS, but rather
// interaction masks (1 = interaction, 0 = no interaction)
// 2. These are inverted (maybe because that is how you use in AVX2/AVX512 masking),
// so read them from right to left (least significant to most significant bit)
// All interaction mask is the same for all kernels
#define NBNXN_INTERACTION_MASK_ALL 0xffffffffU
// 4x4 kernel diagonal mask
#define NBNXN_INTERACTION_MASK_DIAG 0x08ceU
// 4x2 kernel diagonal masks
#define NBNXN_INTERACTION_MASK_DIAG_J2_0 0x0002U
#define NBNXN_INTERACTION_MASK_DIAG_J2_1 0x002fU
// 4x8 kernel diagonal masks
#define NBNXN_INTERACTION_MASK_DIAG_J8_0 0xf0f8fcfeU
#define NBNXN_INTERACTION_MASK_DIAG_J8_1 0x0080c0e0U
typedef struct {
int every;
int ncalls;
int maxneighs;
int* numneigh;
int* numneigh_masked;
int half_neigh;
int* neighbors;
unsigned int* neighbors_imask;
} Neighbor;
extern void initNeighbor(Neighbor*, Parameter*);
extern void setupNeighbor(Parameter*, Atom*);
extern void binatoms(Atom*);
extern void buildNeighbor(Atom*, Neighbor*);
extern void pruneNeighbor(Parameter*, Atom*, Neighbor*);
extern void sortAtom(Atom*);
extern void buildClusters(Atom*);
extern void defineJClusters(Atom*);
extern void binClusters(Atom*);
extern void updateSingleAtoms(Atom*);
#endif

View File

@@ -0,0 +1,20 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <atom.h>
#include <parameter.h>
#ifndef __PBC_H_
#define __PBC_H_
extern void initPbc();
extern void cpuUpdatePbc(Atom*, Parameter*, int);
extern void updateAtomsPbc(Atom*, Parameter*);
extern void setupPbc(Atom*, Parameter*);
#ifdef CUDA_TARGET
extern void cudaUpdatePbc(Atom*, Parameter*, int);
#endif
#endif

View File

@@ -0,0 +1,35 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <atom.h>
#include <parameter.h>
#ifndef __STATS_H_
#define __STATS_H_
typedef struct {
long long int calculated_forces;
long long int num_neighs;
long long int force_iters;
long long int atoms_within_cutoff;
long long int atoms_outside_cutoff;
long long int clusters_within_cutoff;
long long int clusters_outside_cutoff;
} Stats;
void initStats(Stats *s);
void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer);
#ifdef COMPUTE_STATS
# define addStat(stat, value) stat += value;
# define beginStatTimer() double Si = getTimeStamp();
# define endStatTimer(stat) stat += getTimeStamp() - Si;
#else
# define addStat(stat, value)
# define beginStatTimer()
# define endStatTimer(stat)
#endif
#endif

View File

@@ -0,0 +1,102 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
#include <stdio.h>
#include <stdlib.h>
#endif
#ifndef VECTOR_WIDTH
# define VECTOR_WIDTH 8
#endif
#ifndef TRACER_CONDITION
# define TRACER_CONDITION (!(timestep % param->every))
#endif
#ifdef MEM_TRACER
# define MEM_TRACER_INIT FILE *mem_tracer_fp; \
if(TRACER_CONDITION) { \
char mem_tracer_fn[128]; \
snprintf(mem_tracer_fn, sizeof mem_tracer_fn, "mem_tracer_%d.out", timestep); \
mem_tracer_fp = fopen(mem_tracer_fn, "w");
}
# define MEM_TRACER_END if(TRACER_CONDITION) { fclose(mem_tracer_fp); }
# define MEM_TRACE(addr, op) if(TRACER_CONDITION) { fprintf(mem_tracer_fp, "%c: %p\n", op, (void *)(&(addr))); }
#else
# define MEM_TRACER_INIT
# define MEM_TRACER_END
# define MEM_TRACE(addr, op)
#endif
#ifdef INDEX_TRACER
# define INDEX_TRACER_INIT FILE *index_tracer_fp; \
if(TRACER_CONDITION) { \
char index_tracer_fn[128]; \
snprintf(index_tracer_fn, sizeof index_tracer_fn, "index_tracer_%d.out", timestep); \
index_tracer_fp = fopen(index_tracer_fn, "w"); \
}
# define INDEX_TRACER_END if(TRACER_CONDITION) { fclose(index_tracer_fp); }
# define INDEX_TRACE_NATOMS(nl, ng, mn) if(TRACER_CONDITION) { fprintf(index_tracer_fp, "N: %d %d %d\n", nl, ng, mn); }
# define INDEX_TRACE_ATOM(a) if(TRACER_CONDITION) { fprintf(index_tracer_fp, "A: %d\n", a); }
# define INDEX_TRACE(l, e) if(TRACER_CONDITION) { \
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
fprintf(index_tracer_fp, "I: "); \
for(int __j = 0; __j < __e; ++__j) { \
fprintf(index_tracer_fp, "%d ", l[__i + __j]); \
} \
fprintf(index_tracer_fp, "\n"); \
} \
}
# define DIST_TRACE_SORT(l, e) if(TRACER_CONDITION) { \
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
if(__e > 1) { \
for(int __j = __i; __j < __i + __e - 1; ++__j) { \
for(int __k = __i; __k < __i + __e - (__j - __i) - 1; ++__k) { \
if(l[__k] > l[__k + 1]) { \
int __t = l[__k]; \
l[__k] = l[__k + 1]; \
l[__k + 1] = __t; \
} \
} \
} \
} \
} \
}
# define DIST_TRACE(l, e) if(TRACER_CONDITION) { \
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
if(__e > 1) { \
fprintf(index_tracer_fp, "D: "); \
for(int __j = 0; __j < __e - 1; ++__j) { \
int __dist = abs(l[__i + __j + 1] - l[__i + __j]); \
fprintf(index_tracer_fp, "%d ", __dist); \
} \
fprintf(index_tracer_fp, "\n"); \
} \
} \
}
#else
# define INDEX_TRACER_INIT
# define INDEX_TRACER_END
# define INDEX_TRACE_NATOMS(nl, ng, mn)
# define INDEX_TRACE_ATOM(a)
# define INDEX_TRACE(l, e)
# define DIST_TRACE_SORT(l, e)
# define DIST_TRACE(l, e)
#endif
extern void traceAddresses(Parameter *param, Atom *atom, Neighbor *neighbor, int timestep);

View File

@@ -0,0 +1,16 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <atom.h>
#ifndef __VTK_H_
#define __VTK_H_
extern void write_data_to_vtk_file(const char *filename, Atom* atom, int timestep);
extern int write_local_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
extern int write_ghost_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
extern int write_local_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep);
extern int write_ghost_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep);
#endif

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <atom.h>
#ifndef __XTC_H_
#define __XTC_H_
#ifdef XTC_OUTPUT
void xtc_init(const char *, Atom*, int);
void xtc_write(Atom*, int, int, int);
void xtc_end();
#else
#define xtc_init(a,b,c)
#define xtc_write(a,b,c,d)
#define xtc_end()
#endif
#endif