Cleanup. Remove copyright year. Reformat.

This commit is contained in:
2024-05-13 12:33:08 +02:00
parent a6a269703d
commit 9712d7e2c8
77 changed files with 959 additions and 648 deletions

532
clusterpair/atom.c Normal file
View File

@@ -0,0 +1,532 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <atom.h>
#include <allocate.h>
#include <util.h>
void initAtom(Atom *atom) {
atom->x = NULL; atom->y = NULL; atom->z = NULL;
atom->vx = NULL; atom->vy = NULL; atom->vz = NULL;
atom->cl_x = NULL;
atom->cl_v = NULL;
atom->cl_f = NULL;
atom->cl_type = NULL;
atom->Natoms = 0;
atom->Nlocal = 0;
atom->Nghost = 0;
atom->Nmax = 0;
atom->Nclusters = 0;
atom->Nclusters_local = 0;
atom->Nclusters_ghost = 0;
atom->Nclusters_max = 0;
atom->type = NULL;
atom->ntypes = 0;
atom->epsilon = NULL;
atom->sigma6 = NULL;
atom->cutforcesq = NULL;
atom->cutneighsq = NULL;
atom->iclusters = NULL;
atom->jclusters = NULL;
atom->icluster_bin = NULL;
initMasks(atom);
}
void createAtom(Atom *atom, Parameter *param) {
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = param->xprd;
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = param->yprd;
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = param->zprd;
atom->Natoms = 4 * param->nx * param->ny * param->nz;
atom->Nlocal = 0;
atom->ntypes = param->ntypes;
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
atom->epsilon[i] = param->epsilon;
atom->sigma6[i] = param->sigma6;
atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
MD_FLOAT alat = pow((4.0 / param->rho), (1.0 / 3.0));
int ilo = (int) (xlo / (0.5 * alat) - 1);
int ihi = (int) (xhi / (0.5 * alat) + 1);
int jlo = (int) (ylo / (0.5 * alat) - 1);
int jhi = (int) (yhi / (0.5 * alat) + 1);
int klo = (int) (zlo / (0.5 * alat) - 1);
int khi = (int) (zhi / (0.5 * alat) + 1);
ilo = MAX(ilo, 0);
ihi = MIN(ihi, 2 * param->nx - 1);
jlo = MAX(jlo, 0);
jhi = MIN(jhi, 2 * param->ny - 1);
klo = MAX(klo, 0);
khi = MIN(khi, 2 * param->nz - 1);
MD_FLOAT xtmp, ytmp, ztmp, vxtmp, vytmp, vztmp;
int i, j, k, m, n;
int sx = 0; int sy = 0; int sz = 0;
int ox = 0; int oy = 0; int oz = 0;
int subboxdim = 8;
while(oz * subboxdim <= khi) {
k = oz * subboxdim + sz;
j = oy * subboxdim + sy;
i = ox * subboxdim + sx;
if(((i + j + k) % 2 == 0) && (i >= ilo) && (i <= ihi) && (j >= jlo) && (j <= jhi) && (k >= klo) && (k <= khi)) {
xtmp = 0.5 * alat * i;
ytmp = 0.5 * alat * j;
ztmp = 0.5 * alat * k;
if(xtmp >= xlo && xtmp < xhi && ytmp >= ylo && ytmp < yhi && ztmp >= zlo && ztmp < zhi ) {
n = k * (2 * param->ny) * (2 * param->nx) + j * (2 * param->nx) + i + 1;
for(m = 0; m < 5; m++) { myrandom(&n); }
vxtmp = myrandom(&n);
for(m = 0; m < 5; m++){ myrandom(&n); }
vytmp = myrandom(&n);
for(m = 0; m < 5; m++) { myrandom(&n); }
vztmp = myrandom(&n);
if(atom->Nlocal == atom->Nmax) { growAtom(atom); }
atom_x(atom->Nlocal) = xtmp;
atom_y(atom->Nlocal) = ytmp;
atom_z(atom->Nlocal) = ztmp;
atom->vx[atom->Nlocal] = vxtmp;
atom->vy[atom->Nlocal] = vytmp;
atom->vz[atom->Nlocal] = vztmp;
atom->type[atom->Nlocal] = rand() % atom->ntypes;
atom->Nlocal++;
}
}
sx++;
if(sx == subboxdim) { sx = 0; sy++; }
if(sy == subboxdim) { sy = 0; sz++; }
if(sz == subboxdim) { sz = 0; ox++; }
if(ox * subboxdim > ihi) { ox = 0; oy++; }
if(oy * subboxdim > jhi) { oy = 0; oz++; }
}
}
int type_str2int(const char *type) {
if(strncmp(type, "Ar", 2) == 0) { return 0; } // Argon
fprintf(stderr, "Invalid atom type: %s\n", type);
exit(-1);
return -1;
}
int readAtom(Atom* atom, Parameter* param) {
int len = strlen(param->input_file);
if(strncmp(&param->input_file[len - 4], ".pdb", 4) == 0) { return readAtom_pdb(atom, param); }
if(strncmp(&param->input_file[len - 4], ".gro", 4) == 0) { return readAtom_gro(atom, param); }
if(strncmp(&param->input_file[len - 4], ".dmp", 4) == 0) { return readAtom_dmp(atom, param); }
fprintf(stderr, "Invalid input file extension: %s\nValid choices are: pdb, gro, dmp\n", param->input_file);
exit(-1);
return -1;
}
int readAtom_pdb(Atom* atom, Parameter* param) {
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
int read_atoms = 0;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
while(!feof(fp)) {
readline(line, fp);
char *item = strtok(line, " ");
if(strncmp(item, "CRYST1", 6) == 0) {
param->xlo = 0.0;
param->xhi = atof(strtok(NULL, " "));
param->ylo = 0.0;
param->yhi = atof(strtok(NULL, " "));
param->zlo = 0.0;
param->zhi = atof(strtok(NULL, " "));
param->xprd = param->xhi - param->xlo;
param->yprd = param->yhi - param->ylo;
param->zprd = param->zhi - param->zlo;
// alpha, beta, gamma, sGroup, z
} else if(strncmp(item, "ATOM", 4) == 0) {
char *label;
int atom_id, comp_id;
MD_FLOAT occupancy, charge;
atom_id = atoi(strtok(NULL, " ")) - 1;
while(atom_id + 1 >= atom->Nmax) {
growAtom(atom);
}
atom->type[atom_id] = type_str2int(strtok(NULL, " "));
label = strtok(NULL, " ");
comp_id = atoi(strtok(NULL, " "));
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom->vx[atom_id] = 0.0;
atom->vy[atom_id] = 0.0;
atom->vz[atom_id] = 0.0;
occupancy = atof(strtok(NULL, " "));
charge = atof(strtok(NULL, " "));
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
atom->Natoms++;
atom->Nlocal++;
read_atoms++;
} else if(strncmp(item, "HEADER", 6) == 0 ||
strncmp(item, "REMARK", 6) == 0 ||
strncmp(item, "MODEL", 5) == 0 ||
strncmp(item, "TER", 3) == 0 ||
strncmp(item, "ENDMDL", 6) == 0) {
// Do nothing
} else {
fprintf(stderr, "Invalid item: %s\n", item);
exit(-1);
return -1;
}
}
if(!read_atoms) {
fprintf(stderr, "Input error: No atoms read!\n");
exit(-1);
return -1;
}
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
atom->epsilon[i] = param->epsilon;
atom->sigma6[i] = param->sigma6;
atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
fclose(fp);
return read_atoms;
}
int readAtom_gro(Atom* atom, Parameter* param) {
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
char desc[MAXLINE];
int read_atoms = 0;
int atoms_to_read = 0;
int i = 0;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
readline(desc, fp);
for(i = 0; desc[i] != '\n'; i++);
desc[i] = '\0';
readline(line, fp);
atoms_to_read = atoi(strtok(line, " "));
fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read);
while(!feof(fp) && read_atoms < atoms_to_read) {
readline(line, fp);
char *label = strtok(line, " ");
int type = type_str2int(strtok(NULL, " "));
int atom_id = atoi(strtok(NULL, " ")) - 1;
atom_id = read_atoms;
while(atom_id + 1 >= atom->Nmax) {
growAtom(atom);
}
atom->type[atom_id] = type;
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom->vx[atom_id] = atof(strtok(NULL, " "));
atom->vy[atom_id] = atof(strtok(NULL, " "));
atom->vz[atom_id] = atof(strtok(NULL, " "));
atom->ntypes = MAX(atom->type[atom_id] + 1, atom->ntypes);
atom->Natoms++;
atom->Nlocal++;
read_atoms++;
}
if(!feof(fp)) {
readline(line, fp);
param->xlo = 0.0;
param->xhi = atof(strtok(line, " "));
param->ylo = 0.0;
param->yhi = atof(strtok(NULL, " "));
param->zlo = 0.0;
param->zhi = atof(strtok(NULL, " "));
param->xprd = param->xhi - param->xlo;
param->yprd = param->yhi - param->ylo;
param->zprd = param->zhi - param->zlo;
}
if(read_atoms != atoms_to_read) {
fprintf(stderr, "Input error: Number of atoms read do not match (%d/%d).\n", read_atoms, atoms_to_read);
exit(-1);
return -1;
}
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
atom->epsilon[i] = param->epsilon;
atom->sigma6[i] = param->sigma6;
atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", read_atoms, param->input_file);
fclose(fp);
return read_atoms;
}
int readAtom_dmp(Atom* atom, Parameter* param) {
FILE *fp = fopen(param->input_file, "r");
char line[MAXLINE];
int natoms = 0;
int read_atoms = 0;
int atom_id = -1;
int ts = -1;
if(!fp) {
fprintf(stderr, "Could not open input file: %s\n", param->input_file);
exit(-1);
return -1;
}
while(!feof(fp) && ts < 1 && !read_atoms) {
readline(line, fp);
if(strncmp(line, "ITEM: ", 6) == 0) {
char *item = &line[6];
if(strncmp(item, "TIMESTEP", 8) == 0) {
readline(line, fp);
ts = atoi(line);
} else if(strncmp(item, "NUMBER OF ATOMS", 15) == 0) {
readline(line, fp);
natoms = atoi(line);
atom->Natoms = natoms;
atom->Nlocal = natoms;
while(atom->Nlocal >= atom->Nmax) {
growAtom(atom);
}
} else if(strncmp(item, "BOX BOUNDS pp pp pp", 19) == 0) {
readline(line, fp);
param->xlo = atof(strtok(line, " "));
param->xhi = atof(strtok(NULL, " "));
param->xprd = param->xhi - param->xlo;
readline(line, fp);
param->ylo = atof(strtok(line, " "));
param->yhi = atof(strtok(NULL, " "));
param->yprd = param->yhi - param->ylo;
readline(line, fp);
param->zlo = atof(strtok(line, " "));
param->zhi = atof(strtok(NULL, " "));
param->zprd = param->zhi - param->zlo;
} else if(strncmp(item, "ATOMS id type x y z vx vy vz", 28) == 0) {
for(int i = 0; i < natoms; i++) {
readline(line, fp);
atom_id = atoi(strtok(line, " ")) - 1;
atom->type[atom_id] = atoi(strtok(NULL, " "));
atom_x(atom_id) = atof(strtok(NULL, " "));
atom_y(atom_id) = atof(strtok(NULL, " "));
atom_z(atom_id) = atof(strtok(NULL, " "));
atom->vx[atom_id] = atof(strtok(NULL, " "));
atom->vy[atom_id] = atof(strtok(NULL, " "));
atom->vz[atom_id] = atof(strtok(NULL, " "));
atom->ntypes = MAX(atom->type[atom_id], atom->ntypes);
read_atoms++;
}
} else {
fprintf(stderr, "Invalid item: %s\n", item);
exit(-1);
return -1;
}
} else {
fprintf(stderr, "Invalid input from file, expected item reference but got:\n%s\n", line);
exit(-1);
return -1;
}
}
if(ts < 0 || !natoms || !read_atoms) {
fprintf(stderr, "Input error: atom data was not read!\n");
exit(-1);
return -1;
}
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
atom->epsilon[i] = param->epsilon;
atom->sigma6[i] = param->sigma6;
atom->cutneighsq[i] = param->cutneigh * param->cutneigh;
atom->cutforcesq[i] = param->cutforce * param->cutforce;
}
fprintf(stdout, "Read %d atoms from %s\n", natoms, param->input_file);
fclose(fp);
return natoms;
}
void initMasks(Atom *atom) {
const unsigned int half_mask_bits = VECTOR_WIDTH >> 1;
unsigned int mask0, mask1, mask2, mask3;
atom->exclusion_filter = allocate(ALIGNMENT, CLUSTER_M * VECTOR_WIDTH * sizeof(MD_UINT));
atom->diagonal_4xn_j_minus_i = allocate(ALIGNMENT, MAX(CLUSTER_M, VECTOR_WIDTH) * sizeof(MD_UINT));
atom->diagonal_2xnn_j_minus_i = allocate(ALIGNMENT, VECTOR_WIDTH * sizeof(MD_UINT));
//atom->masks_2xnn = allocate(ALIGNMENT, 8 * sizeof(unsigned int));
for(int j = 0; j < MAX(CLUSTER_M, VECTOR_WIDTH); j++) {
atom->diagonal_4xn_j_minus_i[j] = (MD_FLOAT)(j) - 0.5;
}
for(int j = 0; j < VECTOR_WIDTH / 2; j++) {
atom->diagonal_2xnn_j_minus_i[j] = (MD_FLOAT)(j) - 0.5;
atom->diagonal_2xnn_j_minus_i[VECTOR_WIDTH / 2 + j] = (MD_FLOAT)(j - 1) - 0.5;
}
for(int i = 0; i < CLUSTER_M * VECTOR_WIDTH; i++) {
atom->exclusion_filter[i] = (1U << i);
}
#if CLUSTER_M == CLUSTER_N
for(unsigned int cond0 = 0; cond0 < 2; cond0++) {
mask0 = (unsigned int)(0xf - 0x1 * cond0);
mask1 = (unsigned int)(0xf - 0x3 * cond0);
mask2 = (unsigned int)(0xf - 0x7 * cond0);
mask3 = (unsigned int)(0xf - 0xf * cond0);
atom->masks_2xnn_hn[cond0 * 2 + 0] = (mask1 << half_mask_bits) | mask0;
atom->masks_2xnn_hn[cond0 * 2 + 1] = (mask3 << half_mask_bits) | mask2;
mask0 = (unsigned int)(0xf - 0x1 * cond0);
mask1 = (unsigned int)(0xf - 0x2 * cond0);
mask2 = (unsigned int)(0xf - 0x4 * cond0);
mask3 = (unsigned int)(0xf - 0x8 * cond0);
atom->masks_2xnn_fn[cond0 * 2 + 0] = (mask1 << half_mask_bits) | mask0;
atom->masks_2xnn_fn[cond0 * 2 + 1] = (mask3 << half_mask_bits) | mask2;
atom->masks_4xn_hn[cond0 * 4 + 0] = (unsigned int)(0xf - 0x1 * cond0);
atom->masks_4xn_hn[cond0 * 4 + 1] = (unsigned int)(0xf - 0x3 * cond0);
atom->masks_4xn_hn[cond0 * 4 + 2] = (unsigned int)(0xf - 0x7 * cond0);
atom->masks_4xn_hn[cond0 * 4 + 3] = (unsigned int)(0xf - 0xf * cond0);
atom->masks_4xn_fn[cond0 * 4 + 0] = (unsigned int)(0xf - 0x1 * cond0);
atom->masks_4xn_fn[cond0 * 4 + 1] = (unsigned int)(0xf - 0x2 * cond0);
atom->masks_4xn_fn[cond0 * 4 + 2] = (unsigned int)(0xf - 0x4 * cond0);
atom->masks_4xn_fn[cond0 * 4 + 3] = (unsigned int)(0xf - 0x8 * cond0);
}
#else
for(unsigned int cond0 = 0; cond0 < 2; cond0++) {
for(unsigned int cond1 = 0; cond1 < 2; cond1++) {
#if CLUSTER_M < CLUSTER_N
mask0 = (unsigned int)(0xff - 0x1 * cond0 - 0x1f * cond1);
mask1 = (unsigned int)(0xff - 0x3 * cond0 - 0x3f * cond1);
mask2 = (unsigned int)(0xff - 0x7 * cond0 - 0x7f * cond1);
mask3 = (unsigned int)(0xff - 0xf * cond0 - 0xff * cond1);
#else
mask0 = (unsigned int)(0x3 - 0x1 * cond0);
mask1 = (unsigned int)(0x3 - 0x3 * cond0);
mask2 = (unsigned int)(0x3 - cond0 * 0x3 - 0x1 * cond1);
mask3 = (unsigned int)(0x3 - cond0 * 0x3 - 0x3 * cond1);
#endif
atom->masks_2xnn_hn[cond0 * 4 + cond1 * 2 + 0] = (mask1 << half_mask_bits) | mask0;
atom->masks_2xnn_hn[cond0 * 4 + cond1 * 2 + 1] = (mask3 << half_mask_bits) | mask2;
#if CLUSTER_M < CLUSTER_N
mask0 = (unsigned int)(0xff - 0x1 * cond0 - 0x10 * cond1);
mask1 = (unsigned int)(0xff - 0x2 * cond0 - 0x20 * cond1);
mask2 = (unsigned int)(0xff - 0x4 * cond0 - 0x40 * cond1);
mask3 = (unsigned int)(0xff - 0x8 * cond0 - 0x80 * cond1);
#else
mask0 = (unsigned int)(0x3 - 0x1 * cond0);
mask1 = (unsigned int)(0x3 - 0x2 * cond0);
mask2 = (unsigned int)(0x3 - 0x1 * cond1);
mask3 = (unsigned int)(0x3 - 0x2 * cond1);
#endif
atom->masks_2xnn_fn[cond0 * 4 + cond1 * 2 + 0] = (mask1 << half_mask_bits) | mask0;
atom->masks_2xnn_fn[cond0 * 4 + cond1 * 2 + 1] = (mask3 << half_mask_bits) | mask2;
#if CLUSTER_M < CLUSTER_N
atom->masks_4xn_hn[cond0 * 8 + cond1 * 4 + 0] = (unsigned int)(0xff - 0x1 * cond0 - 0x1f * cond1);
atom->masks_4xn_hn[cond0 * 8 + cond1 * 4 + 1] = (unsigned int)(0xff - 0x3 * cond0 - 0x3f * cond1);
atom->masks_4xn_hn[cond0 * 8 + cond1 * 4 + 2] = (unsigned int)(0xff - 0x7 * cond0 - 0x7f * cond1);
atom->masks_4xn_hn[cond0 * 8 + cond1 * 4 + 3] = (unsigned int)(0xff - 0xf * cond0 - 0xff * cond1);
atom->masks_4xn_fn[cond0 * 8 + cond1 * 4 + 0] = (unsigned int)(0xff - 0x1 * cond0 - 0x10 * cond1);
atom->masks_4xn_fn[cond0 * 8 + cond1 * 4 + 1] = (unsigned int)(0xff - 0x2 * cond0 - 0x20 * cond1);
atom->masks_4xn_fn[cond0 * 8 + cond1 * 4 + 2] = (unsigned int)(0xff - 0x4 * cond0 - 0x40 * cond1);
atom->masks_4xn_fn[cond0 * 8 + cond1 * 4 + 3] = (unsigned int)(0xff - 0x8 * cond0 - 0x80 * cond1);
#else
atom->masks_4xn_hn[cond0 * 8 + cond1 * 4 + 0] = (unsigned int)(0x3 - 0x1 * cond0);
atom->masks_4xn_hn[cond0 * 8 + cond1 * 4 + 1] = (unsigned int)(0x3 - 0x3 * cond0);
atom->masks_4xn_hn[cond0 * 8 + cond1 * 4 + 2] = (unsigned int)(0x3 - 0x3 * cond0 - 0x1 * cond1);
atom->masks_4xn_hn[cond0 * 8 + cond1 * 4 + 3] = (unsigned int)(0x3 - 0x3 * cond0 - 0x3 * cond1);
atom->masks_4xn_fn[cond0 * 8 + cond1 * 4 + 0] = (unsigned int)(0x3 - 0x1 * cond0);
atom->masks_4xn_fn[cond0 * 8 + cond1 * 4 + 0] = (unsigned int)(0x3 - 0x2 * cond0);
atom->masks_4xn_fn[cond0 * 8 + cond1 * 4 + 0] = (unsigned int)(0x3 - 0x1 * cond1);
atom->masks_4xn_fn[cond0 * 8 + cond1 * 4 + 0] = (unsigned int)(0x3 - 0x2 * cond1);
#endif
}
}
#endif
}
void growAtom(Atom *atom) {
int nold = atom->Nmax;
atom->Nmax += DELTA;
#ifdef AOS
atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
#else
atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->y = (MD_FLOAT*) reallocate(atom->y, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->z = (MD_FLOAT*) reallocate(atom->z, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
#endif
atom->vx = (MD_FLOAT*) reallocate(atom->vx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->vy = (MD_FLOAT*) reallocate(atom->vy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->vz = (MD_FLOAT*) reallocate(atom->vz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->type = (int *) reallocate(atom->type, ALIGNMENT, atom->Nmax * sizeof(int), nold * sizeof(int));
}
void growClusters(Atom *atom) {
int nold = atom->Nclusters_max;
int jterm = MAX(1, CLUSTER_M / CLUSTER_N); // If M>N, we need to allocate more j-clusters
atom->Nclusters_max += DELTA;
atom->iclusters = (Cluster*) reallocate(atom->iclusters, ALIGNMENT, atom->Nclusters_max * sizeof(Cluster), nold * sizeof(Cluster));
atom->jclusters = (Cluster*) reallocate(atom->jclusters, ALIGNMENT, atom->Nclusters_max * jterm * sizeof(Cluster), nold * jterm * sizeof(Cluster));
atom->icluster_bin = (int*) reallocate(atom->icluster_bin, ALIGNMENT, atom->Nclusters_max * sizeof(int), nold * sizeof(int));
atom->cl_x = (MD_FLOAT*) reallocate(atom->cl_x, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT), nold * CLUSTER_M * 3 * sizeof(MD_FLOAT));
atom->cl_f = (MD_FLOAT*) reallocate(atom->cl_f, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT), nold * CLUSTER_M * 3 * sizeof(MD_FLOAT));
atom->cl_v = (MD_FLOAT*) reallocate(atom->cl_v, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT), nold * CLUSTER_M * 3 * sizeof(MD_FLOAT));
atom->cl_type = (int*) reallocate(atom->cl_type, ALIGNMENT, atom->Nclusters_max * CLUSTER_M * sizeof(int), nold * CLUSTER_M * sizeof(int));
}

View File

@@ -0,0 +1,317 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
extern "C" {
#include <stdio.h>
//---
#include <cuda.h>
#include <driver_types.h>
//---
#include <likwid-marker.h>
//---
#include <atom.h>
#include <device.h>
#include <neighbor.h>
#include <parameter.h>
#include <stats.h>
#include <timing.h>
#include <util.h>
}
extern "C" {
MD_FLOAT *cuda_cl_x;
MD_FLOAT *cuda_cl_v;
MD_FLOAT *cuda_cl_f;
int *cuda_neighbors;
int *cuda_numneigh;
int *cuda_natoms;
int *natoms;
int *ngatoms;
int *cuda_border_map;
int *cuda_jclusters_natoms;
MD_FLOAT *cuda_bbminx, *cuda_bbmaxx;
MD_FLOAT *cuda_bbminy, *cuda_bbmaxy;
MD_FLOAT *cuda_bbminz, *cuda_bbmaxz;
int *cuda_PBCx, *cuda_PBCy, *cuda_PBCz;
int isReneighboured;
}
extern "C"
void initDevice(Atom *atom, Neighbor *neighbor) {
cuda_assert("cudaDeviceSetup", cudaDeviceReset());
cuda_assert("cudaDeviceSetup", cudaSetDevice(0));
cuda_cl_x = (MD_FLOAT *) allocateGPU(atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT));
cuda_cl_v = (MD_FLOAT *) allocateGPU(atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT));
cuda_cl_f = (MD_FLOAT *) allocateGPU(atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT));
cuda_natoms = (int *) allocateGPU(atom->Nclusters_max * sizeof(int));
cuda_jclusters_natoms = (int *) allocateGPU(atom->Nclusters_max * sizeof(int));
cuda_border_map = (int *) allocateGPU(atom->Nclusters_max * sizeof(int));
cuda_PBCx = (int *) allocateGPU(atom->Nclusters_max * sizeof(int));
cuda_PBCy = (int *) allocateGPU(atom->Nclusters_max * sizeof(int));
cuda_PBCz = (int *) allocateGPU(atom->Nclusters_max * sizeof(int));
cuda_numneigh = (int *) allocateGPU(atom->Nclusters_max * sizeof(int));
cuda_neighbors = (int *) allocateGPU(atom->Nclusters_max * neighbor->maxneighs * sizeof(int));
natoms = (int *) malloc(atom->Nclusters_max * sizeof(int));
ngatoms = (int *) malloc(atom->Nclusters_max * sizeof(int));
isReneighboured = 1;
}
extern "C"
void copyDataToCUDADevice(Atom *atom) {
memcpyToGPU(cuda_cl_x, atom->cl_x, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT));
memcpyToGPU(cuda_cl_v, atom->cl_v, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT));
memcpyToGPU(cuda_cl_f, atom->cl_f, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT));
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
natoms[ci] = atom->iclusters[ci].natoms;
}
memcpyToGPU(cuda_natoms, natoms, atom->Nclusters_local * sizeof(int));
int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
int ncj = atom->Nclusters_local / jfac;
for(int cg = 0; cg < atom->Nclusters_ghost; cg++) {
const int cj = ncj + cg;
ngatoms[cg] = atom->jclusters[cj].natoms;
}
memcpyToGPU(cuda_jclusters_natoms, ngatoms, atom->Nclusters_ghost * sizeof(int));
memcpyToGPU(cuda_border_map, atom->border_map, atom->Nclusters_ghost * sizeof(int));
memcpyToGPU(cuda_PBCx, atom->PBCx, atom->Nclusters_ghost * sizeof(int));
memcpyToGPU(cuda_PBCy, atom->PBCy, atom->Nclusters_ghost * sizeof(int));
memcpyToGPU(cuda_PBCz, atom->PBCz, atom->Nclusters_ghost * sizeof(int));
}
extern "C"
void copyDataFromCUDADevice(Atom *atom) {
memcpyFromGPU(atom->cl_x, cuda_cl_x, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT));
memcpyFromGPU(atom->cl_v, cuda_cl_v, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT));
memcpyFromGPU(atom->cl_f, cuda_cl_f, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT));
}
extern "C"
void cudaDeviceFree() {
cuda_assert("cudaDeviceFree", cudaFree(cuda_cl_x));
cuda_assert("cudaDeviceFree", cudaFree(cuda_cl_v));
cuda_assert("cudaDeviceFree", cudaFree(cuda_cl_f));
cuda_assert("cudaDeviceFree", cudaFree(cuda_numneigh));
cuda_assert("cudaDeviceFree", cudaFree(cuda_neighbors));
cuda_assert("cudaDeviceFree", cudaFree(cuda_natoms));
cuda_assert("cudaDeviceFree", cudaFree(cuda_border_map));
cuda_assert("cudaDeviceFree", cudaFree(cuda_jclusters_natoms));
cuda_assert("cudaDeviceFree", cudaFree(cuda_PBCx));
cuda_assert("cudaDeviceFree", cudaFree(cuda_PBCy));
cuda_assert("cudaDeviceFree", cudaFree(cuda_PBCz));
free(natoms);
free(ngatoms);
}
__global__ void cudaInitialIntegrate_warp(MD_FLOAT *cuda_cl_x, MD_FLOAT *cuda_cl_v, MD_FLOAT *cuda_cl_f,
int *cuda_natoms,
int Nclusters_local, MD_FLOAT dtforce, MD_FLOAT dt) {
unsigned int ci_pos = blockDim.x * blockIdx.x + threadIdx.x;
if (ci_pos >= Nclusters_local) return;
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci_pos);
MD_FLOAT *ci_x = &cuda_cl_x[ci_vec_base];
MD_FLOAT *ci_v = &cuda_cl_v[ci_vec_base];
MD_FLOAT *ci_f = &cuda_cl_f[ci_vec_base];
for (int cii = 0; cii < cuda_natoms[ci_pos]; cii++) {
ci_v[CL_X_OFFSET + cii] += dtforce * ci_f[CL_X_OFFSET + cii];
ci_v[CL_Y_OFFSET + cii] += dtforce * ci_f[CL_Y_OFFSET + cii];
ci_v[CL_Z_OFFSET + cii] += dtforce * ci_f[CL_Z_OFFSET + cii];
ci_x[CL_X_OFFSET + cii] += dt * ci_v[CL_X_OFFSET + cii];
ci_x[CL_Y_OFFSET + cii] += dt * ci_v[CL_Y_OFFSET + cii];
ci_x[CL_Z_OFFSET + cii] += dt * ci_v[CL_Z_OFFSET + cii];
}
}
__global__ void cudaUpdatePbc_warp(MD_FLOAT *cuda_cl_x, int *cuda_border_map,
int *cuda_jclusters_natoms,
int *cuda_PBCx,
int *cuda_PBCy,
int *cuda_PBCz,
int Nclusters_local,
int Nclusters_ghost,
MD_FLOAT param_xprd,
MD_FLOAT param_yprd,
MD_FLOAT param_zprd) {
unsigned int cg = blockDim.x * blockIdx.x + threadIdx.x;
if (cg >= Nclusters_ghost) return;
int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
int ncj = Nclusters_local / jfac;
MD_FLOAT xprd = param_xprd;
MD_FLOAT yprd = param_yprd;
MD_FLOAT zprd = param_zprd;
const int cj = ncj + cg;
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
int bmap_vec_base = CJ_VECTOR_BASE_INDEX(cuda_border_map[cg]);
MD_FLOAT *cj_x = &cuda_cl_x[cj_vec_base];
MD_FLOAT *bmap_x = &cuda_cl_x[bmap_vec_base];
for(int cjj = 0; cjj < cuda_jclusters_natoms[cg]; cjj++) {
cj_x[CL_X_OFFSET + cjj] = bmap_x[CL_X_OFFSET + cjj] + cuda_PBCx[cg] * xprd;
cj_x[CL_Y_OFFSET + cjj] = bmap_x[CL_Y_OFFSET + cjj] + cuda_PBCy[cg] * yprd;
cj_x[CL_Z_OFFSET + cjj] = bmap_x[CL_Z_OFFSET + cjj] + cuda_PBCz[cg] * zprd;
}
}
__global__ void computeForceLJ_cuda_warp(MD_FLOAT *cuda_cl_x, MD_FLOAT *cuda_cl_f,
int Nclusters_local, int Nclusters_max,
int *cuda_numneigh, int *cuda_neighs, int half_neigh, int maxneighs,
MD_FLOAT cutforcesq, MD_FLOAT sigma6, MD_FLOAT epsilon) {
unsigned int ci_pos = blockDim.x * blockIdx.x + threadIdx.x;
unsigned int cii_pos = blockDim.y * blockIdx.y + threadIdx.y;
unsigned int cjj_pos = blockDim.z * blockIdx.z + threadIdx.z;
if ((ci_pos >= Nclusters_local) || (cii_pos >= CLUSTER_M) || (cjj_pos >= CLUSTER_N)) return;
int ci_cj0 = CJ0_FROM_CI(ci_pos);
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci_pos);
MD_FLOAT *ci_x = &cuda_cl_x[ci_vec_base];
MD_FLOAT *ci_f = &cuda_cl_f[ci_vec_base];
int numneighs = cuda_numneigh[ci_pos];
for(int k = 0; k < numneighs; k++) {
int cj = (&cuda_neighs[ci_pos * maxneighs])[k];
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_x = &cuda_cl_x[cj_vec_base];
MD_FLOAT *cj_f = &cuda_cl_f[cj_vec_base];
MD_FLOAT xtmp = ci_x[CL_X_OFFSET + cii_pos];
MD_FLOAT ytmp = ci_x[CL_Y_OFFSET + cii_pos];
MD_FLOAT ztmp = ci_x[CL_Z_OFFSET + cii_pos];
MD_FLOAT fix = 0;
MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0;
int cond;
#if CLUSTER_M == CLUSTER_N
cond = half_neigh ? (ci_cj0 != cj || cii_pos < cjj_pos) :
(ci_cj0 != cj || cii_pos != cjj_pos);
#elif CLUSTER_M < CLUSTER_N
cond = half_neigh ? (ci_cj0 != cj || cii_pos + CLUSTER_M * (ci_pos & 0x1) < cjj_pos) :
(ci_cj0 != cj || cii_pos + CLUSTER_M * (ci_pos & 0x1) != cjj_pos);
#endif
if(cond) {
MD_FLOAT delx = xtmp - cj_x[CL_X_OFFSET + cjj_pos];
MD_FLOAT dely = ytmp - cj_x[CL_Y_OFFSET + cjj_pos];
MD_FLOAT delz = ztmp - cj_x[CL_Z_OFFSET + cjj_pos];
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq < cutforcesq) {
MD_FLOAT sr2 = 1.0 / rsq;
MD_FLOAT sr6 = sr2 * sr2 * sr2 * sigma6;
MD_FLOAT force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
if(half_neigh) {
atomicAdd(&cj_f[CL_X_OFFSET + cjj_pos], -delx * force);
atomicAdd(&cj_f[CL_Y_OFFSET + cjj_pos], -dely * force);
atomicAdd(&cj_f[CL_Z_OFFSET + cjj_pos], -delz * force);
}
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
atomicAdd(&ci_f[CL_X_OFFSET + cii_pos], fix);
atomicAdd(&ci_f[CL_Y_OFFSET + cii_pos], fiy);
atomicAdd(&ci_f[CL_Z_OFFSET + cii_pos], fiz);
}
}
}
}
__global__ void cudaFinalIntegrate_warp(MD_FLOAT *cuda_cl_v, MD_FLOAT *cuda_cl_f,
int *cuda_natoms,
int Nclusters_local, MD_FLOAT dtforce) {
unsigned int ci_pos = blockDim.x * blockIdx.x + threadIdx.x;
if (ci_pos >= Nclusters_local) return;
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci_pos);
MD_FLOAT *ci_v = &cuda_cl_v[ci_vec_base];
MD_FLOAT *ci_f = &cuda_cl_f[ci_vec_base];
for (int cii = 0; cii < cuda_natoms[ci_pos]; cii++) {
ci_v[CL_X_OFFSET + cii] += dtforce * ci_f[CL_X_OFFSET + cii];
ci_v[CL_Y_OFFSET + cii] += dtforce * ci_f[CL_Y_OFFSET + cii];
ci_v[CL_Z_OFFSET + cii] += dtforce * ci_f[CL_Z_OFFSET + cii];
}
}
extern "C"
void cudaInitialIntegrate(Parameter *param, Atom *atom) {
const int threads_num = 16;
dim3 block_size = dim3(threads_num, 1, 1);
dim3 grid_size = dim3(atom->Nclusters_local/(threads_num)+1, 1, 1);
cudaInitialIntegrate_warp<<<grid_size, block_size>>>(cuda_cl_x, cuda_cl_v, cuda_cl_f,
cuda_natoms, atom->Nclusters_local, param->dtforce, param->dt);
cuda_assert("cudaInitialIntegrate", cudaPeekAtLastError());
cuda_assert("cudaInitialIntegrate", cudaDeviceSynchronize());
}
/* update coordinates of ghost atoms */
/* uses mapping created in setupPbc */
extern "C"
void cudaUpdatePbc(Atom *atom, Parameter *param) {
const int threads_num = 512;
dim3 block_size = dim3(threads_num, 1, 1);;
dim3 grid_size = dim3(atom->Nclusters_ghost/(threads_num)+1, 1, 1);;
cudaUpdatePbc_warp<<<grid_size, block_size>>>(cuda_cl_x, cuda_border_map,
cuda_jclusters_natoms, cuda_PBCx, cuda_PBCy, cuda_PBCz,
atom->Nclusters_local, atom->Nclusters_ghost,
param->xprd, param->yprd, param->zprd);
cuda_assert("cudaUpdatePbc", cudaPeekAtLastError());
cuda_assert("cudaUpdatePbc", cudaDeviceSynchronize());
}
extern "C"
double computeForceLJ_cuda(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats) {
MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
MD_FLOAT sigma6 = param->sigma6;
MD_FLOAT epsilon = param->epsilon;
memsetGPU(cuda_cl_f, 0, atom->Nclusters_max * CLUSTER_M * 3 * sizeof(MD_FLOAT));
if (isReneighboured) {
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
memcpyToGPU(&cuda_numneigh[ci], &neighbor->numneigh[ci], sizeof(int));
memcpyToGPU(&cuda_neighbors[ci * neighbor->maxneighs], &neighbor->neighbors[ci * neighbor->maxneighs], neighbor->numneigh[ci] * sizeof(int));
}
isReneighboured = 0;
}
const int threads_num = 1;
dim3 block_size = dim3(threads_num, CLUSTER_M, CLUSTER_N);
dim3 grid_size = dim3(atom->Nclusters_local/threads_num+1, 1, 1);
double S = getTimeStamp();
LIKWID_MARKER_START("force");
computeForceLJ_cuda_warp<<<grid_size, block_size>>>(cuda_cl_x, cuda_cl_f,
atom->Nclusters_local, atom->Nclusters_max,
cuda_numneigh, cuda_neighbors,
neighbor->half_neigh, neighbor->maxneighs, cutforcesq,
sigma6, epsilon);
cuda_assert("computeForceLJ_cuda", cudaPeekAtLastError());
cuda_assert("computeForceLJ_cuda", cudaDeviceSynchronize());
LIKWID_MARKER_STOP("force");
double E = getTimeStamp();
return E-S;
}
extern "C"
void cudaFinalIntegrate(Parameter *param, Atom *atom) {
const int threads_num = 16;
dim3 block_size = dim3(threads_num, 1, 1);
dim3 grid_size = dim3(atom->Nclusters_local/(threads_num)+1, 1, 1);
cudaFinalIntegrate_warp<<<grid_size, block_size>>>(cuda_cl_v, cuda_cl_f, cuda_natoms, atom->Nclusters_local, param->dt);
cuda_assert("cudaFinalIntegrate", cudaPeekAtLastError());
cuda_assert("cudaFinalIntegrate", cudaDeviceSynchronize());
}

201
clusterpair/force_eam.c Normal file
View File

@@ -0,0 +1,201 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <likwid-marker.h>
#include <math.h>
#include <allocate.h>
#include <timing.h>
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#include <stats.h>
#include <eam.h>
#include <util.h>
double computeForceEam(Eam* eam, Parameter* param, Atom *atom, Neighbor *neighbor, Stats *stats) {
/*
if(eam->nmax < atom->Nmax) {
eam->nmax = atom->Nmax;
if(eam->fp != NULL) { free(eam->fp); }
eam->fp = (MD_FLOAT *) allocate(ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT));
}
int Nlocal = atom->Nlocal;
int* neighs;
MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; int ntypes = atom->ntypes; MD_FLOAT* fp = eam->fp;
MD_FLOAT* rhor_spline = eam->rhor_spline; MD_FLOAT* frho_spline = eam->frho_spline; MD_FLOAT* z2r_spline = eam->z2r_spline;
MD_FLOAT rdr = eam->rdr; int nr = eam->nr; int nr_tot = eam->nr_tot; MD_FLOAT rdrho = eam->rdrho;
int nrho = eam->nrho; int nrho_tot = eam->nrho_tot;
*/
double S = getTimeStamp();
LIKWID_MARKER_START("force_eam_fp");
/*
#pragma omp parallel for
for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
MD_FLOAT rhoi = 0;
#ifdef EXPLICIT_TYPES
const int type_i = atom->type[i];
#endif
#pragma ivdep
for(int k = 0; k < numneighs; k++) {
int j = neighs[k];
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
const int type_j = atom->type[j];
const int type_ij = type_i * ntypes + type_j;
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
#else
const MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
#endif
if(rsq < cutforcesq) {
MD_FLOAT p = sqrt(rsq) * rdr + 1.0;
int m = (int)(p);
m = m < nr - 1 ? m : nr - 1;
p -= m;
p = p < 1.0 ? p : 1.0;
#ifdef EXPLICIT_TYPES
rhoi += ((rhor_spline[type_ij * nr_tot + m * 7 + 3] * p +
rhor_spline[type_ij * nr_tot + m * 7 + 4]) * p +
rhor_spline[type_ij * nr_tot + m * 7 + 5]) * p +
rhor_spline[type_ij * nr_tot + m * 7 + 6];
#else
rhoi += ((rhor_spline[m * 7 + 3] * p +
rhor_spline[m * 7 + 4]) * p +
rhor_spline[m * 7 + 5]) * p +
rhor_spline[m * 7 + 6];
#endif
}
}
#ifdef EXPLICIT_TYPES
const int type_ii = type_i * type_i;
#endif
MD_FLOAT p = 1.0 * rhoi * rdrho + 1.0;
int m = (int)(p);
m = MAX(1, MIN(m, nrho - 1));
p -= m;
p = MIN(p, 1.0);
#ifdef EXPLICIT_TYPES
fp[i] = (frho_spline[type_ii * nrho_tot + m * 7 + 0] * p +
frho_spline[type_ii * nrho_tot + m * 7 + 1]) * p +
frho_spline[type_ii * nrho_tot + m * 7 + 2];
#else
fp[i] = (frho_spline[m * 7 + 0] * p + frho_spline[m * 7 + 1]) * p + frho_spline[m * 7 + 2];
#endif
}
LIKWID_MARKER_STOP("force_eam_fp");
// We still need to update fp for PBC atoms
for(int i = 0; i < atom->Nghost; i++) {
fp[Nlocal + i] = fp[atom->border_map[i]];
}
LIKWID_MARKER_START("force_eam");
for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
MD_FLOAT fix = 0;
MD_FLOAT fiy = 0;
MD_FLOAT fiz = 0;
#ifdef EXPLICIT_TYPES
const int type_i = atom->type[i];
#endif
#pragma ivdep
for(int k = 0; k < numneighs; k++) {
int j = neighs[k];
MD_FLOAT delx = xtmp - atom_x(j);
MD_FLOAT dely = ytmp - atom_y(j);
MD_FLOAT delz = ztmp - atom_z(j);
MD_FLOAT rsq = delx * delx + dely * dely + delz * delz;
#ifdef EXPLICIT_TYPES
const int type_j = atom->type[j];
const int type_ij = type_i * ntypes + type_j;
const MD_FLOAT cutforcesq = atom->cutforcesq[type_ij];
#else
const MD_FLOAT cutforcesq = param->cutforce * param->cutforce;
#endif
if(rsq < cutforcesq) {
MD_FLOAT r = sqrt(rsq);
MD_FLOAT p = r * rdr + 1.0;
int m = (int)(p);
m = m < nr - 1 ? m : nr - 1;
p -= m;
p = p < 1.0 ? p : 1.0;
// rhoip = derivative of (density at atom j due to atom i)
// rhojp = derivative of (density at atom i due to atom j)
// phi = pair potential energy
// phip = phi'
// z2 = phi * r
// z2p = (phi * r)' = (phi' r) + phi
// psip needs both fp[i] and fp[j] terms since r_ij appears in two
// terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji)
// hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip
#ifdef EXPLICIT_TYPES
MD_FLOAT rhoip = (rhor_spline[type_ij * nr_tot + m * 7 + 0] * p +
rhor_spline[type_ij * nr_tot + m * 7 + 1]) * p +
rhor_spline[type_ij * nr_tot + m * 7 + 2];
MD_FLOAT z2p = (z2r_spline[type_ij * nr_tot + m * 7 + 0] * p +
z2r_spline[type_ij * nr_tot + m * 7 + 1]) * p +
z2r_spline[type_ij * nr_tot + m * 7 + 2];
MD_FLOAT z2 = ((z2r_spline[type_ij * nr_tot + m * 7 + 3] * p +
z2r_spline[type_ij * nr_tot + m * 7 + 4]) * p +
z2r_spline[type_ij * nr_tot + m * 7 + 5]) * p +
z2r_spline[type_ij * nr_tot + m * 7 + 6];
#else
MD_FLOAT rhoip = (rhor_spline[m * 7 + 0] * p + rhor_spline[m * 7 + 1]) * p + rhor_spline[m * 7 + 2];
MD_FLOAT z2p = (z2r_spline[m * 7 + 0] * p + z2r_spline[m * 7 + 1]) * p + z2r_spline[m * 7 + 2];
MD_FLOAT z2 = ((z2r_spline[m * 7 + 3] * p +
z2r_spline[m * 7 + 4]) * p +
z2r_spline[m * 7 + 5]) * p +
z2r_spline[m * 7 + 6];
#endif
MD_FLOAT recip = 1.0 / r;
MD_FLOAT phi = z2 * recip;
MD_FLOAT phip = z2p * recip - phi * recip;
MD_FLOAT psip = fp[i] * rhoip + fp[j] * rhoip + phip;
MD_FLOAT fpair = -psip * recip;
fix += delx * fpair;
fiy += dely * fpair;
fiz += delz * fpair;
//fpair *= 0.5;
}
}
fx[i] = fix;
fy[i] = fiy;
fz[i] = fiz;
addStat(stats->total_force_neighs, numneighs);
addStat(stats->total_force_iters, (numneighs + VECTOR_WIDTH - 1) / VECTOR_WIDTH);
}
*/
LIKWID_MARKER_STOP("force_eam");
double E = getTimeStamp();
return E-S;
}

1072
clusterpair/force_lj.c Normal file

File diff suppressed because it is too large Load Diff

171
clusterpair/includes/atom.h Normal file
View File

@@ -0,0 +1,171 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <parameter.h>
#ifndef __ATOM_H_
#define __ATOM_H_
#define DELTA 20000
// Nbnxn layouts (as of GROMACS):
// Simd4xN: M=4, N=VECTOR_WIDTH
// Simd2xNN: M=4, N=(VECTOR_WIDTH/2)
// Cuda: M=8, N=VECTOR_WIDTH
#ifdef CUDA_TARGET
# undef VECTOR_WIDTH
# define VECTOR_WIDTH 8
# define KERNEL_NAME "CUDA"
# define CLUSTER_M 8
# define CLUSTER_N VECTOR_WIDTH
# define UNROLL_J 1
# define computeForceLJ computeForceLJ_cuda
# define initialIntegrate cudaInitialIntegrate
# define finalIntegrate cudaFinalIntegrate
# define updatePbc cudaUpdatePbc
#else
# define CLUSTER_M 4
// Simd2xNN (here used for single-precision)
# if VECTOR_WIDTH > CLUSTER_M * 2
# define KERNEL_NAME "Simd2xNN"
# define CLUSTER_N (VECTOR_WIDTH / 2)
# define UNROLL_I 4
# define UNROLL_J 2
# define computeForceLJ computeForceLJ_2xnn
// Simd4xN
# else
# define KERNEL_NAME "Simd4xN"
# define CLUSTER_N VECTOR_WIDTH
# define UNROLL_I 4
# define UNROLL_J 1
# define computeForceLJ computeForceLJ_4xn
# endif
# ifdef USE_REFERENCE_VERSION
# undef KERNEL_NAME
# undef computeForceLJ
# define KERNEL_NAME "Reference"
# define computeForceLJ computeForceLJ_ref
# endif
# define initialIntegrate cpuInitialIntegrate
# define finalIntegrate cpuFinalIntegrate
# define updatePbc cpuUpdatePbc
#endif
#if CLUSTER_M == CLUSTER_N
# define CJ0_FROM_CI(a) (a)
# define CJ1_FROM_CI(a) (a)
# define CI_BASE_INDEX(a,b) ((a) * CLUSTER_N * (b))
# define CJ_BASE_INDEX(a,b) ((a) * CLUSTER_N * (b))
#elif CLUSTER_M == CLUSTER_N * 2 // M > N
# define CJ0_FROM_CI(a) ((a) << 1)
# define CJ1_FROM_CI(a) (((a) << 1) | 0x1)
# define CI_BASE_INDEX(a,b) ((a) * CLUSTER_M * (b))
# define CJ_BASE_INDEX(a,b) (((a) >> 1) * CLUSTER_M * (b) + ((a) & 0x1) * (CLUSTER_M >> 1))
#elif CLUSTER_M == CLUSTER_N / 2 // M < N
# define CJ0_FROM_CI(a) ((a) >> 1)
# define CJ1_FROM_CI(a) ((a) >> 1)
# define CI_BASE_INDEX(a,b) (((a) >> 1) * CLUSTER_N * (b) + ((a) & 0x1) * (CLUSTER_N >> 1))
# define CJ_BASE_INDEX(a,b) ((a) * CLUSTER_N * (b))
#else
# error "Invalid cluster configuration!"
#endif
#if CLUSTER_N != 2 && CLUSTER_N != 4 && CLUSTER_N != 8
# error "Cluster N dimension can be only 2, 4 and 8"
#endif
#define CI_SCALAR_BASE_INDEX(a) (CI_BASE_INDEX(a, 1))
#define CI_VECTOR_BASE_INDEX(a) (CI_BASE_INDEX(a, 3))
#define CJ_SCALAR_BASE_INDEX(a) (CJ_BASE_INDEX(a, 1))
#define CJ_VECTOR_BASE_INDEX(a) (CJ_BASE_INDEX(a, 3))
#if CLUSTER_M >= CLUSTER_N
# define CL_X_OFFSET (0 * CLUSTER_M)
# define CL_Y_OFFSET (1 * CLUSTER_M)
# define CL_Z_OFFSET (2 * CLUSTER_M)
#else
# define CL_X_OFFSET (0 * CLUSTER_N)
# define CL_Y_OFFSET (1 * CLUSTER_N)
# define CL_Z_OFFSET (2 * CLUSTER_N)
#endif
typedef struct {
int natoms;
MD_FLOAT bbminx, bbmaxx;
MD_FLOAT bbminy, bbmaxy;
MD_FLOAT bbminz, bbmaxz;
} Cluster;
typedef struct {
int Natoms, Nlocal, Nghost, Nmax;
int Nclusters, Nclusters_local, Nclusters_ghost, Nclusters_max;
MD_FLOAT *x, *y, *z;
MD_FLOAT *vx, *vy, *vz;
int *border_map;
int *type;
int ntypes;
MD_FLOAT *epsilon;
MD_FLOAT *sigma6;
MD_FLOAT *cutforcesq;
MD_FLOAT *cutneighsq;
int *PBCx, *PBCy, *PBCz;
// Data in cluster format
MD_FLOAT *cl_x;
MD_FLOAT *cl_v;
MD_FLOAT *cl_f;
int *cl_type;
Cluster *iclusters, *jclusters;
int *icluster_bin;
int dummy_cj;
MD_UINT *exclusion_filter;
MD_FLOAT *diagonal_4xn_j_minus_i;
MD_FLOAT *diagonal_2xnn_j_minus_i;
unsigned int masks_2xnn_hn[8];
unsigned int masks_2xnn_fn[8];
unsigned int masks_4xn_hn[16];
unsigned int masks_4xn_fn[16];
} Atom;
extern void initAtom(Atom*);
extern void initMasks(Atom*);
extern void createAtom(Atom*, Parameter*);
extern int readAtom(Atom*, Parameter*);
extern int readAtom_pdb(Atom*, Parameter*);
extern int readAtom_gro(Atom*, Parameter*);
extern int readAtom_dmp(Atom*, Parameter*);
extern void growAtom(Atom*);
extern void growClusters(Atom*);
#ifdef AOS
# define POS_DATA_LAYOUT "AoS"
# define atom_x(i) atom->x[(i) * 3 + 0]
# define atom_y(i) atom->x[(i) * 3 + 1]
# define atom_z(i) atom->x[(i) * 3 + 2]
/*
# define atom_vx(i) atom->vx[(i) * 3 + 0]
# define atom_vy(i) atom->vx[(i) * 3 + 1]
# define atom_vz(i) atom->vx[(i) * 3 + 2]
# define atom_fx(i) atom->fx[(i) * 3 + 0]
# define atom_fy(i) atom->fx[(i) * 3 + 1]
# define atom_fz(i) atom->fx[(i) * 3 + 2]
*/
#else
# define POS_DATA_LAYOUT "SoA"
# define atom_x(i) atom->x[i]
# define atom_y(i) atom->y[i]
# define atom_z(i) atom->z[i]
#endif
// TODO: allow to switch velocites and forces to AoS
# define atom_vx(i) atom->vx[i]
# define atom_vy(i) atom->vy[i]
# define atom_vz(i) atom->vz[i]
# define atom_fx(i) atom->fx[i]
# define atom_fy(i) atom->fy[i]
# define atom_fz(i) atom->fz[i]
#endif

View File

@@ -0,0 +1,56 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdbool.h>
//---
#include <atom.h>
#include <parameter.h>
#include <util.h>
void cpuInitialIntegrate(Parameter *param, Atom *atom) {
DEBUG_MESSAGE("cpuInitialIntegrate start\n");
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
ci_v[CL_X_OFFSET + cii] += param->dtforce * ci_f[CL_X_OFFSET + cii];
ci_v[CL_Y_OFFSET + cii] += param->dtforce * ci_f[CL_Y_OFFSET + cii];
ci_v[CL_Z_OFFSET + cii] += param->dtforce * ci_f[CL_Z_OFFSET + cii];
ci_x[CL_X_OFFSET + cii] += param->dt * ci_v[CL_X_OFFSET + cii];
ci_x[CL_Y_OFFSET + cii] += param->dt * ci_v[CL_Y_OFFSET + cii];
ci_x[CL_Z_OFFSET + cii] += param->dt * ci_v[CL_Z_OFFSET + cii];
}
}
DEBUG_MESSAGE("cpuInitialIntegrate end\n");
}
void cpuFinalIntegrate(Parameter *param, Atom *atom) {
DEBUG_MESSAGE("cpuFinalIntegrate start\n");
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
MD_FLOAT *ci_f = &atom->cl_f[ci_vec_base];
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
ci_v[CL_X_OFFSET + cii] += param->dtforce * ci_f[CL_X_OFFSET + cii];
ci_v[CL_Y_OFFSET + cii] += param->dtforce * ci_f[CL_Y_OFFSET + cii];
ci_v[CL_Z_OFFSET + cii] += param->dtforce * ci_f[CL_Z_OFFSET + cii];
}
}
DEBUG_MESSAGE("cpuFinalIntegrate end\n");
}
#ifdef CUDA_TARGET
void cudaInitialIntegrate(Parameter*, Atom*);
void cudaFinalIntegrate(Parameter*, Atom*);
#endif

View File

@@ -0,0 +1,49 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <atom.h>
#include <parameter.h>
#ifndef __NEIGHBOR_H_
#define __NEIGHBOR_H_
// Interaction masks from GROMACS, things to remember (maybe these confused just me):
// 1. These are not "exclusion" masks as the name suggests in GROMACS, but rather
// interaction masks (1 = interaction, 0 = no interaction)
// 2. These are inverted (maybe because that is how you use in AVX2/AVX512 masking),
// so read them from right to left (least significant to most significant bit)
// All interaction mask is the same for all kernels
#define NBNXN_INTERACTION_MASK_ALL 0xffffffffU
// 4x4 kernel diagonal mask
#define NBNXN_INTERACTION_MASK_DIAG 0x08ceU
// 4x2 kernel diagonal masks
#define NBNXN_INTERACTION_MASK_DIAG_J2_0 0x0002U
#define NBNXN_INTERACTION_MASK_DIAG_J2_1 0x002fU
// 4x8 kernel diagonal masks
#define NBNXN_INTERACTION_MASK_DIAG_J8_0 0xf0f8fcfeU
#define NBNXN_INTERACTION_MASK_DIAG_J8_1 0x0080c0e0U
typedef struct {
int every;
int ncalls;
int maxneighs;
int* numneigh;
int* numneigh_masked;
int half_neigh;
int* neighbors;
unsigned int* neighbors_imask;
} Neighbor;
extern void initNeighbor(Neighbor*, Parameter*);
extern void setupNeighbor(Parameter*, Atom*);
extern void binatoms(Atom*);
extern void buildNeighbor(Atom*, Neighbor*);
extern void pruneNeighbor(Parameter*, Atom*, Neighbor*);
extern void sortAtom(Atom*);
extern void buildClusters(Atom*);
extern void defineJClusters(Atom*);
extern void binClusters(Atom*);
extern void updateSingleAtoms(Atom*);
#endif

View File

@@ -0,0 +1,20 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <atom.h>
#include <parameter.h>
#ifndef __PBC_H_
#define __PBC_H_
extern void initPbc();
extern void cpuUpdatePbc(Atom*, Parameter*, int);
extern void updateAtomsPbc(Atom*, Parameter*);
extern void setupPbc(Atom*, Parameter*);
#ifdef CUDA_TARGET
extern void cudaUpdatePbc(Atom*, Parameter*, int);
#endif
#endif

View File

@@ -0,0 +1,35 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <atom.h>
#include <parameter.h>
#ifndef __STATS_H_
#define __STATS_H_
typedef struct {
long long int calculated_forces;
long long int num_neighs;
long long int force_iters;
long long int atoms_within_cutoff;
long long int atoms_outside_cutoff;
long long int clusters_within_cutoff;
long long int clusters_outside_cutoff;
} Stats;
void initStats(Stats *s);
void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer);
#ifdef COMPUTE_STATS
# define addStat(stat, value) stat += value;
# define beginStatTimer() double Si = getTimeStamp();
# define endStatTimer(stat) stat += getTimeStamp() - Si;
#else
# define addStat(stat, value)
# define beginStatTimer()
# define endStatTimer(stat)
#endif
#endif

View File

@@ -0,0 +1,102 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
#include <stdio.h>
#include <stdlib.h>
#endif
#ifndef VECTOR_WIDTH
# define VECTOR_WIDTH 8
#endif
#ifndef TRACER_CONDITION
# define TRACER_CONDITION (!(timestep % param->every))
#endif
#ifdef MEM_TRACER
# define MEM_TRACER_INIT FILE *mem_tracer_fp; \
if(TRACER_CONDITION) { \
char mem_tracer_fn[128]; \
snprintf(mem_tracer_fn, sizeof mem_tracer_fn, "mem_tracer_%d.out", timestep); \
mem_tracer_fp = fopen(mem_tracer_fn, "w");
}
# define MEM_TRACER_END if(TRACER_CONDITION) { fclose(mem_tracer_fp); }
# define MEM_TRACE(addr, op) if(TRACER_CONDITION) { fprintf(mem_tracer_fp, "%c: %p\n", op, (void *)(&(addr))); }
#else
# define MEM_TRACER_INIT
# define MEM_TRACER_END
# define MEM_TRACE(addr, op)
#endif
#ifdef INDEX_TRACER
# define INDEX_TRACER_INIT FILE *index_tracer_fp; \
if(TRACER_CONDITION) { \
char index_tracer_fn[128]; \
snprintf(index_tracer_fn, sizeof index_tracer_fn, "index_tracer_%d.out", timestep); \
index_tracer_fp = fopen(index_tracer_fn, "w"); \
}
# define INDEX_TRACER_END if(TRACER_CONDITION) { fclose(index_tracer_fp); }
# define INDEX_TRACE_NATOMS(nl, ng, mn) if(TRACER_CONDITION) { fprintf(index_tracer_fp, "N: %d %d %d\n", nl, ng, mn); }
# define INDEX_TRACE_ATOM(a) if(TRACER_CONDITION) { fprintf(index_tracer_fp, "A: %d\n", a); }
# define INDEX_TRACE(l, e) if(TRACER_CONDITION) { \
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
fprintf(index_tracer_fp, "I: "); \
for(int __j = 0; __j < __e; ++__j) { \
fprintf(index_tracer_fp, "%d ", l[__i + __j]); \
} \
fprintf(index_tracer_fp, "\n"); \
} \
}
# define DIST_TRACE_SORT(l, e) if(TRACER_CONDITION) { \
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
if(__e > 1) { \
for(int __j = __i; __j < __i + __e - 1; ++__j) { \
for(int __k = __i; __k < __i + __e - (__j - __i) - 1; ++__k) { \
if(l[__k] > l[__k + 1]) { \
int __t = l[__k]; \
l[__k] = l[__k + 1]; \
l[__k + 1] = __t; \
} \
} \
} \
} \
} \
}
# define DIST_TRACE(l, e) if(TRACER_CONDITION) { \
for(int __i = 0; __i < (e); __i += VECTOR_WIDTH) { \
int __e = (((e) - __i) < VECTOR_WIDTH) ? ((e) - __i) : VECTOR_WIDTH; \
if(__e > 1) { \
fprintf(index_tracer_fp, "D: "); \
for(int __j = 0; __j < __e - 1; ++__j) { \
int __dist = abs(l[__i + __j + 1] - l[__i + __j]); \
fprintf(index_tracer_fp, "%d ", __dist); \
} \
fprintf(index_tracer_fp, "\n"); \
} \
} \
}
#else
# define INDEX_TRACER_INIT
# define INDEX_TRACER_END
# define INDEX_TRACE_NATOMS(nl, ng, mn)
# define INDEX_TRACE_ATOM(a)
# define INDEX_TRACE(l, e)
# define DIST_TRACE_SORT(l, e)
# define DIST_TRACE(l, e)
#endif
extern void traceAddresses(Parameter *param, Atom *atom, Neighbor *neighbor, int timestep);

View File

@@ -0,0 +1,16 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <atom.h>
#ifndef __VTK_H_
#define __VTK_H_
extern void write_data_to_vtk_file(const char *filename, Atom* atom, int timestep);
extern int write_local_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
extern int write_ghost_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep);
extern int write_local_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep);
extern int write_ghost_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep);
#endif

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <atom.h>
#ifndef __XTC_H_
#define __XTC_H_
#ifdef XTC_OUTPUT
void xtc_init(const char *, Atom*, int);
void xtc_write(Atom*, int, int, int);
void xtc_end();
#else
#define xtc_init(a,b,c)
#define xtc_write(a,b,c,d)
#define xtc_end()
#endif
#endif

344
clusterpair/main-stub.c Normal file
View File

@@ -0,0 +1,344 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <string.h>
#include <math.h>
//---
#include <likwid-marker.h>
//---
#include <timing.h>
#include <allocate.h>
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#include <stats.h>
#include <thermo.h>
#include <eam.h>
#include <pbc.h>
#include <timers.h>
#include <util.h>
#define HLINE "----------------------------------------------------------------------------\n"
extern double computeForceLJ_ref(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceLJ_4xn(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceLJ_2xnn(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
// Patterns
#define P_SEQ 0
#define P_FIX 1
#define P_RAND 2
void init(Parameter *param) {
param->input_file = NULL;
param->force_field = FF_LJ;
param->epsilon = 1.0;
param->sigma6 = 1.0;
param->rho = 0.8442;
param->ntypes = 4;
param->ntimes = 200;
param->nx = 1;
param->ny = 1;
param->nz = 1;
param->lattice = 1.0;
param->cutforce = 1000000.0;
param->cutneigh = param->cutforce;
param->mass = 1.0;
param->half_neigh = 0;
// Unused
param->dt = 0.005;
param->dtforce = 0.5 * param->dt;
param->nstat = 100;
param->temp = 1.44;
param->reneigh_every = 20;
param->proc_freq = 2.4;
param->eam_file = NULL;
}
void createNeighbors(Atom *atom, Neighbor *neighbor, int pattern, int nneighs, int nreps, int masked) {
const int maxneighs = nneighs * nreps;
const int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
const int ncj = atom->Nclusters_local / jfac;
const unsigned int imask = NBNXN_INTERACTION_MASK_ALL;
neighbor->numneigh = (int*) malloc(atom->Nclusters_max * sizeof(int));
neighbor->numneigh_masked = (int*) malloc(atom->Nclusters_max * sizeof(int));
neighbor->neighbors = (int*) malloc(atom->Nclusters_max * maxneighs * sizeof(int));
neighbor->neighbors_imask = (unsigned int*) malloc(atom->Nclusters_max * maxneighs * sizeof(unsigned int));
if(pattern == P_RAND && ncj <= nneighs) {
fprintf(stderr, "Error: P_RAND: Number of j-clusters should be higher than number of j-cluster neighbors per i-cluster!\n");
exit(-1);
}
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
unsigned int *neighptr_imask = &(neighbor->neighbors_imask[ci * neighbor->maxneighs]);
int j = (pattern == P_SEQ) ? CJ0_FROM_CI(ci) : 0;
int m = (pattern == P_SEQ) ? ncj : nneighs;
int k = 0;
for(int k = 0; k < nneighs; k++) {
if(pattern == P_RAND) {
int found = 0;
do {
int cj = rand() % ncj;
neighptr[k] = cj;
neighptr_imask[k] = imask;
found = 0;
for(int l = 0; l < k; l++) {
if(neighptr[l] == cj) {
found = 1;
}
}
} while(found == 1);
} else {
neighptr[k] = j;
neighptr_imask[k] = imask;
j = (j + 1) % m;
}
}
for(int r = 1; r < nreps; r++) {
for(int k = 0; k < nneighs; k++) {
neighptr[r * nneighs + k] = neighptr[k];
neighptr_imask[r * nneighs + k] = neighptr_imask[k];
}
}
neighbor->numneigh[ci] = nneighs * nreps;
neighbor->numneigh_masked[ci] = (masked == 1) ? (nneighs * nreps) : 0;
}
}
int main(int argc, const char *argv[]) {
Eam eam;
Atom atom_data;
Atom *atom = (Atom *)(&atom_data);
Neighbor neighbor;
Stats stats;
Parameter param;
char *pattern_str = NULL;
int pattern = P_SEQ;
int niclusters = 256; // Number of local i-clusters
int iclusters_natoms = CLUSTER_M; // Number of valid atoms within i-clusters
int nneighs = 9; // Number of j-cluster neighbors per i-cluster
int masked = 0; // Use masked loop
int nreps = 1;
int csv = 0;
LIKWID_MARKER_INIT;
LIKWID_MARKER_REGISTER("force");
DEBUG_MESSAGE("Initializing parameters...\n");
init(&param);
for(int i = 0; i < argc; i++) {
if((strcmp(argv[i], "-f") == 0)) {
if((param.force_field = str2ff(argv[++i])) < 0) {
fprintf(stderr, "Invalid force field!\n");
exit(-1);
}
continue;
}
if((strcmp(argv[i], "-p") == 0)) {
pattern_str = strdup(argv[++i]);
if(strncmp(pattern_str, "seq", 3) == 0) { pattern = P_SEQ; }
else if(strncmp(pattern_str, "fix", 3) == 0) { pattern = P_FIX; }
else if(strncmp(pattern_str, "rand", 3) == 0) { pattern = P_RAND; }
else {
fprintf(stderr, "Invalid pattern!\n");
exit(-1);
}
continue;
}
if((strcmp(argv[i], "-e") == 0)) {
param.eam_file = strdup(argv[++i]);
continue;
}
if((strcmp(argv[i], "-m") == 0)) {
masked = 1;
continue;
}
if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0)) {
param.ntimes = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-ni") == 0)) {
niclusters = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-na") == 0)) {
iclusters_natoms = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-nn") == 0)) {
nneighs = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-nr") == 0)) {
nreps = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "--freq") == 0)) {
param.proc_freq = atof(argv[++i]);
continue;
}
if((strcmp(argv[i], "--csv") == 0)) {
csv = 1;
continue;
}
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
printf(HLINE);
printf("-f <string>: force field (lj or eam), default lj\n");
printf("-p <string>: pattern for data accesses (seq, fix or rand)\n");
printf("-n / --nsteps <int>: number of timesteps for simulation\n");
printf("-ni <int>: number of i-clusters (default 256)\n");
printf("-na <int>: number of atoms per i-cluster (default %d)\n", CLUSTER_M);
printf("-nn <int>: number of j-cluster neighbors per i-cluster (default 9)\n");
printf("-nr <int>: number of times neighbor lists should be replicated (default 1)\n");
printf("--freq <real>: set CPU frequency (GHz) and display average cycles per atom and neighbors\n");
printf("--csv: set output as CSV style\n");
printf(HLINE);
exit(EXIT_SUCCESS);
}
}
if(pattern_str == NULL) {
pattern_str = strdup("seq\0");
}
if(param.force_field == FF_EAM) {
DEBUG_MESSAGE("Initializing EAM parameters...\n");
initEam(&eam, &param);
}
DEBUG_MESSAGE("Initializing atoms...\n");
initAtom(atom);
initStats(&stats);
atom->ntypes = param.ntypes;
atom->epsilon = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->sigma6 = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutforcesq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
atom->cutneighsq = allocate(ALIGNMENT, atom->ntypes * atom->ntypes * sizeof(MD_FLOAT));
for(int i = 0; i < atom->ntypes * atom->ntypes; i++) {
atom->epsilon[i] = param.epsilon;
atom->sigma6[i] = param.sigma6;
atom->cutneighsq[i] = param.cutneigh * param.cutneigh;
atom->cutforcesq[i] = param.cutforce * param.cutforce;
}
DEBUG_MESSAGE("Creating atoms...\n");
while(atom->Nmax < niclusters * iclusters_natoms) {
growAtom(atom);
}
while(atom->Nclusters_max < niclusters) {
growClusters(atom);
}
for(int ci = 0; ci < niclusters; ++ci) {
int ci_sca_base = CI_SCALAR_BASE_INDEX(ci);
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
int *ci_type = &atom->cl_type[ci_sca_base];
for(int cii = 0; cii < iclusters_natoms; ++cii) {
ci_x[CL_X_OFFSET + cii] = (MD_FLOAT)(ci * iclusters_natoms + cii) * 0.00001;
ci_x[CL_Y_OFFSET + cii] = (MD_FLOAT)(ci * iclusters_natoms + cii) * 0.00001;
ci_x[CL_Z_OFFSET + cii] = (MD_FLOAT)(ci * iclusters_natoms + cii) * 0.00001;
ci_v[CL_X_OFFSET + cii] = 0.0;
ci_v[CL_Y_OFFSET + cii] = 0.0;
ci_v[CL_Z_OFFSET + cii] = 0.0;
ci_type[cii] = rand() % atom->ntypes;
atom->Nlocal++;
}
for(int cii = iclusters_natoms; cii < CLUSTER_M; cii++) {
ci_x[CL_X_OFFSET + cii] = INFINITY;
ci_x[CL_Y_OFFSET + cii] = INFINITY;
ci_x[CL_Z_OFFSET + cii] = INFINITY;
}
atom->iclusters[ci].natoms = iclusters_natoms;
atom->Nclusters_local++;
}
const double estim_atom_volume = (double)(atom->Nlocal * 3 * sizeof(MD_FLOAT));
const double estim_neighbors_volume = (double)(atom->Nlocal * (nneighs + 2) * sizeof(int));
const double estim_volume = (double)(atom->Nlocal * 6 * sizeof(MD_FLOAT) + estim_neighbors_volume);
if(!csv) {
printf("Kernel: %s, MxN: %dx%d, Vector width: %d\n", KERNEL_NAME, CLUSTER_M, CLUSTER_N, VECTOR_WIDTH);
printf("Floating-point precision: %s\n", PRECISION_STRING);
printf("Pattern: %s\n", pattern_str);
printf("Number of timesteps: %d\n", param.ntimes);
printf("Number of i-clusters: %d\n", niclusters);
printf("Number of atoms per i-cluster: %d\n", iclusters_natoms);
printf("Number of j-cluster neighbors per i-cluster: %d\n", nneighs);
printf("Number of times to replicate neighbor lists: %d\n", nreps);
printf("Estimated total data volume (kB): %.4f\n", estim_volume / 1000.0);
printf("Estimated atom data volume (kB): %.4f\n", estim_atom_volume / 1000.0);
printf("Estimated neighborlist data volume (kB): %.4f\n", estim_neighbors_volume / 1000.0);
}
DEBUG_MESSAGE("Defining j-clusters...\n");
defineJClusters(atom);
DEBUG_MESSAGE("Initializing neighbor lists...\n");
initNeighbor(&neighbor, &param);
DEBUG_MESSAGE("Creating neighbor lists...\n");
createNeighbors(atom, &neighbor, pattern, nneighs, nreps, masked);
DEBUG_MESSAGE("Computing forces...\n");
double T_accum = 0.0;
for(int i = 0; i < param.ntimes; i++) {
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
traceAddresses(&param, atom, &neighbor, i + 1);
#endif
if(param.force_field == FF_EAM) {
T_accum += computeForceEam(&eam, &param, atom, &neighbor, &stats);
} else {
T_accum += computeForceLJ(&param, atom, &neighbor, &stats);
}
}
double freq_hz = param.proc_freq * 1.e9;
const double atoms_updates_per_sec = (double)(atom->Nlocal) / T_accum * (double)(param.ntimes);
const double cycles_per_atom = T_accum / (double)(atom->Nlocal) / (double)(param.ntimes) * freq_hz;
const double cycles_per_neigh = cycles_per_atom / (double)(nneighs);
if(!csv) {
printf("Total time: %.4f, Mega atom updates/s: %.4f\n", T_accum, atoms_updates_per_sec / 1.e6);
if(param.proc_freq > 0.0) {
printf("Cycles per atom: %.4f, Cycles per neighbor: %.4f\n", cycles_per_atom, cycles_per_neigh);
}
} else {
printf("steps,pattern,niclusters,iclusters_natoms,nneighs,nreps,total vol.(kB),atoms vol.(kB),neigh vol.(kB),time(s),atom upds/s(M)");
if(param.proc_freq > 0.0) {
printf(",cy/atom,cy/neigh");
}
printf("\n");
printf("%d,%s,%d,%d,%d,%d,%.4f,%.4f,%.4f,%.4f,%.4f",
param.ntimes, pattern_str, niclusters, iclusters_natoms, nneighs, nreps,
estim_volume / 1.e3, estim_atom_volume / 1.e3, estim_neighbors_volume / 1.e3, T_accum, atoms_updates_per_sec / 1.e6);
if(param.proc_freq > 0.0) {
printf(",%.4f,%.4f", cycles_per_atom, cycles_per_neigh);
}
printf("\n");
}
double timer[NUMTIMER];
timer[FORCE] = T_accum;
displayStatistics(atom, &param, &stats, timer);
LIKWID_MARKER_CLOSE;
return EXIT_SUCCESS;
}

344
clusterpair/main.c Normal file
View File

@@ -0,0 +1,344 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <omp.h>
//--
#include <likwid-marker.h>
//--
#include <atom.h>
#include <allocate.h>
#include <device.h>
#include <eam.h>
#include <integrate.h>
#include <neighbor.h>
#include <parameter.h>
#include <pbc.h>
#include <stats.h>
#include <thermo.h>
#include <timers.h>
#include <timing.h>
#include <util.h>
#include <vtk.h>
#include <xtc.h>
#define HLINE "----------------------------------------------------------------------------\n"
extern double computeForceLJ_ref(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceLJ_4xn(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceLJ_2xnn(Parameter*, Atom*, Neighbor*, Stats*);
extern double computeForceEam(Eam*, Parameter*, Atom*, Neighbor*, Stats*);
#ifdef CUDA_TARGET
extern int isReneighboured;
extern double computeForceLJ_cuda(Parameter *param, Atom *atom, Neighbor *neighbor, Stats *stats);
extern void copyDataToCUDADevice(Atom *atom);
extern void copyDataFromCUDADevice(Atom *atom);
extern void cudaDeviceFree();
#endif
double setup(Parameter *param, Eam *eam, Atom *atom, Neighbor *neighbor, Stats *stats) {
if(param->force_field == FF_EAM) { initEam(eam, param); }
double S, E;
param->lattice = pow((4.0 / param->rho), (1.0 / 3.0));
param->xprd = param->nx * param->lattice;
param->yprd = param->ny * param->lattice;
param->zprd = param->nz * param->lattice;
S = getTimeStamp();
initAtom(atom);
initPbc(atom);
initStats(stats);
initNeighbor(neighbor, param);
if(param->input_file == NULL) {
createAtom(atom, param);
} else {
readAtom(atom, param);
}
setupNeighbor(param, atom);
setupThermo(param, atom->Natoms);
if(param->input_file == NULL) { adjustThermo(param, atom); }
buildClusters(atom);
defineJClusters(atom);
setupPbc(atom, param);
binClusters(atom);
buildNeighbor(atom, neighbor);
initDevice(atom, neighbor);
E = getTimeStamp();
return E-S;
}
double reneighbour(Parameter *param, Atom *atom, Neighbor *neighbor) {
double S, E;
S = getTimeStamp();
LIKWID_MARKER_START("reneighbour");
updateSingleAtoms(atom);
updateAtomsPbc(atom, param);
buildClusters(atom);
defineJClusters(atom);
setupPbc(atom, param);
binClusters(atom);
buildNeighbor(atom, neighbor);
LIKWID_MARKER_STOP("reneighbour");
E = getTimeStamp();
return E-S;
}
void printAtomState(Atom *atom) {
printf("Atom counts: Natoms=%d Nlocal=%d Nghost=%d Nmax=%d\n",
atom->Natoms, atom->Nlocal, atom->Nghost, atom->Nmax);
/* int nall = atom->Nlocal + atom->Nghost; */
/* for (int i=0; i<nall; i++) { */
/* printf("%d %f %f %f\n", i, atom->x[i], atom->y[i], atom->z[i]); */
/* } */
}
int main(int argc, char** argv) {
double timer[NUMTIMER];
Eam eam;
Atom atom;
Neighbor neighbor;
Stats stats;
Parameter param;
LIKWID_MARKER_INIT;
#pragma omp parallel
{
LIKWID_MARKER_REGISTER("force");
//LIKWID_MARKER_REGISTER("reneighbour");
//LIKWID_MARKER_REGISTER("pbc");
}
initParameter(&param);
for(int i = 0; i < argc; i++) {
if((strcmp(argv[i], "-p") == 0) || (strcmp(argv[i], "--param") == 0)) {
readParameter(&param, argv[++i]);
continue;
}
if((strcmp(argv[i], "-f") == 0)) {
if((param.force_field = str2ff(argv[++i])) < 0) {
fprintf(stderr, "Invalid force field!\n");
exit(-1);
}
continue;
}
if((strcmp(argv[i], "-i") == 0)) {
param.input_file = strdup(argv[++i]);
continue;
}
if((strcmp(argv[i], "-e") == 0)) {
param.eam_file = strdup(argv[++i]);
continue;
}
if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0)) {
param.ntimes = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-nx") == 0)) {
param.nx = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-ny") == 0)) {
param.ny = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-nz") == 0)) {
param.nz = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-half") == 0)) {
param.half_neigh = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-m") == 0) || (strcmp(argv[i], "--mass") == 0)) {
param.mass = atof(argv[++i]);
continue;
}
if((strcmp(argv[i], "-r") == 0) || (strcmp(argv[i], "--radius") == 0)) {
param.cutforce = atof(argv[++i]);
continue;
}
if((strcmp(argv[i], "-s") == 0) || (strcmp(argv[i], "--skin") == 0)) {
param.skin = atof(argv[++i]);
continue;
}
if((strcmp(argv[i], "--freq") == 0)) {
param.proc_freq = atof(argv[++i]);
continue;
}
if((strcmp(argv[i], "--vtk") == 0)) {
param.vtk_file = strdup(argv[++i]);
continue;
}
if((strcmp(argv[i], "--xtc") == 0)) {
#ifndef XTC_OUTPUT
fprintf(stderr, "XTC not available, set XTC_OUTPUT option in config.mk file and recompile MD-Bench!");
exit(-1);
#else
param.xtc_file = strdup(argv[++i]);
#endif
continue;
}
if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
printf("MD Bench: A minimalistic re-implementation of miniMD\n");
printf(HLINE);
printf("-p <string>: file to read parameters from (can be specified more than once)\n");
printf("-f <string>: force field (lj or eam), default lj\n");
printf("-i <string>: input file with atom positions (dump)\n");
printf("-e <string>: input file for EAM\n");
printf("-n / --nsteps <int>: set number of timesteps for simulation\n");
printf("-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n");
printf("-r / --radius <real>: set cutoff radius\n");
printf("-s / --skin <real>: set skin (verlet buffer)\n");
printf("--freq <real>: processor frequency (GHz)\n");
printf("--vtk <string>: VTK file for visualization\n");
printf("--xtc <string>: XTC file for visualization\n");
printf(HLINE);
exit(EXIT_SUCCESS);
}
}
param.cutneigh = param.cutforce + param.skin;
setup(&param, &eam, &atom, &neighbor, &stats);
printParameter(&param);
printf(HLINE);
printf("step\ttemp\t\tpressure\n");
computeThermo(0, &param, &atom);
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
traceAddresses(&param, &atom, &neighbor, n + 1);
#endif
#ifdef CUDA_TARGET
copyDataToCUDADevice(&atom);
#endif
if(param.force_field == FF_EAM) {
timer[FORCE] = computeForceEam(&eam, &param, &atom, &neighbor, &stats);
} else {
timer[FORCE] = computeForceLJ(&param, &atom, &neighbor, &stats);
}
timer[NEIGH] = 0.0;
timer[TOTAL] = getTimeStamp();
if(param.vtk_file != NULL) {
write_data_to_vtk_file(param.vtk_file, &atom, 0);
}
if(param.xtc_file != NULL) {
xtc_init(param.xtc_file, &atom, 0);
}
for(int n = 0; n < param.ntimes; n++) {
initialIntegrate(&param, &atom);
if((n + 1) % param.reneigh_every) {
if(!((n + 1) % param.prune_every)) {
pruneNeighbor(&param, &atom, &neighbor);
}
updatePbc(&atom, &param, 0);
} else {
#ifdef CUDA_TARGET
copyDataFromCUDADevice(&atom);
#endif
timer[NEIGH] += reneighbour(&param, &atom, &neighbor);
#ifdef CUDA_TARGET
copyDataToCUDADevice(&atom);
isReneighboured = 1;
#endif
}
#if defined(MEM_TRACER) || defined(INDEX_TRACER)
traceAddresses(&param, &atom, &neighbor, n + 1);
#endif
if(param.force_field == FF_EAM) {
timer[FORCE] += computeForceEam(&eam, &param, &atom, &neighbor, &stats);
} else {
timer[FORCE] += computeForceLJ(&param, &atom, &neighbor, &stats);
}
finalIntegrate(&param, &atom);
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
computeThermo(n + 1, &param, &atom);
}
int write_pos = !((n + 1) % param.x_out_every);
int write_vel = !((n + 1) % param.v_out_every);
if(write_pos || write_vel) {
if(param.vtk_file != NULL) {
write_data_to_vtk_file(param.vtk_file, &atom, n + 1);
}
if(param.xtc_file != NULL) {
xtc_write(&atom, n + 1, write_pos, write_vel);
}
}
}
#ifdef CUDA_TARGET
copyDataFromCUDADevice(&atom);
#endif
timer[TOTAL] = getTimeStamp() - timer[TOTAL];
updateSingleAtoms(&atom);
computeThermo(-1, &param, &atom);
if(param.xtc_file != NULL) {
xtc_end();
}
#ifdef CUDA_TARGET
cudaDeviceFree();
#endif
printf(HLINE);
printf("System: %d atoms %d ghost atoms, Steps: %d\n", atom.Natoms, atom.Nghost, param.ntimes);
printf("TOTAL %.2fs FORCE %.2fs NEIGH %.2fs REST %.2fs\n",
timer[TOTAL], timer[FORCE], timer[NEIGH], timer[TOTAL]-timer[FORCE]-timer[NEIGH]);
printf(HLINE);
int nthreads = 0;
int chunkSize = 0;
omp_sched_t schedKind;
char schedType[10];
#pragma omp parallel
#pragma omp master
{
omp_get_schedule(&schedKind, &chunkSize);
switch (schedKind)
{
case omp_sched_static: strcpy(schedType, "static"); break;
case omp_sched_dynamic: strcpy(schedType, "dynamic"); break;
case omp_sched_guided: strcpy(schedType, "guided"); break;
case omp_sched_auto: strcpy(schedType, "auto"); break;
}
nthreads = omp_get_max_threads();
}
printf("Num threads: %d\n", nthreads);
printf("Schedule: (%s,%d)\n", schedType, chunkSize);
printf("Performance: %.2f million atom updates per second\n",
1e-6 * (double) atom.Natoms * param.ntimes / timer[TOTAL]);
#ifdef COMPUTE_STATS
displayStatistics(&atom, &param, &stats, timer);
#endif
LIKWID_MARKER_CLOSE;
return EXIT_SUCCESS;
}

939
clusterpair/neighbor.c Normal file
View File

@@ -0,0 +1,939 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#include <util.h>
#define SMALL 1.0e-6
#define FACTOR 0.999
static MD_FLOAT xprd, yprd, zprd;
static MD_FLOAT bininvx, bininvy;
static int mbinxlo, mbinylo;
static int nbinx, nbiny;
static int mbinx, mbiny; // n bins in x, y
static int *bincount;
static int *bins;
static int *bin_nclusters;
static int *bin_clusters;
static int mbins; //total number of bins
static int atoms_per_bin; // max atoms per bin
static int clusters_per_bin; // max clusters per bin
static MD_FLOAT cutneigh;
static MD_FLOAT cutneighsq; // neighbor cutoff squared
static int nmax;
static int nstencil; // # of bins in stencil
static int* stencil; // stencil list of bin offsets
static MD_FLOAT binsizex, binsizey;
static int coord2bin(MD_FLOAT, MD_FLOAT);
static MD_FLOAT bindist(int, int);
/* exported subroutines */
void initNeighbor(Neighbor *neighbor, Parameter *param) {
MD_FLOAT neighscale = 5.0 / 6.0;
xprd = param->nx * param->lattice;
yprd = param->ny * param->lattice;
zprd = param->nz * param->lattice;
cutneigh = param->cutneigh;
nmax = 0;
atoms_per_bin = 8;
clusters_per_bin = (atoms_per_bin / CLUSTER_M) + 10;
stencil = NULL;
bins = NULL;
bincount = NULL;
bin_clusters = NULL;
bin_nclusters = NULL;
neighbor->half_neigh = param->half_neigh;
neighbor->maxneighs = 100;
neighbor->numneigh = NULL;
neighbor->numneigh_masked = NULL;
neighbor->neighbors = NULL;
neighbor->neighbors_imask = NULL;
}
void setupNeighbor(Parameter *param, Atom *atom) {
MD_FLOAT coord;
int mbinxhi, mbinyhi;
int nextx, nexty, nextz;
if(param->input_file != NULL) {
xprd = param->xprd;
yprd = param->yprd;
zprd = param->zprd;
}
// TODO: update lo and hi for standard case and use them here instead
MD_FLOAT xlo = 0.0; MD_FLOAT xhi = xprd;
MD_FLOAT ylo = 0.0; MD_FLOAT yhi = yprd;
MD_FLOAT zlo = 0.0; MD_FLOAT zhi = zprd;
MD_FLOAT atom_density = ((MD_FLOAT)(atom->Nlocal)) / ((xhi - xlo) * (yhi - ylo) * (zhi - zlo));
MD_FLOAT atoms_in_cell = MAX(CLUSTER_M, CLUSTER_N);
MD_FLOAT targetsizex = cbrt(atoms_in_cell / atom_density);
MD_FLOAT targetsizey = cbrt(atoms_in_cell / atom_density);
nbinx = MAX(1, (int)ceil((xhi - xlo) / targetsizex));
nbiny = MAX(1, (int)ceil((yhi - ylo) / targetsizey));
binsizex = (xhi - xlo) / nbinx;
binsizey = (yhi - ylo) / nbiny;
bininvx = 1.0 / binsizex;
bininvy = 1.0 / binsizey;
cutneighsq = cutneigh * cutneigh;
coord = xlo - cutneigh - SMALL * xprd;
mbinxlo = (int)(coord * bininvx);
if(coord < 0.0) { mbinxlo = mbinxlo - 1; }
coord = xhi + cutneigh + SMALL * xprd;
mbinxhi = (int)(coord * bininvx);
coord = ylo - cutneigh - SMALL * yprd;
mbinylo = (int)(coord * bininvy);
if(coord < 0.0) { mbinylo = mbinylo - 1; }
coord = yhi + cutneigh + SMALL * yprd;
mbinyhi = (int)(coord * bininvy);
mbinxlo = mbinxlo - 1;
mbinxhi = mbinxhi + 1;
mbinx = mbinxhi - mbinxlo + 1;
mbinylo = mbinylo - 1;
mbinyhi = mbinyhi + 1;
mbiny = mbinyhi - mbinylo + 1;
nextx = (int)(cutneigh * bininvx);
nexty = (int)(cutneigh * bininvy);
if(nextx * binsizex < FACTOR * cutneigh) nextx++;
if(nexty * binsizey < FACTOR * cutneigh) nexty++;
if (stencil) { free(stencil); }
stencil = (int *) malloc((2 * nexty + 1) * (2 * nextx + 1) * sizeof(int));
nstencil = 0;
for(int j = -nexty; j <= nexty; j++) {
for(int i = -nextx; i <= nextx; i++) {
if(bindist(i, j) < cutneighsq) {
stencil[nstencil++] = j * mbinx + i;
}
}
}
if(bincount) { free(bincount); }
if(bins) { free(bins); }
if(bin_nclusters) { free(bin_nclusters); }
if(bin_clusters) { free(bin_clusters); }
mbins = mbinx * mbiny;
bincount = (int*) malloc(mbins * sizeof(int));
bins = (int*) malloc(mbins * atoms_per_bin * sizeof(int));
bin_nclusters = (int*) malloc(mbins * sizeof(int));
bin_clusters = (int*) malloc(mbins * clusters_per_bin * sizeof(int));
/*
DEBUG_MESSAGE("lo, hi = (%e, %e, %e), (%e, %e, %e)\n", xlo, ylo, zlo, xhi, yhi, zhi);
DEBUG_MESSAGE("binsize = %e, %e\n", binsizex, binsizey);
DEBUG_MESSAGE("mbin lo, hi = (%d, %d), (%d, %d)\n", mbinxlo, mbinylo, mbinxhi, mbinyhi);
DEBUG_MESSAGE("mbins = %d (%d x %d)\n", mbins, mbinx, mbiny);
DEBUG_MESSAGE("nextx = %d, nexty = %d\n", nextx, nexty);
*/
}
MD_FLOAT getBoundingBoxDistanceSq(Atom *atom, int ci, int cj) {
MD_FLOAT dl = atom->iclusters[ci].bbminx - atom->jclusters[cj].bbmaxx;
MD_FLOAT dh = atom->jclusters[cj].bbminx - atom->iclusters[ci].bbmaxx;
MD_FLOAT dm = MAX(dl, dh);
MD_FLOAT dm0 = MAX(dm, 0.0);
MD_FLOAT d2 = dm0 * dm0;
dl = atom->iclusters[ci].bbminy - atom->jclusters[cj].bbmaxy;
dh = atom->jclusters[cj].bbminy - atom->iclusters[ci].bbmaxy;
dm = MAX(dl, dh);
dm0 = MAX(dm, 0.0);
d2 += dm0 * dm0;
dl = atom->iclusters[ci].bbminz - atom->jclusters[cj].bbmaxz;
dh = atom->jclusters[cj].bbminz - atom->iclusters[ci].bbmaxz;
dm = MAX(dl, dh);
dm0 = MAX(dm, 0.0);
d2 += dm0 * dm0;
return d2;
}
int atomDistanceInRange(Atom *atom, int ci, int cj, MD_FLOAT rsq) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
MD_FLOAT delx = ci_x[CL_X_OFFSET + cii] - cj_x[CL_X_OFFSET + cjj];
MD_FLOAT dely = ci_x[CL_Y_OFFSET + cii] - cj_x[CL_Y_OFFSET + cjj];
MD_FLOAT delz = ci_x[CL_Z_OFFSET + cii] - cj_x[CL_Z_OFFSET + cjj];
if(delx * delx + dely * dely + delz * delz < rsq) {
return 1;
}
}
}
return 0;
}
/* Returns a diagonal or off-diagonal interaction mask for plain C lists */
static unsigned int get_imask(int rdiag, int ci, int cj) {
return (rdiag && ci == cj ? NBNXN_INTERACTION_MASK_DIAG : NBNXN_INTERACTION_MASK_ALL);
}
/* Returns a diagonal or off-diagonal interaction mask for cj-size=2 */
static unsigned int get_imask_simd_j2(int rdiag, int ci, int cj) {
return (rdiag && ci * 2 == cj ? NBNXN_INTERACTION_MASK_DIAG_J2_0
: (rdiag && ci * 2 + 1 == cj ? NBNXN_INTERACTION_MASK_DIAG_J2_1
: NBNXN_INTERACTION_MASK_ALL));
}
/* Returns a diagonal or off-diagonal interaction mask for cj-size=4 */
static unsigned int get_imask_simd_j4(int rdiag, int ci, int cj) {
return (rdiag && ci == cj ? NBNXN_INTERACTION_MASK_DIAG : NBNXN_INTERACTION_MASK_ALL);
}
/* Returns a diagonal or off-diagonal interaction mask for cj-size=8 */
static unsigned int get_imask_simd_j8(int rdiag, int ci, int cj) {
return (rdiag && ci == cj * 2 ? NBNXN_INTERACTION_MASK_DIAG_J8_0
: (rdiag && ci == cj * 2 + 1 ? NBNXN_INTERACTION_MASK_DIAG_J8_1
: NBNXN_INTERACTION_MASK_ALL));
}
#if VECTOR_WIDTH == 2
# define get_imask_simd_4xn get_imask_simd_j2
#elif VECTOR_WIDTH== 4
# define get_imask_simd_4xn get_imask_simd_j4
#elif VECTOR_WIDTH == 8
# define get_imask_simd_4xn get_imask_simd_j8
# define get_imask_simd_2xnn get_imask_simd_j4
#elif VECTOR_WIDTH == 16
# define get_imask_simd_2xnn get_imask_simd_j8
#else
# error "Invalid cluster configuration"
#endif
void buildNeighbor(Atom *atom, Neighbor *neighbor) {
DEBUG_MESSAGE("buildNeighbor start\n");
/* extend atom arrays if necessary */
if(atom->Nclusters_local > nmax) {
nmax = atom->Nclusters_local;
if(neighbor->numneigh) free(neighbor->numneigh);
if(neighbor->numneigh_masked) free(neighbor->numneigh_masked);
if(neighbor->neighbors) free(neighbor->neighbors);
if(neighbor->neighbors_imask) free(neighbor->neighbors_imask);
neighbor->numneigh = (int*) malloc(nmax * sizeof(int));
neighbor->numneigh_masked = (int*) malloc(nmax * sizeof(int));
neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int));
neighbor->neighbors_imask = (unsigned int*) malloc(nmax * neighbor->maxneighs * sizeof(unsigned int));
}
MD_FLOAT bbx = 0.5 * (binsizex + binsizex);
MD_FLOAT bby = 0.5 * (binsizey + binsizey);
MD_FLOAT rbb_sq = MAX(0.0, cutneigh - 0.5 * sqrt(bbx * bbx + bby * bby));
rbb_sq = rbb_sq * rbb_sq;
int resize = 1;
/* loop over each atom, storing neighbors */
while(resize) {
int new_maxneighs = neighbor->maxneighs;
resize = 0;
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_cj1 = CJ1_FROM_CI(ci);
int *neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
unsigned int *neighptr_imask = &(neighbor->neighbors_imask[ci * neighbor->maxneighs]);
int n = 0, nmasked = 0;
int ibin = atom->icluster_bin[ci];
MD_FLOAT ibb_xmin = atom->iclusters[ci].bbminx;
MD_FLOAT ibb_xmax = atom->iclusters[ci].bbmaxx;
MD_FLOAT ibb_ymin = atom->iclusters[ci].bbminy;
MD_FLOAT ibb_ymax = atom->iclusters[ci].bbmaxy;
MD_FLOAT ibb_zmin = atom->iclusters[ci].bbminz;
MD_FLOAT ibb_zmax = atom->iclusters[ci].bbmaxz;
for(int k = 0; k < nstencil; k++) {
int jbin = ibin + stencil[k];
int *loc_bin = &bin_clusters[jbin * clusters_per_bin];
int cj, m = -1;
MD_FLOAT jbb_xmin, jbb_xmax, jbb_ymin, jbb_ymax, jbb_zmin, jbb_zmax;
const int c = bin_nclusters[jbin];
if(c > 0) {
MD_FLOAT dl, dh, dm, dm0, d_bb_sq;
do {
m++;
cj = loc_bin[m];
if(neighbor->half_neigh && ci_cj1 > cj) {
continue;
}
jbb_zmin = atom->jclusters[cj].bbminz;
jbb_zmax = atom->jclusters[cj].bbmaxz;
dl = ibb_zmin - jbb_zmax;
dh = jbb_zmin - ibb_zmax;
dm = MAX(dl, dh);
dm0 = MAX(dm, 0.0);
d_bb_sq = dm0 * dm0;
} while(m + 1 < c && d_bb_sq > cutneighsq);
jbb_xmin = atom->jclusters[cj].bbminx;
jbb_xmax = atom->jclusters[cj].bbmaxx;
jbb_ymin = atom->jclusters[cj].bbminy;
jbb_ymax = atom->jclusters[cj].bbmaxy;
while(m < c) {
if(!neighbor->half_neigh || ci_cj1 <= cj) {
dl = ibb_zmin - jbb_zmax;
dh = jbb_zmin - ibb_zmax;
dm = MAX(dl, dh);
dm0 = MAX(dm, 0.0);
d_bb_sq = dm0 * dm0;
/*if(d_bb_sq > cutneighsq) {
break;
}*/
dl = ibb_ymin - jbb_ymax;
dh = jbb_ymin - ibb_ymax;
dm = MAX(dl, dh);
dm0 = MAX(dm, 0.0);
d_bb_sq += dm0 * dm0;
dl = ibb_xmin - jbb_xmax;
dh = jbb_xmin - ibb_xmax;
dm = MAX(dl, dh);
dm0 = MAX(dm, 0.0);
d_bb_sq += dm0 * dm0;
if(d_bb_sq < cutneighsq) {
if(d_bb_sq < rbb_sq || atomDistanceInRange(atom, ci, cj, cutneighsq)) {
// We use true (1) for rdiag because we only care if there are masks
// at all, and when this is set to false (0) the self-exclusions are
// not accounted for, which makes the optimized version to not work!
unsigned int imask;
#if CLUSTER_N == (VECTOR_WIDTH / 2) // 2xnn
imask = get_imask_simd_2xnn(1, ci, cj);
#else // 4xn
imask = get_imask_simd_4xn(1, ci, cj);
#endif
if(n < neighbor->maxneighs) {
if(imask == NBNXN_INTERACTION_MASK_ALL) {
neighptr[n] = cj;
neighptr_imask[n] = imask;
} else {
neighptr[n] = neighptr[nmasked];
neighptr_imask[n] = neighptr_imask[nmasked];
neighptr[nmasked] = cj;
neighptr_imask[nmasked] = imask;
nmasked++;
}
}
n++;
}
}
}
m++;
if(m < c) {
cj = loc_bin[m];
jbb_xmin = atom->jclusters[cj].bbminx;
jbb_xmax = atom->jclusters[cj].bbmaxx;
jbb_ymin = atom->jclusters[cj].bbminy;
jbb_ymax = atom->jclusters[cj].bbmaxy;
jbb_zmin = atom->jclusters[cj].bbminz;
jbb_zmax = atom->jclusters[cj].bbmaxz;
}
}
}
}
// Fill neighbor list with dummy values to fit vector width
if(CLUSTER_N < VECTOR_WIDTH) {
while(n % (VECTOR_WIDTH / CLUSTER_N)) {
neighptr[n] = atom->dummy_cj; // Last cluster is always a dummy cluster
neighptr_imask[n] = 0;
n++;
}
}
neighbor->numneigh[ci] = n;
neighbor->numneigh_masked[ci] = nmasked;
if(n >= neighbor->maxneighs) {
resize = 1;
if(n >= new_maxneighs) {
new_maxneighs = n;
}
}
}
if(resize) {
neighbor->maxneighs = new_maxneighs * 1.2;
fprintf(stdout, "RESIZE %d\n", neighbor->maxneighs);
free(neighbor->neighbors);
free(neighbor->neighbors_imask);
neighbor->neighbors = (int *) malloc(nmax * neighbor->maxneighs * sizeof(int));
neighbor->neighbors_imask = (unsigned int *) malloc(nmax * neighbor->maxneighs * sizeof(unsigned int));
}
}
/*
DEBUG_MESSAGE("\ncutneighsq = %f, rbb_sq = %f\n", cutneighsq, rbb_sq);
for(int ci = 0; ci < 6; ci++) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
int* neighptr = &(neighbor->neighbors[ci * neighbor->maxneighs]);
DEBUG_MESSAGE("Cluster %d, bbx = {%f, %f}, bby = {%f, %f}, bbz = {%f, %f}\n",
ci,
atom->iclusters[ci].bbminx,
atom->iclusters[ci].bbmaxx,
atom->iclusters[ci].bbminy,
atom->iclusters[ci].bbmaxy,
atom->iclusters[ci].bbminz,
atom->iclusters[ci].bbmaxz);
for(int cii = 0; cii < CLUSTER_M; cii++) {
DEBUG_MESSAGE("%f, %f, %f\n", ci_x[CL_X_OFFSET + cii], ci_x[CL_Y_OFFSET + cii], ci_x[CL_Z_OFFSET + cii]);
}
DEBUG_MESSAGE("Neighbors:\n");
for(int k = 0; k < neighbor->numneigh[ci]; k++) {
int cj = neighptr[k];
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
DEBUG_MESSAGE(" Cluster %d, bbx = {%f, %f}, bby = {%f, %f}, bbz = {%f, %f}\n",
cj,
atom->jclusters[cj].bbminx,
atom->jclusters[cj].bbmaxx,
atom->jclusters[cj].bbminy,
atom->jclusters[cj].bbmaxy,
atom->jclusters[cj].bbminz,
atom->jclusters[cj].bbmaxz);
for(int cjj = 0; cjj < CLUSTER_N; cjj++) {
DEBUG_MESSAGE(" %f, %f, %f\n", cj_x[CL_X_OFFSET + cjj], cj_x[CL_Y_OFFSET + cjj], cj_x[CL_Z_OFFSET + cjj]);
}
}
}
*/
DEBUG_MESSAGE("buildNeighbor end\n");
}
void pruneNeighbor(Parameter *param, Atom *atom, Neighbor *neighbor) {
DEBUG_MESSAGE("pruneNeighbor start\n");
//MD_FLOAT cutsq = param->cutforce * param->cutforce;
MD_FLOAT cutsq = cutneighsq;
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int *neighs = &neighbor->neighbors[ci * neighbor->maxneighs];
unsigned int *neighs_imask = &neighbor->neighbors_imask[ci * neighbor->maxneighs];
int numneighs = neighbor->numneigh[ci];
int numneighs_masked = neighbor->numneigh_masked[ci];
int k = 0;
// Remove dummy clusters if necessary
if(CLUSTER_N < VECTOR_WIDTH) {
while(neighs[numneighs - 1] == atom->dummy_cj) {
numneighs--;
}
}
while(k < numneighs) {
int cj = neighs[k];
if(atomDistanceInRange(atom, ci, cj, cutsq)) {
k++;
} else {
numneighs--;
if(k < numneighs_masked) {
numneighs_masked--;
}
neighs[k] = neighs[numneighs];
}
}
// Readd dummy clusters if necessary
if(CLUSTER_N < VECTOR_WIDTH) {
while(numneighs % (VECTOR_WIDTH / CLUSTER_N)) {
neighs[numneighs] = atom->dummy_cj; // Last cluster is always a dummy cluster
neighs_imask[numneighs] = 0;
numneighs++;
}
}
neighbor->numneigh[ci] = numneighs;
neighbor->numneigh_masked[ci] = numneighs_masked;
}
DEBUG_MESSAGE("pruneNeighbor end\n");
}
/* internal subroutines */
MD_FLOAT bindist(int i, int j) {
MD_FLOAT delx, dely, delz;
if(i > 0) {
delx = (i - 1) * binsizex;
} else if(i == 0) {
delx = 0.0;
} else {
delx = (i + 1) * binsizex;
}
if(j > 0) {
dely = (j - 1) * binsizey;
} else if(j == 0) {
dely = 0.0;
} else {
dely = (j + 1) * binsizey;
}
return (delx * delx + dely * dely);
}
int coord2bin(MD_FLOAT xin, MD_FLOAT yin) {
int ix, iy;
if(xin >= xprd) {
ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
} else if(xin >= 0.0) {
ix = (int)(xin * bininvx) - mbinxlo;
} else {
ix = (int)(xin * bininvx) - mbinxlo - 1;
}
if(yin >= yprd) {
iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
} else if(yin >= 0.0) {
iy = (int)(yin * bininvy) - mbinylo;
} else {
iy = (int)(yin * bininvy) - mbinylo - 1;
}
return (iy * mbinx + ix + 1);
}
void coord2bin2D(MD_FLOAT xin, MD_FLOAT yin, int *ix, int *iy) {
if(xin >= xprd) {
*ix = (int)((xin - xprd) * bininvx) + nbinx - mbinxlo;
} else if(xin >= 0.0) {
*ix = (int)(xin * bininvx) - mbinxlo;
} else {
*ix = (int)(xin * bininvx) - mbinxlo - 1;
}
if(yin >= yprd) {
*iy = (int)((yin - yprd) * bininvy) + nbiny - mbinylo;
} else if(yin >= 0.0) {
*iy = (int)(yin * bininvy) - mbinylo;
} else {
*iy = (int)(yin * bininvy) - mbinylo - 1;
}
}
void binAtoms(Atom *atom) {
DEBUG_MESSAGE("binAtoms start\n");
int resize = 1;
while(resize > 0) {
resize = 0;
for(int i = 0; i < mbins; i++) {
bincount[i] = 0;
}
for(int i = 0; i < atom->Nlocal; i++) {
int ibin = coord2bin(atom_x(i), atom_y(i));
if(bincount[ibin] < atoms_per_bin) {
int ac = bincount[ibin]++;
bins[ibin * atoms_per_bin + ac] = i;
} else {
resize = 1;
}
}
if(resize) {
free(bins);
atoms_per_bin *= 2;
bins = (int*) malloc(mbins * atoms_per_bin * sizeof(int));
}
}
DEBUG_MESSAGE("binAtoms end\n");
}
// TODO: Use pigeonhole sorting
void sortAtomsByZCoord(Atom *atom) {
DEBUG_MESSAGE("sortAtomsByZCoord start\n");
for(int bin = 0; bin < mbins; bin++) {
int c = bincount[bin];
int *bin_ptr = &bins[bin * atoms_per_bin];
for(int ac_i = 0; ac_i < c; ac_i++) {
int i = bin_ptr[ac_i];
int min_ac = ac_i;
int min_idx = i;
MD_FLOAT min_z = atom_z(i);
for(int ac_j = ac_i + 1; ac_j < c; ac_j++) {
int j = bin_ptr[ac_j];
MD_FLOAT zj = atom_z(j);
if(zj < min_z) {
min_ac = ac_j;
min_idx = j;
min_z = zj;
}
}
bin_ptr[ac_i] = min_idx;
bin_ptr[min_ac] = i;
}
}
DEBUG_MESSAGE("sortAtomsByZCoord end\n");
}
void buildClusters(Atom *atom) {
DEBUG_MESSAGE("buildClusters start\n");
atom->Nclusters_local = 0;
/* bin local atoms */
binAtoms(atom);
sortAtomsByZCoord(atom);
for(int bin = 0; bin < mbins; bin++) {
int c = bincount[bin];
int ac = 0;
int nclusters = ((c + CLUSTER_M - 1) / CLUSTER_M);
if(CLUSTER_N > CLUSTER_M && nclusters % 2) { nclusters++; }
for(int cl = 0; cl < nclusters; cl++) {
const int ci = atom->Nclusters_local;
if(ci >= atom->Nclusters_max) {
growClusters(atom);
}
int ci_sca_base = CI_SCALAR_BASE_INDEX(ci);
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
int *ci_type = &atom->cl_type[ci_sca_base];
MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY;
MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
MD_FLOAT bbminz = INFINITY, bbmaxz = -INFINITY;
atom->iclusters[ci].natoms = 0;
for(int cii = 0; cii < CLUSTER_M; cii++) {
if(ac < c) {
int i = bins[bin * atoms_per_bin + ac];
MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i);
ci_x[CL_X_OFFSET + cii] = xtmp;
ci_x[CL_Y_OFFSET + cii] = ytmp;
ci_x[CL_Z_OFFSET + cii] = ztmp;
ci_v[CL_X_OFFSET + cii] = atom->vx[i];
ci_v[CL_Y_OFFSET + cii] = atom->vy[i];
ci_v[CL_Z_OFFSET + cii] = atom->vz[i];
// TODO: To create the bounding boxes faster, we can use SIMD operations
if(bbminx > xtmp) { bbminx = xtmp; }
if(bbmaxx < xtmp) { bbmaxx = xtmp; }
if(bbminy > ytmp) { bbminy = ytmp; }
if(bbmaxy < ytmp) { bbmaxy = ytmp; }
if(bbminz > ztmp) { bbminz = ztmp; }
if(bbmaxz < ztmp) { bbmaxz = ztmp; }
ci_type[cii] = atom->type[i];
atom->iclusters[ci].natoms++;
} else {
ci_x[CL_X_OFFSET + cii] = INFINITY;
ci_x[CL_Y_OFFSET + cii] = INFINITY;
ci_x[CL_Z_OFFSET + cii] = INFINITY;
}
ac++;
}
atom->icluster_bin[ci] = bin;
atom->iclusters[ci].bbminx = bbminx;
atom->iclusters[ci].bbmaxx = bbmaxx;
atom->iclusters[ci].bbminy = bbminy;
atom->iclusters[ci].bbmaxy = bbmaxy;
atom->iclusters[ci].bbminz = bbminz;
atom->iclusters[ci].bbmaxz = bbmaxz;
atom->Nclusters_local++;
}
}
DEBUG_MESSAGE("buildClusters end\n");
}
void defineJClusters(Atom *atom) {
DEBUG_MESSAGE("defineJClusters start\n");
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int cj0 = CJ0_FROM_CI(ci);
if(CLUSTER_M == CLUSTER_N) {
atom->jclusters[cj0].bbminx = atom->iclusters[ci].bbminx;
atom->jclusters[cj0].bbmaxx = atom->iclusters[ci].bbmaxx;
atom->jclusters[cj0].bbminy = atom->iclusters[ci].bbminy;
atom->jclusters[cj0].bbmaxy = atom->iclusters[ci].bbmaxy;
atom->jclusters[cj0].bbminz = atom->iclusters[ci].bbminz;
atom->jclusters[cj0].bbmaxz = atom->iclusters[ci].bbmaxz;
atom->jclusters[cj0].natoms = atom->iclusters[ci].natoms;
} else if(CLUSTER_M > CLUSTER_N) {
int cj1 = CJ1_FROM_CI(ci);
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY;
MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
MD_FLOAT bbminz = INFINITY, bbmaxz = -INFINITY;
for(int cii = 0; cii < MAX(atom->iclusters[ci].natoms, CLUSTER_N); cii++) {
MD_FLOAT xtmp = ci_x[CL_X_OFFSET + cii];
MD_FLOAT ytmp = ci_x[CL_Y_OFFSET + cii];
MD_FLOAT ztmp = ci_x[CL_Z_OFFSET + cii];
// TODO: To create the bounding boxes faster, we can use SIMD operations
if(bbminx > xtmp) { bbminx = xtmp; }
if(bbmaxx < xtmp) { bbmaxx = xtmp; }
if(bbminy > ytmp) { bbminy = ytmp; }
if(bbmaxy < ytmp) { bbmaxy = ytmp; }
if(bbminz > ztmp) { bbminz = ztmp; }
if(bbmaxz < ztmp) { bbmaxz = ztmp; }
}
atom->jclusters[cj0].bbminx = bbminx;
atom->jclusters[cj0].bbmaxx = bbmaxx;
atom->jclusters[cj0].bbminy = bbminy;
atom->jclusters[cj0].bbmaxy = bbmaxy;
atom->jclusters[cj0].bbminz = bbminz;
atom->jclusters[cj0].bbmaxz = bbmaxz;
atom->jclusters[cj0].natoms = MAX(atom->iclusters[ci].natoms, CLUSTER_N);
bbminx = INFINITY, bbmaxx = -INFINITY;
bbminy = INFINITY, bbmaxy = -INFINITY;
bbminz = INFINITY, bbmaxz = -INFINITY;
for(int cii = CLUSTER_N; cii < atom->iclusters[ci].natoms; cii++) {
MD_FLOAT xtmp = ci_x[CL_X_OFFSET + cii];
MD_FLOAT ytmp = ci_x[CL_Y_OFFSET + cii];
MD_FLOAT ztmp = ci_x[CL_Z_OFFSET + cii];
// TODO: To create the bounding boxes faster, we can use SIMD operations
if(bbminx > xtmp) { bbminx = xtmp; }
if(bbmaxx < xtmp) { bbmaxx = xtmp; }
if(bbminy > ytmp) { bbminy = ytmp; }
if(bbmaxy < ytmp) { bbmaxy = ytmp; }
if(bbminz > ztmp) { bbminz = ztmp; }
if(bbmaxz < ztmp) { bbmaxz = ztmp; }
}
atom->jclusters[cj1].bbminx = bbminx;
atom->jclusters[cj1].bbmaxx = bbmaxx;
atom->jclusters[cj1].bbminy = bbminy;
atom->jclusters[cj1].bbmaxy = bbmaxy;
atom->jclusters[cj1].bbminz = bbminz;
atom->jclusters[cj1].bbmaxz = bbmaxz;
atom->jclusters[cj1].natoms = MIN(0, atom->iclusters[ci].natoms - CLUSTER_N);
} else {
if(ci % 2 == 0) {
const int ci1 = ci + 1;
atom->jclusters[cj0].bbminx = MIN(atom->iclusters[ci].bbminx, atom->iclusters[ci1].bbminx);
atom->jclusters[cj0].bbmaxx = MAX(atom->iclusters[ci].bbmaxx, atom->iclusters[ci1].bbmaxx);
atom->jclusters[cj0].bbminy = MIN(atom->iclusters[ci].bbminy, atom->iclusters[ci1].bbminy);
atom->jclusters[cj0].bbmaxy = MAX(atom->iclusters[ci].bbmaxy, atom->iclusters[ci1].bbmaxy);
atom->jclusters[cj0].bbminz = MIN(atom->iclusters[ci].bbminz, atom->iclusters[ci1].bbminz);
atom->jclusters[cj0].bbmaxz = MAX(atom->iclusters[ci].bbmaxz, atom->iclusters[ci1].bbmaxz);
atom->jclusters[cj0].natoms = atom->iclusters[ci].natoms + atom->iclusters[ci1].natoms;
}
}
}
DEBUG_MESSAGE("defineJClusters end\n");
}
void binClusters(Atom *atom) {
DEBUG_MESSAGE("binClusters start\n");
/*
DEBUG_MESSAGE("Nghost = %d\n", atom->Nclusters_ghost);
for(int ci = atom->Nclusters_local; ci < atom->Nclusters_local + 4; ci++) {
MD_FLOAT *cptr = cluster_pos_ptr(ci);
DEBUG_MESSAGE("Cluster %d:\n", ci);
DEBUG_MESSAGE("bin=%d, Natoms=%d, bbox={%f,%f},{%f,%f},{%f,%f}\n",
atom->icluster_bin[ci],
atom->clusters[ci].natoms,
atom->clusters[ci].bbminx,
atom->clusters[ci].bbmaxx,
atom->clusters[ci].bbminy,
atom->clusters[ci].bbmaxy,
atom->clusters[ci].bbminz,
atom->clusters[ci].bbmaxz);
for(int cii = 0; cii < CLUSTER_M; cii++) {
DEBUG_MESSAGE("%f, %f, %f\n", cluster_x(cptr, cii), cluster_y(cptr, cii), cluster_z(cptr, cii));
}
}
*/
const int nlocal = atom->Nclusters_local;
const int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
const int ncj = atom->Nclusters_local / jfac;
int resize = 1;
while(resize > 0) {
resize = 0;
for(int bin = 0; bin < mbins; bin++) {
bin_nclusters[bin] = 0;
}
for(int ci = 0; ci < nlocal && !resize; ci++) {
// Assure we add this j-cluster only once in the bin
if(!(CLUSTER_M < CLUSTER_N && ci % 2)) {
int bin = atom->icluster_bin[ci];
int c = bin_nclusters[bin];
if(c + 1 < clusters_per_bin) {
bin_clusters[bin * clusters_per_bin + c] = CJ0_FROM_CI(ci);
bin_nclusters[bin]++;
if(CLUSTER_M > CLUSTER_N) {
int cj1 = CJ1_FROM_CI(ci);
if(atom->jclusters[cj1].natoms > 0) {
bin_clusters[bin * clusters_per_bin + c + 1] = cj1;
bin_nclusters[bin]++;
}
}
} else {
resize = 1;
}
}
}
for(int cg = 0; cg < atom->Nclusters_ghost && !resize; cg++) {
const int cj = ncj + cg;
int ix = -1, iy = -1;
MD_FLOAT xtmp, ytmp;
if(atom->jclusters[cj].natoms > 0) {
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
MD_FLOAT cj_minz = atom->jclusters[cj].bbminz;
xtmp = cj_x[CL_X_OFFSET + 0];
ytmp = cj_x[CL_Y_OFFSET + 0];
coord2bin2D(xtmp, ytmp, &ix, &iy);
ix = MAX(MIN(ix, mbinx - 1), 0);
iy = MAX(MIN(iy, mbiny - 1), 0);
for(int cjj = 1; cjj < atom->jclusters[cj].natoms; cjj++) {
int nix, niy;
xtmp = cj_x[CL_X_OFFSET + cjj];
ytmp = cj_x[CL_Y_OFFSET + cjj];
coord2bin2D(xtmp, ytmp, &nix, &niy);
nix = MAX(MIN(nix, mbinx - 1), 0);
niy = MAX(MIN(niy, mbiny - 1), 0);
// Always put the cluster on the bin of its innermost atom so
// the cluster should be closer to local clusters
if(atom->PBCx[cg] > 0 && ix > nix) { ix = nix; }
if(atom->PBCx[cg] < 0 && ix < nix) { ix = nix; }
if(atom->PBCy[cg] > 0 && iy > niy) { iy = niy; }
if(atom->PBCy[cg] < 0 && iy < niy) { iy = niy; }
}
int bin = iy * mbinx + ix + 1;
int c = bin_nclusters[bin];
if(c < clusters_per_bin) {
// Insert the current ghost cluster in the bin keeping clusters
// sorted by z coordinate
int inserted = 0;
for(int i = 0; i < c; i++) {
int last_cl = bin_clusters[bin * clusters_per_bin + i];
if(atom->jclusters[last_cl].bbminz > cj_minz) {
bin_clusters[bin * clusters_per_bin + i] = cj;
for(int j = i + 1; j <= c; j++) {
int tmp = bin_clusters[bin * clusters_per_bin + j];
bin_clusters[bin * clusters_per_bin + j] = last_cl;
last_cl = tmp;
}
inserted = 1;
break;
}
}
if(!inserted) {
bin_clusters[bin * clusters_per_bin + c] = cj;
}
bin_nclusters[bin]++;
} else {
resize = 1;
}
}
}
if(resize) {
free(bin_clusters);
clusters_per_bin *= 2;
bin_clusters = (int*) malloc(mbins * clusters_per_bin * sizeof(int));
}
}
/*
DEBUG_MESSAGE("bin_nclusters\n");
for(int i = 0; i < mbins; i++) { DEBUG_MESSAGE("%d, ", bin_nclusters[i]); }
DEBUG_MESSAGE("\n");
*/
DEBUG_MESSAGE("binClusters stop\n");
}
void updateSingleAtoms(Atom *atom) {
DEBUG_MESSAGE("updateSingleAtoms start\n");
int Natom = 0;
for(int ci = 0; ci < atom->Nclusters_local; ci++) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
MD_FLOAT *ci_v = &atom->cl_v[ci_vec_base];
for(int cii = 0; cii < atom->iclusters[ci].natoms; cii++) {
atom_x(Natom) = ci_x[CL_X_OFFSET + cii];
atom_y(Natom) = ci_x[CL_Y_OFFSET + cii];
atom_z(Natom) = ci_x[CL_Z_OFFSET + cii];
atom->vx[Natom] = ci_v[CL_X_OFFSET + cii];
atom->vy[Natom] = ci_v[CL_Y_OFFSET + cii];
atom->vz[Natom] = ci_v[CL_Z_OFFSET + cii];
Natom++;
}
}
if(Natom != atom->Nlocal) {
fprintf(stderr, "updateSingleAtoms(): Number of atoms changed!\n");
}
DEBUG_MESSAGE("updateSingleAtoms stop\n");
}

231
clusterpair/pbc.c Normal file
View File

@@ -0,0 +1,231 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <pbc.h>
#include <atom.h>
#include <allocate.h>
#include <neighbor.h>
#include <util.h>
#define DELTA 20000
static int NmaxGhost;
static void growPbc(Atom*);
/* exported subroutines */
void initPbc(Atom* atom) {
NmaxGhost = 0;
atom->border_map = NULL;
atom->PBCx = NULL; atom->PBCy = NULL; atom->PBCz = NULL;
}
/* update coordinates of ghost atoms */
/* uses mapping created in setupPbc */
void cpuUpdatePbc(Atom *atom, Parameter *param, int firstUpdate) {
DEBUG_MESSAGE("updatePbc start\n");
int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
int ncj = atom->Nclusters_local / jfac;
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
for(int cg = 0; cg < atom->Nclusters_ghost; cg++) {
const int cj = ncj + cg;
int cj_vec_base = CJ_VECTOR_BASE_INDEX(cj);
int bmap_vec_base = CJ_VECTOR_BASE_INDEX(atom->border_map[cg]);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
MD_FLOAT *bmap_x = &atom->cl_x[bmap_vec_base];
MD_FLOAT bbminx = INFINITY, bbmaxx = -INFINITY;
MD_FLOAT bbminy = INFINITY, bbmaxy = -INFINITY;
MD_FLOAT bbminz = INFINITY, bbmaxz = -INFINITY;
for(int cjj = 0; cjj < atom->jclusters[cj].natoms; cjj++) {
MD_FLOAT xtmp = bmap_x[CL_X_OFFSET + cjj] + atom->PBCx[cg] * xprd;
MD_FLOAT ytmp = bmap_x[CL_Y_OFFSET + cjj] + atom->PBCy[cg] * yprd;
MD_FLOAT ztmp = bmap_x[CL_Z_OFFSET + cjj] + atom->PBCz[cg] * zprd;
cj_x[CL_X_OFFSET + cjj] = xtmp;
cj_x[CL_Y_OFFSET + cjj] = ytmp;
cj_x[CL_Z_OFFSET + cjj] = ztmp;
if(firstUpdate) {
// TODO: To create the bounding boxes faster, we can use SIMD operations
if(bbminx > xtmp) { bbminx = xtmp; }
if(bbmaxx < xtmp) { bbmaxx = xtmp; }
if(bbminy > ytmp) { bbminy = ytmp; }
if(bbmaxy < ytmp) { bbmaxy = ytmp; }
if(bbminz > ztmp) { bbminz = ztmp; }
if(bbmaxz < ztmp) { bbmaxz = ztmp; }
}
}
if(firstUpdate) {
for(int cjj = atom->jclusters[cj].natoms; cjj < CLUSTER_N; cjj++) {
cj_x[CL_X_OFFSET + cjj] = INFINITY;
cj_x[CL_Y_OFFSET + cjj] = INFINITY;
cj_x[CL_Z_OFFSET + cjj] = INFINITY;
}
atom->jclusters[cj].bbminx = bbminx;
atom->jclusters[cj].bbmaxx = bbmaxx;
atom->jclusters[cj].bbminy = bbminy;
atom->jclusters[cj].bbmaxy = bbmaxy;
atom->jclusters[cj].bbminz = bbminz;
atom->jclusters[cj].bbmaxz = bbmaxz;
}
}
DEBUG_MESSAGE("updatePbc end\n");
}
/* relocate atoms that have left domain according
* to periodic boundary conditions */
void updateAtomsPbc(Atom *atom, Parameter *param) {
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
for(int i = 0; i < atom->Nlocal; i++) {
if(atom_x(i) < 0.0) {
atom_x(i) += xprd;
} else if(atom_x(i) >= xprd) {
atom_x(i) -= xprd;
}
if(atom_y(i) < 0.0) {
atom_y(i) += yprd;
} else if(atom_y(i) >= yprd) {
atom_y(i) -= yprd;
}
if(atom_z(i) < 0.0) {
atom_z(i) += zprd;
} else if(atom_z(i) >= zprd) {
atom_z(i) -= zprd;
}
}
}
/* setup periodic boundary conditions by
* defining ghost atoms around domain
* only creates mapping and coordinate corrections
* that are then enforced in updatePbc */
#define ADDGHOST(dx,dy,dz); \
Nghost++; \
const int cg = ncj + Nghost; \
const int cj_natoms = atom->jclusters[cj].natoms; \
atom->border_map[Nghost] = cj; \
atom->PBCx[Nghost] = dx; \
atom->PBCy[Nghost] = dy; \
atom->PBCz[Nghost] = dz; \
atom->jclusters[cg].natoms = cj_natoms; \
Nghost_atoms += cj_natoms; \
int cj_sca_base = CJ_SCALAR_BASE_INDEX(cj); \
int cg_sca_base = CJ_SCALAR_BASE_INDEX(cg); \
for(int cjj = 0; cjj < cj_natoms; cjj++) { \
atom->cl_type[cg_sca_base + cjj] = atom->cl_type[cj_sca_base + cjj]; \
}
/* internal subroutines */
void growPbc(Atom* atom) {
int nold = NmaxGhost;
NmaxGhost += DELTA;
atom->border_map = (int*) reallocate(atom->border_map, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
atom->PBCx = (int*) reallocate(atom->PBCx, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
atom->PBCy = (int*) reallocate(atom->PBCy, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
atom->PBCz = (int*) reallocate(atom->PBCz, ALIGNMENT, NmaxGhost * sizeof(int), nold * sizeof(int));
}
void setupPbc(Atom *atom, Parameter *param) {
DEBUG_MESSAGE("setupPbc start\n");
MD_FLOAT xprd = param->xprd;
MD_FLOAT yprd = param->yprd;
MD_FLOAT zprd = param->zprd;
MD_FLOAT Cutneigh = param->cutneigh;
int jfac = MAX(1, CLUSTER_N / CLUSTER_M);
int ncj = atom->Nclusters_local / jfac;
int Nghost = -1;
int Nghost_atoms = 0;
for(int cj = 0; cj < ncj; cj++) {
if(atom->jclusters[cj].natoms > 0) {
if(atom->Nclusters_local + (Nghost + 7) * jfac >= atom->Nclusters_max) {
growClusters(atom);
}
if((Nghost + 7) * jfac >= NmaxGhost) {
growPbc(atom);
}
MD_FLOAT bbminx = atom->jclusters[cj].bbminx;
MD_FLOAT bbmaxx = atom->jclusters[cj].bbmaxx;
MD_FLOAT bbminy = atom->jclusters[cj].bbminy;
MD_FLOAT bbmaxy = atom->jclusters[cj].bbmaxy;
MD_FLOAT bbminz = atom->jclusters[cj].bbminz;
MD_FLOAT bbmaxz = atom->jclusters[cj].bbmaxz;
/* Setup ghost atoms */
/* 6 planes */
if (bbminx < Cutneigh) { ADDGHOST(+1,0,0); }
if (bbmaxx >= (xprd-Cutneigh)) { ADDGHOST(-1,0,0); }
if (bbminy < Cutneigh) { ADDGHOST(0,+1,0); }
if (bbmaxy >= (yprd-Cutneigh)) { ADDGHOST(0,-1,0); }
if (bbminz < Cutneigh) { ADDGHOST(0,0,+1); }
if (bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(0,0,-1); }
/* 8 corners */
if (bbminx < Cutneigh && bbminy < Cutneigh && bbminz < Cutneigh) { ADDGHOST(+1,+1,+1); }
if (bbminx < Cutneigh && bbmaxy >= (yprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(+1,-1,+1); }
if (bbminx < Cutneigh && bbminy < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(+1,+1,-1); }
if (bbminx < Cutneigh && bbmaxy >= (yprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(+1,-1,-1); }
if (bbmaxx >= (xprd-Cutneigh) && bbminy < Cutneigh && bbminz < Cutneigh) { ADDGHOST(-1,+1,+1); }
if (bbmaxx >= (xprd-Cutneigh) && bbmaxy >= (yprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(-1,-1,+1); }
if (bbmaxx >= (xprd-Cutneigh) && bbminy < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(-1,+1,-1); }
if (bbmaxx >= (xprd-Cutneigh) && bbmaxy >= (yprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(-1,-1,-1); }
/* 12 edges */
if (bbminx < Cutneigh && bbminz < Cutneigh) { ADDGHOST(+1,0,+1); }
if (bbminx < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(+1,0,-1); }
if (bbmaxx >= (xprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(-1,0,+1); }
if (bbmaxx >= (xprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(-1,0,-1); }
if (bbminy < Cutneigh && bbminz < Cutneigh) { ADDGHOST(0,+1,+1); }
if (bbminy < Cutneigh && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(0,+1,-1); }
if (bbmaxy >= (yprd-Cutneigh) && bbminz < Cutneigh) { ADDGHOST(0,-1,+1); }
if (bbmaxy >= (yprd-Cutneigh) && bbmaxz >= (zprd-Cutneigh)) { ADDGHOST(0,-1,-1); }
if (bbminy < Cutneigh && bbminx < Cutneigh) { ADDGHOST(+1,+1,0); }
if (bbminy < Cutneigh && bbmaxx >= (xprd-Cutneigh)) { ADDGHOST(-1,+1,0); }
if (bbmaxy >= (yprd-Cutneigh) && bbminx < Cutneigh) { ADDGHOST(+1,-1,0); }
if (bbmaxy >= (yprd-Cutneigh) && bbmaxx >= (xprd-Cutneigh)) { ADDGHOST(-1,-1,0); }
}
}
if(ncj + (Nghost + 1) * jfac >= atom->Nclusters_max) {
growClusters(atom);
}
// Add dummy cluster at the end
int cj_vec_base = CJ_VECTOR_BASE_INDEX(ncj + Nghost + 1);
MD_FLOAT *cj_x = &atom->cl_x[cj_vec_base];
for(int cjj = 0; cjj < CLUSTER_N; cjj++) {
cj_x[CL_X_OFFSET + cjj] = INFINITY;
cj_x[CL_Y_OFFSET + cjj] = INFINITY;
cj_x[CL_Z_OFFSET + cjj] = INFINITY;
}
// increase by one to make it the ghost atom count
atom->dummy_cj = ncj + Nghost + 1;
atom->Nghost = Nghost_atoms;
atom->Nclusters_ghost = Nghost + 1;
atom->Nclusters = atom->Nclusters_local + Nghost + 1;
// Update created ghost clusters positions
cpuUpdatePbc(atom, param, 1);
DEBUG_MESSAGE("setupPbc end\n");
}

58
clusterpair/stats.c Normal file
View File

@@ -0,0 +1,58 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <atom.h>
#include <parameter.h>
#include <stats.h>
#include <timers.h>
void initStats(Stats *s) {
s->calculated_forces = 0;
s->num_neighs = 0;
s->force_iters = 0;
s->atoms_within_cutoff = 0;
s->atoms_outside_cutoff = 0;
s->clusters_within_cutoff = 0;
s->clusters_outside_cutoff = 0;
}
void displayStatistics(Atom *atom, Parameter *param, Stats *stats, double *timer) {
#ifdef COMPUTE_STATS
const int MxN = CLUSTER_M * CLUSTER_N;
double avg_atoms_cluster = (double)(atom->Nlocal) / (double)(atom->Nclusters_local);
double force_useful_volume = 1e-9 * ( (double)(atom->Nlocal * (param->ntimes + 1)) * (sizeof(MD_FLOAT) * 6 + sizeof(int)) +
(double)(stats->num_neighs) * (sizeof(MD_FLOAT) * 3 + sizeof(int)) );
double avg_neigh_atom = (stats->num_neighs * CLUSTER_N) / (double)(atom->Nlocal * (param->ntimes + 1));
double avg_neigh_cluster = (double)(stats->num_neighs) / (double)(stats->calculated_forces);
double avg_simd = stats->force_iters / (double)(atom->Nlocal * (param->ntimes + 1));
#ifdef EXPLICIT_TYPES
force_useful_volume += 1e-9 * (double)((atom->Nlocal * (param->ntimes + 1)) + stats->num_neighs) * sizeof(int);
#endif
printf("Statistics:\n");
printf("\tVector width: %d, Processor frequency: %.4f GHz\n", VECTOR_WIDTH, param->proc_freq);
printf("\tAverage atoms per cluster: %.4f\n", avg_atoms_cluster);
printf("\tAverage neighbors per atom: %.4f\n", avg_neigh_atom);
printf("\tAverage neighbors per cluster: %.4f\n", avg_neigh_cluster);
printf("\tAverage SIMD iterations per atom: %.4f\n", avg_simd);
printf("\tTotal number of computed pair interactions: %lld\n", stats->num_neighs * MxN);
printf("\tTotal number of SIMD iterations: %lld\n", stats->force_iters);
printf("\tUseful read data volume for force computation: %.2fGB\n", force_useful_volume);
printf("\tCycles/SIMD iteration: %.4f\n", timer[FORCE] * param->proc_freq * 1e9 / stats->force_iters);
#ifdef USE_REFERENCE_VERSION
const double atoms_eff = (double)stats->atoms_within_cutoff / (double)(stats->atoms_within_cutoff + stats->atoms_outside_cutoff) * 100.0;
printf("\tAtoms within/outside cutoff radius: %lld/%lld (%.2f%%)\n", stats->atoms_within_cutoff, stats->atoms_outside_cutoff, atoms_eff);
const double clusters_eff = (double)stats->clusters_within_cutoff / (double)(stats->clusters_within_cutoff + stats->clusters_outside_cutoff) * 100.0;
printf("\tClusters within/outside cutoff radius: %lld/%lld (%.2f%%)\n", stats->clusters_within_cutoff, stats->clusters_outside_cutoff, clusters_eff);
#endif
#endif
}

61
clusterpair/tracing.c Normal file
View File

@@ -0,0 +1,61 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <neighbor.h>
#include <parameter.h>
#include <atom.h>
#include <tracing.h>
void traceAddresses(Parameter *param, Atom *atom, Neighbor *neighbor, int timestep) {
MEM_TRACER_INIT;
INDEX_TRACER_INIT;
int Nlocal = atom->Nlocal;
int *neighs;
unsigned int *neighs_imask;
//MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz;
INDEX_TRACE_NATOMS(Nlocal, atom->Nghost, neighbor->maxneighs);
for(int i = 0; i < Nlocal; i++) {
neighs = &neighbor->neighbors[i * neighbor->maxneighs];
int numneighs = neighbor->numneigh[i];
MEM_TRACE(atom_x(i), 'R');
MEM_TRACE(atom_y(i), 'R');
MEM_TRACE(atom_z(i), 'R');
INDEX_TRACE_ATOM(i);
#ifdef EXPLICIT_TYPES
MEM_TRACE(atom->type[i], 'R');
#endif
DIST_TRACE_SORT(neighs, numneighs);
INDEX_TRACE(neighs, numneighs);
DIST_TRACE(neighs, numneighs);
for(int k = 0; k < numneighs; k++) {
int j = neighs[k];
MEM_TRACE(j, 'R');
MEM_TRACE(atom_x(j), 'R');
MEM_TRACE(atom_y(j), 'R');
MEM_TRACE(atom_z(j), 'R');
#ifdef EXPLICIT_TYPES
MEM_TRACE(atom->type[j], 'R');
#endif
}
/*
MEM_TRACE(fx[i], 'R');
MEM_TRACE(fx[i], 'W');
MEM_TRACE(fy[i], 'R');
MEM_TRACE(fy[i], 'W');
MEM_TRACE(fz[i], 'R');
MEM_TRACE(fz[i], 'W');
*/
}
INDEX_TRACER_END;
MEM_TRACER_END;
}

190
clusterpair/vtk.c Normal file
View File

@@ -0,0 +1,190 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdio.h>
#include <stdlib.h>
#include <atom.h>
#include <vtk.h>
void write_data_to_vtk_file(const char *filename, Atom* atom, int timestep) {
write_local_atoms_to_vtk_file(filename, atom, timestep);
write_ghost_atoms_to_vtk_file(filename, atom, timestep);
write_local_cluster_edges_to_vtk_file(filename, atom, timestep);
write_ghost_cluster_edges_to_vtk_file(filename, atom, timestep);
}
int write_local_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
char timestep_filename[128];
snprintf(timestep_filename, sizeof timestep_filename, "%s_local_%d.vtk", filename, timestep);
FILE* fp = fopen(timestep_filename, "wb");
if(fp == NULL) {
fprintf(stderr, "Could not open VTK file for writing!\n");
return -1;
}
fprintf(fp, "# vtk DataFile Version 2.0\n");
fprintf(fp, "Particle data\n");
fprintf(fp, "ASCII\n");
fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
fprintf(fp, "POINTS %d double\n", atom->Nlocal);
for(int ci = 0; ci < atom->Nclusters_local; ++ci) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
for(int cii = 0; cii < atom->iclusters[ci].natoms; ++cii) {
fprintf(fp, "%.4f %.4f %.4f\n", ci_x[CL_X_OFFSET + cii], ci_x[CL_Y_OFFSET + cii], ci_x[CL_Z_OFFSET + cii]);
}
}
fprintf(fp, "\n\n");
fprintf(fp, "CELLS %d %d\n", atom->Nlocal, atom->Nlocal * 2);
for(int i = 0; i < atom->Nlocal; ++i) {
fprintf(fp, "1 %d\n", i);
}
fprintf(fp, "\n\n");
fprintf(fp, "CELL_TYPES %d\n", atom->Nlocal);
for(int i = 0; i < atom->Nlocal; ++i) {
fprintf(fp, "1\n");
}
fprintf(fp, "\n\n");
fprintf(fp, "POINT_DATA %d\n", atom->Nlocal);
fprintf(fp, "SCALARS mass double\n");
fprintf(fp, "LOOKUP_TABLE default\n");
for(int i = 0; i < atom->Nlocal; i++) {
fprintf(fp, "1.0\n");
}
fprintf(fp, "\n\n");
fclose(fp);
return 0;
}
int write_ghost_atoms_to_vtk_file(const char* filename, Atom* atom, int timestep) {
char timestep_filename[128];
snprintf(timestep_filename, sizeof timestep_filename, "%s_ghost_%d.vtk", filename, timestep);
FILE* fp = fopen(timestep_filename, "wb");
if(fp == NULL) {
fprintf(stderr, "Could not open VTK file for writing!\n");
return -1;
}
fprintf(fp, "# vtk DataFile Version 2.0\n");
fprintf(fp, "Particle data\n");
fprintf(fp, "ASCII\n");
fprintf(fp, "DATASET UNSTRUCTURED_GRID\n");
fprintf(fp, "POINTS %d double\n", atom->Nghost);
for(int ci = atom->Nclusters_local; ci < atom->Nclusters_local + atom->Nclusters_ghost; ++ci) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
for(int cii = 0; cii < atom->iclusters[ci].natoms; ++cii) {
fprintf(fp, "%.4f %.4f %.4f\n", ci_x[CL_X_OFFSET + cii], ci_x[CL_Y_OFFSET + cii], ci_x[CL_Z_OFFSET + cii]);
}
}
fprintf(fp, "\n\n");
fprintf(fp, "CELLS %d %d\n", atom->Nghost, atom->Nghost * 2);
for(int i = 0; i < atom->Nghost; ++i) {
fprintf(fp, "1 %d\n", i);
}
fprintf(fp, "\n\n");
fprintf(fp, "CELL_TYPES %d\n", atom->Nghost);
for(int i = 0; i < atom->Nghost; ++i) {
fprintf(fp, "1\n");
}
fprintf(fp, "\n\n");
fprintf(fp, "POINT_DATA %d\n", atom->Nghost);
fprintf(fp, "SCALARS mass double\n");
fprintf(fp, "LOOKUP_TABLE default\n");
for(int i = 0; i < atom->Nghost; i++) {
fprintf(fp, "1.0\n");
}
fprintf(fp, "\n\n");
fclose(fp);
return 0;
}
int write_local_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep) {
char timestep_filename[128];
snprintf(timestep_filename, sizeof timestep_filename, "%s_local_edges_%d.vtk", filename, timestep);
FILE* fp = fopen(timestep_filename, "wb");
int N = atom->Nclusters_local;
int tot_lines = 0;
int i = 0;
if(fp == NULL) {
fprintf(stderr, "Could not open VTK file for writing!\n");
return -1;
}
fprintf(fp, "# vtk DataFile Version 2.0\n");
fprintf(fp, "Particle data\n");
fprintf(fp, "ASCII\n");
fprintf(fp, "DATASET POLYDATA\n");
fprintf(fp, "POINTS %d double\n", atom->Nlocal);
for(int ci = 0; ci < N; ++ci) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
for(int cii = 0; cii < atom->iclusters[ci].natoms; ++cii) {
fprintf(fp, "%.4f %.4f %.4f\n", ci_x[CL_X_OFFSET + cii], ci_x[CL_Y_OFFSET + cii], ci_x[CL_Z_OFFSET + cii]);
}
tot_lines += atom->iclusters[ci].natoms;
}
fprintf(fp, "\n\n");
fprintf(fp, "LINES %d %d\n", N, N + tot_lines);
for(int ci = 0; ci < N; ++ci) {
fprintf(fp, "%d ", atom->iclusters[ci].natoms);
for(int cii = 0; cii < atom->iclusters[ci].natoms; ++cii) {
fprintf(fp, "%d ", i++);
}
fprintf(fp, "\n");
}
fprintf(fp, "\n\n");
fclose(fp);
return 0;
}
int write_ghost_cluster_edges_to_vtk_file(const char* filename, Atom* atom, int timestep) {
char timestep_filename[128];
snprintf(timestep_filename, sizeof timestep_filename, "%s_ghost_edges_%d.vtk", filename, timestep);
FILE* fp = fopen(timestep_filename, "wb");
int N = atom->Nclusters_local + atom->Nclusters_ghost;
int tot_lines = 0;
int i = 0;
if(fp == NULL) {
fprintf(stderr, "Could not open VTK file for writing!\n");
return -1;
}
fprintf(fp, "# vtk DataFile Version 2.0\n");
fprintf(fp, "Particle data\n");
fprintf(fp, "ASCII\n");
fprintf(fp, "DATASET POLYDATA\n");
fprintf(fp, "POINTS %d double\n", atom->Nghost);
for(int ci = atom->Nclusters_local; ci < N; ++ci) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
for(int cii = 0; cii < atom->iclusters[ci].natoms; ++cii) {
fprintf(fp, "%.4f %.4f %.4f\n", ci_x[CL_X_OFFSET + cii], ci_x[CL_Y_OFFSET + cii], ci_x[CL_Z_OFFSET + cii]);
}
tot_lines += atom->iclusters[ci].natoms;
}
fprintf(fp, "\n\n");
fprintf(fp, "LINES %d %d\n", atom->Nclusters_ghost, atom->Nclusters_ghost + tot_lines);
for(int ci = atom->Nclusters_local; ci < N; ++ci) {
fprintf(fp, "%d ", atom->iclusters[ci].natoms);
for(int cii = 0; cii < atom->iclusters[ci].natoms; ++cii) {
fprintf(fp, "%d ", i++);
}
fprintf(fp, "\n");
}
fprintf(fp, "\n\n");
fclose(fp);
return 0;
}

56
clusterpair/xtc.c Normal file
View File

@@ -0,0 +1,56 @@
/*
* Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
* All rights reserved. This file is part of MD-Bench.
* Use of this source code is governed by a LGPL-3.0
* license that can be found in the LICENSE file.
*/
#include <stdlib.h>
//---
#include <atom.h>
#include <allocate.h>
#include <xtc.h>
#ifdef XTC_OUTPUT
#include <gromacs/fileio/xtcio.h>
static struct t_fileio *xtc_file = NULL;
static rvec *x_buf = NULL;
static rvec basis[3];
void xtc_init(const char *filename, Atom *atom, int timestep) {
basis[0][XX] = 1.0;
basis[0][YY] = 0.0;
basis[0][ZZ] = 0.0;
basis[1][XX] = 0.0;
basis[1][YY] = 1.0;
basis[1][ZZ] = 0.0;
basis[2][XX] = 0.0;
basis[2][YY] = 0.0;
basis[2][ZZ] = 1.0;
xtc_file = open_xtc(filename, "w");
x_buf = (rvec *) allocate(ALIGNMENT, sizeof(rvec) * (atom->Nlocal + 1));
xtc_write(atom, timestep, 1, 1);
}
void xtc_write(Atom *atom, int timestep, int write_pos, int write_vel) {
int i = 0;
for(int ci = 0; ci < atom->Nclusters_local; ++ci) {
int ci_vec_base = CI_VECTOR_BASE_INDEX(ci);
MD_FLOAT *ci_x = &atom->cl_x[ci_vec_base];
for(int cii = 0; cii < atom->clusters[ci].natoms; ++cii) {
x_buf[i][XX] = ci_x[CL_X_OFFSET + cii];
x_buf[i][YY] = ci_x[CL_Y_OFFSET + cii];
x_buf[i][ZZ] = ci_x[CL_Z_OFFSET + cii];
i++;
}
}
write_xtc(xtc_file, atom->Nlocal, timestep, 0.0, (const rvec *) basis, (const rvec *) x_buf, 1000);
}
void xtc_end() {
free(x_buf);
close_xtc(xtc_file);
}
#endif