From 50007216ed73323ce2532c07d7426611b85b00e7 Mon Sep 17 00:00:00 2001 From: Maximilian Gaul Date: Sat, 1 Jan 2022 16:09:21 +0100 Subject: [PATCH] Implemented atom force AoS memory layout, added performance measurements + logbook Update --- src/atom.c | 12 ++++++++---- src/force.cu | 22 ++++++---------------- src/includes/atom.h | 12 ++++++++++++ src/main.c | 14 ++++++-------- 4 files changed, 32 insertions(+), 28 deletions(-) diff --git a/src/atom.c b/src/atom.c index b1f7549..55fb103 100644 --- a/src/atom.c +++ b/src/atom.c @@ -162,16 +162,20 @@ void growAtom(Atom *atom) #ifdef AOS atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3); + + atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3); #else atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->y = (MD_FLOAT*) reallocate(atom->y, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->z = (MD_FLOAT*) reallocate(atom->z, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); - #endif - atom->vx = (MD_FLOAT*) reallocate(atom->vx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); - atom->vy = (MD_FLOAT*) reallocate(atom->vy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); - atom->vz = (MD_FLOAT*) reallocate(atom->vz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->fy = (MD_FLOAT*) reallocate(atom->fy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->fz = (MD_FLOAT*) reallocate(atom->fz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + #endif + + atom->vx = (MD_FLOAT*) reallocate(atom->vx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->vy = (MD_FLOAT*) reallocate(atom->vy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + atom->vz = (MD_FLOAT*) reallocate(atom->vz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->type = (int *) reallocate(atom->type, ALIGNMENT, atom->Nmax * sizeof(int), nold * sizeof(int)); } diff --git a/src/force.cu b/src/force.cu index 0e14356..5b9fb90 100644 --- a/src/force.cu +++ b/src/force.cu @@ -58,10 +58,6 @@ __global__ void calc_force( MD_FLOAT ytmp = atom_y(i); MD_FLOAT ztmp = atom_z(i); - MD_FLOAT *fx = atom->fx; - MD_FLOAT *fy = atom->fy; - MD_FLOAT *fz = atom->fz; - MD_FLOAT fix = 0; MD_FLOAT fiy = 0; MD_FLOAT fiz = 0; @@ -91,9 +87,9 @@ __global__ void calc_force( } } - fx[i] += fix; - fy[i] += fiy; - fz[i] += fiz; + atom_fx(i) = fix; + atom_fy(i) = fiy; + atom_fz(i) = fiz; } extern "C" { @@ -155,9 +151,7 @@ double computeForce( if(!initialized) { checkCUDAError( "c_atom.x malloc", cudaMalloc((void**)&(c_atom.x), sizeof(MD_FLOAT) * atom->Nmax * 3) ); - checkCUDAError( "c_atom.fx malloc", cudaMalloc((void**)&(c_atom.fx), sizeof(MD_FLOAT) * Nlocal) ); - checkCUDAError( "c_atom.fy malloc", cudaMalloc((void**)&(c_atom.fy), sizeof(MD_FLOAT) * Nlocal) ); - checkCUDAError( "c_atom.fz malloc", cudaMalloc((void**)&(c_atom.fz), sizeof(MD_FLOAT) * Nlocal) ); + checkCUDAError( "c_atom.fx malloc", cudaMalloc((void**)&(c_atom.fx), sizeof(MD_FLOAT) * Nlocal * 3) ); checkCUDAError( "c_atom.type malloc", cudaMalloc((void**)&(c_atom.type), sizeof(int) * atom->Nmax) ); checkCUDAError( "c_atom.epsilon malloc", cudaMalloc((void**)&(c_atom.epsilon), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) ); checkCUDAError( "c_atom.sigma6 malloc", cudaMalloc((void**)&(c_atom.sigma6), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) ); @@ -173,9 +167,7 @@ double computeForce( checkCUDAError( "c_atom.cutforcesq memcpy", cudaMemcpy(c_atom.cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) ); } - checkCUDAError( "c_atom.fx memset", cudaMemset(c_atom.fx, 0, sizeof(MD_FLOAT) * Nlocal) ); - checkCUDAError( "c_atom.fy memset", cudaMemset(c_atom.fy, 0, sizeof(MD_FLOAT) * Nlocal) ); - checkCUDAError( "c_atom.fz memset", cudaMemset(c_atom.fz, 0, sizeof(MD_FLOAT) * Nlocal) ); + checkCUDAError( "c_atom.fx memset", cudaMemset(c_atom.fx, 0, sizeof(MD_FLOAT) * Nlocal * 3) ); checkCUDAError( "c_atom.x memcpy", cudaMemcpy(c_atom.x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) ); @@ -197,9 +189,7 @@ double computeForce( // copy results in c_atom.fx/fy/fz to atom->fx/fy/fz - cudaMemcpy(atom->fx, c_atom.fx, sizeof(MD_FLOAT) * Nlocal, cudaMemcpyDeviceToHost); - cudaMemcpy(atom->fy, c_atom.fy, sizeof(MD_FLOAT) * Nlocal, cudaMemcpyDeviceToHost); - cudaMemcpy(atom->fz, c_atom.fz, sizeof(MD_FLOAT) * Nlocal, cudaMemcpyDeviceToHost); + cudaMemcpy(atom->fx, c_atom.fx, sizeof(MD_FLOAT) * Nlocal * 3, cudaMemcpyDeviceToHost); /* cudaFree(c_atom.x); diff --git a/src/includes/atom.h b/src/includes/atom.h index 89c01ea..5640517 100644 --- a/src/includes/atom.h +++ b/src/includes/atom.h @@ -45,14 +45,26 @@ extern void growAtom(Atom*); #ifdef AOS #define POS_DATA_LAYOUT "AoS" + #define atom_x(i) atom->x[(i) * 3 + 0] #define atom_y(i) atom->x[(i) * 3 + 1] #define atom_z(i) atom->x[(i) * 3 + 2] + +#define atom_fx(i) atom->fx[(i) * 3 + 0] +#define atom_fy(i) atom->fx[(i) * 3 + 1] +#define atom_fz(i) atom->fx[(i) * 3 + 2] + #else #define POS_DATA_LAYOUT "SoA" + #define atom_x(i) atom->x[i] #define atom_y(i) atom->y[i] #define atom_z(i) atom->z[i] + +#define atom_fx(i) atom->fx[i] +#define atom_fy(i) atom->fy[i] +#define atom_fz(i) atom->fz[i] + #endif #endif diff --git a/src/main.c b/src/main.c index 9767bd8..86b59a2 100644 --- a/src/main.c +++ b/src/main.c @@ -126,13 +126,12 @@ double reneighbour( void initialIntegrate(Parameter *param, Atom *atom) { - MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz; for(int i = 0; i < atom->Nlocal; i++) { - vx[i] += param->dtforce * fx[i]; - vy[i] += param->dtforce * fy[i]; - vz[i] += param->dtforce * fz[i]; + vx[i] += param->dtforce * atom_fx(i); + vy[i] += param->dtforce * atom_fy(i); + vz[i] += param->dtforce * atom_fz(i); atom_x(i) = atom_x(i) + param->dt * vx[i]; atom_y(i) = atom_y(i) + param->dt * vy[i]; atom_z(i) = atom_z(i) + param->dt * vz[i]; @@ -141,13 +140,12 @@ void initialIntegrate(Parameter *param, Atom *atom) void finalIntegrate(Parameter *param, Atom *atom) { - MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz; MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz; for(int i = 0; i < atom->Nlocal; i++) { - vx[i] += param->dtforce * fx[i]; - vy[i] += param->dtforce * fy[i]; - vz[i] += param->dtforce * fz[i]; + vx[i] += param->dtforce * atom_fx(i); + vy[i] += param->dtforce * atom_fy(i); + vz[i] += param->dtforce * atom_fz(i); } }