Implemented atom force AoS memory layout, added performance measurements + logbook Update
This commit is contained in:
parent
72e4599acc
commit
50007216ed
12
src/atom.c
12
src/atom.c
@ -162,16 +162,20 @@ void growAtom(Atom *atom)
|
|||||||
|
|
||||||
#ifdef AOS
|
#ifdef AOS
|
||||||
atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
|
atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
|
||||||
|
|
||||||
|
atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
|
||||||
#else
|
#else
|
||||||
atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||||
atom->y = (MD_FLOAT*) reallocate(atom->y, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
atom->y = (MD_FLOAT*) reallocate(atom->y, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||||
atom->z = (MD_FLOAT*) reallocate(atom->z, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
atom->z = (MD_FLOAT*) reallocate(atom->z, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||||
#endif
|
|
||||||
atom->vx = (MD_FLOAT*) reallocate(atom->vx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
|
||||||
atom->vy = (MD_FLOAT*) reallocate(atom->vy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
|
||||||
atom->vz = (MD_FLOAT*) reallocate(atom->vz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
|
||||||
atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||||
atom->fy = (MD_FLOAT*) reallocate(atom->fy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
atom->fy = (MD_FLOAT*) reallocate(atom->fy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||||
atom->fz = (MD_FLOAT*) reallocate(atom->fz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
atom->fz = (MD_FLOAT*) reallocate(atom->fz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
atom->vx = (MD_FLOAT*) reallocate(atom->vx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||||
|
atom->vy = (MD_FLOAT*) reallocate(atom->vy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||||
|
atom->vz = (MD_FLOAT*) reallocate(atom->vz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
|
||||||
atom->type = (int *) reallocate(atom->type, ALIGNMENT, atom->Nmax * sizeof(int), nold * sizeof(int));
|
atom->type = (int *) reallocate(atom->type, ALIGNMENT, atom->Nmax * sizeof(int), nold * sizeof(int));
|
||||||
}
|
}
|
||||||
|
22
src/force.cu
22
src/force.cu
@ -58,10 +58,6 @@ __global__ void calc_force(
|
|||||||
MD_FLOAT ytmp = atom_y(i);
|
MD_FLOAT ytmp = atom_y(i);
|
||||||
MD_FLOAT ztmp = atom_z(i);
|
MD_FLOAT ztmp = atom_z(i);
|
||||||
|
|
||||||
MD_FLOAT *fx = atom->fx;
|
|
||||||
MD_FLOAT *fy = atom->fy;
|
|
||||||
MD_FLOAT *fz = atom->fz;
|
|
||||||
|
|
||||||
MD_FLOAT fix = 0;
|
MD_FLOAT fix = 0;
|
||||||
MD_FLOAT fiy = 0;
|
MD_FLOAT fiy = 0;
|
||||||
MD_FLOAT fiz = 0;
|
MD_FLOAT fiz = 0;
|
||||||
@ -91,9 +87,9 @@ __global__ void calc_force(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fx[i] += fix;
|
atom_fx(i) = fix;
|
||||||
fy[i] += fiy;
|
atom_fy(i) = fiy;
|
||||||
fz[i] += fiz;
|
atom_fz(i) = fiz;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
@ -155,9 +151,7 @@ double computeForce(
|
|||||||
|
|
||||||
if(!initialized) {
|
if(!initialized) {
|
||||||
checkCUDAError( "c_atom.x malloc", cudaMalloc((void**)&(c_atom.x), sizeof(MD_FLOAT) * atom->Nmax * 3) );
|
checkCUDAError( "c_atom.x malloc", cudaMalloc((void**)&(c_atom.x), sizeof(MD_FLOAT) * atom->Nmax * 3) );
|
||||||
checkCUDAError( "c_atom.fx malloc", cudaMalloc((void**)&(c_atom.fx), sizeof(MD_FLOAT) * Nlocal) );
|
checkCUDAError( "c_atom.fx malloc", cudaMalloc((void**)&(c_atom.fx), sizeof(MD_FLOAT) * Nlocal * 3) );
|
||||||
checkCUDAError( "c_atom.fy malloc", cudaMalloc((void**)&(c_atom.fy), sizeof(MD_FLOAT) * Nlocal) );
|
|
||||||
checkCUDAError( "c_atom.fz malloc", cudaMalloc((void**)&(c_atom.fz), sizeof(MD_FLOAT) * Nlocal) );
|
|
||||||
checkCUDAError( "c_atom.type malloc", cudaMalloc((void**)&(c_atom.type), sizeof(int) * atom->Nmax) );
|
checkCUDAError( "c_atom.type malloc", cudaMalloc((void**)&(c_atom.type), sizeof(int) * atom->Nmax) );
|
||||||
checkCUDAError( "c_atom.epsilon malloc", cudaMalloc((void**)&(c_atom.epsilon), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
|
checkCUDAError( "c_atom.epsilon malloc", cudaMalloc((void**)&(c_atom.epsilon), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
|
||||||
checkCUDAError( "c_atom.sigma6 malloc", cudaMalloc((void**)&(c_atom.sigma6), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
|
checkCUDAError( "c_atom.sigma6 malloc", cudaMalloc((void**)&(c_atom.sigma6), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
|
||||||
@ -173,9 +167,7 @@ double computeForce(
|
|||||||
checkCUDAError( "c_atom.cutforcesq memcpy", cudaMemcpy(c_atom.cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
|
checkCUDAError( "c_atom.cutforcesq memcpy", cudaMemcpy(c_atom.cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
|
||||||
}
|
}
|
||||||
|
|
||||||
checkCUDAError( "c_atom.fx memset", cudaMemset(c_atom.fx, 0, sizeof(MD_FLOAT) * Nlocal) );
|
checkCUDAError( "c_atom.fx memset", cudaMemset(c_atom.fx, 0, sizeof(MD_FLOAT) * Nlocal * 3) );
|
||||||
checkCUDAError( "c_atom.fy memset", cudaMemset(c_atom.fy, 0, sizeof(MD_FLOAT) * Nlocal) );
|
|
||||||
checkCUDAError( "c_atom.fz memset", cudaMemset(c_atom.fz, 0, sizeof(MD_FLOAT) * Nlocal) );
|
|
||||||
|
|
||||||
checkCUDAError( "c_atom.x memcpy", cudaMemcpy(c_atom.x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) );
|
checkCUDAError( "c_atom.x memcpy", cudaMemcpy(c_atom.x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) );
|
||||||
|
|
||||||
@ -197,9 +189,7 @@ double computeForce(
|
|||||||
|
|
||||||
// copy results in c_atom.fx/fy/fz to atom->fx/fy/fz
|
// copy results in c_atom.fx/fy/fz to atom->fx/fy/fz
|
||||||
|
|
||||||
cudaMemcpy(atom->fx, c_atom.fx, sizeof(MD_FLOAT) * Nlocal, cudaMemcpyDeviceToHost);
|
cudaMemcpy(atom->fx, c_atom.fx, sizeof(MD_FLOAT) * Nlocal * 3, cudaMemcpyDeviceToHost);
|
||||||
cudaMemcpy(atom->fy, c_atom.fy, sizeof(MD_FLOAT) * Nlocal, cudaMemcpyDeviceToHost);
|
|
||||||
cudaMemcpy(atom->fz, c_atom.fz, sizeof(MD_FLOAT) * Nlocal, cudaMemcpyDeviceToHost);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
cudaFree(c_atom.x);
|
cudaFree(c_atom.x);
|
||||||
|
@ -45,14 +45,26 @@ extern void growAtom(Atom*);
|
|||||||
|
|
||||||
#ifdef AOS
|
#ifdef AOS
|
||||||
#define POS_DATA_LAYOUT "AoS"
|
#define POS_DATA_LAYOUT "AoS"
|
||||||
|
|
||||||
#define atom_x(i) atom->x[(i) * 3 + 0]
|
#define atom_x(i) atom->x[(i) * 3 + 0]
|
||||||
#define atom_y(i) atom->x[(i) * 3 + 1]
|
#define atom_y(i) atom->x[(i) * 3 + 1]
|
||||||
#define atom_z(i) atom->x[(i) * 3 + 2]
|
#define atom_z(i) atom->x[(i) * 3 + 2]
|
||||||
|
|
||||||
|
#define atom_fx(i) atom->fx[(i) * 3 + 0]
|
||||||
|
#define atom_fy(i) atom->fx[(i) * 3 + 1]
|
||||||
|
#define atom_fz(i) atom->fx[(i) * 3 + 2]
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define POS_DATA_LAYOUT "SoA"
|
#define POS_DATA_LAYOUT "SoA"
|
||||||
|
|
||||||
#define atom_x(i) atom->x[i]
|
#define atom_x(i) atom->x[i]
|
||||||
#define atom_y(i) atom->y[i]
|
#define atom_y(i) atom->y[i]
|
||||||
#define atom_z(i) atom->z[i]
|
#define atom_z(i) atom->z[i]
|
||||||
|
|
||||||
|
#define atom_fx(i) atom->fx[i]
|
||||||
|
#define atom_fy(i) atom->fy[i]
|
||||||
|
#define atom_fz(i) atom->fz[i]
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
14
src/main.c
14
src/main.c
@ -126,13 +126,12 @@ double reneighbour(
|
|||||||
|
|
||||||
void initialIntegrate(Parameter *param, Atom *atom)
|
void initialIntegrate(Parameter *param, Atom *atom)
|
||||||
{
|
{
|
||||||
MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz;
|
|
||||||
MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz;
|
MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz;
|
||||||
|
|
||||||
for(int i = 0; i < atom->Nlocal; i++) {
|
for(int i = 0; i < atom->Nlocal; i++) {
|
||||||
vx[i] += param->dtforce * fx[i];
|
vx[i] += param->dtforce * atom_fx(i);
|
||||||
vy[i] += param->dtforce * fy[i];
|
vy[i] += param->dtforce * atom_fy(i);
|
||||||
vz[i] += param->dtforce * fz[i];
|
vz[i] += param->dtforce * atom_fz(i);
|
||||||
atom_x(i) = atom_x(i) + param->dt * vx[i];
|
atom_x(i) = atom_x(i) + param->dt * vx[i];
|
||||||
atom_y(i) = atom_y(i) + param->dt * vy[i];
|
atom_y(i) = atom_y(i) + param->dt * vy[i];
|
||||||
atom_z(i) = atom_z(i) + param->dt * vz[i];
|
atom_z(i) = atom_z(i) + param->dt * vz[i];
|
||||||
@ -141,13 +140,12 @@ void initialIntegrate(Parameter *param, Atom *atom)
|
|||||||
|
|
||||||
void finalIntegrate(Parameter *param, Atom *atom)
|
void finalIntegrate(Parameter *param, Atom *atom)
|
||||||
{
|
{
|
||||||
MD_FLOAT* fx = atom->fx; MD_FLOAT* fy = atom->fy; MD_FLOAT* fz = atom->fz;
|
|
||||||
MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz;
|
MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz;
|
||||||
|
|
||||||
for(int i = 0; i < atom->Nlocal; i++) {
|
for(int i = 0; i < atom->Nlocal; i++) {
|
||||||
vx[i] += param->dtforce * fx[i];
|
vx[i] += param->dtforce * atom_fx(i);
|
||||||
vy[i] += param->dtforce * fy[i];
|
vy[i] += param->dtforce * atom_fy(i);
|
||||||
vz[i] += param->dtforce * fz[i];
|
vz[i] += param->dtforce * atom_fz(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user