Porting atom velocity memory layout to AoS, porting velocity integration to CUDA, adding measurements + logbook update

This commit is contained in:
Maximilian Gaul 2022-01-01 18:18:12 +01:00
parent 50007216ed
commit dc4d5f1a9c
8 changed files with 175 additions and 72 deletions

1
.gitignore vendored
View File

@ -52,6 +52,7 @@ Mkfile.old
dkms.conf dkms.conf
# Build directories and executables # Build directories and executables
.vscode/
GCC/ GCC/
ICC/ ICC/
MDBench-GCC* MDBench-GCC*

View File

@ -78,7 +78,7 @@ void* reallocate (
size_t newBytesize, size_t newBytesize,
size_t oldBytesize) size_t oldBytesize)
{ {
void* newarray = allocate(alignment, newBytesize); void* newarray = allocate(alignment, newBytesize);
if(ptr != NULL) { if(ptr != NULL) {
memcpy(newarray, ptr, oldBytesize); memcpy(newarray, ptr, oldBytesize);

View File

@ -137,9 +137,9 @@ void createAtom(Atom *atom, Parameter *param)
atom_x(atom->Nlocal) = xtmp; atom_x(atom->Nlocal) = xtmp;
atom_y(atom->Nlocal) = ytmp; atom_y(atom->Nlocal) = ytmp;
atom_z(atom->Nlocal) = ztmp; atom_z(atom->Nlocal) = ztmp;
atom->vx[atom->Nlocal] = vxtmp; atom_vx(atom->Nlocal) = vxtmp;
atom->vy[atom->Nlocal] = vytmp; atom_vy(atom->Nlocal) = vytmp;
atom->vz[atom->Nlocal] = vztmp; atom_vz(atom->Nlocal) = vztmp;
atom->type[atom->Nlocal] = rand() % atom->ntypes; atom->type[atom->Nlocal] = rand() % atom->ntypes;
atom->Nlocal++; atom->Nlocal++;
} }
@ -164,6 +164,8 @@ void growAtom(Atom *atom)
atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3); atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3); atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
atom->vx = (MD_FLOAT*) reallocate(atom->vx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3);
#else #else
atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->y = (MD_FLOAT*) reallocate(atom->y, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->y = (MD_FLOAT*) reallocate(atom->y, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
@ -172,10 +174,12 @@ void growAtom(Atom *atom)
atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->fy = (MD_FLOAT*) reallocate(atom->fy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->fy = (MD_FLOAT*) reallocate(atom->fy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->fz = (MD_FLOAT*) reallocate(atom->fz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->fz = (MD_FLOAT*) reallocate(atom->fz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
#endif
atom->vx = (MD_FLOAT*) reallocate(atom->vx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->vx = (MD_FLOAT*) reallocate(atom->vx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->vy = (MD_FLOAT*) reallocate(atom->vy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->vy = (MD_FLOAT*) reallocate(atom->vy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
atom->vz = (MD_FLOAT*) reallocate(atom->vz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->vz = (MD_FLOAT*) reallocate(atom->vz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT));
#endif
atom->type = (int *) reallocate(atom->type, ALIGNMENT, atom->Nmax * sizeof(int), nold * sizeof(int)); atom->type = (int *) reallocate(atom->type, ALIGNMENT, atom->Nmax * sizeof(int), nold * sizeof(int));
} }

View File

@ -92,13 +92,115 @@ __global__ void calc_force(
atom_fz(i) = fiz; atom_fz(i) = fiz;
} }
__global__ void kernel_initial_integrate(MD_FLOAT dtforce, MD_FLOAT dt, int Nlocal, Atom a) {
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if( i >= Nlocal ) {
return;
}
Atom *atom = &a;
atom_vx(i) += dtforce * atom_fx(i);
atom_vy(i) += dtforce * atom_fy(i);
atom_vz(i) += dtforce * atom_fz(i);
atom_x(i) = atom_x(i) + dt * atom_vx(i);
atom_y(i) = atom_y(i) + dt * atom_vy(i);
atom_z(i) = atom_z(i) + dt * atom_vz(i);
}
__global__ void kernel_final_integrate(MD_FLOAT dtforce, int Nlocal, Atom a) {
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if( i >= Nlocal ) {
return;
}
Atom *atom = &a;
atom_vx(i) += dtforce * atom_fx(i);
atom_vy(i) += dtforce * atom_fy(i);
atom_vz(i) += dtforce * atom_fz(i);
}
extern "C" { extern "C" {
bool initialized = false;
static Atom c_atom; static Atom c_atom;
int *c_neighs; int *c_neighs;
int *c_neigh_numneigh; int *c_neigh_numneigh;
int get_num_threads() {
const char *num_threads_env = getenv("NUM_THREADS");
int num_threads = 0;
if(num_threads_env == nullptr)
num_threads = 32;
else {
num_threads = atoi(num_threads_env);
}
return num_threads;
}
void cuda_final_integrate(bool doReneighbour, Parameter *param, Atom *atom) {
const int Nlocal = atom->Nlocal;
const int num_threads = get_num_threads();
const int num_threads_per_block = num_threads; // this should be multiple of 32 as operations are performed at the level of warps
const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
kernel_final_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, Nlocal, c_atom);
checkCUDAError( "PeekAtLastError FinalIntegrate", cudaPeekAtLastError() );
checkCUDAError( "DeviceSync FinalIntegrate", cudaDeviceSynchronize() );
if(doReneighbour) {
checkCUDAError( "FinalIntegrate: velocity memcpy", cudaMemcpy(atom->vx, c_atom.vx, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
checkCUDAError( "FinalIntegrate: position memcpy", cudaMemcpy(atom->x, c_atom.x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
}
}
void cuda_initial_integrate(bool doReneighbour, Parameter *param, Atom *atom) {
const int Nlocal = atom->Nlocal;
const int num_threads = get_num_threads();
const int num_threads_per_block = num_threads; // this should be multiple of 32 as operations are performed at the level of warps
const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
kernel_initial_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, param->dt, Nlocal, c_atom);
checkCUDAError( "PeekAtLastError InitialIntegrate", cudaPeekAtLastError() );
checkCUDAError( "DeviceSync InitialIntegrate", cudaDeviceSynchronize() );
checkCUDAError( "InitialIntegrate: velocity memcpy", cudaMemcpy(atom->vx, c_atom.vx, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
checkCUDAError( "InitialIntegrate: position memcpy", cudaMemcpy(atom->x, c_atom.x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) );
}
void initCudaAtom(Atom *atom, Neighbor *neighbor) {
const int Nlocal = atom->Nlocal;
checkCUDAError( "c_atom.x malloc", cudaMalloc((void**)&(c_atom.x), sizeof(MD_FLOAT) * atom->Nmax * 3) );
checkCUDAError( "c_atom.fx malloc", cudaMalloc((void**)&(c_atom.fx), sizeof(MD_FLOAT) * Nlocal * 3) );
checkCUDAError( "c_atom.vx malloc", cudaMalloc((void**)&(c_atom.vx), sizeof(MD_FLOAT) * Nlocal * 3) );
checkCUDAError( "c_atom.vx memcpy", cudaMemcpy(c_atom.vx, atom->vx, sizeof(MD_FLOAT) * Nlocal * 3, cudaMemcpyHostToDevice) );
checkCUDAError( "c_atom.type malloc", cudaMalloc((void**)&(c_atom.type), sizeof(int) * atom->Nmax) );
checkCUDAError( "c_atom.epsilon malloc", cudaMalloc((void**)&(c_atom.epsilon), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
checkCUDAError( "c_atom.sigma6 malloc", cudaMalloc((void**)&(c_atom.sigma6), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
checkCUDAError( "c_atom.cutforcesq malloc", cudaMalloc((void**)&(c_atom.cutforcesq), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
checkCUDAError( "c_neighs malloc", cudaMalloc((void**)&c_neighs, sizeof(int) * Nlocal * neighbor->maxneighs) );
checkCUDAError( "c_neigh_numneigh malloc", cudaMalloc((void**)&c_neigh_numneigh, sizeof(int) * Nlocal) );
checkCUDAError( "c_atom.type memcpy", cudaMemcpy(c_atom.type, atom->type, sizeof(int) * atom->Nmax, cudaMemcpyHostToDevice) );
checkCUDAError( "c_atom.sigma6 memcpy", cudaMemcpy(c_atom.sigma6, atom->sigma6, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
checkCUDAError( "c_atom.epsilon memcpy", cudaMemcpy(c_atom.epsilon, atom->epsilon, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
checkCUDAError( "c_atom.cutforcesq memcpy", cudaMemcpy(c_atom.cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
}
double computeForce( double computeForce(
bool reneighbourHappenend, bool reneighbourHappenend,
Parameter *param, Parameter *param,
@ -113,15 +215,7 @@ double computeForce(
MD_FLOAT epsilon = param->epsilon; MD_FLOAT epsilon = param->epsilon;
#endif #endif
cudaProfilerStart(); const int num_threads = get_num_threads();
const char *num_threads_env = getenv("NUM_THREADS");
int num_threads = 0;
if(num_threads_env == nullptr)
num_threads = 32;
else {
num_threads = atoi(num_threads_env);
}
c_atom.Natoms = atom->Natoms; c_atom.Natoms = atom->Natoms;
c_atom.Nlocal = atom->Nlocal; c_atom.Nlocal = atom->Nlocal;
@ -149,25 +243,9 @@ double computeForce(
// HINT: Run with cuda-memcheck ./MDBench-NVCC in case of error // HINT: Run with cuda-memcheck ./MDBench-NVCC in case of error
// HINT: Only works for data layout = AOS!!! // HINT: Only works for data layout = AOS!!!
if(!initialized) { // checkCUDAError( "c_atom.fx memset", cudaMemset(c_atom.fx, 0, sizeof(MD_FLOAT) * Nlocal * 3) );
checkCUDAError( "c_atom.x malloc", cudaMalloc((void**)&(c_atom.x), sizeof(MD_FLOAT) * atom->Nmax * 3) );
checkCUDAError( "c_atom.fx malloc", cudaMalloc((void**)&(c_atom.fx), sizeof(MD_FLOAT) * Nlocal * 3) );
checkCUDAError( "c_atom.type malloc", cudaMalloc((void**)&(c_atom.type), sizeof(int) * atom->Nmax) );
checkCUDAError( "c_atom.epsilon malloc", cudaMalloc((void**)&(c_atom.epsilon), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
checkCUDAError( "c_atom.sigma6 malloc", cudaMalloc((void**)&(c_atom.sigma6), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
checkCUDAError( "c_atom.cutforcesq malloc", cudaMalloc((void**)&(c_atom.cutforcesq), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) );
checkCUDAError( "c_neighs malloc", cudaMalloc((void**)&c_neighs, sizeof(int) * Nlocal * neighbor->maxneighs) ); cudaProfilerStart();
checkCUDAError( "c_neigh_numneigh malloc", cudaMalloc((void**)&c_neigh_numneigh, sizeof(int) * Nlocal) );
checkCUDAError( "c_atom.type memcpy", cudaMemcpy(c_atom.type, atom->type, sizeof(int) * atom->Nmax, cudaMemcpyHostToDevice) );
checkCUDAError( "c_atom.sigma6 memcpy", cudaMemcpy(c_atom.sigma6, atom->sigma6, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
checkCUDAError( "c_atom.epsilon memcpy", cudaMemcpy(c_atom.epsilon, atom->epsilon, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
checkCUDAError( "c_atom.cutforcesq memcpy", cudaMemcpy(c_atom.cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) );
}
checkCUDAError( "c_atom.fx memset", cudaMemset(c_atom.fx, 0, sizeof(MD_FLOAT) * Nlocal * 3) );
checkCUDAError( "c_atom.x memcpy", cudaMemcpy(c_atom.x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) ); checkCUDAError( "c_atom.x memcpy", cudaMemcpy(c_atom.x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) );
@ -184,12 +262,12 @@ double computeForce(
calc_force <<< num_blocks, num_threads_per_block >>> (c_atom, cutforcesq, sigma6, epsilon, Nlocal, neighbor->maxneighs, c_neighs, c_neigh_numneigh); calc_force <<< num_blocks, num_threads_per_block >>> (c_atom, cutforcesq, sigma6, epsilon, Nlocal, neighbor->maxneighs, c_neighs, c_neigh_numneigh);
checkCUDAError( "PeekAtLastError", cudaPeekAtLastError() ); checkCUDAError( "PeekAtLastError ComputeForce", cudaPeekAtLastError() );
checkCUDAError( "DeviceSync", cudaDeviceSynchronize() ); checkCUDAError( "DeviceSync ComputeForce", cudaDeviceSynchronize() );
// copy results in c_atom.fx/fy/fz to atom->fx/fy/fz // copy results in c_atom.fx/fy/fz to atom->fx/fy/fz
cudaMemcpy(atom->fx, c_atom.fx, sizeof(MD_FLOAT) * Nlocal * 3, cudaMemcpyDeviceToHost); checkCUDAError( "memcpy force back", cudaMemcpy(atom->fx, c_atom.fx, sizeof(MD_FLOAT) * Nlocal * 3, cudaMemcpyDeviceToHost) );
/* /*
cudaFree(c_atom.x); cudaFree(c_atom.x);
@ -207,8 +285,6 @@ double computeForce(
LIKWID_MARKER_STOP("force"); LIKWID_MARKER_STOP("force");
double E = getTimeStamp(); double E = getTimeStamp();
initialized = true;
return E-S; return E-S;
} }
} }

View File

@ -54,6 +54,10 @@ extern void growAtom(Atom*);
#define atom_fy(i) atom->fx[(i) * 3 + 1] #define atom_fy(i) atom->fx[(i) * 3 + 1]
#define atom_fz(i) atom->fx[(i) * 3 + 2] #define atom_fz(i) atom->fx[(i) * 3 + 2]
#define atom_vx(i) atom->vx[(i) * 3 + 0]
#define atom_vy(i) atom->vx[(i) * 3 + 1]
#define atom_vz(i) atom->vx[(i) * 3 + 2]
#else #else
#define POS_DATA_LAYOUT "SoA" #define POS_DATA_LAYOUT "SoA"
@ -65,6 +69,10 @@ extern void growAtom(Atom*);
#define atom_fy(i) atom->fy[i] #define atom_fy(i) atom->fy[i]
#define atom_fz(i) atom->fz[i] #define atom_fz(i) atom->fz[i]
#define atom_vx(i) atom->vx[i]
#define atom_vy(i) atom->vy[i]
#define atom_vz(i) atom->vz[i]
#endif #endif
#endif #endif

View File

@ -45,6 +45,11 @@
#define HLINE "----------------------------------------------------------------------------\n" #define HLINE "----------------------------------------------------------------------------\n"
extern void initCudaAtom(Atom *atom, Neighbor *neighbor);
extern void cuda_final_integrate(bool doReneighbour, Parameter *param, Atom *atom);
extern void cuda_initial_integrate(bool doReneighbour, Parameter *param, Atom *atom);
extern double computeForce(bool, Parameter*, Atom*, Neighbor*); extern double computeForce(bool, Parameter*, Atom*, Neighbor*);
extern double computeForceTracing(Parameter*, Atom*, Neighbor*, Stats*, int, int); extern double computeForceTracing(Parameter*, Atom*, Neighbor*, Stats*, int, int);
extern double computeForceEam(Eam* eam, Parameter*, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep); extern double computeForceEam(Eam* eam, Parameter*, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep);
@ -101,6 +106,8 @@ double setup(
buildNeighbor(atom, neighbor); buildNeighbor(atom, neighbor);
E = getTimeStamp(); E = getTimeStamp();
initCudaAtom(atom, neighbor);
return E-S; return E-S;
} }
@ -126,26 +133,22 @@ double reneighbour(
void initialIntegrate(Parameter *param, Atom *atom) void initialIntegrate(Parameter *param, Atom *atom)
{ {
MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz;
for(int i = 0; i < atom->Nlocal; i++) { for(int i = 0; i < atom->Nlocal; i++) {
vx[i] += param->dtforce * atom_fx(i); atom_vx(i) += param->dtforce * atom_fx(i);
vy[i] += param->dtforce * atom_fy(i); atom_vy(i) += param->dtforce * atom_fy(i);
vz[i] += param->dtforce * atom_fz(i); atom_vz(i) += param->dtforce * atom_fz(i);
atom_x(i) = atom_x(i) + param->dt * vx[i]; atom_x(i) = atom_x(i) + param->dt * atom_vx(i);
atom_y(i) = atom_y(i) + param->dt * vy[i]; atom_y(i) = atom_y(i) + param->dt * atom_vy(i);
atom_z(i) = atom_z(i) + param->dt * vz[i]; atom_z(i) = atom_z(i) + param->dt * atom_vz(i);
} }
} }
void finalIntegrate(Parameter *param, Atom *atom) void finalIntegrate(Parameter *param, Atom *atom)
{ {
MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz;
for(int i = 0; i < atom->Nlocal; i++) { for(int i = 0; i < atom->Nlocal; i++) {
vx[i] += param->dtforce * atom_fx(i); atom_vx(i) += param->dtforce * atom_fx(i);
vy[i] += param->dtforce * atom_fy(i); atom_vy(i) += param->dtforce * atom_fy(i);
vz[i] += param->dtforce * atom_fz(i); atom_vz(i) += param->dtforce * atom_fz(i);
} }
} }
@ -274,10 +277,11 @@ int main(int argc, char** argv)
} }
for(int n = 0; n < param.ntimes; n++) { for(int n = 0; n < param.ntimes; n++) {
initialIntegrate(&param, &atom);
const bool doReneighbour = (n + 1) % param.every == 0; const bool doReneighbour = (n + 1) % param.every == 0;
cuda_initial_integrate(doReneighbour, &param, &atom); // initialIntegrate(&param, &atom);
if(doReneighbour) { if(doReneighbour) {
timer[NEIGH] += reneighbour(&param, &atom, &neighbor); timer[NEIGH] += reneighbour(&param, &atom, &neighbor);
} else { } else {
@ -293,7 +297,7 @@ int main(int argc, char** argv)
timer[FORCE] += computeForce(doReneighbour, &param, &atom, &neighbor); timer[FORCE] += computeForce(doReneighbour, &param, &atom, &neighbor);
#endif #endif
} }
finalIntegrate(&param, &atom); cuda_final_integrate(doReneighbour, &param, &atom); // finalIntegrate(&param, &atom);
if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) { if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) {
computeThermo(n + 1, &param, &atom); computeThermo(n + 1, &param, &atom);

View File

@ -359,14 +359,18 @@ void sortAtom(Atom* atom) {
#ifdef AOS #ifdef AOS
double* new_x = (double*) malloc(Nmax * sizeof(MD_FLOAT) * 3); double* new_x = (double*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
double* new_vx = (double*) malloc(Nmax * sizeof(MD_FLOAT) * 3);
#else #else
double* new_x = (double*) malloc(Nmax * sizeof(MD_FLOAT)); double* new_x = (double*) malloc(Nmax * sizeof(MD_FLOAT));
double* new_y = (double*) malloc(Nmax * sizeof(MD_FLOAT)); double* new_y = (double*) malloc(Nmax * sizeof(MD_FLOAT));
double* new_z = (double*) malloc(Nmax * sizeof(MD_FLOAT)); double* new_z = (double*) malloc(Nmax * sizeof(MD_FLOAT));
#endif
double* new_vx = (double*) malloc(Nmax * sizeof(MD_FLOAT)); double* new_vx = (double*) malloc(Nmax * sizeof(MD_FLOAT));
double* new_vy = (double*) malloc(Nmax * sizeof(MD_FLOAT)); double* new_vy = (double*) malloc(Nmax * sizeof(MD_FLOAT));
double* new_vz = (double*) malloc(Nmax * sizeof(MD_FLOAT)); double* new_vz = (double*) malloc(Nmax * sizeof(MD_FLOAT));
#endif
double* old_x = atom->x; double* old_y = atom->y; double* old_z = atom->z; double* old_x = atom->x; double* old_y = atom->y; double* old_z = atom->z;
double* old_vx = atom->vx; double* old_vy = atom->vy; double* old_vz = atom->vz; double* old_vx = atom->vx; double* old_vy = atom->vy; double* old_vz = atom->vz;
@ -380,24 +384,34 @@ void sortAtom(Atom* atom) {
new_x[new_i * 3 + 0] = old_x[old_i * 3 + 0]; new_x[new_i * 3 + 0] = old_x[old_i * 3 + 0];
new_x[new_i * 3 + 1] = old_x[old_i * 3 + 1]; new_x[new_i * 3 + 1] = old_x[old_i * 3 + 1];
new_x[new_i * 3 + 2] = old_x[old_i * 3 + 2]; new_x[new_i * 3 + 2] = old_x[old_i * 3 + 2];
new_vx[new_i * 3 + 0] = old_vx[old_i * 3 + 0];
new_vx[new_i * 3 + 1] = old_vy[old_i * 3 + 1];
new_vx[new_i * 3 + 2] = old_vz[old_i * 3 + 2];
#else #else
new_x[new_i] = old_x[old_i]; new_x[new_i] = old_x[old_i];
new_y[new_i] = old_y[old_i]; new_y[new_i] = old_y[old_i];
new_z[new_i] = old_z[old_i]; new_z[new_i] = old_z[old_i];
#endif
new_vx[new_i] = old_vx[old_i]; new_vx[new_i] = old_vx[old_i];
new_vy[new_i] = old_vy[old_i]; new_vy[new_i] = old_vy[old_i];
new_vz[new_i] = old_vz[old_i]; new_vz[new_i] = old_vz[old_i];
#endif
} }
} }
free(atom->x); free(atom->x);
atom->x = new_x; atom->x = new_x;
free(atom->vx);
atom->vx = new_vx;
#ifndef AOS #ifndef AOS
free(atom->y); free(atom->y);
free(atom->z); free(atom->z);
atom->y = new_y; atom->z = new_z; atom->y = new_y; atom->z = new_z;
free(atom->vy); free(atom->vz);
atom->vy = new_vy; atom->vz = new_vz;
#endif #endif
free(atom->vx); free(atom->vy); free(atom->vz);
atom->vx = new_vx; atom->vy = new_vy; atom->vz = new_vz;
} }

View File

@ -71,12 +71,9 @@ void setupThermo(Parameter *param, int natoms)
void computeThermo(int iflag, Parameter *param, Atom *atom) void computeThermo(int iflag, Parameter *param, Atom *atom)
{ {
MD_FLOAT t = 0.0, p; MD_FLOAT t = 0.0, p;
MD_FLOAT* vx = atom->vx;
MD_FLOAT* vy = atom->vy;
MD_FLOAT* vz = atom->vz;
for(int i = 0; i < atom->Nlocal; i++) { for(int i = 0; i < atom->Nlocal; i++) {
t += (vx[i] * vx[i] + vy[i] * vy[i] + vz[i] * vz[i]) * param->mass; t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
} }
t = t * t_scale; t = t * t_scale;
@ -101,12 +98,11 @@ void adjustThermo(Parameter *param, Atom *atom)
{ {
/* zero center-of-mass motion */ /* zero center-of-mass motion */
MD_FLOAT vxtot = 0.0; MD_FLOAT vytot = 0.0; MD_FLOAT vztot = 0.0; MD_FLOAT vxtot = 0.0; MD_FLOAT vytot = 0.0; MD_FLOAT vztot = 0.0;
MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz;
for(int i = 0; i < atom->Nlocal; i++) { for(int i = 0; i < atom->Nlocal; i++) {
vxtot += vx[i]; vxtot += atom_vx(i);
vytot += vy[i]; vytot += atom_vy(i);
vztot += vz[i]; vztot += atom_vz(i);
} }
vxtot = vxtot / atom->Natoms; vxtot = vxtot / atom->Natoms;
@ -114,24 +110,24 @@ void adjustThermo(Parameter *param, Atom *atom)
vztot = vztot / atom->Natoms; vztot = vztot / atom->Natoms;
for(int i = 0; i < atom->Nlocal; i++) { for(int i = 0; i < atom->Nlocal; i++) {
vx[i] -= vxtot; atom_vx(i) -= vxtot;
vy[i] -= vytot; atom_vy(i) -= vytot;
vz[i] -= vztot; atom_vz(i) -= vztot;
} }
t_act = 0; t_act = 0;
MD_FLOAT t = 0.0; MD_FLOAT t = 0.0;
for(int i = 0; i < atom->Nlocal; i++) { for(int i = 0; i < atom->Nlocal; i++) {
t += (vx[i] * vx[i] + vy[i] * vy[i] + vz[i] * vz[i]) * param->mass; t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass;
} }
t *= t_scale; t *= t_scale;
MD_FLOAT factor = sqrt(param->temp / t); MD_FLOAT factor = sqrt(param->temp / t);
for(int i = 0; i < atom->Nlocal; i++) { for(int i = 0; i < atom->Nlocal; i++) {
vx[i] *= factor; atom_vx(i) *= factor;
vy[i] *= factor; atom_vy(i) *= factor;
vz[i] *= factor; atom_vz(i) *= factor;
} }
} }