diff --git a/.gitignore b/.gitignore index 52c86c7..50d6fbe 100644 --- a/.gitignore +++ b/.gitignore @@ -52,6 +52,7 @@ Mkfile.old dkms.conf # Build directories and executables +.vscode/ GCC/ ICC/ MDBench-GCC* diff --git a/src/allocate.c b/src/allocate.c index 39199c7..2593291 100644 --- a/src/allocate.c +++ b/src/allocate.c @@ -78,7 +78,7 @@ void* reallocate ( size_t newBytesize, size_t oldBytesize) { - void* newarray = allocate(alignment, newBytesize); + void* newarray = allocate(alignment, newBytesize); if(ptr != NULL) { memcpy(newarray, ptr, oldBytesize); diff --git a/src/atom.c b/src/atom.c index 55fb103..1b3d4c0 100644 --- a/src/atom.c +++ b/src/atom.c @@ -137,9 +137,9 @@ void createAtom(Atom *atom, Parameter *param) atom_x(atom->Nlocal) = xtmp; atom_y(atom->Nlocal) = ytmp; atom_z(atom->Nlocal) = ztmp; - atom->vx[atom->Nlocal] = vxtmp; - atom->vy[atom->Nlocal] = vytmp; - atom->vz[atom->Nlocal] = vztmp; + atom_vx(atom->Nlocal) = vxtmp; + atom_vy(atom->Nlocal) = vytmp; + atom_vz(atom->Nlocal) = vztmp; atom->type[atom->Nlocal] = rand() % atom->ntypes; atom->Nlocal++; } @@ -164,6 +164,8 @@ void growAtom(Atom *atom) atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3); atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3); + + atom->vx = (MD_FLOAT*) reallocate(atom->vx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT) * 3, nold * sizeof(MD_FLOAT) * 3); #else atom->x = (MD_FLOAT*) reallocate(atom->x, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->y = (MD_FLOAT*) reallocate(atom->y, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); @@ -172,10 +174,12 @@ void growAtom(Atom *atom) atom->fx = (MD_FLOAT*) reallocate(atom->fx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->fy = (MD_FLOAT*) reallocate(atom->fy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->fz = (MD_FLOAT*) reallocate(atom->fz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); - #endif atom->vx = (MD_FLOAT*) reallocate(atom->vx, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->vy = (MD_FLOAT*) reallocate(atom->vy, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); atom->vz = (MD_FLOAT*) reallocate(atom->vz, ALIGNMENT, atom->Nmax * sizeof(MD_FLOAT), nold * sizeof(MD_FLOAT)); + + #endif + atom->type = (int *) reallocate(atom->type, ALIGNMENT, atom->Nmax * sizeof(int), nold * sizeof(int)); } diff --git a/src/force.cu b/src/force.cu index 5b9fb90..e044585 100644 --- a/src/force.cu +++ b/src/force.cu @@ -92,13 +92,115 @@ __global__ void calc_force( atom_fz(i) = fiz; } +__global__ void kernel_initial_integrate(MD_FLOAT dtforce, MD_FLOAT dt, int Nlocal, Atom a) { + + const int i = blockIdx.x * blockDim.x + threadIdx.x; + if( i >= Nlocal ) { + return; + } + + Atom *atom = &a; + + atom_vx(i) += dtforce * atom_fx(i); + atom_vy(i) += dtforce * atom_fy(i); + atom_vz(i) += dtforce * atom_fz(i); + atom_x(i) = atom_x(i) + dt * atom_vx(i); + atom_y(i) = atom_y(i) + dt * atom_vy(i); + atom_z(i) = atom_z(i) + dt * atom_vz(i); +} + +__global__ void kernel_final_integrate(MD_FLOAT dtforce, int Nlocal, Atom a) { + + const int i = blockIdx.x * blockDim.x + threadIdx.x; + if( i >= Nlocal ) { + return; + } + + Atom *atom = &a; + + atom_vx(i) += dtforce * atom_fx(i); + atom_vy(i) += dtforce * atom_fy(i); + atom_vz(i) += dtforce * atom_fz(i); +} + extern "C" { -bool initialized = false; static Atom c_atom; int *c_neighs; int *c_neigh_numneigh; +int get_num_threads() { + + const char *num_threads_env = getenv("NUM_THREADS"); + int num_threads = 0; + if(num_threads_env == nullptr) + num_threads = 32; + else { + num_threads = atoi(num_threads_env); + } + + return num_threads; +} + +void cuda_final_integrate(bool doReneighbour, Parameter *param, Atom *atom) { + + const int Nlocal = atom->Nlocal; + const int num_threads = get_num_threads(); + + const int num_threads_per_block = num_threads; // this should be multiple of 32 as operations are performed at the level of warps + const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block); + + kernel_final_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, Nlocal, c_atom); + + checkCUDAError( "PeekAtLastError FinalIntegrate", cudaPeekAtLastError() ); + checkCUDAError( "DeviceSync FinalIntegrate", cudaDeviceSynchronize() ); + + if(doReneighbour) { + checkCUDAError( "FinalIntegrate: velocity memcpy", cudaMemcpy(atom->vx, c_atom.vx, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) ); + checkCUDAError( "FinalIntegrate: position memcpy", cudaMemcpy(atom->x, c_atom.x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) ); + } +} + +void cuda_initial_integrate(bool doReneighbour, Parameter *param, Atom *atom) { + + const int Nlocal = atom->Nlocal; + const int num_threads = get_num_threads(); + + const int num_threads_per_block = num_threads; // this should be multiple of 32 as operations are performed at the level of warps + const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block); + + kernel_initial_integrate <<< num_blocks, num_threads_per_block >>> (param->dtforce, param->dt, Nlocal, c_atom); + + checkCUDAError( "PeekAtLastError InitialIntegrate", cudaPeekAtLastError() ); + checkCUDAError( "DeviceSync InitialIntegrate", cudaDeviceSynchronize() ); + + checkCUDAError( "InitialIntegrate: velocity memcpy", cudaMemcpy(atom->vx, c_atom.vx, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) ); + checkCUDAError( "InitialIntegrate: position memcpy", cudaMemcpy(atom->x, c_atom.x, sizeof(MD_FLOAT) * atom->Nlocal * 3, cudaMemcpyDeviceToHost) ); +} + +void initCudaAtom(Atom *atom, Neighbor *neighbor) { + + const int Nlocal = atom->Nlocal; + + checkCUDAError( "c_atom.x malloc", cudaMalloc((void**)&(c_atom.x), sizeof(MD_FLOAT) * atom->Nmax * 3) ); + checkCUDAError( "c_atom.fx malloc", cudaMalloc((void**)&(c_atom.fx), sizeof(MD_FLOAT) * Nlocal * 3) ); + checkCUDAError( "c_atom.vx malloc", cudaMalloc((void**)&(c_atom.vx), sizeof(MD_FLOAT) * Nlocal * 3) ); + checkCUDAError( "c_atom.vx memcpy", cudaMemcpy(c_atom.vx, atom->vx, sizeof(MD_FLOAT) * Nlocal * 3, cudaMemcpyHostToDevice) ); + checkCUDAError( "c_atom.type malloc", cudaMalloc((void**)&(c_atom.type), sizeof(int) * atom->Nmax) ); + checkCUDAError( "c_atom.epsilon malloc", cudaMalloc((void**)&(c_atom.epsilon), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) ); + checkCUDAError( "c_atom.sigma6 malloc", cudaMalloc((void**)&(c_atom.sigma6), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) ); + checkCUDAError( "c_atom.cutforcesq malloc", cudaMalloc((void**)&(c_atom.cutforcesq), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) ); + + checkCUDAError( "c_neighs malloc", cudaMalloc((void**)&c_neighs, sizeof(int) * Nlocal * neighbor->maxneighs) ); + checkCUDAError( "c_neigh_numneigh malloc", cudaMalloc((void**)&c_neigh_numneigh, sizeof(int) * Nlocal) ); + + checkCUDAError( "c_atom.type memcpy", cudaMemcpy(c_atom.type, atom->type, sizeof(int) * atom->Nmax, cudaMemcpyHostToDevice) ); + checkCUDAError( "c_atom.sigma6 memcpy", cudaMemcpy(c_atom.sigma6, atom->sigma6, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) ); + checkCUDAError( "c_atom.epsilon memcpy", cudaMemcpy(c_atom.epsilon, atom->epsilon, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) ); + + checkCUDAError( "c_atom.cutforcesq memcpy", cudaMemcpy(c_atom.cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) ); +} + double computeForce( bool reneighbourHappenend, Parameter *param, @@ -113,15 +215,7 @@ double computeForce( MD_FLOAT epsilon = param->epsilon; #endif - cudaProfilerStart(); - - const char *num_threads_env = getenv("NUM_THREADS"); - int num_threads = 0; - if(num_threads_env == nullptr) - num_threads = 32; - else { - num_threads = atoi(num_threads_env); - } + const int num_threads = get_num_threads(); c_atom.Natoms = atom->Natoms; c_atom.Nlocal = atom->Nlocal; @@ -149,25 +243,9 @@ double computeForce( // HINT: Run with cuda-memcheck ./MDBench-NVCC in case of error // HINT: Only works for data layout = AOS!!! - if(!initialized) { - checkCUDAError( "c_atom.x malloc", cudaMalloc((void**)&(c_atom.x), sizeof(MD_FLOAT) * atom->Nmax * 3) ); - checkCUDAError( "c_atom.fx malloc", cudaMalloc((void**)&(c_atom.fx), sizeof(MD_FLOAT) * Nlocal * 3) ); - checkCUDAError( "c_atom.type malloc", cudaMalloc((void**)&(c_atom.type), sizeof(int) * atom->Nmax) ); - checkCUDAError( "c_atom.epsilon malloc", cudaMalloc((void**)&(c_atom.epsilon), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) ); - checkCUDAError( "c_atom.sigma6 malloc", cudaMalloc((void**)&(c_atom.sigma6), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) ); - checkCUDAError( "c_atom.cutforcesq malloc", cudaMalloc((void**)&(c_atom.cutforcesq), sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes) ); + // checkCUDAError( "c_atom.fx memset", cudaMemset(c_atom.fx, 0, sizeof(MD_FLOAT) * Nlocal * 3) ); - checkCUDAError( "c_neighs malloc", cudaMalloc((void**)&c_neighs, sizeof(int) * Nlocal * neighbor->maxneighs) ); - checkCUDAError( "c_neigh_numneigh malloc", cudaMalloc((void**)&c_neigh_numneigh, sizeof(int) * Nlocal) ); - - checkCUDAError( "c_atom.type memcpy", cudaMemcpy(c_atom.type, atom->type, sizeof(int) * atom->Nmax, cudaMemcpyHostToDevice) ); - checkCUDAError( "c_atom.sigma6 memcpy", cudaMemcpy(c_atom.sigma6, atom->sigma6, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) ); - checkCUDAError( "c_atom.epsilon memcpy", cudaMemcpy(c_atom.epsilon, atom->epsilon, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) ); - - checkCUDAError( "c_atom.cutforcesq memcpy", cudaMemcpy(c_atom.cutforcesq, atom->cutforcesq, sizeof(MD_FLOAT) * atom->ntypes * atom->ntypes, cudaMemcpyHostToDevice) ); - } - - checkCUDAError( "c_atom.fx memset", cudaMemset(c_atom.fx, 0, sizeof(MD_FLOAT) * Nlocal * 3) ); + cudaProfilerStart(); checkCUDAError( "c_atom.x memcpy", cudaMemcpy(c_atom.x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) ); @@ -184,12 +262,12 @@ double computeForce( calc_force <<< num_blocks, num_threads_per_block >>> (c_atom, cutforcesq, sigma6, epsilon, Nlocal, neighbor->maxneighs, c_neighs, c_neigh_numneigh); - checkCUDAError( "PeekAtLastError", cudaPeekAtLastError() ); - checkCUDAError( "DeviceSync", cudaDeviceSynchronize() ); + checkCUDAError( "PeekAtLastError ComputeForce", cudaPeekAtLastError() ); + checkCUDAError( "DeviceSync ComputeForce", cudaDeviceSynchronize() ); // copy results in c_atom.fx/fy/fz to atom->fx/fy/fz - cudaMemcpy(atom->fx, c_atom.fx, sizeof(MD_FLOAT) * Nlocal * 3, cudaMemcpyDeviceToHost); + checkCUDAError( "memcpy force back", cudaMemcpy(atom->fx, c_atom.fx, sizeof(MD_FLOAT) * Nlocal * 3, cudaMemcpyDeviceToHost) ); /* cudaFree(c_atom.x); @@ -207,8 +285,6 @@ double computeForce( LIKWID_MARKER_STOP("force"); double E = getTimeStamp(); - initialized = true; - return E-S; } } \ No newline at end of file diff --git a/src/includes/atom.h b/src/includes/atom.h index 5640517..a91e02a 100644 --- a/src/includes/atom.h +++ b/src/includes/atom.h @@ -54,6 +54,10 @@ extern void growAtom(Atom*); #define atom_fy(i) atom->fx[(i) * 3 + 1] #define atom_fz(i) atom->fx[(i) * 3 + 2] +#define atom_vx(i) atom->vx[(i) * 3 + 0] +#define atom_vy(i) atom->vx[(i) * 3 + 1] +#define atom_vz(i) atom->vx[(i) * 3 + 2] + #else #define POS_DATA_LAYOUT "SoA" @@ -65,6 +69,10 @@ extern void growAtom(Atom*); #define atom_fy(i) atom->fy[i] #define atom_fz(i) atom->fz[i] +#define atom_vx(i) atom->vx[i] +#define atom_vy(i) atom->vy[i] +#define atom_vz(i) atom->vz[i] + #endif #endif diff --git a/src/main.c b/src/main.c index 86b59a2..28898b0 100644 --- a/src/main.c +++ b/src/main.c @@ -45,6 +45,11 @@ #define HLINE "----------------------------------------------------------------------------\n" +extern void initCudaAtom(Atom *atom, Neighbor *neighbor); + +extern void cuda_final_integrate(bool doReneighbour, Parameter *param, Atom *atom); +extern void cuda_initial_integrate(bool doReneighbour, Parameter *param, Atom *atom); + extern double computeForce(bool, Parameter*, Atom*, Neighbor*); extern double computeForceTracing(Parameter*, Atom*, Neighbor*, Stats*, int, int); extern double computeForceEam(Eam* eam, Parameter*, Atom *atom, Neighbor *neighbor, Stats *stats, int first_exec, int timestep); @@ -101,6 +106,8 @@ double setup( buildNeighbor(atom, neighbor); E = getTimeStamp(); + initCudaAtom(atom, neighbor); + return E-S; } @@ -126,26 +133,22 @@ double reneighbour( void initialIntegrate(Parameter *param, Atom *atom) { - MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz; - for(int i = 0; i < atom->Nlocal; i++) { - vx[i] += param->dtforce * atom_fx(i); - vy[i] += param->dtforce * atom_fy(i); - vz[i] += param->dtforce * atom_fz(i); - atom_x(i) = atom_x(i) + param->dt * vx[i]; - atom_y(i) = atom_y(i) + param->dt * vy[i]; - atom_z(i) = atom_z(i) + param->dt * vz[i]; + atom_vx(i) += param->dtforce * atom_fx(i); + atom_vy(i) += param->dtforce * atom_fy(i); + atom_vz(i) += param->dtforce * atom_fz(i); + atom_x(i) = atom_x(i) + param->dt * atom_vx(i); + atom_y(i) = atom_y(i) + param->dt * atom_vy(i); + atom_z(i) = atom_z(i) + param->dt * atom_vz(i); } } void finalIntegrate(Parameter *param, Atom *atom) { - MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz; - for(int i = 0; i < atom->Nlocal; i++) { - vx[i] += param->dtforce * atom_fx(i); - vy[i] += param->dtforce * atom_fy(i); - vz[i] += param->dtforce * atom_fz(i); + atom_vx(i) += param->dtforce * atom_fx(i); + atom_vy(i) += param->dtforce * atom_fy(i); + atom_vz(i) += param->dtforce * atom_fz(i); } } @@ -274,10 +277,11 @@ int main(int argc, char** argv) } for(int n = 0; n < param.ntimes; n++) { - initialIntegrate(¶m, &atom); const bool doReneighbour = (n + 1) % param.every == 0; + cuda_initial_integrate(doReneighbour, ¶m, &atom); // initialIntegrate(¶m, &atom); + if(doReneighbour) { timer[NEIGH] += reneighbour(¶m, &atom, &neighbor); } else { @@ -293,7 +297,7 @@ int main(int argc, char** argv) timer[FORCE] += computeForce(doReneighbour, ¶m, &atom, &neighbor); #endif } - finalIntegrate(¶m, &atom); + cuda_final_integrate(doReneighbour, ¶m, &atom); // finalIntegrate(¶m, &atom); if(!((n + 1) % param.nstat) && (n+1) < param.ntimes) { computeThermo(n + 1, ¶m, &atom); diff --git a/src/neighbor.c b/src/neighbor.c index c9f9634..cce3756 100644 --- a/src/neighbor.c +++ b/src/neighbor.c @@ -359,14 +359,18 @@ void sortAtom(Atom* atom) { #ifdef AOS double* new_x = (double*) malloc(Nmax * sizeof(MD_FLOAT) * 3); + + double* new_vx = (double*) malloc(Nmax * sizeof(MD_FLOAT) * 3); #else double* new_x = (double*) malloc(Nmax * sizeof(MD_FLOAT)); double* new_y = (double*) malloc(Nmax * sizeof(MD_FLOAT)); double* new_z = (double*) malloc(Nmax * sizeof(MD_FLOAT)); -#endif + double* new_vx = (double*) malloc(Nmax * sizeof(MD_FLOAT)); double* new_vy = (double*) malloc(Nmax * sizeof(MD_FLOAT)); double* new_vz = (double*) malloc(Nmax * sizeof(MD_FLOAT)); +#endif + double* old_x = atom->x; double* old_y = atom->y; double* old_z = atom->z; double* old_vx = atom->vx; double* old_vy = atom->vy; double* old_vz = atom->vz; @@ -380,24 +384,34 @@ void sortAtom(Atom* atom) { new_x[new_i * 3 + 0] = old_x[old_i * 3 + 0]; new_x[new_i * 3 + 1] = old_x[old_i * 3 + 1]; new_x[new_i * 3 + 2] = old_x[old_i * 3 + 2]; + + new_vx[new_i * 3 + 0] = old_vx[old_i * 3 + 0]; + new_vx[new_i * 3 + 1] = old_vy[old_i * 3 + 1]; + new_vx[new_i * 3 + 2] = old_vz[old_i * 3 + 2]; #else new_x[new_i] = old_x[old_i]; new_y[new_i] = old_y[old_i]; new_z[new_i] = old_z[old_i]; -#endif + new_vx[new_i] = old_vx[old_i]; new_vy[new_i] = old_vy[old_i]; new_vz[new_i] = old_vz[old_i]; +#endif + } } free(atom->x); atom->x = new_x; + + free(atom->vx); + atom->vx = new_vx; #ifndef AOS free(atom->y); free(atom->z); atom->y = new_y; atom->z = new_z; + + free(atom->vy); free(atom->vz); + atom->vy = new_vy; atom->vz = new_vz; #endif - free(atom->vx); free(atom->vy); free(atom->vz); - atom->vx = new_vx; atom->vy = new_vy; atom->vz = new_vz; } diff --git a/src/thermo.c b/src/thermo.c index 70b897d..6161bf4 100644 --- a/src/thermo.c +++ b/src/thermo.c @@ -71,12 +71,9 @@ void setupThermo(Parameter *param, int natoms) void computeThermo(int iflag, Parameter *param, Atom *atom) { MD_FLOAT t = 0.0, p; - MD_FLOAT* vx = atom->vx; - MD_FLOAT* vy = atom->vy; - MD_FLOAT* vz = atom->vz; for(int i = 0; i < atom->Nlocal; i++) { - t += (vx[i] * vx[i] + vy[i] * vy[i] + vz[i] * vz[i]) * param->mass; + t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass; } t = t * t_scale; @@ -101,12 +98,11 @@ void adjustThermo(Parameter *param, Atom *atom) { /* zero center-of-mass motion */ MD_FLOAT vxtot = 0.0; MD_FLOAT vytot = 0.0; MD_FLOAT vztot = 0.0; - MD_FLOAT* vx = atom->vx; MD_FLOAT* vy = atom->vy; MD_FLOAT* vz = atom->vz; for(int i = 0; i < atom->Nlocal; i++) { - vxtot += vx[i]; - vytot += vy[i]; - vztot += vz[i]; + vxtot += atom_vx(i); + vytot += atom_vy(i); + vztot += atom_vz(i); } vxtot = vxtot / atom->Natoms; @@ -114,24 +110,24 @@ void adjustThermo(Parameter *param, Atom *atom) vztot = vztot / atom->Natoms; for(int i = 0; i < atom->Nlocal; i++) { - vx[i] -= vxtot; - vy[i] -= vytot; - vz[i] -= vztot; + atom_vx(i) -= vxtot; + atom_vy(i) -= vytot; + atom_vz(i) -= vztot; } t_act = 0; MD_FLOAT t = 0.0; for(int i = 0; i < atom->Nlocal; i++) { - t += (vx[i] * vx[i] + vy[i] * vy[i] + vz[i] * vz[i]) * param->mass; + t += (atom_vx(i) * atom_vx(i) + atom_vy(i) * atom_vy(i) + atom_vz(i) * atom_vz(i)) * param->mass; } t *= t_scale; MD_FLOAT factor = sqrt(param->temp / t); for(int i = 0; i < atom->Nlocal; i++) { - vx[i] *= factor; - vy[i] *= factor; - vz[i] *= factor; + atom_vx(i) *= factor; + atom_vy(i) *= factor; + atom_vz(i) *= factor; } }