Fixed various compiler errors - now there's probably a memory leak remaining

This commit is contained in:
Martin Bauernfeind 2022-06-26 18:37:09 +02:00
parent 45f83c7607
commit 60ed524dd8
3 changed files with 16 additions and 12 deletions

View File

@ -201,7 +201,7 @@ double computeForce(
if(!reneighbourHappenend) { if(!reneighbourHappenend) {
checkCUDAError( "c_atom.x memcpy", cudaMemcpy(c_atom.x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) ); checkCUDAError( "computeForce c_atom->x memcpy", cudaMemcpy(c_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) );
} }
const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block); const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
@ -221,4 +221,4 @@ double computeForce(
return E-S; return E-S;
} }
} }

View File

@ -136,10 +136,10 @@ double setup(
adjustThermo(param, atom); adjustThermo(param, atom);
setupPbc(atom, param); setupPbc(atom, param);
updatePbc(atom, param); updatePbc(atom, param);
initCudaAtom(atom, neighbor, c_atom, c_neighbor);
buildNeighbor_cuda(atom, neighbor, c_atom, c_neighbor, num_threads_per_block); buildNeighbor_cuda(atom, neighbor, c_atom, c_neighbor, num_threads_per_block);
E = getTimeStamp(); E = getTimeStamp();
initCudaAtom(atom, neighbor, c_atom, c_neighbor);
return E-S; return E-S;
} }
@ -160,7 +160,7 @@ double reneighbour(
setupPbc(atom, param); setupPbc(atom, param);
updatePbc(atom, param); updatePbc(atom, param);
//sortAtom(atom); //sortAtom(atom);
buildNeighbor(atom, neighbor, c_atom, c_neighbor, num_threads_per_block); buildNeighbor_cuda(atom, neighbor, c_atom, c_neighbor, num_threads_per_block);
LIKWID_MARKER_STOP("reneighbour"); LIKWID_MARKER_STOP("reneighbour");
E = getTimeStamp(); E = getTimeStamp();
@ -218,7 +218,7 @@ int get_num_threads() {
const char *num_threads_env = getenv("NUM_THREADS"); const char *num_threads_env = getenv("NUM_THREADS");
int num_threads = 0; int num_threads = 0;
if(num_threads_env == nullptr) if(num_threads_env == 0)
num_threads = 32; num_threads = 32;
else { else {
num_threads = atoi(num_threads_env); num_threads = atoi(num_threads_env);

View File

@ -23,6 +23,9 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#include <math.h> #include <math.h>
#include <cuda_profiler_api.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
extern "C" { extern "C" {
@ -68,7 +71,7 @@ __device__ int coord2bin_device(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin,
} }
__global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil, __global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil,
int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs){ int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq){
const int i = blockIdx.x * blockDim.x + threadIdx.x; const int i = blockIdx.x * blockDim.x + threadIdx.x;
const int Nlocal = a.Nlocal; const int Nlocal = a.Nlocal;
if( i >= Nlocal ) { if( i >= Nlocal ) {
@ -83,7 +86,7 @@ __global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np,
MD_FLOAT xtmp = atom_x(i); MD_FLOAT xtmp = atom_x(i);
MD_FLOAT ytmp = atom_y(i); MD_FLOAT ytmp = atom_y(i);
MD_FLOAT ztmp = atom_z(i); MD_FLOAT ztmp = atom_z(i);
int ibin = coord2bin_device(xtmp, ytmp, ztmp, Neighbor_params np); int ibin = coord2bin_device(xtmp, ytmp, ztmp, np);
#ifdef EXPLICIT_TYPES #ifdef EXPLICIT_TYPES
int type_i = atom->type[i]; int type_i = atom->type[i];
#endif #endif
@ -541,7 +544,7 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *
cudaProfilerStart(); cudaProfilerStart();
checkCUDAError( "c_atom->x memcpy", cudaMemcpy(c_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) ); checkCUDAError( "buildNeighbor c_atom->x memcpy", cudaMemcpy(c_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) );
/* upload stencil */ /* upload stencil */
int* c_stencil; int* c_stencil;
@ -589,10 +592,11 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *
/*compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil, /*compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil,
int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs) int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs)
* */ * */
compute_neighborhood<<<num_blocks, num_threads_per_block>>>(*c_Atom, *c_neighbor, compute_neighborhood<<<num_blocks, num_threads_per_block>>>(*c_atom, *c_neighbor,
np, nstencil, c_stencil, np, nstencil, c_stencil,
c_bins, atoms_per_bin, c_bincount, c_bins, atoms_per_bin, c_bincount,
c_new_maxneighs); c_new_maxneighs,
cutneighsq);
// TODO copy the value of c_new_maxneighs back to host and check if it has been modified // TODO copy the value of c_new_maxneighs back to host and check if it has been modified
int new_maxneighs; int new_maxneighs;
@ -616,8 +620,8 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *
cudaProfilerStop(); cudaProfilerStop();
cudaFree(c_new_maxneighs); cudaFree(c_new_maxneighs);
cudaFree(c_n_stencil); cudaFree(c_stencil);
cudaFree(c_bincount); cudaFree(c_bincount);
cudaFree(c_bins); cudaFree(c_bins);
} }
} }