Fixed various compiler errors - now there's probably a memory leak remaining
This commit is contained in:
parent
45f83c7607
commit
60ed524dd8
@ -201,7 +201,7 @@ double computeForce(
|
|||||||
|
|
||||||
|
|
||||||
if(!reneighbourHappenend) {
|
if(!reneighbourHappenend) {
|
||||||
checkCUDAError( "c_atom.x memcpy", cudaMemcpy(c_atom.x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) );
|
checkCUDAError( "computeForce c_atom->x memcpy", cudaMemcpy(c_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) );
|
||||||
}
|
}
|
||||||
|
|
||||||
const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
|
const int num_blocks = ceil((float)Nlocal / (float)num_threads_per_block);
|
||||||
@ -221,4 +221,4 @@ double computeForce(
|
|||||||
|
|
||||||
return E-S;
|
return E-S;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -136,10 +136,10 @@ double setup(
|
|||||||
adjustThermo(param, atom);
|
adjustThermo(param, atom);
|
||||||
setupPbc(atom, param);
|
setupPbc(atom, param);
|
||||||
updatePbc(atom, param);
|
updatePbc(atom, param);
|
||||||
|
initCudaAtom(atom, neighbor, c_atom, c_neighbor);
|
||||||
buildNeighbor_cuda(atom, neighbor, c_atom, c_neighbor, num_threads_per_block);
|
buildNeighbor_cuda(atom, neighbor, c_atom, c_neighbor, num_threads_per_block);
|
||||||
E = getTimeStamp();
|
E = getTimeStamp();
|
||||||
|
|
||||||
initCudaAtom(atom, neighbor, c_atom, c_neighbor);
|
|
||||||
|
|
||||||
return E-S;
|
return E-S;
|
||||||
}
|
}
|
||||||
@ -160,7 +160,7 @@ double reneighbour(
|
|||||||
setupPbc(atom, param);
|
setupPbc(atom, param);
|
||||||
updatePbc(atom, param);
|
updatePbc(atom, param);
|
||||||
//sortAtom(atom);
|
//sortAtom(atom);
|
||||||
buildNeighbor(atom, neighbor, c_atom, c_neighbor, num_threads_per_block);
|
buildNeighbor_cuda(atom, neighbor, c_atom, c_neighbor, num_threads_per_block);
|
||||||
LIKWID_MARKER_STOP("reneighbour");
|
LIKWID_MARKER_STOP("reneighbour");
|
||||||
E = getTimeStamp();
|
E = getTimeStamp();
|
||||||
|
|
||||||
@ -218,7 +218,7 @@ int get_num_threads() {
|
|||||||
|
|
||||||
const char *num_threads_env = getenv("NUM_THREADS");
|
const char *num_threads_env = getenv("NUM_THREADS");
|
||||||
int num_threads = 0;
|
int num_threads = 0;
|
||||||
if(num_threads_env == nullptr)
|
if(num_threads_env == 0)
|
||||||
num_threads = 32;
|
num_threads = 32;
|
||||||
else {
|
else {
|
||||||
num_threads = atoi(num_threads_env);
|
num_threads = atoi(num_threads_env);
|
||||||
|
@ -23,6 +23,9 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
#include <cuda_profiler_api.h>
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <device_launch_parameters.h>
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
|
||||||
@ -68,7 +71,7 @@ __device__ int coord2bin_device(MD_FLOAT xin, MD_FLOAT yin, MD_FLOAT zin,
|
|||||||
}
|
}
|
||||||
|
|
||||||
__global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil,
|
__global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil,
|
||||||
int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs){
|
int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs, MD_FLOAT cutneighsq){
|
||||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int Nlocal = a.Nlocal;
|
const int Nlocal = a.Nlocal;
|
||||||
if( i >= Nlocal ) {
|
if( i >= Nlocal ) {
|
||||||
@ -83,7 +86,7 @@ __global__ void compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np,
|
|||||||
MD_FLOAT xtmp = atom_x(i);
|
MD_FLOAT xtmp = atom_x(i);
|
||||||
MD_FLOAT ytmp = atom_y(i);
|
MD_FLOAT ytmp = atom_y(i);
|
||||||
MD_FLOAT ztmp = atom_z(i);
|
MD_FLOAT ztmp = atom_z(i);
|
||||||
int ibin = coord2bin_device(xtmp, ytmp, ztmp, Neighbor_params np);
|
int ibin = coord2bin_device(xtmp, ytmp, ztmp, np);
|
||||||
#ifdef EXPLICIT_TYPES
|
#ifdef EXPLICIT_TYPES
|
||||||
int type_i = atom->type[i];
|
int type_i = atom->type[i];
|
||||||
#endif
|
#endif
|
||||||
@ -541,7 +544,7 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *
|
|||||||
|
|
||||||
cudaProfilerStart();
|
cudaProfilerStart();
|
||||||
|
|
||||||
checkCUDAError( "c_atom->x memcpy", cudaMemcpy(c_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) );
|
checkCUDAError( "buildNeighbor c_atom->x memcpy", cudaMemcpy(c_atom->x, atom->x, sizeof(MD_FLOAT) * atom->Nmax * 3, cudaMemcpyHostToDevice) );
|
||||||
|
|
||||||
/* upload stencil */
|
/* upload stencil */
|
||||||
int* c_stencil;
|
int* c_stencil;
|
||||||
@ -589,10 +592,11 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *
|
|||||||
/*compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil,
|
/*compute_neighborhood(Atom a, Neighbor neigh, Neighbor_params np, int nstencil, int* stencil,
|
||||||
int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs)
|
int* bins, int atoms_per_bin, int *bincount, int *new_maxneighs)
|
||||||
* */
|
* */
|
||||||
compute_neighborhood<<<num_blocks, num_threads_per_block>>>(*c_Atom, *c_neighbor,
|
compute_neighborhood<<<num_blocks, num_threads_per_block>>>(*c_atom, *c_neighbor,
|
||||||
np, nstencil, c_stencil,
|
np, nstencil, c_stencil,
|
||||||
c_bins, atoms_per_bin, c_bincount,
|
c_bins, atoms_per_bin, c_bincount,
|
||||||
c_new_maxneighs);
|
c_new_maxneighs,
|
||||||
|
cutneighsq);
|
||||||
|
|
||||||
// TODO copy the value of c_new_maxneighs back to host and check if it has been modified
|
// TODO copy the value of c_new_maxneighs back to host and check if it has been modified
|
||||||
int new_maxneighs;
|
int new_maxneighs;
|
||||||
@ -616,8 +620,8 @@ void buildNeighbor_cuda(Atom *atom, Neighbor *neighbor, Atom *c_atom, Neighbor *
|
|||||||
cudaProfilerStop();
|
cudaProfilerStop();
|
||||||
|
|
||||||
cudaFree(c_new_maxneighs);
|
cudaFree(c_new_maxneighs);
|
||||||
cudaFree(c_n_stencil);
|
cudaFree(c_stencil);
|
||||||
cudaFree(c_bincount);
|
cudaFree(c_bincount);
|
||||||
cudaFree(c_bins);
|
cudaFree(c_bins);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user