diff --git a/evaluate_gpu_perf_per_thread.sh b/evaluate_gpu_perf_per_thread.sh index 135a03b..af68422 100644 --- a/evaluate_gpu_perf_per_thread.sh +++ b/evaluate_gpu_perf_per_thread.sh @@ -1,5 +1,6 @@ END=32 for ((i=1;i<=END;i++)); do output=$(eval "NUM_THREADS=$i ./MDBench-NVCC -n 50") - echo "$output" | grep 'atom updates per second' | sed 's/[^0-9.]//g' | awk '{print $1"e6"}' + echo -n "$i," + echo "$output" | grep 'atom updates per second' | sed 's/[^0-9.,]//g' | awk '{print $1"e6"}' done diff --git a/src/neighbor.c b/src/neighbor.c index 4fa3416..fbf582d 100644 --- a/src/neighbor.c +++ b/src/neighbor.c @@ -26,6 +26,7 @@ #include #include +#include #include #define SMALL 1.0e-6 @@ -174,10 +175,12 @@ void buildNeighbor(Atom *atom, Neighbor *neighbor) /* extend atom arrays if necessary */ if(nall > nmax) { nmax = nall; - if(neighbor->numneigh) free(neighbor->numneigh); - if(neighbor->neighbors) free(neighbor->neighbors); - neighbor->numneigh = (int*) malloc(nmax * sizeof(int)); - neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*)); + if(neighbor->numneigh) cudaFreeHost(neighbor->numneigh); + if(neighbor->neighbors) cudaFreeHost(neighbor->neighbors); + checkCUDAError( "buildNeighbor numneigh", cudaMallocHost((void**)&(neighbor->numneigh), nmax * sizeof(int)) ); + checkCUDAError( "buildNeighbor neighbors", cudaMallocHost((void**)&(neighbor->neighbors), nmax * neighbor->maxneighs * sizeof(int*)) ); + // neighbor->numneigh = (int*) malloc(nmax * sizeof(int)); + // neighbor->neighbors = (int*) malloc(nmax * neighbor->maxneighs * sizeof(int*)); } /* bin local & ghost atoms */