Adjust NVCC flags to avoid issues with atomicAdd with doubles
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
c70ebce4c1
commit
93188d1383
@ -196,10 +196,10 @@ __global__ void computeForceLJ_cuda_warp(MD_FLOAT *cuda_cl_x, MD_FLOAT *cuda_cl_
|
|||||||
int cond;
|
int cond;
|
||||||
#if CLUSTER_M == CLUSTER_N
|
#if CLUSTER_M == CLUSTER_N
|
||||||
cond = half_neigh ? (ci_cj0 != cj || cii_pos < cjj_pos) :
|
cond = half_neigh ? (ci_cj0 != cj || cii_pos < cjj_pos) :
|
||||||
(ci_cj0 != cj || cii_pos != cjj_pos);
|
(ci_cj0 != cj || cii_pos != cjj_pos);
|
||||||
#elif CLUSTER_M < CLUSTER_N
|
#elif CLUSTER_M < CLUSTER_N
|
||||||
cond = half_neigh ? (ci_cj0 != cj || cii_pos + CLUSTER_M * (ci_pos & 0x1) < cjj_pos) :
|
cond = half_neigh ? (ci_cj0 != cj || cii_pos + CLUSTER_M * (ci_pos & 0x1) < cjj_pos) :
|
||||||
(ci_cj0 != cj || cii_pos + CLUSTER_M * (ci_pos & 0x1) != cjj_pos);
|
(ci_cj0 != cj || cii_pos + CLUSTER_M * (ci_pos & 0x1) != cjj_pos);
|
||||||
#endif
|
#endif
|
||||||
if(cond) {
|
if(cond) {
|
||||||
MD_FLOAT delx = xtmp - cj_x[CL_X_OFFSET + cjj_pos];
|
MD_FLOAT delx = xtmp - cj_x[CL_X_OFFSET + cjj_pos];
|
||||||
|
@ -6,10 +6,15 @@ ANSI_CFLAGS += -std=c99
|
|||||||
ANSI_CFLAGS += -pedantic
|
ANSI_CFLAGS += -pedantic
|
||||||
ANSI_CFLAGS += -Wextra
|
ANSI_CFLAGS += -Wextra
|
||||||
|
|
||||||
CFLAGS = -O3 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
|
#
|
||||||
|
# A100 + Native
|
||||||
|
CFLAGS = -O3 -arch=sm_80 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
|
||||||
|
# A40 + Native
|
||||||
|
#CFLAGS = -O3 -arch=sm_80 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
|
||||||
|
# Cascade Lake
|
||||||
#CFLAGS = -O3 -march=cascadelake -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
|
#CFLAGS = -O3 -march=cascadelake -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
|
||||||
#CFLAGS = -O3 -g # -fopenmp
|
# For GROMACS kernels, we need at least sm_61 due to atomicAdd with doubles
|
||||||
#CFLAGS = -O0 -g -std=c99 -fargument-noalias
|
# TODO: Check if this is required for full neighbor-lists and just compile kernel for that case if not
|
||||||
#CFLAGS = -O3 -g -arch=sm_61 # -fopenmp
|
#CFLAGS = -O3 -g -arch=sm_61 # -fopenmp
|
||||||
ASFLAGS = -masm=intel
|
ASFLAGS = -masm=intel
|
||||||
LFLAGS =
|
LFLAGS =
|
||||||
|
Loading…
Reference in New Issue
Block a user