From 93188d138358bbe6e858a8ce8137a6f7cea601aa Mon Sep 17 00:00:00 2001 From: Rafael Ravedutti Date: Mon, 14 Nov 2022 18:01:46 +0100 Subject: [PATCH] Adjust NVCC flags to avoid issues with atomicAdd with doubles Signed-off-by: Rafael Ravedutti --- gromacs/cuda/force_lj.cu | 4 ++-- include_NVCC.mk | 11 ++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/gromacs/cuda/force_lj.cu b/gromacs/cuda/force_lj.cu index 96d3e26..75d6929 100644 --- a/gromacs/cuda/force_lj.cu +++ b/gromacs/cuda/force_lj.cu @@ -196,10 +196,10 @@ __global__ void computeForceLJ_cuda_warp(MD_FLOAT *cuda_cl_x, MD_FLOAT *cuda_cl_ int cond; #if CLUSTER_M == CLUSTER_N cond = half_neigh ? (ci_cj0 != cj || cii_pos < cjj_pos) : - (ci_cj0 != cj || cii_pos != cjj_pos); + (ci_cj0 != cj || cii_pos != cjj_pos); #elif CLUSTER_M < CLUSTER_N cond = half_neigh ? (ci_cj0 != cj || cii_pos + CLUSTER_M * (ci_pos & 0x1) < cjj_pos) : - (ci_cj0 != cj || cii_pos + CLUSTER_M * (ci_pos & 0x1) != cjj_pos); + (ci_cj0 != cj || cii_pos + CLUSTER_M * (ci_pos & 0x1) != cjj_pos); #endif if(cond) { MD_FLOAT delx = xtmp - cj_x[CL_X_OFFSET + cjj_pos]; diff --git a/include_NVCC.mk b/include_NVCC.mk index 3201e64..8a7fed7 100644 --- a/include_NVCC.mk +++ b/include_NVCC.mk @@ -6,10 +6,15 @@ ANSI_CFLAGS += -std=c99 ANSI_CFLAGS += -pedantic ANSI_CFLAGS += -Wextra -CFLAGS = -O3 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp +# +# A100 + Native +CFLAGS = -O3 -arch=sm_80 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp +# A40 + Native +#CFLAGS = -O3 -arch=sm_80 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp +# Cascade Lake #CFLAGS = -O3 -march=cascadelake -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp -#CFLAGS = -O3 -g # -fopenmp -#CFLAGS = -O0 -g -std=c99 -fargument-noalias +# For GROMACS kernels, we need at least sm_61 due to atomicAdd with doubles +# TODO: Check if this is required for full neighbor-lists and just compile kernel for that case if not #CFLAGS = -O3 -g -arch=sm_61 # -fopenmp ASFLAGS = -masm=intel LFLAGS =