Adjust NVCC flags

Signed-off-by: Rafael Ravedutti <>
This commit is contained in:
Rafael Ravedutti 2022-11-07 20:37:01 +01:00
parent c4304e3619
commit 437b380229
3 changed files with 19 additions and 3 deletions

View File

@ -102,7 +102,8 @@ static inline MD_SIMD_FLOAT simd_gather(MD_SIMD_INT vidx, const MD_FLOAT *m, int
static inline MD_SIMD_INT simd_int_broadcast(int scalar) { return _mm256_set1_epi32(scalar); }
static inline MD_SIMD_INT simd_int_zero() { return _mm256_setzero_si256(); }
static inline MD_SIMD_INT simd_int_seq() { return _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0); }
static inline MD_SIMD_INT simd_int_load(const int *m) { return _mm256_load_epi32(m); }
static inline MD_SIMD_INT simd_int_load(const int *m) { return _mm256_load_si256((const MD_SIMD_INT *) m); }
//static inline MD_SIMD_INT simd_int_load(const int *m) { return _mm256_load_epi32(m); }
static inline MD_SIMD_INT simd_int_add(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_add_epi32(a, b); }
static inline MD_SIMD_INT simd_int_mul(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_mul_epi32(a, b); }
static inline MD_SIMD_INT simd_int_mask_load(const int *m, MD_SIMD_MASK k) { return _mm256_mask_load_epi32(simd_int_zero(), k, m); }

View File

@ -6,9 +6,11 @@ ANSI_CFLAGS += -std=c99
ANSI_CFLAGS += -pedantic
ANSI_CFLAGS += -Wextra
# CFLAGS = -O0 -g -std=c99 -fargument-noalias
CFLAGS = -O3 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
#CFLAGS = -O3 -march=cascadelake -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
#CFLAGS = -O3 -g # -fopenmp
#CFLAGS = -O0 -g -std=c99 -fargument-noalias
#CFLAGS = -O3 -g -arch=sm_61 # -fopenmp
CFLAGS = -O3 -g # -fopenmp
ASFLAGS = -masm=intel

View File

@ -116,6 +116,17 @@ double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor,
void writeInput(Parameter *param, Atom *atom) {
FILE *fpin = fopen("", "w");
fprintf(fpin, "0,%f,0,%f,0,%f\n", param->xprd, param->yprd, param->zprd);
for(int i = 0; i < atom->Nlocal; i++) {
fprintf(fpin, "1,%f,%f,%f,%f,%f,%f\n", atom_x(i), atom_y(i), atom_z(i), atom_vx(i), atom_vy(i), atom_vz(i));
int main(int argc, char** argv) {
double timer[NUMTIMER];
Eam eam;
@ -218,6 +229,8 @@ int main(int argc, char** argv) {
traceAddresses(&param, &atom, &neighbor, n + 1);
//writeInput(&param, &atom);
timer[FORCE] = computeForce(&eam, &param, &atom, &neighbor, &stats);
timer[NEIGH] = 0.0;
timer[TOTAL] = getTimeStamp();