Adjust NVCC flags
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
c4304e3619
commit
437b380229
@ -102,7 +102,8 @@ static inline MD_SIMD_FLOAT simd_gather(MD_SIMD_INT vidx, const MD_FLOAT *m, int
|
||||
static inline MD_SIMD_INT simd_int_broadcast(int scalar) { return _mm256_set1_epi32(scalar); }
|
||||
static inline MD_SIMD_INT simd_int_zero() { return _mm256_setzero_si256(); }
|
||||
static inline MD_SIMD_INT simd_int_seq() { return _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0); }
|
||||
static inline MD_SIMD_INT simd_int_load(const int *m) { return _mm256_load_epi32(m); }
|
||||
static inline MD_SIMD_INT simd_int_load(const int *m) { return _mm256_load_si256((const MD_SIMD_INT *) m); }
|
||||
//static inline MD_SIMD_INT simd_int_load(const int *m) { return _mm256_load_epi32(m); }
|
||||
static inline MD_SIMD_INT simd_int_add(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_add_epi32(a, b); }
|
||||
static inline MD_SIMD_INT simd_int_mul(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_mul_epi32(a, b); }
|
||||
static inline MD_SIMD_INT simd_int_mask_load(const int *m, MD_SIMD_MASK k) { return _mm256_mask_load_epi32(simd_int_zero(), k, m); }
|
||||
|
@ -6,9 +6,11 @@ ANSI_CFLAGS += -std=c99
|
||||
ANSI_CFLAGS += -pedantic
|
||||
ANSI_CFLAGS += -Wextra
|
||||
|
||||
# CFLAGS = -O0 -g -std=c99 -fargument-noalias
|
||||
CFLAGS = -O3 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
|
||||
#CFLAGS = -O3 -march=cascadelake -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
|
||||
#CFLAGS = -O3 -g # -fopenmp
|
||||
#CFLAGS = -O0 -g -std=c99 -fargument-noalias
|
||||
#CFLAGS = -O3 -g -arch=sm_61 # -fopenmp
|
||||
CFLAGS = -O3 -g # -fopenmp
|
||||
ASFLAGS = -masm=intel
|
||||
LFLAGS =
|
||||
DEFINES = -D_GNU_SOURCE -DCUDA_TARGET -DNO_ZMM_INTRIN #-DLIKWID_PERFMON
|
||||
|
@ -116,6 +116,17 @@ double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor,
|
||||
#endif
|
||||
}
|
||||
|
||||
void writeInput(Parameter *param, Atom *atom) {
|
||||
FILE *fpin = fopen("input.in", "w");
|
||||
fprintf(fpin, "0,%f,0,%f,0,%f\n", param->xprd, param->yprd, param->zprd);
|
||||
|
||||
for(int i = 0; i < atom->Nlocal; i++) {
|
||||
fprintf(fpin, "1,%f,%f,%f,%f,%f,%f\n", atom_x(i), atom_y(i), atom_z(i), atom_vx(i), atom_vy(i), atom_vz(i));
|
||||
}
|
||||
|
||||
fclose(fpin);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
double timer[NUMTIMER];
|
||||
Eam eam;
|
||||
@ -218,6 +229,8 @@ int main(int argc, char** argv) {
|
||||
traceAddresses(¶m, &atom, &neighbor, n + 1);
|
||||
#endif
|
||||
|
||||
//writeInput(¶m, &atom);
|
||||
|
||||
timer[FORCE] = computeForce(&eam, ¶m, &atom, &neighbor, &stats);
|
||||
timer[NEIGH] = 0.0;
|
||||
timer[TOTAL] = getTimeStamp();
|
||||
|
Loading…
Reference in New Issue
Block a user