Adjust NVCC flags
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
parent
c4304e3619
commit
437b380229
@ -102,7 +102,8 @@ static inline MD_SIMD_FLOAT simd_gather(MD_SIMD_INT vidx, const MD_FLOAT *m, int
|
|||||||
static inline MD_SIMD_INT simd_int_broadcast(int scalar) { return _mm256_set1_epi32(scalar); }
|
static inline MD_SIMD_INT simd_int_broadcast(int scalar) { return _mm256_set1_epi32(scalar); }
|
||||||
static inline MD_SIMD_INT simd_int_zero() { return _mm256_setzero_si256(); }
|
static inline MD_SIMD_INT simd_int_zero() { return _mm256_setzero_si256(); }
|
||||||
static inline MD_SIMD_INT simd_int_seq() { return _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0); }
|
static inline MD_SIMD_INT simd_int_seq() { return _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0); }
|
||||||
static inline MD_SIMD_INT simd_int_load(const int *m) { return _mm256_load_epi32(m); }
|
static inline MD_SIMD_INT simd_int_load(const int *m) { return _mm256_load_si256((const MD_SIMD_INT *) m); }
|
||||||
|
//static inline MD_SIMD_INT simd_int_load(const int *m) { return _mm256_load_epi32(m); }
|
||||||
static inline MD_SIMD_INT simd_int_add(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_add_epi32(a, b); }
|
static inline MD_SIMD_INT simd_int_add(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_add_epi32(a, b); }
|
||||||
static inline MD_SIMD_INT simd_int_mul(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_mul_epi32(a, b); }
|
static inline MD_SIMD_INT simd_int_mul(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_mul_epi32(a, b); }
|
||||||
static inline MD_SIMD_INT simd_int_mask_load(const int *m, MD_SIMD_MASK k) { return _mm256_mask_load_epi32(simd_int_zero(), k, m); }
|
static inline MD_SIMD_INT simd_int_mask_load(const int *m, MD_SIMD_MASK k) { return _mm256_mask_load_epi32(simd_int_zero(), k, m); }
|
||||||
|
@ -6,9 +6,11 @@ ANSI_CFLAGS += -std=c99
|
|||||||
ANSI_CFLAGS += -pedantic
|
ANSI_CFLAGS += -pedantic
|
||||||
ANSI_CFLAGS += -Wextra
|
ANSI_CFLAGS += -Wextra
|
||||||
|
|
||||||
|
CFLAGS = -O3 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
|
||||||
|
#CFLAGS = -O3 -march=cascadelake -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp
|
||||||
|
#CFLAGS = -O3 -g # -fopenmp
|
||||||
#CFLAGS = -O0 -g -std=c99 -fargument-noalias
|
#CFLAGS = -O0 -g -std=c99 -fargument-noalias
|
||||||
#CFLAGS = -O3 -g -arch=sm_61 # -fopenmp
|
#CFLAGS = -O3 -g -arch=sm_61 # -fopenmp
|
||||||
CFLAGS = -O3 -g # -fopenmp
|
|
||||||
ASFLAGS = -masm=intel
|
ASFLAGS = -masm=intel
|
||||||
LFLAGS =
|
LFLAGS =
|
||||||
DEFINES = -D_GNU_SOURCE -DCUDA_TARGET -DNO_ZMM_INTRIN #-DLIKWID_PERFMON
|
DEFINES = -D_GNU_SOURCE -DCUDA_TARGET -DNO_ZMM_INTRIN #-DLIKWID_PERFMON
|
||||||
|
@ -116,6 +116,17 @@ double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor,
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void writeInput(Parameter *param, Atom *atom) {
|
||||||
|
FILE *fpin = fopen("input.in", "w");
|
||||||
|
fprintf(fpin, "0,%f,0,%f,0,%f\n", param->xprd, param->yprd, param->zprd);
|
||||||
|
|
||||||
|
for(int i = 0; i < atom->Nlocal; i++) {
|
||||||
|
fprintf(fpin, "1,%f,%f,%f,%f,%f,%f\n", atom_x(i), atom_y(i), atom_z(i), atom_vx(i), atom_vy(i), atom_vz(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(fpin);
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
double timer[NUMTIMER];
|
double timer[NUMTIMER];
|
||||||
Eam eam;
|
Eam eam;
|
||||||
@ -218,6 +229,8 @@ int main(int argc, char** argv) {
|
|||||||
traceAddresses(¶m, &atom, &neighbor, n + 1);
|
traceAddresses(¶m, &atom, &neighbor, n + 1);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
//writeInput(¶m, &atom);
|
||||||
|
|
||||||
timer[FORCE] = computeForce(&eam, ¶m, &atom, &neighbor, &stats);
|
timer[FORCE] = computeForce(&eam, ¶m, &atom, &neighbor, &stats);
|
||||||
timer[NEIGH] = 0.0;
|
timer[NEIGH] = 0.0;
|
||||||
timer[TOTAL] = getTimeStamp();
|
timer[TOTAL] = getTimeStamp();
|
||||||
|
Loading…
Reference in New Issue
Block a user