Adjust NVCC flags
Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com>
This commit is contained in:
		| @@ -102,7 +102,8 @@ static inline MD_SIMD_FLOAT simd_gather(MD_SIMD_INT vidx, const MD_FLOAT *m, int | ||||
| static inline MD_SIMD_INT simd_int_broadcast(int scalar) { return _mm256_set1_epi32(scalar); } | ||||
| static inline MD_SIMD_INT simd_int_zero() { return _mm256_setzero_si256(); } | ||||
| static inline MD_SIMD_INT simd_int_seq() { return _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0); } | ||||
| static inline MD_SIMD_INT simd_int_load(const int *m) { return _mm256_load_epi32(m); } | ||||
| static inline MD_SIMD_INT simd_int_load(const int *m) { return _mm256_load_si256((const MD_SIMD_INT *) m); } | ||||
| //static inline MD_SIMD_INT simd_int_load(const int *m) { return _mm256_load_epi32(m); } | ||||
| static inline MD_SIMD_INT simd_int_add(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_add_epi32(a, b); } | ||||
| static inline MD_SIMD_INT simd_int_mul(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_mul_epi32(a, b); } | ||||
| static inline MD_SIMD_INT simd_int_mask_load(const int *m, MD_SIMD_MASK k) { return _mm256_mask_load_epi32(simd_int_zero(), k, m); } | ||||
|   | ||||
| @@ -6,9 +6,11 @@ ANSI_CFLAGS += -std=c99 | ||||
| ANSI_CFLAGS += -pedantic | ||||
| ANSI_CFLAGS += -Wextra | ||||
|  | ||||
| # CFLAGS   = -O0 -g  -std=c99 -fargument-noalias | ||||
| CFLAGS   = -O3 -march=native  -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp | ||||
| #CFLAGS   = -O3 -march=cascadelake  -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp | ||||
| #CFLAGS   = -O3 -g # -fopenmp | ||||
| #CFLAGS   = -O0 -g  -std=c99 -fargument-noalias | ||||
| #CFLAGS   = -O3 -g -arch=sm_61 # -fopenmp | ||||
| CFLAGS   = -O3 -g # -fopenmp | ||||
| ASFLAGS  =  -masm=intel | ||||
| LFLAGS   = | ||||
| DEFINES  = -D_GNU_SOURCE -DCUDA_TARGET -DNO_ZMM_INTRIN #-DLIKWID_PERFMON | ||||
|   | ||||
| @@ -116,6 +116,17 @@ double computeForce(Eam *eam, Parameter *param, Atom *atom, Neighbor *neighbor, | ||||
|     #endif | ||||
| } | ||||
|  | ||||
| void writeInput(Parameter *param, Atom *atom) { | ||||
|     FILE *fpin = fopen("input.in", "w"); | ||||
|     fprintf(fpin, "0,%f,0,%f,0,%f\n", param->xprd, param->yprd, param->zprd); | ||||
|  | ||||
|     for(int i = 0; i < atom->Nlocal; i++) { | ||||
|         fprintf(fpin, "1,%f,%f,%f,%f,%f,%f\n", atom_x(i), atom_y(i), atom_z(i), atom_vx(i), atom_vy(i), atom_vz(i)); | ||||
|     } | ||||
|  | ||||
|     fclose(fpin); | ||||
| } | ||||
|  | ||||
| int main(int argc, char** argv) { | ||||
|     double timer[NUMTIMER]; | ||||
|     Eam eam; | ||||
| @@ -218,6 +229,8 @@ int main(int argc, char** argv) { | ||||
|     traceAddresses(¶m, &atom, &neighbor, n + 1); | ||||
|     #endif | ||||
|  | ||||
|     //writeInput(¶m, &atom); | ||||
|  | ||||
|     timer[FORCE] = computeForce(&eam, ¶m, &atom, &neighbor, &stats); | ||||
|     timer[NEIGH] = 0.0; | ||||
|     timer[TOTAL] = getTimeStamp(); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user