From 493915fe959b892f425d9737bfd04474b24637d5 Mon Sep 17 00:00:00 2001 From: Rafael Ravedutti Date: Tue, 8 Nov 2022 15:30:37 +0100 Subject: [PATCH] Fix code for AVX and remove warnings Signed-off-by: Rafael Ravedutti --- Makefile | 4 ++++ common/eam_utils.c | 8 +++---- common/includes/simd/avx_avx2_double.h | 7 +++--- common/includes/util.h | 1 + common/parameter.c | 2 +- common/util.c | 8 +++++++ gromacs/atom.c | 24 ++++++++++----------- include_ISA.mk | 1 + include_NVCC.mk | 2 +- lammps/atom.c | 30 +++++++++++++------------- 10 files changed, 51 insertions(+), 36 deletions(-) diff --git a/Makefile b/Makefile index f275cce..d348982 100644 --- a/Makefile +++ b/Makefile @@ -73,6 +73,10 @@ ifeq ($(strip $(SIMD_KERNEL_AVAILABLE)),true) DEFINES += -DSIMD_KERNEL_AVAILABLE endif +ifeq ($(strip $(NO_AVX2)),true) + DEFINES += -DNO_AVX2 +endif + ifeq ($(strip $(AVX512)),true) DEFINES += -DAVX512 endif diff --git a/common/eam_utils.c b/common/eam_utils.c index 7bb8a60..74fd0c1 100644 --- a/common/eam_utils.c +++ b/common/eam_utils.c @@ -54,10 +54,10 @@ void read_eam_file(Funcfl* file, const char* filename) { } int tmp; - fgets(line, MAXLINE, fptr); - fgets(line, MAXLINE, fptr); + readline(line, fptr); + readline(line, fptr); sscanf(line, "%d %lg", &tmp, &(file->mass)); - fgets(line, MAXLINE, fptr); + readline(line, fptr); sscanf(line, "%d %lg %d %lg %lg", &file->nrho, &file->drho, &file->nr, &file->dr, &file->cut); //printf("Read: %lf %i %lf %i %lf %lf\n",file->mass,file->nrho,file->drho,file->nr,file->dr,file->cut); @@ -261,7 +261,7 @@ void grab(FILE* fptr, int n, MD_FLOAT* list) { int i = 0; while(i < n) { - fgets(line, MAXLINE, fptr); + readline(line, fptr); ptr = strtok(line, " \t\n\r\f"); list[i++] = atof(ptr); while(ptr = strtok(NULL, " \t\n\r\f")) list[i++] = atof(ptr); diff --git a/common/includes/simd/avx_avx2_double.h b/common/includes/simd/avx_avx2_double.h index 4426d17..bde34ee 100644 --- a/common/includes/simd/avx_avx2_double.h +++ b/common/includes/simd/avx_avx2_double.h @@ -71,6 +71,7 @@ static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm256_cvt static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return simd_add(simd_mul(a, b), c); } static inline MD_SIMD_FLOAT simd_masked_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_MASK m) { return simd_add(a, _mm256_and_pd(b, m)); } static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_cmp_pd(a, b, _CMP_LT_OQ); } +static inline MD_SIMD_MASK simd_mask_int_cond_lt(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_cvtepi32_pd(_mm_cmplt_epi32(a, b)); } static inline MD_SIMD_MASK simd_mask_and(MD_SIMD_MASK a, MD_SIMD_MASK b) { return _mm256_and_pd(a, b); } // TODO: Initialize all diagonal cases and just select the proper one (all bits set or diagonal) based on cond0 static inline MD_SIMD_MASK simd_mask_from_u32(unsigned int a) { @@ -96,6 +97,7 @@ static inline MD_SIMD_FLOAT simd_reciprocal(MD_SIMD_FLOAT a) { return _mm256_rcp static inline MD_SIMD_FLOAT simd_fma(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_FLOAT c) { return _mm256_fmadd_pd(a, b, c); } static inline MD_SIMD_FLOAT simd_masked_add(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b, MD_SIMD_MASK m) { return _mm256_mask_add_pd(a, m, a, b); } static inline MD_SIMD_MASK simd_mask_cond_lt(MD_SIMD_FLOAT a, MD_SIMD_FLOAT b) { return _mm256_cmp_pd_mask(a, b, _CMP_LT_OQ); } +static inline MD_SIMD_MASK simd_mask_int_cond_lt(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm_cmplt_epi32_mask(a, b); } static inline MD_SIMD_MASK simd_mask_and(MD_SIMD_MASK a, MD_SIMD_MASK b) { return _kand_mask8(a, b); } static inline MD_SIMD_MASK simd_mask_from_u32(unsigned int a) { return _cvtu32_mask8(a); } static inline unsigned int simd_mask_to_u32(MD_SIMD_MASK a) { return _cvtmask8_u32(a); } @@ -109,13 +111,13 @@ static inline MD_FLOAT simd_h_reduce_sum(MD_SIMD_FLOAT a) { return *((MD_FLOAT *) &a0); } +#endif + static inline void simd_h_decr3(MD_FLOAT *m, MD_SIMD_FLOAT a0, MD_SIMD_FLOAT a1, MD_SIMD_FLOAT a2) { fprintf(stderr, "simd_h_decr3(): Not implemented for AVX/AVX2 with double precision!"); exit(-1); } -#endif - // Functions used in LAMMPS kernel static inline MD_SIMD_FLOAT simd_gather(MD_SIMD_INT vidx, const MD_FLOAT *m, int s) { return _mm256_i32gather_pd(m, vidx, s); } static inline MD_SIMD_INT simd_int_broadcast(int scalar) { return _mm_set1_epi32(scalar); } @@ -125,4 +127,3 @@ static inline MD_SIMD_INT simd_int_load(const int *m) { return _mm_load_si128((_ static inline MD_SIMD_INT simd_int_add(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm_add_epi32(a, b); } static inline MD_SIMD_INT simd_int_mul(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm_mul_epi32(a, b); } static inline MD_SIMD_INT simd_int_mask_load(const int *m, MD_SIMD_MASK k) { return simd_int_load(m) & _mm256_cvtpd_epi32(k); } -static inline MD_SIMD_MASK simd_mask_int_cond_lt(MD_SIMD_INT a, MD_SIMD_INT b) { return _mm256_cvtepi32_pd(_mm_cmplt_epi32(a, b)); } diff --git a/common/includes/util.h b/common/includes/util.h index 07aae92..7efd000 100644 --- a/common/includes/util.h +++ b/common/includes/util.h @@ -44,5 +44,6 @@ extern void random_reset(int *seed, int ibase, double *coord); extern int str2ff(const char *string); extern const char* ff2str(int ff); extern int get_num_threads(); +extern void readline(char *line, FILE *fp); #endif diff --git a/common/parameter.c b/common/parameter.c index 50039fc..421e570 100644 --- a/common/parameter.c +++ b/common/parameter.c @@ -67,7 +67,7 @@ void readParameter(Parameter *param, const char *filename) { while(!feof(fp)) { line[0] = '\0'; - fgets(line, MAXLINE, fp); + readline(line, fp); for(i = 0; line[i] != '\0' && line[i] != '#'; i++); line[i] = '\0'; diff --git a/common/util.c b/common/util.c index 9f5ec27..0641dc7 100644 --- a/common/util.c +++ b/common/util.c @@ -4,6 +4,7 @@ * Use of this source code is governed by a LGPL-3.0 * license that can be found in the LICENSE file. */ +#include #include #include #include @@ -80,3 +81,10 @@ int get_num_threads() { const char *num_threads_env = getenv("NUM_THREADS"); return (num_threads_env == NULL) ? 32 : atoi(num_threads_env); } + +void readline(char *line, FILE *fp) { + if(fgets(line, MAXLINE, fp) == NULL) { + fprintf(stderr, "readline(): Error: could not read line!\n"); + exit(-1); + } +} diff --git a/gromacs/atom.c b/gromacs/atom.c index a34cccd..3f45116 100644 --- a/gromacs/atom.c +++ b/gromacs/atom.c @@ -147,7 +147,7 @@ int readAtom_pdb(Atom* atom, Parameter* param) { } while(!feof(fp)) { - fgets(line, MAXLINE, fp); + readline(line, fp); char *item = strtok(line, " "); if(strncmp(item, "CRYST1", 6) == 0) { param->xlo = 0.0; @@ -234,15 +234,15 @@ int readAtom_gro(Atom* atom, Parameter* param) { return -1; } - fgets(desc, MAXLINE, fp); + readline(desc, fp); for(i = 0; desc[i] != '\n'; i++); desc[i] = '\0'; - fgets(line, MAXLINE, fp); + readline(line, fp); atoms_to_read = atoi(strtok(line, " ")); fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read); while(!feof(fp) && read_atoms < atoms_to_read) { - fgets(line, MAXLINE, fp); + readline(line, fp); char *label = strtok(line, " "); int type = type_str2int(strtok(NULL, " ")); int atom_id = atoi(strtok(NULL, " ")) - 1; @@ -265,7 +265,7 @@ int readAtom_gro(Atom* atom, Parameter* param) { } if(!feof(fp)) { - fgets(line, MAXLINE, fp); + readline(line, fp); param->xlo = 0.0; param->xhi = atof(strtok(line, " ")); param->ylo = 0.0; @@ -314,15 +314,15 @@ int readAtom_dmp(Atom* atom, Parameter* param) { } while(!feof(fp) && ts < 1 && !read_atoms) { - fgets(line, MAXLINE, fp); + readline(line, fp); if(strncmp(line, "ITEM: ", 6) == 0) { char *item = &line[6]; if(strncmp(item, "TIMESTEP", 8) == 0) { - fgets(line, MAXLINE, fp); + readline(line, fp); ts = atoi(line); } else if(strncmp(item, "NUMBER OF ATOMS", 15) == 0) { - fgets(line, MAXLINE, fp); + readline(line, fp); natoms = atoi(line); atom->Natoms = natoms; atom->Nlocal = natoms; @@ -330,23 +330,23 @@ int readAtom_dmp(Atom* atom, Parameter* param) { growAtom(atom); } } else if(strncmp(item, "BOX BOUNDS pp pp pp", 19) == 0) { - fgets(line, MAXLINE, fp); + readline(line, fp); param->xlo = atof(strtok(line, " ")); param->xhi = atof(strtok(NULL, " ")); param->xprd = param->xhi - param->xlo; - fgets(line, MAXLINE, fp); + readline(line, fp); param->ylo = atof(strtok(line, " ")); param->yhi = atof(strtok(NULL, " ")); param->yprd = param->yhi - param->ylo; - fgets(line, MAXLINE, fp); + readline(line, fp); param->zlo = atof(strtok(line, " ")); param->zhi = atof(strtok(NULL, " ")); param->zprd = param->zhi - param->zlo; } else if(strncmp(item, "ATOMS id type x y z vx vy vz", 28) == 0) { for(int i = 0; i < natoms; i++) { - fgets(line, MAXLINE, fp); + readline(line, fp); atom_id = atoi(strtok(line, " ")) - 1; atom->type[atom_id] = atoi(strtok(NULL, " ")); atom_x(atom_id) = atof(strtok(NULL, " ")); diff --git a/include_ISA.mk b/include_ISA.mk index 7008d26..ef8a83f 100644 --- a/include_ISA.mk +++ b/include_ISA.mk @@ -2,6 +2,7 @@ ifeq ($(strip $(ISA)), SSE) _VECTOR_WIDTH=2 else ifeq ($(strip $(ISA)), AVX) # Vector width is 4 but AVX2 instruction set is not supported +NO_AVX2=true _VECTOR_WIDTH=4 else ifeq ($(strip $(ISA)), AVX2) #SIMD_KERNEL_AVAILABLE=true diff --git a/include_NVCC.mk b/include_NVCC.mk index 446ccbf..3201e64 100644 --- a/include_NVCC.mk +++ b/include_NVCC.mk @@ -6,7 +6,7 @@ ANSI_CFLAGS += -std=c99 ANSI_CFLAGS += -pedantic ANSI_CFLAGS += -Wextra -CFLAGS = -O3 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp +CFLAGS = -O3 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp #CFLAGS = -O3 -march=cascadelake -ffast-math -funroll-loops --forward-unknown-to-host-compiler # -fopenmp #CFLAGS = -O3 -g # -fopenmp #CFLAGS = -O0 -g -std=c99 -fargument-noalias diff --git a/lammps/atom.c b/lammps/atom.c index 53fc05c..a96eeb7 100644 --- a/lammps/atom.c +++ b/lammps/atom.c @@ -4,8 +4,8 @@ * Use of this source code is governed by a LGPL-3.0 * license that can be found in the LICENSE file. */ -#include #include +#include #include #include @@ -186,7 +186,7 @@ int readAtom_pdb(Atom* atom, Parameter* param) { } while(!feof(fp)) { - fgets(line, MAXLINE, fp); + readline(line, fp); char *item = strtok(line, " "); if(strncmp(item, "CRYST1", 6) == 0) { param->xlo = 0.0; @@ -273,15 +273,15 @@ int readAtom_gro(Atom* atom, Parameter* param) { return -1; } - fgets(desc, MAXLINE, fp); + readline(desc, fp); for(i = 0; desc[i] != '\n'; i++); desc[i] = '\0'; - fgets(line, MAXLINE, fp); + readline(line, fp); atoms_to_read = atoi(strtok(line, " ")); fprintf(stdout, "System: %s with %d atoms\n", desc, atoms_to_read); while(!feof(fp) && read_atoms < atoms_to_read) { - fgets(line, MAXLINE, fp); + readline(line, fp); char *label = strtok(line, " "); int type = type_str2int(strtok(NULL, " ")); int atom_id = atoi(strtok(NULL, " ")) - 1; @@ -304,7 +304,7 @@ int readAtom_gro(Atom* atom, Parameter* param) { } if(!feof(fp)) { - fgets(line, MAXLINE, fp); + readline(line, fp); param->xlo = 0.0; param->xhi = atof(strtok(line, " ")); param->ylo = 0.0; @@ -353,15 +353,15 @@ int readAtom_dmp(Atom* atom, Parameter* param) { } while(!feof(fp) && ts < 1 && !read_atoms) { - fgets(line, MAXLINE, fp); + readline(line, fp); if(strncmp(line, "ITEM: ", 6) == 0) { char *item = &line[6]; if(strncmp(item, "TIMESTEP", 8) == 0) { - fgets(line, MAXLINE, fp); + readline(line, fp); ts = atoi(line); } else if(strncmp(item, "NUMBER OF ATOMS", 15) == 0) { - fgets(line, MAXLINE, fp); + readline(line, fp); natoms = atoi(line); atom->Natoms = natoms; atom->Nlocal = natoms; @@ -369,23 +369,23 @@ int readAtom_dmp(Atom* atom, Parameter* param) { growAtom(atom); } } else if(strncmp(item, "BOX BOUNDS pp pp pp", 19) == 0) { - fgets(line, MAXLINE, fp); + readline(line, fp); param->xlo = atof(strtok(line, " ")); param->xhi = atof(strtok(NULL, " ")); param->xprd = param->xhi - param->xlo; - fgets(line, MAXLINE, fp); + readline(line, fp); param->ylo = atof(strtok(line, " ")); param->yhi = atof(strtok(NULL, " ")); param->yprd = param->yhi - param->ylo; - fgets(line, MAXLINE, fp); + readline(line, fp); param->zlo = atof(strtok(line, " ")); param->zhi = atof(strtok(NULL, " ")); param->zprd = param->zhi - param->zlo; } else if(strncmp(item, "ATOMS id type x y z vx vy vz", 28) == 0) { for(int i = 0; i < natoms; i++) { - fgets(line, MAXLINE, fp); + readline(line, fp); atom_id = atoi(strtok(line, " ")) - 1; atom->type[atom_id] = atoi(strtok(NULL, " ")); atom_x(atom_id) = atof(strtok(NULL, " ")); @@ -442,7 +442,7 @@ int readAtom_in(Atom* atom, Parameter* param) { return -1; } - fgets(line, MAXLINE, fp); + readline(line, fp); natoms = atoi(strtok(line, " ")); param->xlo = atof(strtok(NULL, " ")); param->xhi = atof(strtok(NULL, " ")); @@ -459,7 +459,7 @@ int readAtom_in(Atom* atom, Parameter* param) { } for(int i = 0; i < natoms; i++) { - fgets(line, MAXLINE, fp); + readline(line, fp); // TODO: store mass per atom char *s_mass = strtok(line, " ");