diff --git a/mk/include_CLANG.mk b/mk/include_CLANG.mk index 0030907..b3b2415 100644 --- a/mk/include_CLANG.mk +++ b/mk/include_CLANG.mk @@ -14,7 +14,7 @@ LIBS = # -lomp endif VERSION = --version -CFLAGS = -Ofast -std=c99 $(OPENMP) +CFLAGS = -O3 -ffast-math -std=c99 $(OPENMP) #CFLAGS = -Ofast -fnt-store=aggressive -std=c99 $(OPENMP) #AMD CLANG LFLAGS = $(OPENMP) DEFINES += -D_GNU_SOURCE diff --git a/src/main.c b/src/main.c index c700a7a..c43eba4 100644 --- a/src/main.c +++ b/src/main.c @@ -52,8 +52,8 @@ int main(int argc, char** argv) CG_FLOAT eps = (CG_FLOAT)param.eps; int itermax = param.itermax; - initSolver(&s, ¶m); - // matrixDump(&s.A); + initSolver(&s, &comm, ¶m); + // matrixDump(&s.A, &comm); CG_UINT nrow = s.A.nr; CG_UINT ncol = s.A.nc; @@ -73,7 +73,7 @@ int main(int argc, char** argv) // initial iteration waxpby(nrow, 1.0, r, 0.0, r, p); - // TICK(); exchange_externals(A,p); TOCK(t5); + commExchange(&comm, &s.A, p); spMVM(&s.A, p, Ap); double alpha = 0.0; ddot(nrow, p, Ap, &alpha); @@ -88,7 +88,7 @@ int main(int argc, char** argv) ddot(nrow, r, r, &rtrans); double beta = rtrans / oldrtrans; waxpby(nrow, 1.0, r, beta, p, p); - // TICK(); exchange_externals(A,p); TOCK(t5); + commExchange(&comm, &s.A, p); spMVM(&s.A, p, Ap); alpha = 0.0; ddot(nrow, p, Ap, &alpha); diff --git a/src/matrix.c b/src/matrix.c index ff6ed66..797f572 100644 --- a/src/matrix.c +++ b/src/matrix.c @@ -167,22 +167,34 @@ void matrixRead(Matrix* m, char* filename) } } -void matrixDump(Matrix* m) +void matrixDump(Matrix* m, int rank, int size) { CG_UINT numRows = m->nr; CG_UINT* rowPtr = m->rowPtr; CG_UINT* colInd = m->colInd; CG_FLOAT* val = m->val; - printf("Matrix: %lld non zeroes, number of rows %lld\n", m->nnz, numRows); + if (!rank) { + printf("Matrix: %lld total non zeroes, total number of rows %lld\n", + m->totalNnz, + m->totalNr); + } - for (int rowID = 0; rowID < numRows; rowID++) { - printf("Row [%d]: ", rowID); + for (int i = 0; i < size; i++) { + if (i == rank) { + printf("Matrix: %lld non zeroes, number of rows %lld\n", m->nnz, numRows); - for (size_t rowEntry = rowPtr[rowID]; rowEntry < rowPtr[rowID + 1]; rowEntry++) { - printf("[%lld]:%.2f ", colInd[rowEntry], val[rowEntry]); + for (int rowID = 0; rowID < numRows; rowID++) { + printf("Row [%d]: ", rowID); + + for (size_t rowEntry = rowPtr[rowID]; rowEntry < rowPtr[rowID + 1]; + rowEntry++) { + printf("[%lld]:%.2f ", colInd[rowEntry], val[rowEntry]); + } + + printf("\n"); + } + fflush(stdout); } - - printf("\n"); } } diff --git a/src/matrix.h b/src/matrix.h index 92b08eb..78b1c8a 100644 --- a/src/matrix.h +++ b/src/matrix.h @@ -23,6 +23,6 @@ typedef struct { } Matrix; extern void matrixRead(Matrix* m, char* filename); -extern void matrixDump(Matrix* m); +extern void matrixDump(Matrix* m, int rank, int size); #endif // __MATRIX_H_ diff --git a/src/solver.c b/src/solver.c index a2c7156..179452e 100644 --- a/src/solver.c +++ b/src/solver.c @@ -9,15 +9,14 @@ #include #include "allocate.h" -#include "comm.h" #include "matrix.h" #include "solver.h" #include "util.h" -static void matrixGenerate(Parameter* p, Solver* s, bool use_7pt_stencil) +static void matrixGenerate(Parameter* p, Solver* s, Comm* c, bool use_7pt_stencil) { - int size = 1; // Serial case (not using MPI) - int rank = 0; + int size = c->size; + int rank = c->rank; CG_UINT local_nrow = p->nx * p->ny * p->nz; CG_UINT local_nnz = 27 * local_nrow; @@ -28,12 +27,14 @@ static void matrixGenerate(Parameter* p, Solver* s, bool use_7pt_stencil) int start_row = local_nrow * rank; int stop_row = start_row + local_nrow - 1; - if (use_7pt_stencil) { - printf("Generate 7pt matrix with "); - } else { - printf("Generate 27pt matrix with "); + if (commIsMaster(c)) { + if (use_7pt_stencil) { + printf("Generate 7pt matrix with "); + } else { + printf("Generate 27pt matrix with "); + } + printf("%d total rows and %d nonzeros\n", (int)total_nrow, (int)local_nnz); } - printf("%d total rows and %d nonzeros\n", (int)total_nrow, (int)local_nnz); s->A.val = (CG_FLOAT*)allocate(64, local_nnz * sizeof(CG_FLOAT)); s->A.colInd = (CG_UINT*)allocate(64, local_nnz * sizeof(CG_UINT)); @@ -53,11 +54,6 @@ static void matrixGenerate(Parameter* p, Solver* s, bool use_7pt_stencil) int nx = p->nx, ny = p->ny, nz = p->nz; CG_UINT cursor = 0; - // for (int i = 0; i < local_nnz; i++) { - // curvalptr[i] = 0.0; - // printf("%d-%f, ", i, m->val[i]); - // } - // printf("\n"); *currowptr++ = 0; for (int iz = 0; iz < nz; iz++) { @@ -68,9 +64,6 @@ static void matrixGenerate(Parameter* p, Solver* s, bool use_7pt_stencil) int currow = start_row + iz * nx * ny + iy * nx + ix; int nnzrow = 0; - // (*A)->ptr_to_vals_in_row[curlocalrow] = curvalptr; - // (*A)->ptr_to_inds_in_row[curlocalrow] = curindptr; - for (int sz = -1; sz <= 1; sz++) { for (int sy = -1; sy <= 1; sy++) { for (int sx = -1; sx <= 1; sx++) { @@ -101,7 +94,6 @@ static void matrixGenerate(Parameter* p, Solver* s, bool use_7pt_stencil) } // end sz loop *currowptr = *(currowptr - 1) + nnzrow; - // printf("%d:%d-%lld, ", currow, nnzrow, *currowptr); currowptr++; nnzglobal += nnzrow; x[curlocalrow] = 0.0; @@ -117,11 +109,6 @@ static void matrixGenerate(Parameter* p, Solver* s, bool use_7pt_stencil) printf("%d nonzeros\n", start_row, stop_row); #endif /* ifdef VERBOSE */ - // for (int i = 0; i < local_nnz; i++) { - // printf("%d:%f, ", (int)m->colInd[i], m->val[i]); - // } - // printf("\n"); - s->A.startRow = start_row; s->A.stopRow = stop_row; s->A.totalNr = total_nrow; @@ -131,12 +118,12 @@ static void matrixGenerate(Parameter* p, Solver* s, bool use_7pt_stencil) s->A.nnz = local_nnz; } -void initSolver(Solver* s, Parameter* p) +void initSolver(Solver* s, Comm* c, Parameter* p) { if (!strcmp(p->filename, "generate")) { - matrixGenerate(p, s, false); + matrixGenerate(p, s, c, false); } else if (!strcmp(p->filename, "generate7P")) { - matrixGenerate(p, s, true); + matrixGenerate(p, s, c, true); } else { matrixRead(&s->A, p->filename); } diff --git a/src/solver.h b/src/solver.h index 8a1b81c..d909d69 100644 --- a/src/solver.h +++ b/src/solver.h @@ -4,6 +4,7 @@ * license that can be found in the LICENSE file. */ #ifndef __SOLVER_H_ #define __SOLVER_H_ +#include "comm.h" #include "matrix.h" #include "parameter.h" #include "util.h" @@ -15,7 +16,7 @@ typedef struct { CG_FLOAT* xexact; } Solver; -void initSolver(Solver* s, Parameter*); +void initSolver(Solver* s, Comm* c, Parameter*); void spMVM(Matrix* m, const CG_FLOAT* restrict x, CG_FLOAT* restrict y); void waxpby(const CG_UINT n,