diff --git a/.clang-format b/.clang-format index 38404b9..9c928f4 100644 --- a/.clang-format +++ b/.clang-format @@ -1,5 +1,5 @@ --- -Language: Cpp +Language: Cpp # BasedOnStyle: WebKit AccessModifierOffset: -4 AlignAfterOpenBracket: DontAlign @@ -29,21 +29,21 @@ AttributeMacros: BinPackArguments: false BinPackParameters: false BraceWrapping: - AfterCaseLabel: false - AfterClass: false + AfterCaseLabel: false + AfterClass: false AfterControlStatement: Never - AfterEnum: false - AfterFunction: true - AfterNamespace: false + AfterEnum: false + AfterFunction: true + AfterNamespace: false AfterObjCDeclaration: false - AfterStruct: false - AfterUnion: false + AfterStruct: false + AfterUnion: false AfterExternBlock: false - BeforeCatch: false - BeforeElse: false + BeforeCatch: false + BeforeElse: false BeforeLambdaBody: false - BeforeWhile: false - IndentBraces: false + BeforeWhile: false + IndentBraces: false SplitEmptyFunction: true SplitEmptyRecord: true SplitEmptyNamespace: true @@ -56,19 +56,18 @@ BreakConstructorInitializersBeforeComma: false BreakConstructorInitializers: BeforeComma BreakAfterJavaFieldAnnotations: false BreakStringLiterals: true -ColumnLimit: 90 -CommentPragmas: '^ IWYU pragma:' +ColumnLimit: 80 +CommentPragmas: "^ IWYU pragma:" CompactNamespaces: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: false DeriveLineEnding: true DerivePointerAlignment: false -DisableFormat: false +DisableFormat: false EmptyLineAfterAccessModifier: Never EmptyLineBeforeAccessModifier: LogicalBlock ExperimentalAutoDetectBinPacking: false -BasedOnStyle: '' ConstructorInitializerAllOnOneLineOrOnePerLine: false AllowAllConstructorInitializersOnNextLine: true FixNamespaceComments: false @@ -78,37 +77,37 @@ ForEachMacros: - BOOST_FOREACH IfMacros: - KJ_IF_MAYBE -IncludeBlocks: Preserve +IncludeBlocks: Preserve IncludeCategories: - - Regex: '^"(llvm|llvm-c|clang|clang-c)/' - Priority: 2 - SortPriority: 0 - CaseSensitive: false - - Regex: '^(<|"(gtest|gmock|isl|json)/)' - Priority: 3 - SortPriority: 0 - CaseSensitive: false - - Regex: '.*' - Priority: 1 - SortPriority: 0 - CaseSensitive: false -IncludeIsMainRegex: '(Test)?$' -IncludeIsMainSourceRegex: '' + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + SortPriority: 0 + CaseSensitive: false + - Regex: ".*" + Priority: 1 + SortPriority: 0 + CaseSensitive: false +IncludeIsMainRegex: "(Test)?$" +IncludeIsMainSourceRegex: "" IndentAccessModifiers: false IndentCaseLabels: false IndentCaseBlocks: false IndentGotoLabels: true IndentPPDirectives: None IndentExternBlock: AfterExternBlock -IndentWidth: 4 +IndentWidth: 2 IndentWrappedFunctionNames: false InsertTrailingCommas: None JavaScriptQuotes: Leave JavaScriptWrapImports: true KeepEmptyLinesAtTheStartOfBlocks: true LambdaBodyIndentation: Signature -MacroBlockBegin: '' -MacroBlockEnd: '' +MacroBlockBegin: "" +MacroBlockEnd: "" MaxEmptyLinesToKeep: 1 NamespaceIndentation: Inner ObjCBinPackProtocolList: Auto @@ -126,11 +125,11 @@ PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 PenaltyIndentedWhitespace: 0 PointerAlignment: Left -PPIndentWidth: -1 +PPIndentWidth: -1 ReferenceAlignment: Pointer -ReflowComments: true +ReflowComments: true ShortNamespaceLines: 1 -SortIncludes: CaseSensitive +SortIncludes: CaseSensitive SortJavaStaticImport: Before SortUsingDeclarations: true SpaceAfterCStyleCast: false @@ -147,30 +146,29 @@ SpaceBeforeRangeBasedForLoopColon: true SpaceInEmptyBlock: false SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 -SpacesInAngles: Never +SpacesInAngles: Never SpacesInConditionalStatement: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInLineCommentPrefix: - Minimum: 1 - Maximum: -1 + Minimum: 1 + Maximum: -1 SpacesInParentheses: false SpacesInSquareBrackets: false SpaceBeforeSquareBrackets: false BitFieldColonSpacing: Both -Standard: Latest +Standard: Latest StatementAttributeLikeMacros: - Q_EMIT StatementMacros: - Q_UNUSED - QT_REQUIRE_VERSION -TabWidth: 8 -UseCRLF: false -UseTab: Never +TabWidth: 8 +UseCRLF: false +UseTab: Never WhitespaceSensitiveMacros: - STRINGIZE - PP_STRINGIZE - BOOST_PP_STRINGIZE - NS_SWIFT_NAME - CF_SWIFT_NAME -... diff --git a/src/allocate.c b/src/allocate.c index 31816ca..2b9ae6f 100644 --- a/src/allocate.c +++ b/src/allocate.c @@ -11,26 +11,26 @@ void* allocate(size_t alignment, size_t bytesize) { - int errorCode; - void* ptr; + int errorCode; + void* ptr; - errorCode = posix_memalign(&ptr, alignment, bytesize); + errorCode = posix_memalign(&ptr, alignment, bytesize); - if (errorCode) { - if (errorCode == EINVAL) { - fprintf(stderr, "Error: Alignment parameter is not a power of two\n"); - exit(EXIT_FAILURE); - } - if (errorCode == ENOMEM) { - fprintf(stderr, "Error: Insufficient memory to fulfill the request\n"); - exit(EXIT_FAILURE); - } + if (errorCode) { + if (errorCode == EINVAL) { + fprintf(stderr, "Error: Alignment parameter is not a power of two\n"); + exit(EXIT_FAILURE); } - - if (ptr == NULL) { - fprintf(stderr, "Error: posix_memalign failed!\n"); - exit(EXIT_FAILURE); + if (errorCode == ENOMEM) { + fprintf(stderr, "Error: Insufficient memory to fulfill the request\n"); + exit(EXIT_FAILURE); } + } - return ptr; + if (ptr == NULL) { + fprintf(stderr, "Error: posix_memalign failed!\n"); + exit(EXIT_FAILURE); + } + + return ptr; } diff --git a/src/comm.c b/src/comm.c index 319bd39..7957178 100644 --- a/src/comm.c +++ b/src/comm.c @@ -16,620 +16,629 @@ // subroutines local to this module static int sizeOfRank(int rank, int size, int N) { - return N / size + ((N % size > rank) ? 1 : 0); + return N / size + ((N % size > rank) ? 1 : 0); } void commReduction(double* v, int op) { #ifdef _MPI - if (op == MAX) { - MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); - } else if (op == SUM) { - MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - } + if (op == MAX) { + MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + } else if (op == SUM) { + MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + } #endif } void commPartition(Comm* c, Matrix* A) { #ifdef _MPI - int rank = c->rank; - int size = c->size; - MPI_Comm comm = c->comm; + int rank = c->rank; + int size = c->size; + MPI_Comm comm = c->comm; - // Extract Matrix pieces - int start_row = A->startRow; - int stop_row = A->stopRow; - int total_nrow = A->totalNr; - long long total_nnz = A->totalNnz; - int local_nrow = A->nr; - int local_nnz = A->nnz; - int* row_ptr = (int*)A->rowPtr; - int* col_ind = (int*)A->colInd; + // Extract Matrix pieces + int start_row = A->startRow; + int stop_row = A->stopRow; + int total_nrow = A->totalNr; + long long total_nnz = A->totalNnz; + int local_nrow = A->nr; + int local_nnz = A->nnz; + int* row_ptr = (int*)A->rowPtr; + int* col_ind = (int*)A->colInd; - // int* nnz_in_row = A->nnz_in_row; - // double** ptr_to_vals_in_row = A->ptr_to_vals_in_row; - // int** ptr_to_inds_in_row = A->ptr_to_inds_in_row; + // We need to convert the index values for the rows on this processor + // to a local index space. We need to: + // - Determine if each index reaches to a local value or external value + // - If local, subtract start_row from index value to get local index + // - If external, find out if it is already accounted for. + // - If so, then do nothing, + // - otherwise + // - add it to the list of external indices, + // - find out which processor owns the value. + // - Set up communication for sparse MV operation. - // We need to convert the index values for the rows on this processor - // to a local index space. We need to: - // - Determine if each index reaches to a local value or external value - // - If local, subtract start_row from index value to get local index - // - If external, find out if it is already accounted for. - // - If so, then do nothing, - // - otherwise - // - add it to the list of external indices, - // - find out which processor owns the value. - // - Set up communication for sparse MV operation. + // Scan the indices and transform to local + int* externals = (int*)allocate(ARRAY_ALIGNMENT, A->totalNr * sizeof(int)); + int num_external = 1; - // Scan the indices and transform to local - int* externals = (int*)allocate(ARRAY_ALIGNMENT, A->totalNr * sizeof(int)); - int num_external = 1; + int* external_index = (int*)allocate(ARRAY_ALIGNMENT, + MAX_EXTERNAL * sizeof(int)); + c->external_index = external_index; - int* external_index = (int*)allocate(ARRAY_ALIGNMENT, MAX_EXTERNAL * sizeof(int)); - c->external_index = external_index; + for (int i = 0; i < A->totalNr; i++) { + externals[i] = -1; + } - for (int i = 0; i < A->totalNr; i++) { - externals[i] = -1; - } - - for (int i = 0; i < local_nrow; i++) { - for (int j = row_ptr[i]; j < row_ptr[i + 1]; j++) { - int cur_ind = A->colInd[j]; + for (int i = 0; i < local_nrow; i++) { + for (int j = row_ptr[i]; j < row_ptr[i + 1]; j++) { + int cur_ind = A->colInd[j]; #ifdef VERBOSE - printf("Process %d of %d getting index %d in local row %d\n", - rank, - size, - cur_ind, - i); + printf("Process %d of %d getting index %d in local row %d\n", + rank, + size, + cur_ind, + i); #endif - // shift local rows to the start - if (start_row <= cur_ind && cur_ind <= stop_row) { - col_ind[j] -= start_row; - } else { - // Must find out if we have already set up this point - if (externals[cur_ind] == -1) { - externals[cur_ind] = num_external++; + // shift local rows to the start + if (start_row <= cur_ind && cur_ind <= stop_row) { + col_ind[j] -= start_row; + } else { + // Must find out if we have already set up this point + if (externals[cur_ind] == -1) { + externals[cur_ind] = num_external++; - if (num_external <= MAX_EXTERNAL) { - external_index[num_external - 1] = cur_ind; - // Mark index as external by negating it - col_ind[j] = -col_ind[j]; - } else { - printf("Must increase MAX_EXTERNAL\n"); - exit(EXIT_FAILURE); - } - } else { - // Mark index as external by negating it - col_ind[j] = -col_ind[j]; - } - } + if (num_external <= MAX_EXTERNAL) { + external_index[num_external - 1] = cur_ind; + // Mark index as external by negating it + col_ind[j] = -(col_ind[j] + 1); // FIXME: Offset? + } else { + printf("Must increase MAX_EXTERNAL\n"); + exit(EXIT_FAILURE); + } + } else { + // Mark index as external by negating it + col_ind[j] = -col_ind[j]; } + } } + } - /************************************************************************** - Go through list of externals to find out which processors must be accessed. - **************************************************************************/ - c->num_external = num_external; - int tmp_buffer[size]; - int global_index_offsets[size]; + /************************************************************************** + Go through list of externals to find out which processors must be accessed. + **************************************************************************/ + c->num_external = num_external; + int tmp_buffer[size]; + int global_index_offsets[size]; - for (int i = 0; i < size; i++) { - tmp_buffer[i] = 0; + for (int i = 0; i < size; i++) { + tmp_buffer[i] = 0; + } + + tmp_buffer[rank] = start_row; + + MPI_Allreduce(tmp_buffer, + global_index_offsets, + size, + MPI_INT, + MPI_SUM, + MPI_COMM_WORLD); + + // Go through list of externals and find the processor that owns each + int external_processor[num_external]; + + for (int i = 0; i < num_external; i++) { + int cur_ind = external_index[i]; + for (int j = size - 1; j >= 0; j--) { + if (global_index_offsets[j] <= cur_ind) { + external_processor[i] = j; + break; + } } + } - tmp_buffer[rank] = start_row; + /*Go through the external elements. For each newly encountered external + point assign it the next index in the local sequence. Then look for other + external elements who are updated by the same node and assign them the next + set of index numbers in the local sequence (ie. elements updated by the same + node have consecutive indices).*/ + int* external_local_index = (int*)allocate(ARRAY_ALIGNMENT, + MAX_EXTERNAL * sizeof(int)); + c->external_local_index = external_local_index; - MPI_Allreduce(tmp_buffer, - global_index_offsets, + int count = local_nrow; + + for (int i = 0; i < num_external; i++) { + external_local_index[i] = -1; + } + + for (int i = 0; i < num_external; i++) { + if (external_local_index[i] == -1) { + external_local_index[i] = count++; + + for (int j = i + 1; j < num_external; j++) { + if (external_processor[j] == external_processor[i]) { + external_local_index[j] = count++; + } + } + } + } + + // map all external ids to the new local index + CG_UINT* rowPtr = A->rowPtr; + + for (int i = 0; i < local_nrow; i++) { + for (int j = rowPtr[i]; j < rowPtr[i + 1]; j++) { + if (col_ind[j] < 0) { + int cur_ind = -col_ind[j] - 1; // FIXME: Offset by 1?? + col_ind[j] = external_local_index[externals[cur_ind]]; + } + } + } + + int new_external_processor[num_external]; + + for (int i = 0; i < num_external; i++) { + new_external_processor[i] = 0; + } + + // setup map from external id to partition + for (int i = 0; i < num_external; i++) { + new_external_processor[external_local_index[i] - local_nrow] = + external_processor[i]; + } + +#ifdef VERBOSE + for (int i = 0; i < num_external; i++) { + printf("Process %d of %d: external process[%d] = %d\n", + rank, size, + i, + external_processor[i]); + } +#endif + + /* Count the number of neighbors from which we receive information to update + our external elements. Additionally, fill the array tmp_neighbors in the + following way: + tmp_neighbors[i] = 0 ==> No external elements are updated by + processor i. + tmp_neighbors[i] = x ==> (x-1)/size elements are updated from + processor i.*/ + + int tmp_neighbors[size]; + + for (int i = 0; i < size; i++) { + tmp_neighbors[i] = 0; + } + + int num_recv_neighbors = 0; + int length = 1; + + for (int i = 0; i < num_external; i++) { + if (tmp_neighbors[new_external_processor[i]] == 0) { + num_recv_neighbors++; + tmp_neighbors[new_external_processor[i]] = 1; + } + tmp_neighbors[new_external_processor[i]] += size; + } + + // sum over all processors all the tmp_neighbors arrays + MPI_Allreduce(tmp_neighbors, + tmp_buffer, + size, + MPI_INT, + MPI_SUM, + MPI_COMM_WORLD); + + /* decode the combined 'tmp_neighbors' (stored in tmp_buffer) array from all + * the processors */ + int num_send_neighbors = tmp_buffer[rank] % size; + + /* decode 'tmp_buffer[rank] to deduce total number of elements we must send */ + int total_to_be_sent = (tmp_buffer[rank] - num_send_neighbors) / size; + + /* Check to see if we have enough workspace allocated. This could be + dynamically modified, but let's keep it simple for now...*/ + if (num_send_neighbors > MAX_NUM_MESSAGES) { + printf("Must increase MAX_NUM_MESSAGES. Must be at least %d\n", + num_send_neighbors); + exit(EXIT_FAILURE); + } + + if (total_to_be_sent > MAX_EXTERNAL) { + printf("Must increase MAX_EXTERNAL. Must be at least %d\n", + total_to_be_sent); + exit(EXIT_FAILURE); + } + +#ifdef VERBOSE + cout << "Processor " << rank << " of " << size + << ": Number of send neighbors = " << num_send_neighbors << endl; + + cout << "Processor " << rank << " of " << size + << ": Number of receive neighbors = " << num_recv_neighbors << endl; + + cout << "Processor " << rank << " of " << size + << ": Total number of elements to send = " << total_to_be_sent << endl; + + MPI_Barrier(MPI_COMM_WORLD); +#endif + + /* Make a list of the neighbors that will send information to update our + external elements (in the order that we will receive this information).*/ + int* recv_list = allocate(ARRAY_ALIGNMENT, MAX_EXTERNAL * sizeof(int)); + + // FIXME: Create local scope + int j = 0; + recv_list[j++] = new_external_processor[0]; + + for (int i = 1; i < num_external; i++) { + if (new_external_processor[i - 1] != new_external_processor[i]) { + recv_list[j++] = new_external_processor[i]; + } + } + + // Ensure that all the neighbors we expect to receive from also send to us + // Send a 0 length message to each of our recv neighbors + int send_list[num_send_neighbors]; + + for (int i = 0; i < num_send_neighbors; i++) { + send_list[i] = 0; + } + + // first post receives, these are immediate receives + // Do not wait for result to come, will do that at the + // wait call below. + int MPI_MY_TAG = 99; + + MPI_Request request[MAX_NUM_MESSAGES]; + + for (int i = 0; i < num_send_neighbors; i++) { + MPI_Irecv(tmp_buffer + i, + 1, MPI_INT, - MPI_SUM, + MPI_ANY_SOURCE, + MPI_MY_TAG, + MPI_COMM_WORLD, + request + i); + } + + // send messages + for (int i = 0; i < num_recv_neighbors; i++) { + MPI_Send(tmp_buffer + i, + 1, + MPI_INT, + recv_list[i], + MPI_MY_TAG, MPI_COMM_WORLD); + } - // Go through list of externals and find the processor that owns each - int external_processor[num_external]; + // Receive message from each send neighbor to construct 'send_list'. + MPI_Status status; + for (int i = 0; i < num_send_neighbors; i++) { + if (MPI_Wait(request + i, &status)) { + printf("MPI_Wait error\n"); + exit(EXIT_FAILURE); + } + send_list[i] = status.MPI_SOURCE; + } - for (int i = 0; i < num_external; i++) { - int cur_ind = external_index[i]; - for (int j = size - 1; j >= 0; j--) { - if (global_index_offsets[j] <= cur_ind) { - external_processor[i] = j; - break; - } - } - } - - /*Go through the external elements. For each newly encountered external - point assign it the next index in the local sequence. Then look for other - external elements who are updated by the same node and assign them the next - set of index numbers in the local sequence (ie. elements updated by the same node - have consecutive indices).*/ - int* external_local_index = (int*)allocate(ARRAY_ALIGNMENT, - MAX_EXTERNAL * sizeof(int)); - c->external_local_index = external_local_index; - - int count = local_nrow; - - for (int i = 0; i < num_external; i++) { - external_local_index[i] = -1; - } - - for (int i = 0; i < num_external; i++) { - if (external_local_index[i] == -1) { - external_local_index[i] = count++; - - for (int j = i + 1; j < num_external; j++) { - if (external_processor[j] == external_processor[i]) { - external_local_index[j] = count++; - } - } - } - } - - // map all external ids to the new local index - CG_UINT* rowPtr = A->rowPtr; - - for (int i = 0; i < local_nrow; i++) { - for (int j = rowPtr[i]; j < rowPtr[i + 1]; j++) { - if (col_ind[j] < 0) { - int cur_ind = -col_ind[j] - 1; // FIXME: Offset by 1?? - col_ind[j] = external_local_index[externals[cur_ind]]; - } - } - } - - int new_external_processor[num_external]; - - for (int i = 0; i < num_external; i++) { - new_external_processor[i] = 0; - } - - // setup map from external id to partition - for (int i = 0; i < num_external; i++) { - new_external_processor[external_local_index[i] - local_nrow] = - external_processor[i]; + /* Compare the two lists. In most cases they should be the same. + // However, if they are not then add new entries to the recv list + // that are in the send list (but not already in the recv list). + WHY!! This ensures that the sendlist is equal to the sendlist + But why is this required? -> Just One neighbour list??*/ + for (int j = 0; j < num_send_neighbors; j++) { + int found = 0; + for (int i = 0; i < num_recv_neighbors; i++) { + if (recv_list[i] == send_list[j]) found = 1; } + if (found == 0) { #ifdef VERBOSE - for (int i = 0; i < num_external; i++) { - printf("Process %d of %d: external process[%d] = %d\n", - rank, - size, - i, - external_processor[i]); - } + printf("Process %d of %d: recv_list[%d] = %d\n", + rank, + size, + num_recv_neighbors, + send_list[i]); #endif + recv_list[num_recv_neighbors] = send_list[j]; + (num_recv_neighbors)++; + } + } + num_send_neighbors = num_recv_neighbors; - /* Count the number of neighbors from which we receive information to update - our external elements. Additionally, fill the array tmp_neighbors in the - following way: - tmp_neighbors[i] = 0 ==> No external elements are updated by - processor i. - tmp_neighbors[i] = x ==> (x-1)/size elements are updated from - processor i.*/ + if (num_send_neighbors > MAX_NUM_MESSAGES) { + printf("Must increase MAX_EXTERNAL\n"); + exit(EXIT_FAILURE); + } - int tmp_neighbors[size]; + // Start filling communication setup + // Create 'new_external' which explicitly put the external elements in the + // order given by 'external_local_index' + c->total_to_be_sent = total_to_be_sent; + int* elements_to_send = (int*)allocate(ARRAY_ALIGNMENT, + total_to_be_sent * sizeof(int)); + c->elements_to_send = elements_to_send; - for (int i = 0; i < size; i++) { - tmp_neighbors[i] = 0; + for (int i = 0; i < total_to_be_sent; i++) { + elements_to_send[i] = 0; + } + + // Create 'new_external' which explicitly put the external elements in the + // order given by 'external_local_index' + int* new_external = (int*)allocate(ARRAY_ALIGNMENT, + num_external * sizeof(int)); + + for (int i = 0; i < num_external; i++) { + new_external[external_local_index[i] - local_nrow] = external_index[i]; + } + + // Send each processor the global index list of the external elements in the + // order that I will want to receive them when updating my external elements + int lengths[num_recv_neighbors]; + MPI_MY_TAG++; + + // First post receives + for (int i = 0; i < num_recv_neighbors; i++) { + int partner = recv_list[i]; + MPI_Irecv(lengths + i, + 1, + MPI_INT, + partner, + MPI_MY_TAG, + MPI_COMM_WORLD, + request + i); + } + + int* neighbors = c->neighbors; + int* recv_length = c->recv_length; + int* send_length = c->send_length; + + j = 0; + + for (int i = 0; i < num_recv_neighbors; i++) { + int start = j; + int newlength = 0; + + // go through list of external elements until updating + // processor changes + while ((j < num_external) && (new_external_processor[j] == recv_list[i])) { + newlength++; + j++; + if (j == num_external) break; } - int num_recv_neighbors = 0; - int length = 1; + recv_length[i] = newlength; + neighbors[i] = recv_list[i]; - for (int i = 0; i < num_external; i++) { - if (tmp_neighbors[new_external_processor[i]] == 0) { - num_recv_neighbors++; - tmp_neighbors[new_external_processor[i]] = 1; - } - tmp_neighbors[new_external_processor[i]] += size; + length = j - start; + MPI_Send(&length, 1, MPI_INT, recv_list[i], MPI_MY_TAG, MPI_COMM_WORLD); + } + + // Complete the receives of the number of externals + for (int i = 0; i < num_recv_neighbors; i++) { + if (MPI_Wait(request + i, &status)) { + printf("MPI_Wait error\n"); + exit(EXIT_FAILURE); } + send_length[i] = lengths[i]; + } - // sum over all processors all the tmp_neighbors arrays - MPI_Allreduce(tmp_neighbors, tmp_buffer, size, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + // Build "elements_to_send" list. These are the x elements I own + // that need to be sent to other processors. + MPI_MY_TAG++; - /* decode the combined 'tmp_neighbors' (stored in tmp_buffer) array from all the - * processors */ - int num_send_neighbors = tmp_buffer[rank] % size; + j = 0; - /* decode 'tmp_buffer[rank] to deduce total number of elements we must send */ - int total_to_be_sent = (tmp_buffer[rank] - num_send_neighbors) / size; + for (int i = 0; i < num_recv_neighbors; i++) { + MPI_Irecv(elements_to_send + j, + send_length[i], + MPI_INT, + neighbors[i], + MPI_MY_TAG, + MPI_COMM_WORLD, + request + i); + j += send_length[i]; + } - /* Check to see if we have enough workspace allocated. This could be - dynamically modified, but let's keep it simple for now...*/ - if (num_send_neighbors > MAX_NUM_MESSAGES) { - printf("Must increase MAX_NUM_MESSAGES. Must be at least %d\n", - num_send_neighbors); - exit(EXIT_FAILURE); + j = 0; + + for (int i = 0; i < num_recv_neighbors; i++) { + int start = j; + int newlength = 0; + + // Go through list of external elements + // until updating processor changes. This is redundant, but + // saves us from recording this information. + while ((j < num_external) && (new_external_processor[j] == recv_list[i])) { + + newlength++; + j++; + if (j == num_external) break; } + MPI_Send(new_external + start, + j - start, + MPI_INT, + recv_list[i], + MPI_MY_TAG, + MPI_COMM_WORLD); + } - if (total_to_be_sent > MAX_EXTERNAL) { - printf("Must increase MAX_EXTERNAL. Must be at least %d\n", total_to_be_sent); - exit(EXIT_FAILURE); + // receive from each neighbor the global index list of external elements + for (int i = 0; i < num_recv_neighbors; i++) { + if (MPI_Wait(request + i, &status)) { + printf("MPI_Wait error\n"); + exit(EXIT_FAILURE); } + } -#ifdef VERBOSE - cout << "Processor " << rank << " of " << size - << ": Number of send neighbors = " << num_send_neighbors << endl; + /// replace global indices by local indices + for (int i = 0; i < total_to_be_sent; i++) { + elements_to_send[i] -= start_row; + } - cout << "Processor " << rank << " of " << size - << ": Number of receive neighbors = " << num_recv_neighbors << endl; + // Finish up !! + c->num_send_neighbors = num_send_neighbors; + A->nc = A->nc + num_external; - cout << "Processor " << rank << " of " << size - << ": Total number of elements to send = " << total_to_be_sent << endl; + // Used in exchange + CG_FLOAT* send_buffer = (CG_FLOAT*)allocate(ARRAY_ALIGNMENT, + total_to_be_sent * sizeof(CG_FLOAT)); + c->send_buffer = send_buffer; - MPI_Barrier(MPI_COMM_WORLD); -#endif - - /* Make a list of the neighbors that will send information to update our - external elements (in the order that we will receive this information).*/ - int* recv_list = allocate(ARRAY_ALIGNMENT, MAX_EXTERNAL * sizeof(int)); - - // FIXME: Create local scope - int j = 0; - recv_list[j++] = new_external_processor[0]; - - for (int i = 1; i < num_external; i++) { - if (new_external_processor[i - 1] != new_external_processor[i]) { - recv_list[j++] = new_external_processor[i]; - } - } - - // Ensure that all the neighbors we expect to receive from also send to us - // Send a 0 length message to each of our recv neighbors - int send_list[num_send_neighbors]; - - for (int i = 0; i < num_send_neighbors; i++) { - send_list[i] = 0; - } - - // first post receives, these are immediate receives - // Do not wait for result to come, will do that at the - // wait call below. - int MPI_MY_TAG = 99; - - MPI_Request request[MAX_NUM_MESSAGES]; - - for (int i = 0; i < num_send_neighbors; i++) { - MPI_Irecv(tmp_buffer + i, - 1, - MPI_INT, - MPI_ANY_SOURCE, - MPI_MY_TAG, - MPI_COMM_WORLD, - request + i); - } - - // send messages - for (int i = 0; i < num_recv_neighbors; i++) { - MPI_Send(tmp_buffer + i, 1, MPI_INT, recv_list[i], MPI_MY_TAG, MPI_COMM_WORLD); - } - - // Receive message from each send neighbor to construct 'send_list'. - MPI_Status status; - for (int i = 0; i < num_send_neighbors; i++) { - if (MPI_Wait(request + i, &status)) { - printf("MPI_Wait error\n"); - exit(EXIT_FAILURE); - } - send_list[i] = status.MPI_SOURCE; - } - - /* Compare the two lists. In most cases they should be the same. - // However, if they are not then add new entries to the recv list - // that are in the send list (but not already in the recv list). - WHY!! This ensures that the sendlist is equal to the sendlist - But why is this required? -> Just One neighbour list??*/ - for (int j = 0; j < num_send_neighbors; j++) { - int found = 0; - for (int i = 0; i < num_recv_neighbors; i++) { - if (recv_list[i] == send_list[j]) found = 1; - } - - if (found == 0) { -#ifdef VERBOSE - printf("Process %d of %d: recv_list[%d] = %d\n", - rank, - size, - num_recv_neighbors, - send_list[i]); -#endif - recv_list[num_recv_neighbors] = send_list[j]; - (num_recv_neighbors)++; - } - } - num_send_neighbors = num_recv_neighbors; - - if (num_send_neighbors > MAX_NUM_MESSAGES) { - printf("Must increase MAX_EXTERNAL\n"); - exit(EXIT_FAILURE); - } - - // Start filling communication setup - // Create 'new_external' which explicitly put the external elements in the - // order given by 'external_local_index' - c->total_to_be_sent = total_to_be_sent; - int* elements_to_send = (int*)allocate(ARRAY_ALIGNMENT, - total_to_be_sent * sizeof(int)); - c->elements_to_send = elements_to_send; - - for (int i = 0; i < total_to_be_sent; i++) { - elements_to_send[i] = 0; - } - - // Create 'new_external' which explicitly put the external elements in the - // order given by 'external_local_index' - int* new_external = (int*)allocate(ARRAY_ALIGNMENT, num_external * sizeof(int)); - - for (int i = 0; i < num_external; i++) { - new_external[external_local_index[i] - local_nrow] = external_index[i]; - } - - // Send each processor the global index list of the external elements in the - // order that I will want to receive them when updating my external elements - int lengths[num_recv_neighbors]; - MPI_MY_TAG++; - - // First post receives - for (int i = 0; i < num_recv_neighbors; i++) { - int partner = recv_list[i]; - MPI_Irecv(lengths + i, - 1, - MPI_INT, - partner, - MPI_MY_TAG, - MPI_COMM_WORLD, - request + i); - } - - int* neighbors = c->neighbors; - int* recv_length = c->recv_length; - int* send_length = c->send_length; - - j = 0; - - for (int i = 0; i < num_recv_neighbors; i++) { - int start = j; - int newlength = 0; - - // go through list of external elements until updating - // processor changes - while ((j < num_external) && (new_external_processor[j] == recv_list[i])) { - newlength++; - j++; - if (j == num_external) break; - } - - recv_length[i] = newlength; - neighbors[i] = recv_list[i]; - - length = j - start; - MPI_Send(&length, 1, MPI_INT, recv_list[i], MPI_MY_TAG, MPI_COMM_WORLD); - } - - // Complete the receives of the number of externals - for (int i = 0; i < num_recv_neighbors; i++) { - if (MPI_Wait(request + i, &status)) { - printf("MPI_Wait error\n"); - exit(EXIT_FAILURE); - } - send_length[i] = lengths[i]; - } - - // Build "elements_to_send" list. These are the x elements I own - // that need to be sent to other processors. - MPI_MY_TAG++; - - j = 0; - - for (int i = 0; i < num_recv_neighbors; i++) { - MPI_Irecv(elements_to_send + j, - send_length[i], - MPI_INT, - neighbors[i], - MPI_MY_TAG, - MPI_COMM_WORLD, - request + i); - j += send_length[i]; - } - - j = 0; - - for (int i = 0; i < num_recv_neighbors; i++) { - int start = j; - int newlength = 0; - - // Go through list of external elements - // until updating processor changes. This is redundant, but - // saves us from recording this information. - while ((j < num_external) && (new_external_processor[j] == recv_list[i])) { - - newlength++; - j++; - if (j == num_external) break; - } - MPI_Send(new_external + start, - j - start, - MPI_INT, - recv_list[i], - MPI_MY_TAG, - MPI_COMM_WORLD); - } - - // receive from each neighbor the global index list of external elements - for (int i = 0; i < num_recv_neighbors; i++) { - if (MPI_Wait(request + i, &status)) { - printf("MPI_Wait error\n"); - exit(EXIT_FAILURE); - } - } - - /// replace global indices by local indices - for (int i = 0; i < total_to_be_sent; i++) { - elements_to_send[i] -= start_row; - } - - // Finish up !! - c->num_send_neighbors = num_send_neighbors; - A->nc = A->nc + num_external; - - // Used in exchange - CG_FLOAT* send_buffer = (CG_FLOAT*)allocate(ARRAY_ALIGNMENT, - total_to_be_sent * sizeof(CG_FLOAT)); - c->send_buffer = send_buffer; - - free(recv_list); - free(new_external); + free(recv_list); + free(new_external); #endif } void commExchange(Comm* c, Matrix* A, double* x) { #ifdef _MPI - int num_external = 0; + int num_external = 0; - // Extract Matrix pieces + // Extract Matrix pieces - int local_nrow = A->nr; - int num_neighbors = c->num_send_neighbors; - int* recv_length = c->recv_length; - int* send_length = c->send_length; - int* neighbors = c->neighbors; - double* send_buffer = c->send_buffer; - int total_to_be_sent = c->total_to_be_sent; - int* elements_to_send = c->elements_to_send; + int local_nrow = A->nr; + int num_neighbors = c->num_send_neighbors; + int* recv_length = c->recv_length; + int* send_length = c->send_length; + int* neighbors = c->neighbors; + double* send_buffer = c->send_buffer; + int total_to_be_sent = c->total_to_be_sent; + int* elements_to_send = c->elements_to_send; - int rank = c->rank; - int size = c->size; - MPI_Comm comm = c->comm; + int rank = c->rank; + int size = c->size; + MPI_Comm comm = c->comm; - // first post receives, these are immediate receives - // Do not wait for result to come, will do that at the - // wait call below. - int MPI_MY_TAG = 99; + // first post receives, these are immediate receives + // Do not wait for result to come, will do that at the + // wait call below. + int MPI_MY_TAG = 99; - MPI_Request request[num_neighbors]; + MPI_Request request[num_neighbors]; - // Externals are at end of locals - double* x_external = (double*)x + local_nrow; + // Externals are at end of locals + double* x_external = (double*)x + local_nrow; - // Post receives first - for (int i = 0; i < num_neighbors; i++) { - int n_recv = recv_length[i]; - MPI_Irecv(x_external, - n_recv, - MPI_DOUBLE, - neighbors[i], - MPI_MY_TAG, - MPI_COMM_WORLD, - request + i); - x_external += n_recv; - } - - // Fill up send buffer - for (int i = 0; i < total_to_be_sent; i++) { - send_buffer[i] = x[elements_to_send[i]]; - } - - // Send to each neighbor - for (int i = 0; i < num_neighbors; i++) { - int n_send = send_length[i]; - MPI_Send(send_buffer, - n_send, - MPI_DOUBLE, - neighbors[i], - MPI_MY_TAG, - MPI_COMM_WORLD); - send_buffer += n_send; - } - - // Complete the reads issued above - MPI_Status status; - for (int i = 0; i < num_neighbors; i++) { - if (MPI_Wait(request + i, &status)) { - printf("MPI_Wait error\n"); - exit(EXIT_FAILURE); - } + // Post receives first + for (int i = 0; i < num_neighbors; i++) { + int n_recv = recv_length[i]; + MPI_Irecv(x_external, + n_recv, + MPI_DOUBLE, + neighbors[i], + MPI_MY_TAG, + MPI_COMM_WORLD, + request + i); + x_external += n_recv; + } + + // Fill up send buffer + for (int i = 0; i < total_to_be_sent; i++) { + send_buffer[i] = x[elements_to_send[i]]; + } + + // Send to each neighbor + for (int i = 0; i < num_neighbors; i++) { + int n_send = send_length[i]; + MPI_Send(send_buffer, + n_send, + MPI_DOUBLE, + neighbors[i], + MPI_MY_TAG, + MPI_COMM_WORLD); + send_buffer += n_send; + } + + // Complete the reads issued above + MPI_Status status; + for (int i = 0; i < num_neighbors; i++) { + if (MPI_Wait(request + i, &status)) { + printf("MPI_Wait error\n"); + exit(EXIT_FAILURE); } + } #endif } void commPrintConfig(Comm* c) { #ifdef _MPI - fflush(stdout); - MPI_Barrier(MPI_COMM_WORLD); - if (commIsMaster(c)) { - printf("Communication setup:\n"); - } + fflush(stdout); + MPI_Barrier(MPI_COMM_WORLD); + if (commIsMaster(c)) { + printf("Communication setup:\n"); + } - for (int i = 0; i < c->size; i++) { - if (i == c->rank) { - fflush(stdout); - } - MPI_Barrier(MPI_COMM_WORLD); + for (int i = 0; i < c->size; i++) { + if (i == c->rank) { + fflush(stdout); } + MPI_Barrier(MPI_COMM_WORLD); + } #endif } void commMatrixDump(Comm* c, Matrix* m) { - int rank = c->rank; - int size = c->size; - CG_UINT numRows = m->nr; - CG_UINT* rowPtr = m->rowPtr; - CG_UINT* colInd = m->colInd; - CG_FLOAT* val = m->val; + int rank = c->rank; + int size = c->size; + CG_UINT numRows = m->nr; + CG_UINT* rowPtr = m->rowPtr; + CG_UINT* colInd = m->colInd; + CG_FLOAT* val = m->val; - if (commIsMaster(c)) { - printf("Matrix: %lld total non zeroes, total number of rows %lld\n", - m->totalNnz, - m->totalNr); - } + if (commIsMaster(c)) { + printf("Matrix: %lld total non zeroes, total number of rows %lld\n", + m->totalNnz, + m->totalNr); + } - for (int i = 0; i < size; i++) { - if (i == rank) { - printf("Rank %d: %lld non zeroes, number of rows %lld\n", - rank, - m->nnz, - numRows); + for (int i = 0; i < size; i++) { + if (i == rank) { + printf("Rank %d: %lld non zeroes, number of rows %lld\n", + rank, + m->nnz, + numRows); - for (int rowID = 0; rowID < numRows; rowID++) { - printf("Row [%d]: ", rowID); + for (int rowID = 0; rowID < numRows; rowID++) { + printf("Row [%d]: ", rowID); - for (size_t rowEntry = rowPtr[rowID]; rowEntry < rowPtr[rowID + 1]; - rowEntry++) { - printf("[%lld]:%.2f ", colInd[rowEntry], val[rowEntry]); - } - - printf("\n"); - } - fflush(stdout); + for (size_t rowEntry = rowPtr[rowID]; rowEntry < rowPtr[rowID + 1]; + rowEntry++) { + printf("[%lld]:%.2f ", colInd[rowEntry], val[rowEntry]); } -#ifdef _MPI - MPI_Barrier(MPI_COMM_WORLD); -#endif + + printf("\n"); + } + fflush(stdout); } +#ifdef _MPI + MPI_Barrier(MPI_COMM_WORLD); +#endif + } } void commInit(Comm* c, int argc, char** argv) { #ifdef _MPI - MPI_Init(&argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &(c->rank)); - MPI_Comm_size(MPI_COMM_WORLD, &(c->size)); + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &(c->rank)); + MPI_Comm_size(MPI_COMM_WORLD, &(c->size)); #else - c->rank = 0; - c->size = 1; + c->rank = 0; + c->size = 1; #endif } void commFinalize(Comm* c) { #ifdef _MPI - MPI_Finalize(); + MPI_Finalize(); #endif } diff --git a/src/comm.h b/src/comm.h index 189d5c9..7821ff9 100644 --- a/src/comm.h +++ b/src/comm.h @@ -17,21 +17,21 @@ enum op { MAX = 0, SUM }; typedef struct { - int rank; - int size; + int rank; + int size; #if defined(_MPI) - MPI_Comm comm; + MPI_Comm comm; - int num_external; - int num_send_neighbors; - int* external_index; - int* external_local_index; - int total_to_be_sent; - int* elements_to_send; - int neighbors[MAX_NUM_NEIGHBOURS]; - int recv_length[MAX_NUM_NEIGHBOURS]; - int send_length[MAX_NUM_NEIGHBOURS]; - double* send_buffer; + int num_external; + int num_send_neighbors; + int* external_index; + int* external_local_index; + int total_to_be_sent; + int* elements_to_send; + int neighbors[MAX_NUM_NEIGHBOURS]; + int recv_length[MAX_NUM_NEIGHBOURS]; + int send_length[MAX_NUM_NEIGHBOURS]; + double* send_buffer; #endif } Comm; diff --git a/src/main.c b/src/main.c index 2565637..31afc64 100644 --- a/src/main.c +++ b/src/main.c @@ -17,100 +17,100 @@ CG_FLOAT compute_residual(Solver* s) { - CG_FLOAT residual = 0.0; - int n = s->A.nr; - CG_FLOAT* v1 = s->x; - CG_FLOAT* v2 = s->xexact; + CG_FLOAT residual = 0.0; + int n = s->A.nr; + CG_FLOAT* v1 = s->x; + CG_FLOAT* v2 = s->xexact; - for (int i = 0; i < n; i++) { - double diff = fabs(v1[i] - v2[i]); - if (diff > residual) residual = diff; - } + for (int i = 0; i < n; i++) { + double diff = fabs(v1[i] - v2[i]); + if (diff > residual) residual = diff; + } - commReduction(&residual, MAX); + commReduction(&residual, MAX); - return residual; + return residual; } int main(int argc, char** argv) { - double timeStart, timeStop; - Parameter param; - Solver s; - Comm comm; + double timeStart, timeStop; + Parameter param; + Solver s; + Comm comm; - commInit(&comm, argc, argv); - initParameter(¶m); + commInit(&comm, argc, argv); + initParameter(¶m); - if (argc != 2) { - if (commIsMaster(&comm)) { - printf("Usage: %s \n", argv[0]); - } - commFinalize(&comm); - exit(EXIT_SUCCESS); + if (argc != 2) { + if (commIsMaster(&comm)) { + printf("Usage: %s \n", argv[0]); } - - readParameter(¶m, argv[1]); - - CG_FLOAT eps = (CG_FLOAT)param.eps; - int itermax = param.itermax; - initSolver(&s, &comm, ¶m); - commMatrixDump(&comm, &s.A); commFinalize(&comm); exit(EXIT_SUCCESS); + } - CG_UINT nrow = s.A.nr; - CG_UINT ncol = s.A.nc; - CG_FLOAT* r = (CG_FLOAT*)allocate(ARRAY_ALIGNMENT, nrow * sizeof(CG_FLOAT)); - CG_FLOAT* p = (CG_FLOAT*)allocate(ARRAY_ALIGNMENT, ncol * sizeof(CG_FLOAT)); - CG_FLOAT* Ap = (CG_FLOAT*)allocate(ARRAY_ALIGNMENT, nrow * sizeof(CG_FLOAT)); - CG_FLOAT normr = 0.0; - CG_FLOAT rtrans = 0.0, oldrtrans; + readParameter(¶m, argv[1]); - waxpby(nrow, 1.0, s.x, 0.0, s.x, p); - spMVM(&s.A, p, Ap); - waxpby(nrow, 1.0, s.b, -1.0, Ap, r); + CG_FLOAT eps = (CG_FLOAT)param.eps; + int itermax = param.itermax; + initSolver(&s, &comm, ¶m); + commMatrixDump(&comm, &s.A); + commFinalize(&comm); + exit(EXIT_SUCCESS); + + CG_UINT nrow = s.A.nr; + CG_UINT ncol = s.A.nc; + CG_FLOAT* r = (CG_FLOAT*)allocate(ARRAY_ALIGNMENT, nrow * sizeof(CG_FLOAT)); + CG_FLOAT* p = (CG_FLOAT*)allocate(ARRAY_ALIGNMENT, ncol * sizeof(CG_FLOAT)); + CG_FLOAT* Ap = (CG_FLOAT*)allocate(ARRAY_ALIGNMENT, nrow * sizeof(CG_FLOAT)); + CG_FLOAT normr = 0.0; + CG_FLOAT rtrans = 0.0, oldrtrans; + + waxpby(nrow, 1.0, s.x, 0.0, s.x, p); + spMVM(&s.A, p, Ap); + waxpby(nrow, 1.0, s.b, -1.0, Ap, r); + ddot(nrow, r, r, &rtrans); + + normr = sqrt(rtrans); + + // initial iteration + waxpby(nrow, 1.0, r, 0.0, r, p); + + commExchange(&comm, &s.A, p); + spMVM(&s.A, p, Ap); + double alpha = 0.0; + ddot(nrow, p, Ap, &alpha); + alpha = rtrans / alpha; + waxpby(nrow, 1.0, s.x, alpha, p, s.x); + waxpby(nrow, 1.0, r, -alpha, Ap, r); + + int k; + timeStart = getTimeStamp(); + for (k = 1; k < itermax && normr > eps; k++) { + oldrtrans = rtrans; ddot(nrow, r, r, &rtrans); - - normr = sqrt(rtrans); - - // initial iteration - waxpby(nrow, 1.0, r, 0.0, r, p); - + double beta = rtrans / oldrtrans; + waxpby(nrow, 1.0, r, beta, p, p); commExchange(&comm, &s.A, p); spMVM(&s.A, p, Ap); - double alpha = 0.0; + alpha = 0.0; ddot(nrow, p, Ap, &alpha); alpha = rtrans / alpha; waxpby(nrow, 1.0, s.x, alpha, p, s.x); waxpby(nrow, 1.0, r, -alpha, Ap, r); + } + timeStop = getTimeStamp(); - int k; - timeStart = getTimeStamp(); - for (k = 1; k < itermax && normr > eps; k++) { - oldrtrans = rtrans; - ddot(nrow, r, r, &rtrans); - double beta = rtrans / oldrtrans; - waxpby(nrow, 1.0, r, beta, p, p); - commExchange(&comm, &s.A, p); - spMVM(&s.A, p, Ap); - alpha = 0.0; - ddot(nrow, p, Ap, &alpha); - alpha = rtrans / alpha; - waxpby(nrow, 1.0, s.x, alpha, p, s.x); - waxpby(nrow, 1.0, r, -alpha, Ap, r); - } - timeStop = getTimeStamp(); + double residual = compute_residual(&s); - double residual = compute_residual(&s); + if (commIsMaster(&comm)) { + printf("Solution performed %d iterations and took %.2fs\n", + k, + timeStop - timeStart); + printf("Difference between computed and exact = %f\n", residual); + } - if (commIsMaster(&comm)) { - printf("Solution performed %d iterations and took %.2fs\n", - k, - timeStop - timeStart); - printf("Difference between computed and exact = %f\n", residual); - } - - commFinalize(&comm); - return EXIT_SUCCESS; + commFinalize(&comm); + return EXIT_SUCCESS; } diff --git a/src/matrix.c b/src/matrix.c index d334570..e33d275 100644 --- a/src/matrix.c +++ b/src/matrix.c @@ -15,154 +15,156 @@ static inline int compareColumn(const void* a, const void* b) { - const Entry* a_ = (const Entry*)a; - const Entry* b_ = (const Entry*)b; + const Entry* a_ = (const Entry*)a; + const Entry* b_ = (const Entry*)b; - return (a_->col > b_->col) - (a_->col < b_->col); + return (a_->col > b_->col) - (a_->col < b_->col); } static inline int compareRow(const void* a, const void* b) { - const Entry* a_ = (const Entry*)a; - const Entry* b_ = (const Entry*)b; + const Entry* a_ = (const Entry*)a; + const Entry* b_ = (const Entry*)b; - return (a_->row > b_->row) - (a_->row < b_->row); + return (a_->row > b_->row) - (a_->row < b_->row); } static void dumpMMMatrix(Entry* mm, int nz) { - for (int i = 0; i < nz; i++) { - printf("%d %d: %f\n", mm[i].row, mm[i].col, mm[i].val); - } + for (int i = 0; i < nz; i++) { + printf("%d %d: %f\n", mm[i].row, mm[i].col, mm[i].val); + } } void matrixRead(Matrix* m, char* filename) { - MM_typecode matcode; - FILE* f; - int M, N, nz; + MM_typecode matcode; + FILE* f; + int M, N, nz; - if ((f = fopen(filename, "r")) == NULL) { - printf("Unable to open file"); + if ((f = fopen(filename, "r")) == NULL) { + printf("Unable to open file"); + } + + if (mm_read_banner(f, &matcode) != 0) { + printf("Could not process Matrix Market banner.\n"); + exit(EXIT_FAILURE); + } + + if (!((mm_is_real(matcode) || mm_is_pattern(matcode) || + mm_is_integer(matcode)) && + mm_is_matrix(matcode) && mm_is_sparse(matcode))) { + fprintf(stderr, "Sorry, this application does not support "); + fprintf(stderr, "Market Market type: [%s]\n", mm_typecode_to_str(matcode)); + exit(EXIT_FAILURE); + } + + bool compatible_flag = (mm_is_sparse(matcode) && + (mm_is_real(matcode) || mm_is_pattern(matcode) || + mm_is_integer(matcode))) && + (mm_is_symmetric(matcode) || mm_is_general(matcode)); + bool sym_flag = mm_is_symmetric(matcode); + bool pattern_flag = mm_is_pattern(matcode); + bool complex_flag = mm_is_complex(matcode); + + if (!compatible_flag) { + printf("The matrix market file provided is not supported.\n Reason :\n"); + if (!mm_is_sparse(matcode)) { + printf(" * matrix has to be sparse\n"); } - if (mm_read_banner(f, &matcode) != 0) { - printf("Could not process Matrix Market banner.\n"); - exit(EXIT_FAILURE); + if (!mm_is_real(matcode) && !(mm_is_pattern(matcode))) { + printf(" * matrix has to be real or pattern\n"); } - if (!((mm_is_real(matcode) || mm_is_pattern(matcode) || mm_is_integer(matcode)) && - mm_is_matrix(matcode) && mm_is_sparse(matcode))) { - fprintf(stderr, "Sorry, this application does not support "); - fprintf(stderr, "Market Market type: [%s]\n", mm_typecode_to_str(matcode)); - exit(EXIT_FAILURE); + if (!mm_is_symmetric(matcode) && !mm_is_general(matcode)) { + printf(" * matrix has to be either general or symmetric\n"); } - bool compatible_flag = (mm_is_sparse(matcode) && - (mm_is_real(matcode) || mm_is_pattern(matcode) || - mm_is_integer(matcode))) && - (mm_is_symmetric(matcode) || mm_is_general(matcode)); - bool sym_flag = mm_is_symmetric(matcode); - bool pattern_flag = mm_is_pattern(matcode); - bool complex_flag = mm_is_complex(matcode); + exit(EXIT_FAILURE); + } - if (!compatible_flag) { - printf("The matrix market file provided is not supported.\n Reason :\n"); - if (!mm_is_sparse(matcode)) { - printf(" * matrix has to be sparse\n"); - } + if (mm_read_mtx_crd_size(f, &M, &N, &nz) != 0) { + exit(EXIT_FAILURE); + } - if (!mm_is_real(matcode) && !(mm_is_pattern(matcode))) { - printf(" * matrix has to be real or pattern\n"); - } + printf("Read matrix %s with %d non zeroes and %d rows\n", filename, nz, M); - if (!mm_is_symmetric(matcode) && !mm_is_general(matcode)) { - printf(" * matrix has to be either general or symmetric\n"); - } + m->nr = M; + m->nnz = nz; + Entry* mm; - exit(EXIT_FAILURE); - } + if (sym_flag) { + mm = (Entry*)allocate(ARRAY_ALIGNMENT, nz * 2 * sizeof(Entry)); + } else { + mm = (Entry*)allocate(ARRAY_ALIGNMENT, nz * sizeof(Entry)); + } - if (mm_read_mtx_crd_size(f, &M, &N, &nz) != 0) { - exit(EXIT_FAILURE); - } + size_t cursor = 0; + int row, col; + double v; - printf("Read matrix %s with %d non zeroes and %d rows\n", filename, nz, M); + for (size_t i = 0; i < nz; i++) { - m->nr = M; - m->nnz = nz; - Entry* mm; - - if (sym_flag) { - mm = (Entry*)allocate(ARRAY_ALIGNMENT, nz * 2 * sizeof(Entry)); + if (pattern_flag) { + fscanf(f, "%d %d\n", &row, &col); + v = 1.; + } else if (complex_flag) { + fscanf(f, "%d %d %lg %*g\n", &row, &col, &v); } else { - mm = (Entry*)allocate(ARRAY_ALIGNMENT, nz * sizeof(Entry)); + fscanf(f, "%d %d %lg\n", &row, &col, &v); } - size_t cursor = 0; - int row, col; - double v; + row--; /* adjust from 1-based to 0-based */ + col--; - for (size_t i = 0; i < nz; i++) { + mm[cursor].row = row; + mm[cursor].col = col; + mm[cursor++].val = v; - if (pattern_flag) { - fscanf(f, "%d %d\n", &row, &col); - v = 1.; - } else if (complex_flag) { - fscanf(f, "%d %d %lg %*g\n", &row, &col, &v); - } else { - fscanf(f, "%d %d %lg\n", &row, &col, &v); - } - - row--; /* adjust from 1-based to 0-based */ - col--; - - mm[cursor].row = row; - mm[cursor].col = col; - mm[cursor++].val = v; - - if (sym_flag && (row != col)) { - mm[cursor].row = col; - mm[cursor].col = row; - mm[cursor++].val = v; - } + if (sym_flag && (row != col)) { + mm[cursor].row = col; + mm[cursor].col = row; + mm[cursor++].val = v; } + } - fclose(f); - size_t mms = cursor; + fclose(f); + size_t mms = cursor; - // sort by column - qsort(mm, mms, sizeof(Entry), compareColumn); - // dumpMMMatrix(mm, nz); - // sort by row - mergesort(mm, mms, sizeof(Entry), compareRow); - // dumpMMMatrix(mm, nz); + // sort by column + qsort(mm, mms, sizeof(Entry), compareColumn); + // dumpMMMatrix(mm, nz); + // sort by row + mergesort(mm, mms, sizeof(Entry), compareRow); + // dumpMMMatrix(mm, nz); - m->rowPtr = (CG_UINT*)allocate(ARRAY_ALIGNMENT, (m->nr + 1) * sizeof(CG_UINT)); - m->colInd = (CG_UINT*)allocate(ARRAY_ALIGNMENT, m->nnz * sizeof(CG_UINT)); - m->val = (CG_FLOAT*)allocate(ARRAY_ALIGNMENT, m->nnz * sizeof(CG_FLOAT)); + m->rowPtr = (CG_UINT*)allocate(ARRAY_ALIGNMENT, + (m->nr + 1) * sizeof(CG_UINT)); + m->colInd = (CG_UINT*)allocate(ARRAY_ALIGNMENT, m->nnz * sizeof(CG_UINT)); + m->val = (CG_FLOAT*)allocate(ARRAY_ALIGNMENT, m->nnz * sizeof(CG_FLOAT)); - int* valsPerRow = (int*)allocate(ARRAY_ALIGNMENT, m->nr * sizeof(int)); + int* valsPerRow = (int*)allocate(ARRAY_ALIGNMENT, m->nr * sizeof(int)); - for (int i = 0; i < m->nr; i++) { - valsPerRow[i] = 0; - } - - for (int i = 0; i < mms; i++) { - valsPerRow[mm[i].row]++; - } - - m->rowPtr[0] = 0; - - // convert to CRS format - for (int rowID = 0; rowID < m->nr; rowID++) { - - m->rowPtr[rowID + 1] = m->rowPtr[rowID] + valsPerRow[rowID]; - - // loop over all elements in Row - for (int id = m->rowPtr[rowID]; id < m->rowPtr[rowID + 1]; id++) { - m->val[id] = (CG_FLOAT)mm[id].val; - m->colInd[id] = (CG_UINT)mm[id].col; - } + for (int i = 0; i < m->nr; i++) { + valsPerRow[i] = 0; + } + + for (int i = 0; i < mms; i++) { + valsPerRow[mm[i].row]++; + } + + m->rowPtr[0] = 0; + + // convert to CRS format + for (int rowID = 0; rowID < m->nr; rowID++) { + + m->rowPtr[rowID + 1] = m->rowPtr[rowID] + valsPerRow[rowID]; + + // loop over all elements in Row + for (int id = m->rowPtr[rowID]; id < m->rowPtr[rowID + 1]; id++) { + m->val[id] = (CG_FLOAT)mm[id].val; + m->colInd[id] = (CG_UINT)mm[id].col; } + } } diff --git a/src/matrix.h b/src/matrix.h index a446cfb..430702b 100644 --- a/src/matrix.h +++ b/src/matrix.h @@ -9,17 +9,17 @@ #include "util.h" typedef struct { - int row; - int col; - double val; + int row; + int col; + double val; } Entry; typedef struct { - CG_UINT nr, nc, nnz; // number of rows, columns and non zeros - CG_UINT totalNr, totalNnz; // number of rows and non zeros - CG_UINT startRow, stopRow; - CG_UINT *colInd, *rowPtr; // colum Indices, row Pointer - CG_FLOAT* val; + CG_UINT nr, nc, nnz; // number of rows, columns and non zeros + CG_UINT totalNr, totalNnz; // number of rows and non zeros + CG_UINT startRow, stopRow; + CG_UINT *colInd, *rowPtr; // colum Indices, row Pointer + CG_FLOAT* val; } Matrix; extern void matrixRead(Matrix* m, char* filename); diff --git a/src/mmio.c b/src/mmio.c index 080181b..a72156c 100644 --- a/src/mmio.c +++ b/src/mmio.c @@ -11,206 +11,220 @@ #include "mmio.h" -int mm_read_unsymmetric_sparse( - const char* fname, int* M_, int* N_, int* nz_, double** val_, int** I_, int** J_) +int mm_read_unsymmetric_sparse(const char* fname, + int* M_, + int* N_, + int* nz_, + double** val_, + int** I_, + int** J_) { - FILE* f; - MM_typecode matcode; - int M, N, nz; - int i; - double* val; - int *I, *J; + FILE* f; + MM_typecode matcode; + int M, N, nz; + int i; + double* val; + int *I, *J; - if ((f = fopen(fname, "r")) == NULL) return -1; + if ((f = fopen(fname, "r")) == NULL) return -1; - if (mm_read_banner(f, &matcode) != 0) { - printf("mm_read_unsymetric: Could not process Matrix Market banner "); - printf(" in file [%s]\n", fname); - return -1; - } + if (mm_read_banner(f, &matcode) != 0) { + printf("mm_read_unsymetric: Could not process Matrix Market banner "); + printf(" in file [%s]\n", fname); + return -1; + } - if (!(mm_is_real(matcode) && mm_is_matrix(matcode) && mm_is_sparse(matcode))) { - fprintf(stderr, "Sorry, this application does not support "); - fprintf(stderr, "Market Market type: [%s]\n", mm_typecode_to_str(matcode)); - return -1; - } + if (!(mm_is_real(matcode) && mm_is_matrix(matcode) && + mm_is_sparse(matcode))) { + fprintf(stderr, "Sorry, this application does not support "); + fprintf(stderr, "Market Market type: [%s]\n", mm_typecode_to_str(matcode)); + return -1; + } - /* find out size of sparse matrix: M, N, nz .... */ + /* find out size of sparse matrix: M, N, nz .... */ - if (mm_read_mtx_crd_size(f, &M, &N, &nz) != 0) { - fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n"); - return -1; - } + if (mm_read_mtx_crd_size(f, &M, &N, &nz) != 0) { + fprintf(stderr, + "read_unsymmetric_sparse(): could not parse matrix size.\n"); + return -1; + } - *M_ = M; - *N_ = N; - *nz_ = nz; + *M_ = M; + *N_ = N; + *nz_ = nz; - /* reseve memory for matrices */ + /* reseve memory for matrices */ - I = (int*)malloc(nz * sizeof(int)); - J = (int*)malloc(nz * sizeof(int)); - val = (double*)malloc(nz * sizeof(double)); + I = (int*)malloc(nz * sizeof(int)); + J = (int*)malloc(nz * sizeof(int)); + val = (double*)malloc(nz * sizeof(double)); - *val_ = val; - *I_ = I; - *J_ = J; + *val_ = val; + *I_ = I; + *J_ = J; - /* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ - /* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ - /* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ + /* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ + /* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ + /* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ - for (i = 0; i < nz; i++) { - fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i]); - I[i]--; /* adjust from 1-based to 0-based */ - J[i]--; - } - fclose(f); + for (i = 0; i < nz; i++) { + fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i]); + I[i]--; /* adjust from 1-based to 0-based */ + J[i]--; + } + fclose(f); - return 0; + return 0; } int mm_is_valid(MM_typecode matcode) { - if (!mm_is_matrix(matcode)) return 0; - if (mm_is_dense(matcode) && mm_is_pattern(matcode)) return 0; - if (mm_is_real(matcode) && mm_is_hermitian(matcode)) return 0; - if (mm_is_pattern(matcode) && (mm_is_hermitian(matcode) || mm_is_skew(matcode))) - return 0; - return 1; + if (!mm_is_matrix(matcode)) return 0; + if (mm_is_dense(matcode) && mm_is_pattern(matcode)) return 0; + if (mm_is_real(matcode) && mm_is_hermitian(matcode)) return 0; + if (mm_is_pattern(matcode) && + (mm_is_hermitian(matcode) || mm_is_skew(matcode))) + return 0; + return 1; } int mm_read_banner(FILE* f, MM_typecode* matcode) { - char line[MM_MAX_LINE_LENGTH]; - char banner[MM_MAX_TOKEN_LENGTH]; - char mtx[MM_MAX_TOKEN_LENGTH]; - char crd[MM_MAX_TOKEN_LENGTH]; - char data_type[MM_MAX_TOKEN_LENGTH]; - char storage_scheme[MM_MAX_TOKEN_LENGTH]; - char* p; + char line[MM_MAX_LINE_LENGTH]; + char banner[MM_MAX_TOKEN_LENGTH]; + char mtx[MM_MAX_TOKEN_LENGTH]; + char crd[MM_MAX_TOKEN_LENGTH]; + char data_type[MM_MAX_TOKEN_LENGTH]; + char storage_scheme[MM_MAX_TOKEN_LENGTH]; + char* p; - mm_clear_typecode(matcode); + mm_clear_typecode(matcode); - if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL) return MM_PREMATURE_EOF; + if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL) return MM_PREMATURE_EOF; - if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, data_type, storage_scheme) != 5) - return MM_PREMATURE_EOF; + if (sscanf(line, + "%s %s %s %s %s", + banner, + mtx, + crd, + data_type, + storage_scheme) != 5) + return MM_PREMATURE_EOF; - for (p = mtx; *p != '\0'; *p = tolower(*p), p++) - ; /* convert to lower case */ - for (p = crd; *p != '\0'; *p = tolower(*p), p++) - ; - for (p = data_type; *p != '\0'; *p = tolower(*p), p++) - ; - for (p = storage_scheme; *p != '\0'; *p = tolower(*p), p++) - ; + for (p = mtx; *p != '\0'; *p = tolower(*p), p++) + ; /* convert to lower case */ + for (p = crd; *p != '\0'; *p = tolower(*p), p++) + ; + for (p = data_type; *p != '\0'; *p = tolower(*p), p++) + ; + for (p = storage_scheme; *p != '\0'; *p = tolower(*p), p++) + ; - /* check for banner */ - if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0) - return MM_NO_HEADER; + /* check for banner */ + if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0) + return MM_NO_HEADER; - /* first field should be "mtx" */ - if (strcmp(mtx, MM_MTX_STR) != 0) return MM_UNSUPPORTED_TYPE; - mm_set_matrix(matcode); + /* first field should be "mtx" */ + if (strcmp(mtx, MM_MTX_STR) != 0) return MM_UNSUPPORTED_TYPE; + mm_set_matrix(matcode); - /* second field describes whether this is a sparse matrix (in coordinate - storgae) or a dense array */ + /* second field describes whether this is a sparse matrix (in coordinate + storgae) or a dense array */ - if (strcmp(crd, MM_SPARSE_STR) == 0) mm_set_sparse(matcode); - else if (strcmp(crd, MM_DENSE_STR) == 0) - mm_set_dense(matcode); - else - return MM_UNSUPPORTED_TYPE; + if (strcmp(crd, MM_SPARSE_STR) == 0) mm_set_sparse(matcode); + else if (strcmp(crd, MM_DENSE_STR) == 0) + mm_set_dense(matcode); + else + return MM_UNSUPPORTED_TYPE; - /* third field */ + /* third field */ - if (strcmp(data_type, MM_REAL_STR) == 0) mm_set_real(matcode); - else if (strcmp(data_type, MM_COMPLEX_STR) == 0) - mm_set_complex(matcode); - else if (strcmp(data_type, MM_PATTERN_STR) == 0) - mm_set_pattern(matcode); - else if (strcmp(data_type, MM_INT_STR) == 0) - mm_set_integer(matcode); - else - return MM_UNSUPPORTED_TYPE; + if (strcmp(data_type, MM_REAL_STR) == 0) mm_set_real(matcode); + else if (strcmp(data_type, MM_COMPLEX_STR) == 0) + mm_set_complex(matcode); + else if (strcmp(data_type, MM_PATTERN_STR) == 0) + mm_set_pattern(matcode); + else if (strcmp(data_type, MM_INT_STR) == 0) + mm_set_integer(matcode); + else + return MM_UNSUPPORTED_TYPE; - /* fourth field */ + /* fourth field */ - if (strcmp(storage_scheme, MM_GENERAL_STR) == 0) mm_set_general(matcode); - else if (strcmp(storage_scheme, MM_SYMM_STR) == 0) - mm_set_symmetric(matcode); - else if (strcmp(storage_scheme, MM_HERM_STR) == 0) - mm_set_hermitian(matcode); - else if (strcmp(storage_scheme, MM_SKEW_STR) == 0) - mm_set_skew(matcode); - else - return MM_UNSUPPORTED_TYPE; + if (strcmp(storage_scheme, MM_GENERAL_STR) == 0) mm_set_general(matcode); + else if (strcmp(storage_scheme, MM_SYMM_STR) == 0) + mm_set_symmetric(matcode); + else if (strcmp(storage_scheme, MM_HERM_STR) == 0) + mm_set_hermitian(matcode); + else if (strcmp(storage_scheme, MM_SKEW_STR) == 0) + mm_set_skew(matcode); + else + return MM_UNSUPPORTED_TYPE; - return 0; + return 0; } int mm_write_mtx_crd_size(FILE* f, int M, int N, int nz) { - if (fprintf(f, "%d %d %d\n", M, N, nz) != 3) return MM_COULD_NOT_WRITE_FILE; - else - return 0; + if (fprintf(f, "%d %d %d\n", M, N, nz) != 3) return MM_COULD_NOT_WRITE_FILE; + else + return 0; } int mm_read_mtx_crd_size(FILE* f, int* M, int* N, int* nz) { - char line[MM_MAX_LINE_LENGTH]; - int num_items_read; + char line[MM_MAX_LINE_LENGTH]; + int num_items_read; - /* set return null parameter values, in case we exit with errors */ - *M = *N = *nz = 0; + /* set return null parameter values, in case we exit with errors */ + *M = *N = *nz = 0; - /* now continue scanning until you reach the end-of-comments */ + /* now continue scanning until you reach the end-of-comments */ + do { + if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL) return MM_PREMATURE_EOF; + } while (line[0] == '%'); + + /* line[] is either blank or has M,N, nz */ + if (sscanf(line, "%d %d %d", M, N, nz) == 3) return 0; + + else do { - if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL) return MM_PREMATURE_EOF; - } while (line[0] == '%'); + num_items_read = fscanf(f, "%d %d %d", M, N, nz); + if (num_items_read == EOF) return MM_PREMATURE_EOF; + } while (num_items_read != 3); - /* line[] is either blank or has M,N, nz */ - if (sscanf(line, "%d %d %d", M, N, nz) == 3) return 0; - - else - do { - num_items_read = fscanf(f, "%d %d %d", M, N, nz); - if (num_items_read == EOF) return MM_PREMATURE_EOF; - } while (num_items_read != 3); - - return 0; + return 0; } int mm_read_mtx_array_size(FILE* f, int* M, int* N) { - char line[MM_MAX_LINE_LENGTH]; - int num_items_read; - /* set return null parameter values, in case we exit with errors */ - *M = *N = 0; + char line[MM_MAX_LINE_LENGTH]; + int num_items_read; + /* set return null parameter values, in case we exit with errors */ + *M = *N = 0; - /* now continue scanning until you reach the end-of-comments */ + /* now continue scanning until you reach the end-of-comments */ + do { + if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL) return MM_PREMATURE_EOF; + } while (line[0] == '%'); + + /* line[] is either blank or has M,N, nz */ + if (sscanf(line, "%d %d", M, N) == 2) return 0; + + else /* we have a blank line */ do { - if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL) return MM_PREMATURE_EOF; - } while (line[0] == '%'); + num_items_read = fscanf(f, "%d %d", M, N); + if (num_items_read == EOF) return MM_PREMATURE_EOF; + } while (num_items_read != 2); - /* line[] is either blank or has M,N, nz */ - if (sscanf(line, "%d %d", M, N) == 2) return 0; - - else /* we have a blank line */ - do { - num_items_read = fscanf(f, "%d %d", M, N); - if (num_items_read == EOF) return MM_PREMATURE_EOF; - } while (num_items_read != 2); - - return 0; + return 0; } int mm_write_mtx_array_size(FILE* f, int M, int N) { - if (fprintf(f, "%d %d\n", M, N) != 2) return MM_COULD_NOT_WRITE_FILE; - else - return 0; + if (fprintf(f, "%d %d\n", M, N) != 2) return MM_COULD_NOT_WRITE_FILE; + else + return 0; } /*-------------------------------------------------------------------------*/ @@ -219,47 +233,58 @@ int mm_write_mtx_array_size(FILE* f, int M, int N) /* use when I[], J[], and val[]J, and val[] are already allocated */ /******************************************************************/ -int mm_read_mtx_crd_data( - FILE* f, int M, int N, int nz, int I[], int J[], double val[], MM_typecode matcode) +int mm_read_mtx_crd_data(FILE* f, + int M, + int N, + int nz, + int I[], + int J[], + double val[], + MM_typecode matcode) { - int i; - if (mm_is_complex(matcode)) { - for (i = 0; i < nz; i++) - if (fscanf(f, "%d %d %lg %lg", &I[i], &J[i], &val[2 * i], &val[2 * i + 1]) != - 4) - return MM_PREMATURE_EOF; - } else if (mm_is_real(matcode)) { - for (i = 0; i < nz; i++) { - if (fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i]) != 3) - return MM_PREMATURE_EOF; - } + int i; + if (mm_is_complex(matcode)) { + for (i = 0; i < nz; i++) + if (fscanf(f, + "%d %d %lg %lg", + &I[i], + &J[i], + &val[2 * i], + &val[2 * i + 1]) != 4) + return MM_PREMATURE_EOF; + } else if (mm_is_real(matcode)) { + for (i = 0; i < nz; i++) { + if (fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i]) != 3) + return MM_PREMATURE_EOF; } + } - else if (mm_is_pattern(matcode)) { - for (i = 0; i < nz; i++) - if (fscanf(f, "%d %d", &I[i], &J[i]) != 2) return MM_PREMATURE_EOF; - } else - return MM_UNSUPPORTED_TYPE; + else if (mm_is_pattern(matcode)) { + for (i = 0; i < nz; i++) + if (fscanf(f, "%d %d", &I[i], &J[i]) != 2) return MM_PREMATURE_EOF; + } else + return MM_UNSUPPORTED_TYPE; - return 0; + return 0; } int mm_read_mtx_crd_entry( FILE* f, int* I, int* J, double* real, double* imag, MM_typecode matcode) { - if (mm_is_complex(matcode)) { - if (fscanf(f, "%d %d %lg %lg", I, J, real, imag) != 4) return MM_PREMATURE_EOF; - } else if (mm_is_real(matcode)) { - if (fscanf(f, "%d %d %lg\n", I, J, real) != 3) return MM_PREMATURE_EOF; + if (mm_is_complex(matcode)) { + if (fscanf(f, "%d %d %lg %lg", I, J, real, imag) != 4) + return MM_PREMATURE_EOF; + } else if (mm_is_real(matcode)) { + if (fscanf(f, "%d %d %lg\n", I, J, real) != 3) return MM_PREMATURE_EOF; - } + } - else if (mm_is_pattern(matcode)) { - if (fscanf(f, "%d %d", I, J) != 2) return MM_PREMATURE_EOF; - } else - return MM_UNSUPPORTED_TYPE; + else if (mm_is_pattern(matcode)) { + if (fscanf(f, "%d %d", I, J) != 2) return MM_PREMATURE_EOF; + } else + return MM_UNSUPPORTED_TYPE; - return 0; + return 0; } /************************************************************************ @@ -279,53 +304,54 @@ int mm_read_mtx_crd(char* fname, double** val, MM_typecode* matcode) { - int ret_code; - FILE* f; + int ret_code; + FILE* f; - if (strcmp(fname, "stdin") == 0) f = stdin; - else if ((f = fopen(fname, "r")) == NULL) - return MM_COULD_NOT_READ_FILE; + if (strcmp(fname, "stdin") == 0) f = stdin; + else if ((f = fopen(fname, "r")) == NULL) + return MM_COULD_NOT_READ_FILE; - if ((ret_code = mm_read_banner(f, matcode)) != 0) return ret_code; + if ((ret_code = mm_read_banner(f, matcode)) != 0) return ret_code; - if (!(mm_is_valid(*matcode) && mm_is_sparse(*matcode) && mm_is_matrix(*matcode))) - return MM_UNSUPPORTED_TYPE; + if (!(mm_is_valid(*matcode) && mm_is_sparse(*matcode) && + mm_is_matrix(*matcode))) + return MM_UNSUPPORTED_TYPE; - if ((ret_code = mm_read_mtx_crd_size(f, M, N, nz)) != 0) return ret_code; + if ((ret_code = mm_read_mtx_crd_size(f, M, N, nz)) != 0) return ret_code; - *I = (int*)malloc(*nz * sizeof(int)); - *J = (int*)malloc(*nz * sizeof(int)); - *val = NULL; + *I = (int*)malloc(*nz * sizeof(int)); + *J = (int*)malloc(*nz * sizeof(int)); + *val = NULL; - if (mm_is_complex(*matcode)) { - *val = (double*)malloc(*nz * 2 * sizeof(double)); - ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val, *matcode); - if (ret_code != 0) return ret_code; - } else if (mm_is_real(*matcode)) { - *val = (double*)malloc(*nz * sizeof(double)); - ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val, *matcode); - if (ret_code != 0) return ret_code; - } + if (mm_is_complex(*matcode)) { + *val = (double*)malloc(*nz * 2 * sizeof(double)); + ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val, *matcode); + if (ret_code != 0) return ret_code; + } else if (mm_is_real(*matcode)) { + *val = (double*)malloc(*nz * sizeof(double)); + ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val, *matcode); + if (ret_code != 0) return ret_code; + } - else if (mm_is_pattern(*matcode)) { - ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val, *matcode); - if (ret_code != 0) return ret_code; - } + else if (mm_is_pattern(*matcode)) { + ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val, *matcode); + if (ret_code != 0) return ret_code; + } - if (f != stdin) fclose(f); - return 0; + if (f != stdin) fclose(f); + return 0; } int mm_write_banner(FILE* f, MM_typecode matcode) { - char* str = mm_typecode_to_str(matcode); - int ret_code; + char* str = mm_typecode_to_str(matcode); + int ret_code; - ret_code = fprintf(f, "%s %s\n", MatrixMarketBanner, str); - free(str); - if (ret_code != 2) return MM_COULD_NOT_WRITE_FILE; - else - return 0; + ret_code = fprintf(f, "%s %s\n", MatrixMarketBanner, str); + free(str); + if (ret_code != 2) return MM_COULD_NOT_WRITE_FILE; + else + return 0; } int mm_write_mtx_crd(char fname[], @@ -337,38 +363,43 @@ int mm_write_mtx_crd(char fname[], double val[], MM_typecode matcode) { - FILE* f; - int i; + FILE* f; + int i; - if (strcmp(fname, "stdout") == 0) f = stdout; - else if ((f = fopen(fname, "w")) == NULL) - return MM_COULD_NOT_WRITE_FILE; + if (strcmp(fname, "stdout") == 0) f = stdout; + else if ((f = fopen(fname, "w")) == NULL) + return MM_COULD_NOT_WRITE_FILE; - /* print banner followed by typecode */ - fprintf(f, "%s ", MatrixMarketBanner); - fprintf(f, "%s\n", mm_typecode_to_str(matcode)); + /* print banner followed by typecode */ + fprintf(f, "%s ", MatrixMarketBanner); + fprintf(f, "%s\n", mm_typecode_to_str(matcode)); - /* print matrix sizes and nonzeros */ - fprintf(f, "%d %d %d\n", M, N, nz); - - /* print values */ - if (mm_is_pattern(matcode)) - for (i = 0; i < nz; i++) - fprintf(f, "%d %d\n", I[i], J[i]); - else if (mm_is_real(matcode)) - for (i = 0; i < nz; i++) - fprintf(f, "%d %d %20.16g\n", I[i], J[i], val[i]); - else if (mm_is_complex(matcode)) - for (i = 0; i < nz; i++) - fprintf(f, "%d %d %20.16g %20.16g\n", I[i], J[i], val[2 * i], val[2 * i + 1]); - else { - if (f != stdout) fclose(f); - return MM_UNSUPPORTED_TYPE; - } + /* print matrix sizes and nonzeros */ + fprintf(f, "%d %d %d\n", M, N, nz); + /* print values */ + if (mm_is_pattern(matcode)) + for (i = 0; i < nz; i++) + fprintf(f, "%d %d\n", I[i], J[i]); + else if (mm_is_real(matcode)) + for (i = 0; i < nz; i++) + fprintf(f, "%d %d %20.16g\n", I[i], J[i], val[i]); + else if (mm_is_complex(matcode)) + for (i = 0; i < nz; i++) + fprintf(f, + "%d %d %20.16g %20.16g\n", + I[i], + J[i], + val[2 * i], + val[2 * i + 1]); + else { if (f != stdout) fclose(f); + return MM_UNSUPPORTED_TYPE; + } - return 0; + if (f != stdout) fclose(f); + + return 0; } /** @@ -378,52 +409,52 @@ int mm_write_mtx_crd(char fname[], */ char* mm_strdup(const char* s) { - int len = strlen(s); - char* s2 = (char*)malloc((len + 1) * sizeof(char)); - return strcpy(s2, s); + int len = strlen(s); + char* s2 = (char*)malloc((len + 1) * sizeof(char)); + return strcpy(s2, s); } char* mm_typecode_to_str(MM_typecode matcode) { - char buffer[MM_MAX_LINE_LENGTH]; - char* types[4]; - char* mm_strdup(const char*); - int error = 0; + char buffer[MM_MAX_LINE_LENGTH]; + char* types[4]; + char* mm_strdup(const char*); + int error = 0; - /* check for MTX type */ - if (mm_is_matrix(matcode)) types[0] = MM_MTX_STR; - else - error = 1; + /* check for MTX type */ + if (mm_is_matrix(matcode)) types[0] = MM_MTX_STR; + else + error = 1; - /* check for CRD or ARR matrix */ - if (mm_is_sparse(matcode)) types[1] = MM_SPARSE_STR; - else if (mm_is_dense(matcode)) - types[1] = MM_DENSE_STR; - else - return NULL; + /* check for CRD or ARR matrix */ + if (mm_is_sparse(matcode)) types[1] = MM_SPARSE_STR; + else if (mm_is_dense(matcode)) + types[1] = MM_DENSE_STR; + else + return NULL; - /* check for element data type */ - if (mm_is_real(matcode)) types[2] = MM_REAL_STR; - else if (mm_is_complex(matcode)) - types[2] = MM_COMPLEX_STR; - else if (mm_is_pattern(matcode)) - types[2] = MM_PATTERN_STR; - else if (mm_is_integer(matcode)) - types[2] = MM_INT_STR; - else - return NULL; + /* check for element data type */ + if (mm_is_real(matcode)) types[2] = MM_REAL_STR; + else if (mm_is_complex(matcode)) + types[2] = MM_COMPLEX_STR; + else if (mm_is_pattern(matcode)) + types[2] = MM_PATTERN_STR; + else if (mm_is_integer(matcode)) + types[2] = MM_INT_STR; + else + return NULL; - /* check for symmetry type */ - if (mm_is_general(matcode)) types[3] = MM_GENERAL_STR; - else if (mm_is_symmetric(matcode)) - types[3] = MM_SYMM_STR; - else if (mm_is_hermitian(matcode)) - types[3] = MM_HERM_STR; - else if (mm_is_skew(matcode)) - types[3] = MM_SKEW_STR; - else - return NULL; + /* check for symmetry type */ + if (mm_is_general(matcode)) types[3] = MM_GENERAL_STR; + else if (mm_is_symmetric(matcode)) + types[3] = MM_SYMM_STR; + else if (mm_is_hermitian(matcode)) + types[3] = MM_HERM_STR; + else if (mm_is_skew(matcode)) + types[3] = MM_SKEW_STR; + else + return NULL; - sprintf(buffer, "%s %s %s %s", types[0], types[1], types[2], types[3]); - return mm_strdup(buffer); + sprintf(buffer, "%s %s %s %s", types[0], types[1], types[2], types[3]); + return mm_strdup(buffer); } diff --git a/src/mmio.h b/src/mmio.h index db660fa..7096e88 100644 --- a/src/mmio.h +++ b/src/mmio.h @@ -64,8 +64,8 @@ int mm_is_valid(MM_typecode matcode); /* too complex for a macro */ #define mm_set_skew(typecode) ((*typecode)[3] = 'K') #define mm_set_hermitian(typecode) ((*typecode)[3] = 'H') -#define mm_clear_typecode(typecode) \ - ((*typecode)[0] = (*typecode)[1] = (*typecode)[2] = ' ', (*typecode)[3] = 'G') +#define mm_clear_typecode(typecode) \ + ((*typecode)[0] = (*typecode)[1] = (*typecode)[2] = ' ', (*typecode)[3] = 'G') #define mm_initialize_typecode(typecode) mm_clear_typecode(typecode) @@ -83,14 +83,14 @@ int mm_is_valid(MM_typecode matcode); /* too complex for a macro */ MM_matrix_typecode: 4-character sequence - ojbect sparse/ data storage dense - type scheme + ojbect sparse/ data + storage dense type scheme string position: [0] [1] [2] [3] Matrix typecode: M(atrix) C(oord) R(eal) G(eneral) - A(array) C(omplex) - H(ermitian) P(attern) S(ymmetric) I(nteger) K(kew) + A(array) + C(omplex) H(ermitian) P(attern) S(ymmetric) I(nteger) K(kew) ***********************************************************************/ @@ -118,12 +118,23 @@ int mm_write_mtx_crd(char fname[], int J[], double val[], MM_typecode matcode); -int mm_read_mtx_crd_data( - FILE* f, int M, int N, int nz, int I[], int J[], double val[], MM_typecode matcode); +int mm_read_mtx_crd_data(FILE* f, + int M, + int N, + int nz, + int I[], + int J[], + double val[], + MM_typecode matcode); int mm_read_mtx_crd_entry( FILE* f, int* I, int* J, double* real, double* img, MM_typecode matcode); -int mm_read_unsymmetric_sparse( - const char* fname, int* M_, int* N_, int* nz_, double** val_, int** I_, int** J_); +int mm_read_unsymmetric_sparse(const char* fname, + int* M_, + int* N_, + int* nz_, + double** val_, + int** I_, + int** J_); #endif diff --git a/src/parameter.c b/src/parameter.c index 2ddc1b8..b3fd922 100644 --- a/src/parameter.c +++ b/src/parameter.c @@ -11,61 +11,61 @@ void initParameter(Parameter* param) { - param->filename = "generate"; - param->nx = 100; - param->ny = 100; - param->nz = 1000; - param->itermax = 1000; - param->eps = 0.0001; + param->filename = "generate"; + param->nx = 100; + param->ny = 100; + param->nz = 1000; + param->itermax = 1000; + param->eps = 0.0001; } void readParameter(Parameter* param, const char* filename) { - FILE* fp = fopen(filename, "r"); - char line[MAXLINE]; - int i; + FILE* fp = fopen(filename, "r"); + char line[MAXLINE]; + int i; - if (!fp) { - fprintf(stderr, "Could not open parameter file: %s\n", filename); - exit(EXIT_FAILURE); - } + if (!fp) { + fprintf(stderr, "Could not open parameter file: %s\n", filename); + exit(EXIT_FAILURE); + } - while (!feof(fp)) { - line[0] = '\0'; - fgets(line, MAXLINE, fp); - for (i = 0; line[i] != '\0' && line[i] != '#'; i++) - ; - line[i] = '\0'; + while (!feof(fp)) { + line[0] = '\0'; + fgets(line, MAXLINE, fp); + for (i = 0; line[i] != '\0' && line[i] != '#'; i++) + ; + line[i] = '\0'; - char* tok = strtok(line, " "); - char* val = strtok(NULL, " "); + char* tok = strtok(line, " "); + char* val = strtok(NULL, " "); -#define PARSE_PARAM(p, f) \ - if (strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) { \ - param->p = f(val); \ - } +#define PARSE_PARAM(p, f) \ + if (strncmp(tok, #p, sizeof(#p) / sizeof(#p[0]) - 1) == 0) { \ + param->p = f(val); \ + } #define PARSE_STRING(p) PARSE_PARAM(p, strdup) #define PARSE_INT(p) PARSE_PARAM(p, atoi) #define PARSE_REAL(p) PARSE_PARAM(p, atof) - if (tok != NULL && val != NULL) { - PARSE_INT(nx); - PARSE_INT(ny); - PARSE_INT(nz); - PARSE_INT(itermax); - PARSE_REAL(eps); - } + if (tok != NULL && val != NULL) { + PARSE_INT(nx); + PARSE_INT(ny); + PARSE_INT(nz); + PARSE_INT(itermax); + PARSE_REAL(eps); } + } - fclose(fp); + fclose(fp); } void printParameter(Parameter* param) { - printf("Parameters\n"); - printf("Geometry data:\n"); - printf("\tPoints (x, y, z): %d, %d, %d\n", param->nx, param->ny, param->nz); - printf("Iterative solver parameters:\n"); - printf("\tMax iterations: %d\n", param->itermax); - printf("\tepsilon (stopping tolerance) : %f\n", param->eps); + printf("Parameters\n"); + printf("Geometry data:\n"); + printf("\tPoints (x, y, z): %d, %d, %d\n", param->nx, param->ny, param->nz); + printf("Iterative solver parameters:\n"); + printf("\tMax iterations: %d\n", param->itermax); + printf("\tepsilon (stopping tolerance) : %f\n", param->eps); } diff --git a/src/parameter.h b/src/parameter.h index 27b44a1..063e658 100644 --- a/src/parameter.h +++ b/src/parameter.h @@ -6,10 +6,10 @@ #define __PARAMETER_H_ typedef struct { - char* filename; - int nx, ny, nz; - int itermax; - double eps; + char* filename; + int nx, ny, nz; + int itermax; + double eps; } Parameter; void initParameter(Parameter*); diff --git a/src/progress.c b/src/progress.c index b0af4c9..031262a 100644 --- a/src/progress.c +++ b/src/progress.c @@ -13,36 +13,36 @@ static int _current; void initProgress(double end) { - _end = end; - _current = 0; + _end = end; + _current = 0; - printf("[ ]"); - fflush(stdout); + printf("[ ]"); + fflush(stdout); } void printProgress(double current) { - int new = (int)rint((current / _end) * 10.0); + int new = (int)rint((current / _end) * 10.0); - if (new > _current) { - char progress[11]; - _current = new; - progress[0] = 0; + if (new > _current) { + char progress[11]; + _current = new; + progress[0] = 0; - for (int i = 0; i < 10; i++) { - if (i < _current) { - sprintf(progress + strlen(progress), "#"); - } else { - sprintf(progress + strlen(progress), " "); - } - } - printf("\r[%s]", progress); + for (int i = 0; i < 10; i++) { + if (i < _current) { + sprintf(progress + strlen(progress), "#"); + } else { + sprintf(progress + strlen(progress), " "); + } } - fflush(stdout); + printf("\r[%s]", progress); + } + fflush(stdout); } void stopProgress() { - printf("\n"); - fflush(stdout); + printf("\n"); + fflush(stdout); } diff --git a/src/solver.c b/src/solver.c index 88d9f9b..c071190 100644 --- a/src/solver.c +++ b/src/solver.c @@ -13,139 +13,143 @@ #include "solver.h" #include "util.h" -static void matrixGenerate(Parameter* p, Solver* s, Comm* c, bool use_7pt_stencil) +static void matrixGenerate( + Parameter* p, Solver* s, Comm* c, bool use_7pt_stencil) { - int size = c->size; - int rank = c->rank; + int size = c->size; + int rank = c->rank; - CG_UINT local_nrow = p->nx * p->ny * p->nz; - CG_UINT local_nnz = 27 * local_nrow; + CG_UINT local_nrow = p->nx * p->ny * p->nz; + CG_UINT local_nnz = 27 * local_nrow; - CG_UINT total_nrow = local_nrow * size; - CG_UINT total_nnz = 27 * total_nrow; + CG_UINT total_nrow = local_nrow * size; + CG_UINT total_nnz = 27 * total_nrow; - int start_row = local_nrow * rank; - int stop_row = start_row + local_nrow - 1; + int start_row = local_nrow * rank; + int stop_row = start_row + local_nrow - 1; - if (commIsMaster(c)) { - if (use_7pt_stencil) { - printf("Generate 7pt matrix with "); - } else { - printf("Generate 27pt matrix with "); - } - printf("%d total rows and %d nonzeros\n", (int)total_nrow, (int)local_nnz); + if (commIsMaster(c)) { + if (use_7pt_stencil) { + printf("Generate 7pt matrix with "); + } else { + printf("Generate 27pt matrix with "); } + printf("%d total rows and %d nonzeros\n", (int)total_nrow, (int)local_nnz); + } - s->A.val = (CG_FLOAT*)allocate(ARRAY_ALIGNMENT, local_nnz * sizeof(CG_FLOAT)); - s->A.colInd = (CG_UINT*)allocate(ARRAY_ALIGNMENT, local_nnz * sizeof(CG_UINT)); - s->A.rowPtr = (CG_UINT*)allocate(ARRAY_ALIGNMENT, (local_nrow + 1) * sizeof(CG_UINT)); - s->x = (CG_FLOAT*)allocate(ARRAY_ALIGNMENT, local_nrow * sizeof(CG_FLOAT)); - s->b = (CG_FLOAT*)allocate(ARRAY_ALIGNMENT, local_nrow * sizeof(CG_FLOAT)); - s->xexact = (CG_FLOAT*)allocate(ARRAY_ALIGNMENT, local_nrow * sizeof(CG_FLOAT)); + s->A.val = (CG_FLOAT*)allocate(ARRAY_ALIGNMENT, local_nnz * sizeof(CG_FLOAT)); + s->A.colInd = (CG_UINT*)allocate(ARRAY_ALIGNMENT, + local_nnz * sizeof(CG_UINT)); + s->A.rowPtr = (CG_UINT*)allocate(ARRAY_ALIGNMENT, + (local_nrow + 1) * sizeof(CG_UINT)); + s->x = (CG_FLOAT*)allocate(ARRAY_ALIGNMENT, local_nrow * sizeof(CG_FLOAT)); + s->b = (CG_FLOAT*)allocate(ARRAY_ALIGNMENT, local_nrow * sizeof(CG_FLOAT)); + s->xexact = (CG_FLOAT*)allocate(ARRAY_ALIGNMENT, + local_nrow * sizeof(CG_FLOAT)); - CG_FLOAT* curvalptr = s->A.val; - CG_UINT* curindptr = s->A.colInd; - CG_UINT* currowptr = s->A.rowPtr; - CG_FLOAT* x = s->x; - CG_FLOAT* b = s->b; - CG_FLOAT* xexact = s->xexact; + CG_FLOAT* curvalptr = s->A.val; + CG_UINT* curindptr = s->A.colInd; + CG_UINT* currowptr = s->A.rowPtr; + CG_FLOAT* x = s->x; + CG_FLOAT* b = s->b; + CG_FLOAT* xexact = s->xexact; - CG_UINT nnzglobal = 0; - int nx = p->nx, ny = p->ny, nz = p->nz; - CG_UINT cursor = 0; + CG_UINT nnzglobal = 0; + int nx = p->nx, ny = p->ny, nz = p->nz; + CG_UINT cursor = 0; - *currowptr++ = 0; + *currowptr++ = 0; - for (int iz = 0; iz < nz; iz++) { - for (int iy = 0; iy < ny; iy++) { - for (int ix = 0; ix < nx; ix++) { + for (int iz = 0; iz < nz; iz++) { + for (int iy = 0; iy < ny; iy++) { + for (int ix = 0; ix < nx; ix++) { - int curlocalrow = iz * nx * ny + iy * nx + ix; - int currow = start_row + iz * nx * ny + iy * nx + ix; - int nnzrow = 0; + int curlocalrow = iz * nx * ny + iy * nx + ix; + int currow = start_row + iz * nx * ny + iy * nx + ix; + int nnzrow = 0; - for (int sz = -1; sz <= 1; sz++) { - for (int sy = -1; sy <= 1; sy++) { - for (int sx = -1; sx <= 1; sx++) { + for (int sz = -1; sz <= 1; sz++) { + for (int sy = -1; sy <= 1; sy++) { + for (int sx = -1; sx <= 1; sx++) { - int curcol = currow + sz * nx * ny + sy * nx + sx; - // Since we have a stack of nx by ny by nz domains - // , stacking in the z direction, we check to see - // if sx and sy are reaching outside of the domain, - // while the check for the curcol being valid is - // sufficient to check the z values - if ((ix + sx >= 0) && (ix + sx < nx) && (iy + sy >= 0) && - (iy + sy < ny) && (curcol >= 0 && curcol < total_nrow)) { - // This logic will skip over point that are not part of a - // 7-pt stencil - if (!use_7pt_stencil || - (sz * sz + sy * sy + sx * sx <= 1)) { - if (curcol == currow) { - *curvalptr++ = 27.0; - } else { - *curvalptr++ = -1.0; - } - *curindptr++ = curcol; - nnzrow++; - } - } - } // end sx loop - } // end sy loop - } // end sz loop + int curcol = currow + sz * nx * ny + sy * nx + sx; + // Since we have a stack of nx by ny by nz domains + // , stacking in the z direction, we check to see + // if sx and sy are reaching outside of the domain, + // while the check for the curcol being valid is + // sufficient to check the z values + if ((ix + sx >= 0) && (ix + sx < nx) && (iy + sy >= 0) && + (iy + sy < ny) && (curcol >= 0 && curcol < total_nrow)) { + // This logic will skip over point that are not part of a + // 7-pt stencil + if (!use_7pt_stencil || (sz * sz + sy * sy + sx * sx <= 1)) { + if (curcol == currow) { + *curvalptr++ = 27.0; + } else { + *curvalptr++ = -1.0; + } + *curindptr++ = curcol; + nnzrow++; + } + } + } // end sx loop + } // end sy loop + } // end sz loop - *currowptr = *(currowptr - 1) + nnzrow; - currowptr++; - nnzglobal += nnzrow; - x[curlocalrow] = 0.0; - b[curlocalrow] = 27.0 - ((CG_FLOAT)(nnzrow - 1)); - xexact[curlocalrow] = 1.0; - } // end ix loop - } // end iy loop - } // end iz loop + *currowptr = *(currowptr - 1) + nnzrow; + currowptr++; + nnzglobal += nnzrow; + x[curlocalrow] = 0.0; + b[curlocalrow] = 27.0 - ((CG_FLOAT)(nnzrow - 1)); + xexact[curlocalrow] = 1.0; + } // end ix loop + } // end iy loop + } // end iz loop #ifdef VERBOSE - printf("Process %d of %d has %d rows\n", rank, size, local_nrow); - printf("Global rows %d through %d\n", start_row, stop_row); - printf("%d nonzeros\n", start_row, stop_row); + printf("Process %d of %d has %d rows\n", rank, size, local_nrow); + printf("Global rows %d through %d\n", start_row, stop_row); + printf("%d nonzeros\n", start_row, stop_row); #endif /* ifdef VERBOSE */ - s->A.startRow = start_row; - s->A.stopRow = stop_row; - s->A.totalNr = total_nrow; - s->A.totalNnz = total_nnz; - s->A.nr = local_nrow; - s->A.nc = local_nrow; - s->A.nnz = local_nnz; + s->A.startRow = start_row; + s->A.stopRow = stop_row; + s->A.totalNr = total_nrow; + s->A.totalNnz = total_nnz; + s->A.nr = local_nrow; + s->A.nc = local_nrow; + s->A.nnz = local_nnz; } void initSolver(Solver* s, Comm* c, Parameter* p) { - if (!strcmp(p->filename, "generate")) { - matrixGenerate(p, s, c, false); - } else if (!strcmp(p->filename, "generate7P")) { - matrixGenerate(p, s, c, true); - } else { - matrixRead(&s->A, p->filename); - } + if (!strcmp(p->filename, "generate")) { + matrixGenerate(p, s, c, false); + } else if (!strcmp(p->filename, "generate7P")) { + matrixGenerate(p, s, c, true); + } else { + matrixRead(&s->A, p->filename); + } } void spMVM(Matrix* m, const CG_FLOAT* restrict x, CG_FLOAT* restrict y) { - CG_UINT numRows = m->nr; - CG_UINT* rowPtr = m->rowPtr; - CG_UINT* colInd = m->colInd; - CG_FLOAT* val = m->val; + CG_UINT numRows = m->nr; + CG_UINT* rowPtr = m->rowPtr; + CG_UINT* colInd = m->colInd; + CG_FLOAT* val = m->val; - for (size_t rowID = 0; rowID < numRows; rowID++) { - CG_FLOAT tmp = y[rowID]; + for (size_t rowID = 0; rowID < numRows; rowID++) { + CG_FLOAT tmp = y[rowID]; - // loop over all elements in row - for (size_t rowEntry = rowPtr[rowID]; rowEntry < rowPtr[rowID + 1]; rowEntry++) { - tmp += val[rowEntry] * x[colInd[rowEntry]]; - } - - y[rowID] = tmp; + // loop over all elements in row + for (size_t rowEntry = rowPtr[rowID]; rowEntry < rowPtr[rowID + 1]; + rowEntry++) { + tmp += val[rowEntry] * x[colInd[rowEntry]]; } + + y[rowID] = tmp; + } } void waxpby(const CG_UINT n, @@ -155,19 +159,19 @@ void waxpby(const CG_UINT n, const CG_FLOAT* restrict y, CG_FLOAT* const w) { - if (alpha == 1.0) { - for (size_t i = 0; i < n; i++) { - w[i] = x[i] + beta * y[i]; - } - } else if (beta == 1.0) { - for (size_t i = 0; i < n; i++) { - w[i] = alpha * x[i] + y[i]; - } - } else { - for (size_t i = 0; i < n; i++) { - w[i] = alpha * x[i] + beta * y[i]; - } + if (alpha == 1.0) { + for (size_t i = 0; i < n; i++) { + w[i] = x[i] + beta * y[i]; } + } else if (beta == 1.0) { + for (size_t i = 0; i < n; i++) { + w[i] = alpha * x[i] + y[i]; + } + } else { + for (size_t i = 0; i < n; i++) { + w[i] = alpha * x[i] + beta * y[i]; + } + } } void ddot(const CG_UINT n, @@ -175,18 +179,18 @@ void ddot(const CG_UINT n, const CG_FLOAT* restrict y, CG_FLOAT* restrict result) { - CG_FLOAT sum = 0.0; + CG_FLOAT sum = 0.0; - if (y == x) { - for (size_t i = 0; i < n; i++) { - sum += x[i] * x[i]; - } - } else { - for (size_t i = 0; i < n; i++) { - sum += x[i] * y[i]; - } + if (y == x) { + for (size_t i = 0; i < n; i++) { + sum += x[i] * x[i]; } + } else { + for (size_t i = 0; i < n; i++) { + sum += x[i] * y[i]; + } + } - commReduction(&sum, SUM); - *result = sum; + commReduction(&sum, SUM); + *result = sum; } diff --git a/src/solver.h b/src/solver.h index bfce5f9..e67f4fc 100644 --- a/src/solver.h +++ b/src/solver.h @@ -10,10 +10,10 @@ #include "util.h" typedef struct { - Matrix A; - CG_FLOAT* x; - CG_FLOAT* b; - CG_FLOAT* xexact; + Matrix A; + CG_FLOAT* x; + CG_FLOAT* b; + CG_FLOAT* xexact; } Solver; void initSolver(Solver* s, Comm* c, Parameter*); diff --git a/src/timing.c b/src/timing.c index 9bfef68..29458a1 100644 --- a/src/timing.c +++ b/src/timing.c @@ -7,14 +7,14 @@ double getTimeStamp(void) { - struct timespec ts; - clock_gettime(CLOCK_MONOTONIC, &ts); - return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9; + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9; } double getTimeResolution(void) { - struct timespec ts; - clock_getres(CLOCK_MONOTONIC, &ts); - return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9; + struct timespec ts; + clock_getres(CLOCK_MONOTONIC, &ts); + return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9; }