From 9f37fa73a98043129240ed91384105e65c58b911 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Fri, 17 Jan 2025 20:30:18 +0100 Subject: [PATCH] Partial port of communication code --- src/comm.c | 493 ++++++++++++++++++++++++++--------------------------- src/comm.h | 12 +- 2 files changed, 248 insertions(+), 257 deletions(-) diff --git a/src/comm.c b/src/comm.c index d071965..e71819e 100644 --- a/src/comm.c +++ b/src/comm.c @@ -14,10 +14,6 @@ #include "allocate.h" #include "comm.h" -#define MAX_EXTERNAL 100000 -#define MAX_NUM_MESSAGES 500 -#define MAX_NUM_NEIGHBOURS MAX_NUM_MESSAGES - // subroutines local to this module int sizeOfRank(int rank, int size, int N) { @@ -37,6 +33,7 @@ void commReduction(double* v, int op) void commPartition(Comm* c, Matrix* A) { +#ifdef _MPI int rank = c->rank; int size = c->size; MPI_Comm comm = c->comm; @@ -66,15 +63,12 @@ void commPartition(Comm* c, Matrix* A) // - find out which processor owns the value. // - Set up communication for sparse MV operation. - /////////////////////////////////////////// // Scan the indices and transform to local - /////////////////////////////////////////// + int* externals = (int*)allocate(ARRAY_ALIGNMENT, A->totalNr * sizeof(int)); + int num_external = 1; int* external_index = (int*)allocate(ARRAY_ALIGNMENT, MAX_EXTERNAL * sizeof(int)); - int* externals = (int*)allocate(ARRAY_ALIGNMENT, A->totalNr * sizeof(int)); - int num_external = 1; - - c->external_index = external_index; + c->external_index = external_index; for (int i = 0; i < A->totalNr; i++) { externals[i] = -1; @@ -95,49 +89,39 @@ void commPartition(Comm* c, Matrix* A) // shift local rows to the start if (start_row <= cur_ind && cur_ind <= stop_row) { col_ind[j] -= start_row; - } else // Must find out if we have already set up this point - { + } else { + // Must find out if we have already set up this point if (externals[cur_ind] == -1) { externals[cur_ind] = num_external++; if (num_external <= MAX_EXTERNAL) { external_index[num_external - 1] = cur_ind; // Mark index as external by negating it - ptr_to_inds_in_row[i][j] = -(ptr_to_inds_in_row[i][j] + 1); + col_ind[j] = -col_ind[j]; } else { - cerr << "Must increase MAX_EXTERNAL in HPC_Sparse_Matrix.hpp" - << endl; - abort(); + printf("Must increase MAX_EXTERNAL\n"); + exit(EXIT_FAILURE); } } else { - // Mark index as external by adding 1 and negating it - ptr_to_inds_in_row[i][j] = -(ptr_to_inds_in_row[i][j] + 1); + // Mark index as external by negating it + col_ind[j] = -col_ind[j]; } } } } - //////////////////////////////////////////////////////////////////////////// - // Go through list of externals to find out which processors must be accessed. - //////////////////////////////////////////////////////////////////////////// + /************************************************************************** + Go through list of externals to find out which processors must be accessed. + **************************************************************************/ + c->num_external = num_external; + int tmp_buffer[size]; + int global_index_offsets[size]; - A->num_external = num_external; - int* tmp_buffer = new int[size]; // Temp buffer space needed below + for (int i = 0; i < size; i++) { + tmp_buffer[i] = 0; + } - // Build list of global index offset - - int* global_index_offsets = new int[size]; - for (i = 0; i < size; i++) - tmp_buffer[i] = 0; // First zero out - - tmp_buffer[rank] = start_row; // This is my start row - - // This call sends the start_row of each ith processor to the ith - // entry of global_index_offset on all processors. - // Thus, each processor know the range of indices owned by all - // other processors. - // Note: There might be a better algorithm for doing this, but this - // will work... + tmp_buffer[rank] = start_row; MPI_Allreduce(tmp_buffer, global_index_offsets, @@ -147,105 +131,97 @@ void commPartition(Comm* c, Matrix* A) MPI_COMM_WORLD); // Go through list of externals and find the processor that owns each - int* external_processor = new int[num_external]; - int* new_external_processor = new int[num_external]; + int external_processor[num_external]; - for (i = 0; i < num_external; i++) { + for (int i = 0; i < num_external; i++) { int cur_ind = external_index[i]; - for (int j = size - 1; j >= 0; j--) + for (int j = size - 1; j >= 0; j--) { if (global_index_offsets[j] <= cur_ind) { external_processor[i] = j; break; } - } - if (debug) { - t0 = mytimer() - t0; - cout << " Time in finding processors phase = " << t0 << endl; + } } - //////////////////////////////////////////////////////////////////////////// - // Sift through the external elements. For each newly encountered external - // point assign it the next index in the sequence. Then look for other - // external elements who are update by the same node and assign them the next - // set of index numbers in the sequence (ie. elements updated by the same node - // have consecutive indices). - //////////////////////////////////////////////////////////////////////////// - - if (debug) t0 = mytimer(); + /*Go through the external elements. For each newly encountered external + point assign it the next index in the local sequence. Then look for other + external elements who are updated by the same node and assign them the next + set of index numbers in the local sequence (ie. elements updated by the same node + have consecutive indices).*/ + int* external_local_index = (int*)allocate(ARRAY_ALIGNMENT, + MAX_EXTERNAL * sizeof(int)); + c->external_local_index = external_local_index; int count = local_nrow; - for (i = 0; i < num_external; i++) - external_local_index[i] = -1; - for (i = 0; i < num_external; i++) { + for (int i = 0; i < num_external; i++) { + external_local_index[i] = -1; + } + + for (int i = 0; i < num_external; i++) { if (external_local_index[i] == -1) { external_local_index[i] = count++; - for (j = i + 1; j < num_external; j++) { - if (external_processor[j] == external_processor[i]) + for (int j = i + 1; j < num_external; j++) { + if (external_processor[j] == external_processor[i]) { external_local_index[j] = count++; + } } } } - if (debug) { - t0 = mytimer() - t0; - cout << " Time in scanning external indices phase = " << t0 << endl; - } - if (debug) t0 = mytimer(); + // map all external ids to the new local index + CG_UINT* rowPtr = A->rowPtr; - for (i = 0; i < local_nrow; i++) { - for (j = 0; j < nnz_in_row[i]; j++) { - if (ptr_to_inds_in_row[i][j] < 0) // Change index values of externals - { - int cur_ind = -ptr_to_inds_in_row[i][j] - 1; - ptr_to_inds_in_row[i][j] = external_local_index[externals[cur_ind]]; + for (int i = 0; i < local_nrow; i++) { + for (int j = rowPtr[i]; j < rowPtr[i + 1]; j++) { + if (col_ind[j] < 0) { + int cur_ind = -col_ind[j] - 1; // FIXME: Offset by 1?? + col_ind[j] = external_local_index[externals[cur_ind]]; } } } - for (i = 0; i < num_external; i++) + int new_external_processor[num_external]; + + for (int i = 0; i < num_external; i++) { new_external_processor[i] = 0; + } - for (i = 0; i < num_external; i++) + // setup map from external id to partition + for (int i = 0; i < num_external; i++) { new_external_processor[external_local_index[i] - local_nrow] = external_processor[i]; - - if (debug) { - t0 = mytimer() - t0; - cout << " Time in assigning external indices phase = " << t0 << endl; } - if (debug_details) { - for (i = 0; i < num_external; i++) { - cout << "Processor " << rank << " of " << size << ": external processor[" << i - << "] = " << external_processor[i] << endl; - cout << "Processor " << rank << " of " << size << ": new external processor[" - << i << "] = " << new_external_processor[i] << endl; - } +#ifdef VERBOSE + for (int i = 0; i < num_external; i++) { + printf("Process %d of %d: external process[%d] = %d\n", + rank, + size, + i, + external_processor[i]); } +#endif - //////////////////////////////////////////////////////////////////////////// - /// - // Count the number of neighbors from which we receive information to update - // our external elements. Additionally, fill the array tmp_neighbors in the - // following way: - // tmp_neighbors[i] = 0 ==> No external elements are updated by - // processor i. - // tmp_neighbors[i] = x ==> (x-1)/size elements are updated from - // processor i. - /// - //////////////////////////////////////////////////////////////////////////// + /* Count the number of neighbors from which we receive information to update + our external elements. Additionally, fill the array tmp_neighbors in the + following way: + tmp_neighbors[i] = 0 ==> No external elements are updated by + processor i. + tmp_neighbors[i] = x ==> (x-1)/size elements are updated from + processor i.*/ - t0 = mytimer(); - int* tmp_neighbors = new int[size]; - for (i = 0; i < size; i++) + int tmp_neighbors[size]; + + for (int i = 0; i < size; i++) { tmp_neighbors[i] = 0; + } int num_recv_neighbors = 0; int length = 1; - for (i = 0; i < num_external; i++) { + for (int i = 0; i < num_external; i++) { if (tmp_neighbors[new_external_processor[i]] == 0) { num_recv_neighbors++; tmp_neighbors[new_external_processor[i]] = 1; @@ -253,90 +229,72 @@ void commPartition(Comm* c, Matrix* A) tmp_neighbors[new_external_processor[i]] += size; } - /// sum over all processors all the tmp_neighbors arrays /// - + // sum over all processors all the tmp_neighbors arrays MPI_Allreduce(tmp_neighbors, tmp_buffer, size, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - /// decode the combined 'tmp_neighbors' (stored in tmp_buffer) - // array from all the processors - + /* decode the combined 'tmp_neighbors' (stored in tmp_buffer) array from all the + * processors */ int num_send_neighbors = tmp_buffer[rank] % size; - /// decode 'tmp_buffer[rank] to deduce total number of elements - // we must send - + /* decode 'tmp_buffer[rank] to deduce total number of elements we must send */ int total_to_be_sent = (tmp_buffer[rank] - num_send_neighbors) / size; - // - // Check to see if we have enough workspace allocated. This could be - // dynamically modified, but let's keep it simple for now... - // - + /* Check to see if we have enough workspace allocated. This could be + dynamically modified, but let's keep it simple for now...*/ if (num_send_neighbors > MAX_NUM_MESSAGES) { - cerr << "Must increase MAX_NUM_MESSAGES in HPC_Sparse_Matrix.hpp" << endl; - cerr << "Must be at least " << num_send_neighbors << endl; - abort(); + printf("Must increase MAX_NUM_MESSAGES. Must be at least %d\n", + num_send_neighbors); + exit(EXIT_FAILURE); } if (total_to_be_sent > MAX_EXTERNAL) { - cerr << "Must increase MAX_EXTERNAL in HPC_Sparse_Matrix.hpp" << endl; - cerr << "Must be at least " << total_to_be_sent << endl; - abort(); + printf("Must increase MAX_EXTERNAL. Must be at least %d\n", total_to_be_sent); + exit(EXIT_FAILURE); } - delete[] tmp_neighbors; - if (debug) { - t0 = mytimer() - t0; - cout << " Time in finding neighbors phase = " << t0 << endl; - } - if (debug) - cout << "Processor " << rank << " of " << size - << ": Number of send neighbors = " << num_send_neighbors << endl; +#ifdef VERBOSE + cout << "Processor " << rank << " of " << size + << ": Number of send neighbors = " << num_send_neighbors << endl; - if (debug) - cout << "Processor " << rank << " of " << size - << ": Number of receive neighbors = " << num_recv_neighbors << endl; + cout << "Processor " << rank << " of " << size + << ": Number of receive neighbors = " << num_recv_neighbors << endl; - if (debug) - cout << "Processor " << rank << " of " << size - << ": Total number of elements to send = " << total_to_be_sent << endl; + cout << "Processor " << rank << " of " << size + << ": Total number of elements to send = " << total_to_be_sent << endl; - if (debug) MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(MPI_COMM_WORLD); +#endif - ///////////////////////////////////////////////////////////////////////// - /// - // Make a list of the neighbors that will send information to update our - // external elements (in the order that we will receive this information). - /// - ///////////////////////////////////////////////////////////////////////// + /* Make a list of the neighbors that will send information to update our + external elements (in the order that we will receive this information).*/ + int* recv_list = allocate(ARRAY_ALIGNMENT, MAX_EXTERNAL * sizeof(int)); - int* recv_list = new int[MAX_EXTERNAL]; - - j = 0; + // FIXME: Create local scope + int j = 0; recv_list[j++] = new_external_processor[0]; - for (i = 1; i < num_external; i++) { + + for (int i = 1; i < num_external; i++) { if (new_external_processor[i - 1] != new_external_processor[i]) { recv_list[j++] = new_external_processor[i]; } } - // + // Ensure that all the neighbors we expect to receive from also send to us // Send a 0 length message to each of our recv neighbors - // + int send_list[num_send_neighbors]; - int* send_list = new int[num_send_neighbors]; - for (i = 0; i < num_send_neighbors; i++) + for (int i = 0; i < num_send_neighbors; i++) { send_list[i] = 0; + } - // // first post receives, these are immediate receives // Do not wait for result to come, will do that at the // wait call below. - // int MPI_MY_TAG = 99; - MPI_Request* request = new MPI_Request[MAX_NUM_MESSAGES]; - for (i = 0; i < num_send_neighbors; i++) { + MPI_Request request[MAX_NUM_MESSAGES]; + + for (int i = 0; i < num_send_neighbors; i++) { MPI_Irecv(tmp_buffer + i, 1, MPI_INT, @@ -347,88 +305,77 @@ void commPartition(Comm* c, Matrix* A) } // send messages - - for (i = 0; i < num_recv_neighbors; i++) + for (int i = 0; i < num_recv_neighbors; i++) { MPI_Send(tmp_buffer + i, 1, MPI_INT, recv_list[i], MPI_MY_TAG, MPI_COMM_WORLD); - /// - // Receive message from each send neighbor to construct 'send_list'. - /// + } + // Receive message from each send neighbor to construct 'send_list'. MPI_Status status; - for (i = 0; i < num_send_neighbors; i++) { + for (int i = 0; i < num_send_neighbors; i++) { if (MPI_Wait(request + i, &status)) { - cerr << "MPI_Wait error\n" << endl; - exit(-1); + printf("MPI_Wait error\n"); + exit(EXIT_FAILURE); } send_list[i] = status.MPI_SOURCE; } - ///////////////////////////////////////////////////////////////////////// - /// - // Compare the two lists. In most cases they should be the same. + /* Compare the two lists. In most cases they should be the same. // However, if they are not then add new entries to the recv list // that are in the send list (but not already in the recv list). - /// - ///////////////////////////////////////////////////////////////////////// - - for (j = 0; j < num_send_neighbors; j++) { + WHY!! This ensures that the sendlist is equal to the sendlist + But why is this required? -> Just One neighbour list??*/ + for (int j = 0; j < num_send_neighbors; j++) { int found = 0; - for (i = 0; i < num_recv_neighbors; i++) { + for (int i = 0; i < num_recv_neighbors; i++) { if (recv_list[i] == send_list[j]) found = 1; } if (found == 0) { - if (debug) - cout << "Processor " << rank << " of " << size << ": recv_list[" - << num_recv_neighbors << "] = " << send_list[j] << endl; +#ifdef VERBOSE + printf("Process %d of %d: recv_list[%d] = %d\n", + rank, + size, + num_recv_neighbors, + send_list[i]); +#endif recv_list[num_recv_neighbors] = send_list[j]; (num_recv_neighbors)++; } } - - delete[] send_list; num_send_neighbors = num_recv_neighbors; if (num_send_neighbors > MAX_NUM_MESSAGES) { - cerr << "Must increase MAX_EXTERNAL in HPC_Sparse_Matrix.hpp" << endl; - abort(); + printf("Must increase MAX_EXTERNAL\n"); + exit(EXIT_FAILURE); } - ///////////////////////////////////////////////////////////////////////// - /// Start filling HPC_Sparse_Matrix struct - ///////////////////////////////////////////////////////////////////////// - - A->total_to_be_sent = total_to_be_sent; - int* elements_to_send = new int[total_to_be_sent]; - A->elements_to_send = elements_to_send; - - for (i = 0; i < total_to_be_sent; i++) - elements_to_send[i] = 0; - - // + // Start filling communication setup // Create 'new_external' which explicitly put the external elements in the // order given by 'external_local_index' - // + c->total_to_be_sent = total_to_be_sent; + int* elements_to_send = (int*)allocate(ARRAY_ALIGNMENT, + total_to_be_sent * sizeof(int)); + c->elements_to_send = elements_to_send; - int* new_external = new int[num_external]; - for (i = 0; i < num_external; i++) { + for (int i = 0; i < total_to_be_sent; i++) { + elements_to_send[i] = 0; + } + + // Create 'new_external' which explicitly put the external elements in the + // order given by 'external_local_index' + int* new_external = (int*)allocate(ARRAY_ALIGNMENT, num_external * sizeof(int)); + + for (int i = 0; i < num_external; i++) { new_external[external_local_index[i] - local_nrow] = external_index[i]; } - ///////////////////////////////////////////////////////////////////////// - // // Send each processor the global index list of the external elements in the // order that I will want to receive them when updating my external elements - // - ///////////////////////////////////////////////////////////////////////// - - int* lengths = new int[num_recv_neighbors]; - + int lengths[num_recv_neighbors]; MPI_MY_TAG++; // First post receives - - for (i = 0; i < num_recv_neighbors; i++) { + for (int i = 0; i < num_recv_neighbors; i++) { int partner = recv_list[i]; MPI_Irecv(lengths + i, 1, @@ -439,22 +386,18 @@ void commPartition(Comm* c, Matrix* A) request + i); } - int* neighbors = new int[MAX_NUM_NEIGHBOURS]; - int* recv_length = new int[MAX_NUM_NEIGHBOURS]; - int* send_length = new int[MAX_NUM_NEIGHBOURS]; - - A->neighbors = neighbors; - A->recv_length = recv_length; - A->send_length = send_length; + int* neighbors = c->neighbors; + int* recv_length = c->recv_length; + int* send_length = c->send_length; j = 0; - for (i = 0; i < num_recv_neighbors; i++) { + + for (int i = 0; i < num_recv_neighbors; i++) { int start = j; int newlength = 0; // go through list of external elements until updating // processor changes - while ((j < num_external) && (new_external_processor[j] == recv_list[i])) { newlength++; j++; @@ -469,25 +412,21 @@ void commPartition(Comm* c, Matrix* A) } // Complete the receives of the number of externals - - for (i = 0; i < num_recv_neighbors; i++) { + for (int i = 0; i < num_recv_neighbors; i++) { if (MPI_Wait(request + i, &status)) { - cerr << "MPI_Wait error\n" << endl; - exit(-1); + printf("MPI_Wait error\n"); + exit(EXIT_FAILURE); } send_length[i] = lengths[i]; } - delete[] lengths; - /////////////////////////////////////////////////////////////////// // Build "elements_to_send" list. These are the x elements I own // that need to be sent to other processors. - /////////////////////////////////////////////////////////////////// - MPI_MY_TAG++; j = 0; - for (i = 0; i < num_recv_neighbors; i++) { + + for (int i = 0; i < num_recv_neighbors; i++) { MPI_Irecv(elements_to_send + j, send_length[i], MPI_INT, @@ -499,14 +438,14 @@ void commPartition(Comm* c, Matrix* A) } j = 0; - for (i = 0; i < num_recv_neighbors; i++) { + + for (int i = 0; i < num_recv_neighbors; i++) { int start = j; int newlength = 0; // Go through list of external elements // until updating processor changes. This is redundant, but // saves us from recording this information. - while ((j < num_external) && (new_external_processor[j] == recv_list[i])) { newlength++; @@ -522,39 +461,101 @@ void commPartition(Comm* c, Matrix* A) } // receive from each neighbor the global index list of external elements - - for (i = 0; i < num_recv_neighbors; i++) { + for (int i = 0; i < num_recv_neighbors; i++) { if (MPI_Wait(request + i, &status)) { - cerr << "MPI_Wait error\n" << endl; - exit(-1); + printf("MPI_Wait error\n"); + exit(EXIT_FAILURE); } } - /// replace global indices by local indices /// - - for (i = 0; i < total_to_be_sent; i++) + /// replace global indices by local indices + for (int i = 0; i < total_to_be_sent; i++) { elements_to_send[i] -= start_row; + } - //////////////// // Finish up !! - //////////////// + c->num_send_neighbors = num_send_neighbors; + A->nc = A->nc + num_external; - A->num_send_neighbors = num_send_neighbors; - A->local_ncol = A->local_nrow + num_external; + // Used in exchange + CG_FLOAT* send_buffer = (CG_FLOAT*)allocate(ARRAY_ALIGNMENT, + total_to_be_sent * sizeof(CG_FLOAT)); + c->send_buffer = send_buffer; - // Used in exchange_externals - double* send_buffer = new double[total_to_be_sent]; - A->send_buffer = send_buffer; + free(recv_list); + free(new_external); +#endif +} - delete[] tmp_buffer; - delete[] global_index_offsets; - delete[] recv_list; - delete[] external_processor; - delete[] new_external; - delete[] new_external_processor; - delete[] request; +void commExchange(Comm* c, Matrix* A, double* x) +{ +#ifdef _MPI + int num_external = 0; - return; + // Extract Matrix pieces + + int local_nrow = A->nr; + int num_neighbors = c->num_send_neighbors; + int* recv_length = c->recv_length; + int* send_length = c->send_length; + int* neighbors = c->neighbors; + double* send_buffer = c->send_buffer; + int total_to_be_sent = c->total_to_be_sent; + int* elements_to_send = c->elements_to_send; + + int rank = c->rank; + int size = c->size; + MPI_Comm comm = c->comm; + + // first post receives, these are immediate receives + // Do not wait for result to come, will do that at the + // wait call below. + int MPI_MY_TAG = 99; + + MPI_Request request[num_neighbors]; + + // Externals are at end of locals + double* x_external = (double*)x + local_nrow; + + // Post receives first + for (int i = 0; i < num_neighbors; i++) { + int n_recv = recv_length[i]; + MPI_Irecv(x_external, + n_recv, + MPI_DOUBLE, + neighbors[i], + MPI_MY_TAG, + MPI_COMM_WORLD, + request + i); + x_external += n_recv; + } + + // Fill up send buffer + for (int i = 0; i < total_to_be_sent; i++) { + send_buffer[i] = x[elements_to_send[i]]; + } + + // Send to each neighbor + for (int i = 0; i < num_neighbors; i++) { + int n_send = send_length[i]; + MPI_Send(send_buffer, + n_send, + MPI_DOUBLE, + neighbors[i], + MPI_MY_TAG, + MPI_COMM_WORLD); + send_buffer += n_send; + } + + // Complete the reads issued above + MPI_Status status; + for (int i = 0; i < num_neighbors; i++) { + if (MPI_Wait(request + i, &status)) { + printf("MPI_Wait error\n"); + exit(EXIT_FAILURE); + } + } +#endif } void commPrintConfig(Comm* c) @@ -568,20 +569,6 @@ void commPrintConfig(Comm* c) for (int i = 0; i < c->size; i++) { if (i == c->rank) { - printf("\tRank %d of %d\n", c->rank, c->size); - printf("\tNeighbours (bottom, top, left, right): %d %d, %d, %d\n", - c->neighbours[BOTTOM], - c->neighbours[TOP], - c->neighbours[LEFT], - c->neighbours[RIGHT]); - printf("\tIs boundary:\n"); - printf("\t\tLEFT: %d\n", commIsBoundary(c, LEFT)); - printf("\t\tRIGHT: %d\n", commIsBoundary(c, RIGHT)); - printf("\t\tBOTTOM: %d\n", commIsBoundary(c, BOTTOM)); - printf("\t\tTOP: %d\n", commIsBoundary(c, TOP)); - printf("\tCoordinates (i,j) %d %d\n", c->coords[IDIM], c->coords[JDIM]); - printf("\tDims (i,j) %d %d\n", c->dims[IDIM], c->dims[JDIM]); - printf("\tLocal domain size (i,j) %dx%d\n", c->imaxLocal, c->jmaxLocal); fflush(stdout); } MPI_Barrier(MPI_COMM_WORLD); diff --git a/src/comm.h b/src/comm.h index dfe296e..5e5c1af 100644 --- a/src/comm.h +++ b/src/comm.h @@ -10,6 +10,10 @@ #include "matrix.h" +#define MAX_EXTERNAL 100000 +#define MAX_NUM_MESSAGES 500 +#define MAX_NUM_NEIGHBOURS MAX_NUM_MESSAGES + enum op { MAX = 0, SUM }; typedef struct { @@ -24,9 +28,9 @@ typedef struct { int* external_local_index; int total_to_be_sent; int* elements_to_send; - int* neighbors; - int* recv_length; - int* send_length; + int neighbors[MAX_NUM_NEIGHBOURS]; + int recv_length[MAX_NUM_NEIGHBOURS]; + int send_length[MAX_NUM_NEIGHBOURS]; double* send_buffer; #endif } Comm; @@ -36,7 +40,7 @@ extern void commInit(Comm* c, int argc, char** argv); extern void commFinalize(Comm* c); extern void commPartition(Comm*, Matrix* m); extern void commPrintConfig(Comm*); -extern void commExchange(Comm*, double*); +extern void commExchange(Comm* c, Matrix* A, double* x); extern void commReduction(double* v, int op); static inline int commIsMaster(Comm* c) { return c->rank == 0; }