/* * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. * All rights reserved. This file is part of nusif-solver. * Use of this source code is governed by a MIT style * license that can be found in the LICENSE file. */ #include #include #include #include #include "allocate.h" #include "comm.h" // subroutines local to this module static int sizeOfRank(int rank, int size, int N) { return N / size + ((N % size > rank) ? 1 : 0); } static void setupCommunication(Comm* c, Direction direction, int layer) { int imaxLocal = c->imaxLocal; int jmaxLocal = c->jmaxLocal; int kmaxLocal = c->kmaxLocal; size_t dblsize = sizeof(double); int sizes[NDIMS]; int subSizes[NDIMS]; int starts[NDIMS]; int offset = 0; sizes[IDIM] = imaxLocal + 2; sizes[JDIM] = jmaxLocal + 2; sizes[KDIM] = kmaxLocal + 2; if (layer == HALO) { offset = 1; } switch (direction) { case LEFT: subSizes[IDIM] = 1; subSizes[JDIM] = jmaxLocal; subSizes[KDIM] = kmaxLocal; starts[IDIM] = 1 - offset; starts[JDIM] = 1; starts[KDIM] = 1; break; case RIGHT: subSizes[IDIM] = 1; subSizes[JDIM] = jmaxLocal; subSizes[KDIM] = kmaxLocal; starts[IDIM] = imaxLocal + offset; starts[JDIM] = 1; starts[KDIM] = 1; break; case BOTTOM: subSizes[IDIM] = imaxLocal; subSizes[JDIM] = 1; subSizes[KDIM] = kmaxLocal; starts[IDIM] = 1; starts[JDIM] = 1 - offset; starts[KDIM] = 1; break; case TOP: subSizes[IDIM] = imaxLocal; subSizes[JDIM] = 1; subSizes[KDIM] = kmaxLocal; starts[IDIM] = 1; starts[JDIM] = jmaxLocal + offset; starts[KDIM] = 1; break; case FRONT: subSizes[IDIM] = imaxLocal; subSizes[JDIM] = jmaxLocal; subSizes[KDIM] = 1; starts[IDIM] = 1; starts[JDIM] = 1; starts[KDIM] = 1 - offset; break; case BACK: subSizes[IDIM] = imaxLocal; subSizes[JDIM] = jmaxLocal; subSizes[KDIM] = 1; starts[IDIM] = 1; starts[JDIM] = 1; starts[KDIM] = kmaxLocal + offset; break; case NDIRS: printf("ERROR!\n"); break; } if (layer == HALO) { MPI_Type_create_subarray(NDIMS, sizes, subSizes, starts, MPI_ORDER_C, MPI_DOUBLE, &c->rbufferTypes[direction]); MPI_Type_commit(&c->rbufferTypes[direction]); } else if (layer == BULK) { MPI_Type_create_subarray(NDIMS, sizes, subSizes, starts, MPI_ORDER_C, MPI_DOUBLE, &c->sbufferTypes[direction]); MPI_Type_commit(&c->sbufferTypes[direction]); } } static void assembleResult(Comm* c, double* src, double* dst, int imaxLocal[], int jmaxLocal[], int kmaxLocal[], int offset[], int kmax, int jmax, int imax) { int numRequests = 1; if (c->rank == 0) { numRequests = c->size + 1; } MPI_Request requests[numRequests]; /* all ranks send their interpolated bulk array */ MPI_Isend(src, c->imaxLocal * c->jmaxLocal * c->kmaxLocal, MPI_DOUBLE, 0, 0, c->comm, &requests[0]); /* rank 0 assembles the subdomains */ if (c->rank == 0) { for (int i = 0; i < c->size; i++) { MPI_Datatype domainType; int oldSizes[NDIMS] = { kmax, jmax, imax }; int newSizes[NDIMS] = { kmaxLocal[i], jmaxLocal[i], imaxLocal[i] }; int starts[NDIMS] = { offset[i * NDIMS + KDIM], offset[i * NDIMS + JDIM], offset[i * NDIMS + IDIM] }; MPI_Type_create_subarray(NDIMS, oldSizes, newSizes, starts, MPI_ORDER_C, MPI_DOUBLE, &domainType); MPI_Type_commit(&domainType); MPI_Irecv(dst, 1, domainType, i, 0, c->comm, &requests[i + 1]); MPI_Type_free(&domainType); } } MPI_Waitall(numRequests, requests, MPI_STATUSES_IGNORE); } static int sum(int* sizes, int position) { int sum = 0; for (int i = 0; i < position; i++) { sum += sizes[i]; } return sum; } // exported subroutines void commReduction(double* v, int op) { if (op == MAX) { MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); } else if (op == SUM) { MPI_Allreduce(MPI_IN_PLACE, v, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); } } int commIsBoundary(Comm* c, Direction direction) { switch (direction) { case LEFT: return c->coords[ICORD] == 0; break; case RIGHT: return c->coords[ICORD] == (c->dims[ICORD] - 1); break; case BOTTOM: return c->coords[JCORD] == 0; break; case TOP: return c->coords[JCORD] == (c->dims[JCORD] - 1); break; case FRONT: return c->coords[KCORD] == 0; break; case BACK: return c->coords[KCORD] == (c->dims[KCORD] - 1); break; case NDIRS: printf("ERROR!\n"); break; } return 0; } void commExchange(Comm* c, double* grid) { int counts[6] = { 1, 1, 1, 1, 1, 1 }; MPI_Aint displs[6] = { 0, 0, 0, 0, 0, 0 }; MPI_Neighbor_alltoallw(grid, counts, displs, c->sbufferTypes, grid, counts, displs, c->rbufferTypes, c->comm); } void commShift(Comm* c, double* f, double* g, double* h) { MPI_Request requests[6] = { MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL }; /* shift G */ /* receive ghost cells from bottom neighbor */ MPI_Irecv(g, 1, c->rbufferTypes[BOTTOM], c->neighbours[BOTTOM], 0, c->comm, &requests[0]); /* send ghost cells to top neighbor */ MPI_Isend(g, 1, c->sbufferTypes[TOP], c->neighbours[TOP], 0, c->comm, &requests[1]); /* shift F */ /* receive ghost cells from left neighbor */ MPI_Irecv(f, 1, c->rbufferTypes[LEFT], c->neighbours[LEFT], 1, c->comm, &requests[2]); /* send ghost cells to right neighbor */ MPI_Isend(f, 1, c->sbufferTypes[RIGHT], c->neighbours[RIGHT], 1, c->comm, &requests[3]); /* shift H */ /* receive ghost cells from front neighbor */ MPI_Irecv(h, 1, c->rbufferTypes[FRONT], c->neighbours[FRONT], 2, c->comm, &requests[4]); /* send ghost cells to back neighbor */ MPI_Isend(h, 1, c->sbufferTypes[BACK], c->neighbours[BACK], 2, c->comm, &requests[5]); MPI_Waitall(6, requests, MPI_STATUSES_IGNORE); } #define G(v, i, j, k) \ v[(k) * (imaxLocal + 2) * (jmaxLocal + 2) + (j) * (imaxLocal + 2) + (i)] void commCollectResult(Comm* c, double* ug, double* vg, double* wg, double* pg, double* u, double* v, double* w, double* p, int kmax, int jmax, int imax) { int imaxLocal = c->imaxLocal; int jmaxLocal = c->jmaxLocal; int kmaxLocal = c->kmaxLocal; int offset[c->size * NDIMS]; int imaxLocalAll[c->size]; int jmaxLocalAll[c->size]; int kmaxLocalAll[c->size]; MPI_Gather(&imaxLocal, 1, MPI_INT, imaxLocalAll, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Gather(&jmaxLocal, 1, MPI_INT, jmaxLocalAll, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Gather(&kmaxLocal, 1, MPI_INT, kmaxLocalAll, 1, MPI_INT, 0, MPI_COMM_WORLD); if (c->rank == 0) { for (int i = 0; i < c->size; i++) { int coords[NCORDS]; MPI_Cart_coords(c->comm, i, NDIMS, coords); offset[i * NDIMS + IDIM] = sum(imaxLocalAll, coords[ICORD]); offset[i * NDIMS + JDIM] = sum(jmaxLocalAll, coords[JCORD]); offset[i * NDIMS + KDIM] = sum(kmaxLocalAll, coords[KCORD]); printf("Rank: %d, Coords(k,j,i): %d %d %d, Size(k,j,i): %d %d %d, " "Offset(k,j,i): %d %d %d\n", i, coords[KCORD], coords[JCORD], coords[ICORD], kmaxLocalAll[i], jmaxLocalAll[i], imaxLocalAll[i], offset[i * NDIMS + KDIM], offset[i * NDIMS + JDIM], offset[i * NDIMS + IDIM]); } } size_t bytesize = imaxLocal * jmaxLocal * kmaxLocal * sizeof(double); double* tmp = allocate(64, bytesize); int idx = 0; /* collect P */ for (int k = 1; k < kmaxLocal + 1; k++) { for (int j = 1; j < jmaxLocal + 1; j++) { for (int i = 1; i < imaxLocal + 1; i++) { tmp[idx++] = G(p, i, j, k); } } } assembleResult(c, tmp, pg, imaxLocalAll, jmaxLocalAll, kmaxLocalAll, offset, kmax, jmax, imax); /* collect U */ idx = 0; for (int k = 1; k < kmaxLocal + 1; k++) { for (int j = 1; j < jmaxLocal + 1; j++) { for (int i = 1; i < imaxLocal + 1; i++) { tmp[idx++] = (G(u, i, j, k) + G(u, i - 1, j, k)) / 2.0; } } } assembleResult(c, tmp, ug, imaxLocalAll, jmaxLocalAll, kmaxLocalAll, offset, kmax, jmax, imax); /* collect V */ idx = 0; for (int k = 1; k < kmaxLocal + 1; k++) { for (int j = 1; j < jmaxLocal + 1; j++) { for (int i = 1; i < imaxLocal + 1; i++) { tmp[idx++] = (G(v, i, j, k) + G(v, i, j - 1, k)) / 2.0; } } } assembleResult(c, tmp, vg, imaxLocalAll, jmaxLocalAll, kmaxLocalAll, offset, kmax, jmax, imax); /* collect W */ idx = 0; for (int k = 1; k < kmaxLocal + 1; k++) { for (int j = 1; j < jmaxLocal + 1; j++) { for (int i = 1; i < imaxLocal + 1; i++) { tmp[idx++] = (G(w, i, j, k) + G(w, i, j, k - 1)) / 2.0; } } } assembleResult(c, tmp, wg, imaxLocalAll, jmaxLocalAll, kmaxLocalAll, offset, kmax, jmax, imax); free(tmp); } void commPrintConfig(Comm* c) { fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); if (commIsMaster(c)) { printf("Communication setup:\n"); } for (int i = 0; i < c->size; i++) { if (i == c->rank) { printf("\tRank %d of %d\n", c->rank, c->size); printf("\tNeighbours (front, back, bottom, top, left, right): %d, %d, %d, " "%d, %d, %d\n", c->neighbours[FRONT], c->neighbours[BACK], c->neighbours[BOTTOM], c->neighbours[TOP], c->neighbours[LEFT], c->neighbours[RIGHT]); printf("\tCoordinates (k,j,i) %d %d %d\n", c->coords[KCORD], c->coords[JCORD], c->coords[ICORD]); printf("\tLocal domain size (k,j,i) %dx%dx%d\n", c->kmaxLocal, c->jmaxLocal, c->imaxLocal); fflush(stdout); } } MPI_Barrier(MPI_COMM_WORLD); } void commInit(Comm* c, int kmax, int jmax, int imax) { /* setup communication */ MPI_Comm_rank(MPI_COMM_WORLD, &(c->rank)); MPI_Comm_size(MPI_COMM_WORLD, &(c->size)); int dims[NDIMS] = { 0, 0, 0 }; int periods[NDIMS] = { 0, 0, 0 }; MPI_Dims_create(c->size, NDIMS, dims); MPI_Cart_create(MPI_COMM_WORLD, NCORDS, dims, periods, 0, &c->comm); MPI_Cart_shift(c->comm, ICORD, 1, &c->neighbours[LEFT], &c->neighbours[RIGHT]); MPI_Cart_shift(c->comm, JCORD, 1, &c->neighbours[BOTTOM], &c->neighbours[TOP]); MPI_Cart_shift(c->comm, KCORD, 1, &c->neighbours[FRONT], &c->neighbours[BACK]); MPI_Cart_get(c->comm, NCORDS, c->dims, periods, c->coords); c->imaxLocal = sizeOfRank(c->rank, dims[ICORD], imax); c->jmaxLocal = sizeOfRank(c->rank, dims[JCORD], jmax); c->kmaxLocal = sizeOfRank(c->rank, dims[KCORD], kmax); // setup buffer types for communication setupCommunication(c, LEFT, BULK); setupCommunication(c, LEFT, HALO); setupCommunication(c, RIGHT, BULK); setupCommunication(c, RIGHT, HALO); setupCommunication(c, BOTTOM, BULK); setupCommunication(c, BOTTOM, HALO); setupCommunication(c, TOP, BULK); setupCommunication(c, TOP, HALO); setupCommunication(c, FRONT, BULK); setupCommunication(c, FRONT, HALO); setupCommunication(c, BACK, BULK); setupCommunication(c, BACK, HALO); } void commFree(Comm* c) { for (int i = 0; i < NDIRS; i++) { MPI_Type_free(&c->sbufferTypes[i]); MPI_Type_free(&c->rbufferTypes[i]); } }