Skip to content

Commit 7495b88

Browse files
authored
Improve error message for missing partition.nc file (#1006)
## Improve error message for missing `partition.nc` file Fixes #914 Previously, when the `partition.nc` file is missing (for MPI runs) you would get a generic netcdf error message which was not clear at all e.g., <details><summary>Previous Error Message</summary> <p> ``` $ mpirun -n 4 ../build-mpi/nextsim --config-file config_june23.cfg terminate called after throwing an instance of 'netCDF::exceptions::NcException' terminate called after throwing an instance of 'netCDF::exceptions::NcException' what(): No such file or directory file: /tmp/melt/spack-stage/spack-stage-netcdf-cxx4-4.3.1-k66y7sxdla7qjovzkm5lmbcn6io7tugb/spack-src/cxx4/ncFile.cpp line:88 [lenny:23322] *** Process received signal *** [lenny:23322] Signal: Aborted (6) [lenny:23322] Signal code: (-6) what(): No such file or directory file: /tmp/melt/spack-stage/spack-stage-netcdf-cxx4-4.3.1-k66y7sxdla7qjovzkm5lmbcn6io7tugb/spack-src/cxx4/ncFile.cpp line:88 [lenny:23321] *** Process received signal *** [lenny:23321] Signal: Aborted (6) [lenny:23321] Signal code: (-6) terminate called after throwing an instance of 'netCDF::exceptions::NcException' what(): No such file or directory file: /tmp/melt/spack-stage/spack-stage-netcdf-cxx4-4.3.1-k66y7sxdla7qjovzkm5lmbcn6io7tugb/spack-src/cxx4/ncFile.cpp line:88 [lenny:23324] *** Process received signal *** [lenny:23324] Signal: Aborted (6) [lenny:23324] Signal code: (-6) terminate called after throwing an instance of 'netCDF::exceptions::NcException' what(): No such file or directory file: /tmp/melt/spack-stage/spack-stage-netcdf-cxx4-4.3.1-k66y7sxdla7qjovzkm5lmbcn6io7tugb/spack-src/cxx4/ncFile.cpp line:88 [lenny:23323] *** Process received signal *** [lenny:23323] Signal: Aborted (6) [lenny:23323] Signal code: (-6) [lenny:23321] [ 0] /lib/x86_64-linux-gnu/libc.so.6(+0x45330)[0x797f636c4330] [lenny:23321] [ 1] [lenny:23323] [ 0] /lib/x86_64-linux-gnu/libc.so.6(+0x45330)[0x750e62b0c330] [lenny:23323] [ 1] /lib/x86_64-linux-gnu/libc.so.6(pthread_kill+0x11c)[0x750e62b65b2c] [lenny:23323] [ 2] [lenny:23324] [ 0] /lib/x86_64-linux-gnu/libc.so.6(+0x45330)[0x77f9b703a330] [lenny:23324] [ 1] /lib/x86_64-linux-gnu/libc.so.6(pthread_kill+0x11c)[0x77f9b7093b2c] [lenny:23324] [ 2] [lenny:23322] [ 0] /lib/x86_64-linux-gnu/libc.so.6(+0x45330)[0x7f4fde766330] [lenny:23322] [ 1] /lib/x86_64-linux-gnu/libc.so.6(pthread_kill+0x11c)[0x797f6371db2c] [lenny:23321] [ 2] /lib/x86_64-linux-gnu/libc.so.6(gsignal+0x1e)[0x77f9b703a27e] [lenny:23324] [ 3] /lib/x86_64-linux-gnu/libc.so.6(gsignal+0x1e)[0x750e62b0c27e] [lenny:23323] [ 3] /lib/x86_64-linux-gnu/libc.so.6(gsignal+0x1e)[0x797f636c427e] [lenny:23321] [ 3] /lib/x86_64-linux-gnu/libc.so.6(pthread_kill+0x11c)[0x7f4fde7bfb2c] [lenny:23322] [ 2] /lib/x86_64-linux-gnu/libc.so.6(abort+0xdf)[0x750e62aef8ff] [lenny:23323] [ 4] /lib/x86_64-linux-gnu/libc.so.6(abort+0xdf)[0x77f9b701d8ff] [lenny:23324] [ 4] /lib/x86_64-linux-gnu/libc.so.6(gsignal+0x1e)[0x7f4fde76627e] [lenny:23322] [ 3] /lib/x86_64-linux-gnu/libc.so.6(abort+0xdf)[0x797f636a78ff] [lenny:23321] [ 4] /lib/x86_64-linux-gnu/libc.so.6(abort+0xdf)[0x7f4fde7498ff] [lenny:23322] [ 4] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libstdc++.so.6(+0xa5ff5)[0x77f9b72f6ff5] [lenny:23324] [ 5] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libstdc++.so.6(+0xa5ff5)[0x750e62dc8ff5] [lenny:23323] [ 5] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libstdc++.so.6(+0xa5ff5)[0x797f63980ff5] [lenny:23321] [ 5] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libstdc++.so.6(+0xa5ff5)[0x7f4fdea22ff5] [lenny:23322] [ 5] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libstdc++.so.6(+0xbb0da)[0x77f9b730c0da] [lenny:23324] [ 6] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libstdc++.so.6(+0xbb0da)[0x750e62dde0da] [lenny:23323] [ 6] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libstdc++.so.6(+0xbb0da)[0x7f4fdea380da] [lenny:23322] [ 6] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libstdc++.so.6(+0xbb0da)[0x797f639960da] [lenny:23321] [ 6] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libstdc++.so.6(_ZSt10unexpectedv+0x0)[0x7f4fdea22a55] [lenny:23322] [ 7] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libstdc++.so.6(_ZSt10unexpectedv+0x0)[0x750e62dc8a55] [lenny:23323] [ 7] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libstdc++.so.6(_ZSt10unexpectedv+0x0)[0x77f9b72f6a55] [lenny:23324] [ 7] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libstdc++.so.6(_ZSt10unexpectedv+0x0)[0x797f63980a55] [lenny:23321] [ 7] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libstdc++.so.6(+0xbb391)[0x7f4fdea38391] [lenny:23322] [ 8] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libnetcdf-cxx4.so.1(+0x24d1f)[0x7f4fde4fad1f] [lenny:23322] [ 9] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libnetcdf-cxx4.so.1(_ZN6netCDF6NcFile4openERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS0_8FileModeE+0xea)[0x7f4fde5040fa] [lenny:23322] [10] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libstdc++.so.6(+0xbb391)[0x77f9b730c391] [lenny:23324] [ 8] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libstdc++.so.6(+0xbb391)[0x797f63996391] [lenny:23321] [ 8] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libstdc++.so.6(+0xbb391)[0x750e62dde391] [lenny:23323] [ 8] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libnetcdf-cxx4.so.1(+0x24d1f)[0x797f63458d1f] [lenny:23321] [ 9] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libnetcdf-cxx4.so.1(+0x24d1f)[0x750e628a0d1f] [lenny:23323] [ 9] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libnetcdf-cxx4.so.1(_ZN6netCDF6NcFileC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS0_8FileModeE+0x32)[0x7f4fde504132] [lenny:23322] [11] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libnetcdf-cxx4.so.1(+0x24d1f)[0x77f9b6dced1f] [lenny:23324] [ 9] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libnetcdf-cxx4.so.1(_ZN6netCDF6NcFile4openERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS0_8FileModeE+0xea)[0x750e628aa0fa] [lenny:23323] [10] /home/melt/sync/cambridge/projects/current/sasip/nextsimdg/build-mpi/libnextsimlib.so(_ZN7Nextsim13ModelMetadata20getPartitionMetadataENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE+0x4a)[0x7f4fdef3537a] [lenny:23322] [12] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libnetcdf-cxx4.so.1(_ZN6netCDF6NcFile4openERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS0_8FileModeE+0xea)[0x797f634620fa] [lenny:23321] [10] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libnetcdf-cxx4.so.1(_ZN6netCDF6NcFile4openERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS0_8FileModeE+0xea)[0x77f9b6dd80fa] [lenny:23324] [10] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libnetcdf-cxx4.so.1(_ZN6netCDF6NcFileC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS0_8FileModeE+0x32)[0x77f9b6dd8132] [lenny:23324] [11] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libnetcdf-cxx4.so.1(_ZN6netCDF6NcFileC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS0_8FileModeE+0x32)[0x797f63462132] [lenny:23321] [11] /software/spack/var/spack/environments/nextsim/.spack-env/view/lib/libnetcdf-cxx4.so.1(_ZN6netCDF6NcFileC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS0_8FileModeE+0x32)[0x750e628aa132] [lenny:23323] [11] /home/melt/sync/cambridge/projects/current/sasip/nextsimdg/build-mpi/libnextsimlib.so(_ZN7Nextsim13ModelMetadataC1ENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE+0x505)[0x7f4fdef36225] [lenny:23322] [13] /home/melt/sync/cambridge/projects/current/sasip/nextsimdg/build-mpi/libnextsimlib.so(_ZN7Nextsim13ModelMetadata20getPartitionMetadataENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE+0x4a)[0x77f9b780937a] [lenny:23324] [12] /home/melt/sync/cambridge/projects/current/sasip/nextsimdg/build-mpi/libnextsimlib.so(_ZN7Nextsim13ModelMetadata20getPartitionMetadataENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE+0x4a)[0x797f63e9337a] [lenny:23321] [12] /home/melt/sync/cambridge/projects/current/sasip/nextsimdg/build-mpi/libnextsimlib.so(_ZN7Nextsim13ModelMetadata20getPartitionMetadataENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE+0x4a)[0x750e632db37a] [lenny:23323] [12] /home/melt/sync/cambridge/projects/current/sasip/nextsimdg/build-mpi/libnextsimlib.so(_ZN7Nextsim13ModelMetadata11getInstanceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE+0x9f)[0x7f4fdeef7dcf] [lenny:23322] [14] /home/melt/sync/cambridge/projects/current/sasip/nextsimdg/build-mpi/libnextsimlib.so(_ZN7Nextsim5Model9configureEv+0x322)[0x7f4fdeef3392] [lenny:23322] [15] ../build-mpi/nextsim(+0x4048)[0x5e232ffaa048] [lenny:23322] [16] /home/melt/sync/cambridge/projects/current/sasip/nextsimdg/build-mpi/libnextsimlib.so(_ZN7Nextsim13ModelMetadataC1ENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE+0x505)[0x750e632dc225] [lenny:23323] [13] /home/melt/sync/cambridge/projects/current/sasip/nextsimdg/build-mpi/libnextsimlib.so(_ZN7Nextsim13ModelMetadataC1ENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE+0x505)[0x77f9b780a225] [lenny:23324] [13] /home/melt/sync/cambridge/projects/current/sasip/nextsimdg/build-mpi/libnextsimlib.so(_ZN7Nextsim13ModelMetadataC1ENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE+0x505)[0x797f63e94225] [lenny:23321] [13] /lib/x86_64-linux-gnu/libc.so.6(+0x2a1ca)[0x7f4fde74b1ca] [lenny:23322] [17] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0x8b)[0x7f4fde74b28b] [lenny:23322] [18] ../build-mpi/nextsim(+0x4165)[0x5e232ffaa165] [lenny:23322] *** End of error message *** /home/melt/sync/cambridge/projects/current/sasip/nextsimdg/build-mpi/libnextsimlib.so(_ZN7Nextsim13ModelMetadata11getInstanceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE+0x9f)[0x797f63e55dcf] [lenny:23321] [14] /home/melt/sync/cambridge/projects/current/sasip/nextsimdg/build-mpi/libnextsimlib.so(_ZN7Nextsim13ModelMetadata11getInstanceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE+0x9f)[0x750e6329ddcf] [lenny:23323] [14] /home/melt/sync/cambridge/projects/current/sasip/nextsimdg/build-mpi/libnextsimlib.so(_ZN7Nextsim13ModelMetadata11getInstanceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE+0x9f)[0x77f9b77cbdcf] [lenny:23324] [14] /home/melt/sync/cambridge/projects/current/sasip/nextsimdg/build-mpi/libnextsimlib.so(_ZN7Nextsim5Model9configureEv+0x322)[0x797f63e51392] [lenny:23321] [15] ../build-mpi/nextsim(+0x4048)[0x631532bec048] [lenny:23321] [16] /home/melt/sync/cambridge/projects/current/sasip/nextsimdg/build-mpi/libnextsimlib.so(_ZN7Nextsim5Model9configureEv+0x322)[0x750e63299392] [lenny:23323] [15] ../build-mpi/nextsim(+0x4048)[0x627a057a6048] [lenny:23323] [16] /home/melt/sync/cambridge/projects/current/sasip/nextsimdg/build-mpi/libnextsimlib.so(_ZN7Nextsim5Model9configureEv+0x322)[0x77f9b77c7392] [lenny:23324] [15] ../build-mpi/nextsim(+0x4048)[0x5e0c045f9048] [lenny:23324] [16] /lib/x86_64-linux-gnu/libc.so.6(+0x2a1ca)[0x750e62af11ca] [lenny:23323] [17] /lib/x86_64-linux-gnu/libc.so.6(+0x2a1ca)[0x797f636a91ca] [lenny:23321] [17] /lib/x86_64-linux-gnu/libc.so.6(+0x2a1ca)[0x77f9b701f1ca] [lenny:23324] [17] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0x8b)[0x797f636a928b] [lenny:23321] [18] ../build-mpi/nextsim(+0x4165)[0x631532bec165] [lenny:23321] *** End of error message *** /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0x8b)[0x750e62af128b] [lenny:23323] [18] ../build-mpi/nextsim(+0x4165)[0x627a057a6165] [lenny:23323] *** End of error message *** /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0x8b)[0x77f9b701f28b] [lenny:23324] [18] ../build-mpi/nextsim(+0x4165)[0x5e0c045f9165] [lenny:23324] *** End of error message *** -------------------------------------------------------------------------- Primary job terminated normally, but 1 process returned a non-zero exit code. Per user-direction, the job has been aborted. -------------------------------------------------------------------------- -------------------------------------------------------------------------- mpirun noticed that process rank 2 with PID 0 on node lenny exited on signal 6 (Aborted). -------------------------------------------------------------------------- ``` </p> </details> With this PR, the new error message will look something like this: <details><summary>New Error Message</summary> <p> ``` $ mpirun -n 4 ../build-mpi/nextsim --config-file config_june23.cfg Failed to open partition file [partition.nc] :: No such file or directory file: /tmp/melt/spack-stage/spack-stage-netcdf-cxx4-4.3.1-k66y7sxdla7qjovzkm5lmbcn6io7tugb/spack-src/cxx4/ncFile.cpp line:88 Failed to open partition file [partition.nc] :: No such file or directory file: /tmp/melt/spack-stage/spack-stage-netcdf-cxx4-4.3.1-k66y7sxdla7qjovzkm5lmbcn6io7tugb/spack-src/cxx4/ncFile.cpp line:88 Failed to open partition file [partition.nc] :: No such file or directory file: /tmp/melt/spack-stage/spack-stage-netcdf-cxx4-4.3.1-k66y7sxdla7qjovzkm5lmbcn6io7tugb/spack-src/cxx4/ncFile.cpp line:88 -------------------------------------------------------------------------- MPI_ABORT was invoked on rank 0 in communicator MPI_COMM_WORLD with errorcode 1. NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes. You may or may not see output from other processes, depending on exactly when Open MPI kills them. -------------------------------------------------------------------------- [lenny:36722] 2 more processes have sent help message help-mpi-api.txt / mpi-abort [lenny:36722] Set MCA parameter "orte_base_help_aggregate" to 0 to see all help / error messages ``` </p> </details>
2 parents f53d8c6 + 9e75d58 commit 7495b88

File tree

1 file changed

+55
-46
lines changed

1 file changed

+55
-46
lines changed

core/src/ModelMetadata.cpp

Lines changed: 55 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -163,57 +163,66 @@ void ModelMetadata::readNeighbourData(netCDF::NcFile& ncFile)
163163

164164
void ModelMetadata::getPartitionMetadata(std::string partitionFile)
165165
{
166-
netCDF::NcFile ncFile(partitionFile, netCDF::NcFile::read);
167-
int sizes = ncFile.getDim("L").getSize();
168-
int nBoxes = ncFile.getDim("P").getSize();
169-
auto& modelMPI = ModelMPI::getInstance();
170-
auto mpiSize = modelMPI.getSize();
171-
if (nBoxes != mpiSize) {
172-
std::string errorMsg = "Number of MPI ranks " + std::to_string(mpiSize) + " <> "
173-
+ std::to_string(nBoxes) + "\n";
174-
throw std::runtime_error(errorMsg);
175-
}
176-
if (!globalExtentX) {
177-
globalExtentX = ncFile.getDim("NX").getSize();
178-
} else if (globalExtentX != ncFile.getDim("NX").getSize()) {
179-
throw std::runtime_error("ModelMetadata: Inconsistent global x-extent between "
180-
"partition and input files.");
181-
}
182-
if (!globalExtentY) {
183-
globalExtentY = ncFile.getDim("NY").getSize();
184-
} else if (globalExtentX != ncFile.getDim("NY").getSize()) {
185-
throw std::runtime_error("ModelMetadata: Inconsistent global y-extent between "
186-
"partition and input files.");
187-
}
188-
netCDF::NcGroup bboxGroup(ncFile.getGroup(bboxName));
166+
try {
167+
netCDF::NcFile ncFile(partitionFile, netCDF::NcFile::read);
168+
int sizes = ncFile.getDim("L").getSize();
169+
int nBoxes = ncFile.getDim("P").getSize();
170+
auto& modelMPI = ModelMPI::getInstance();
171+
auto mpiSize = modelMPI.getSize();
172+
if (nBoxes != mpiSize) {
173+
std::string errorMsg = "Number of MPI ranks " + std::to_string(mpiSize) + " <> "
174+
+ std::to_string(nBoxes) + "\n";
175+
throw std::runtime_error(errorMsg);
176+
}
177+
if (!globalExtentX) {
178+
globalExtentX = ncFile.getDim("NX").getSize();
179+
} else if (globalExtentX != ncFile.getDim("NX").getSize()) {
180+
throw std::runtime_error("ModelMetadata: Inconsistent global x-extent between "
181+
"partition and input files.");
182+
}
183+
if (!globalExtentY) {
184+
globalExtentY = ncFile.getDim("NY").getSize();
185+
} else if (globalExtentX != ncFile.getDim("NY").getSize()) {
186+
throw std::runtime_error("ModelMetadata: Inconsistent global y-extent between "
187+
"partition and input files.");
188+
}
189+
netCDF::NcGroup bboxGroup(ncFile.getGroup(bboxName));
189190

190-
std::vector<size_t> rank(1, modelMPI.getRank());
191-
bboxGroup.getVar("domain_x").getVar(rank, &localCornerX);
192-
bboxGroup.getVar("domain_y").getVar(rank, &localCornerY);
193-
bboxGroup.getVar("domain_extent_x").getVar(rank, &localExtentX);
194-
bboxGroup.getVar("domain_extent_y").getVar(rank, &localExtentY);
191+
std::vector<size_t> rank(1, modelMPI.getRank());
192+
bboxGroup.getVar("domain_x").getVar(rank, &localCornerX);
193+
bboxGroup.getVar("domain_y").getVar(rank, &localCornerY);
194+
bboxGroup.getVar("domain_extent_x").getVar(rank, &localExtentX);
195+
bboxGroup.getVar("domain_extent_y").getVar(rank, &localExtentY);
195196

196-
readNeighbourData(ncFile);
197+
readNeighbourData(ncFile);
197198

198-
// cornerHaloRecv doesn't need to be read because it can be easily calculated.
199-
for (auto corner : corners) {
200-
if (cornerRanks[corner].size()) {
201-
cornerHaloRecv[corner].resize(1);
202-
cornerHaloRecv[corner][0] = 2 * (localExtentX + localExtentY) + corner;
203-
}
204-
if (cornerRanksPeriodic[corner].size()) {
205-
cornerHaloRecvPeriodic[corner].resize(1);
206-
cornerHaloRecvPeriodic[corner][0] = 2 * (localExtentX + localExtentY) + corner;
199+
// cornerHaloRecv doesn't need to be read because it can be easily calculated.
200+
for (auto corner : corners) {
201+
if (cornerRanks[corner].size()) {
202+
cornerHaloRecv[corner].resize(1);
203+
cornerHaloRecv[corner][0] = 2 * (localExtentX + localExtentY) + corner;
204+
}
205+
if (cornerRanksPeriodic[corner].size()) {
206+
cornerHaloRecvPeriodic[corner].resize(1);
207+
cornerHaloRecvPeriodic[corner][0] = 2 * (localExtentX + localExtentY) + corner;
208+
}
207209
}
208-
}
209-
210-
// gather rank extents in X & Y direction for all processes
211-
rankExtentsX.resize(modelMPI.getSize(), 0);
212-
rankExtentsY.resize(modelMPI.getSize(), 0);
213-
MPI_Allgather(&localExtentX, 1, MPI_INT, rankExtentsX.data(), 1, MPI_INT, modelMPI.getComm());
214-
MPI_Allgather(&localExtentY, 1, MPI_INT, rankExtentsY.data(), 1, MPI_INT, modelMPI.getComm());
215210

216-
ncFile.close();
211+
// gather rank extents in X & Y direction for all processes
212+
rankExtentsX.resize(modelMPI.getSize(), 0);
213+
rankExtentsY.resize(modelMPI.getSize(), 0);
214+
MPI_Allgather(
215+
&localExtentX, 1, MPI_INT, rankExtentsX.data(), 1, MPI_INT, modelMPI.getComm());
216+
MPI_Allgather(
217+
&localExtentY, 1, MPI_INT, rankExtentsY.data(), 1, MPI_INT, modelMPI.getComm());
218+
219+
ncFile.close();
220+
} catch (netCDF::exceptions::NcException& e) {
221+
std::cerr << "Failed to open partition file [" << partitionFile << "] :: " << e.what()
222+
<< std::endl;
223+
auto& modelMPI = ModelMPI::getInstance();
224+
MPI_Abort(modelMPI.getComm(), 1);
225+
}
217226
}
218227

219228
int ModelMetadata::getLocalCornerX() const { return localCornerX; }

0 commit comments

Comments
 (0)