|
46 | 46 | (COMM)->c_coll->coll_##COLL##_module = (FALLBACKS).COLL.module; \ |
47 | 47 | } while (0) |
48 | 48 |
|
| 49 | +#define HAN_SUBCOM_EXTRA_RETAIN(COMM, PARENT_COMM) \ |
| 50 | + do \ |
| 51 | + { \ |
| 52 | + if (OMPI_COMM_CID_IS_LOWER(COMM, PARENT_COMM)) { \ |
| 53 | + OMPI_COMM_SET_EXTRA_RETAIN(COMM); \ |
| 54 | + OBJ_RETAIN(COMM); \ |
| 55 | + } \ |
| 56 | + } while (0) |
| 57 | + |
49 | 58 | /* |
50 | 59 | * Routine that creates the local hierarchical sub-communicators |
51 | 60 | * Called each time a collective is called. |
@@ -206,6 +215,11 @@ int mca_coll_han_comm_create_new(struct ompi_communicator_t *comm, |
206 | 215 | HAN_SUBCOM_RESTORE_COLLECTIVE(fallbacks, comm, han_module, scatterv); |
207 | 216 |
|
208 | 217 | OBJ_DESTRUCT(&comm_info); |
| 218 | + |
| 219 | + /* Ensure these communicators aren't released before the parent comm */ |
| 220 | + HAN_SUBCOM_EXTRA_RETAIN(*low_comm, comm); |
| 221 | + HAN_SUBCOM_EXTRA_RETAIN(*up_comm, comm); |
| 222 | + |
209 | 223 | return OMPI_SUCCESS; |
210 | 224 |
|
211 | 225 | return_with_error: |
@@ -376,6 +390,14 @@ int mca_coll_han_comm_create(struct ompi_communicator_t *comm, |
376 | 390 | han_module->cached_up_comms = up_comms; |
377 | 391 | han_module->cached_vranks = vranks; |
378 | 392 |
|
| 393 | + /* Ensure these communicators aren't released before the parent comm */ |
| 394 | + for(int i = 0; i < COLL_HAN_LOW_MODULES; i++) { |
| 395 | + HAN_SUBCOM_EXTRA_RETAIN(low_comms[i], comm); |
| 396 | + } |
| 397 | + for(int i = 0; i < COLL_HAN_UP_MODULES; i++) { |
| 398 | + HAN_SUBCOM_EXTRA_RETAIN(up_comms[i], comm); |
| 399 | + } |
| 400 | + |
379 | 401 | /* Reset the saved collectives to point back to HAN */ |
380 | 402 | HAN_SUBCOM_RESTORE_COLLECTIVE(fallbacks, comm, han_module, alltoall); |
381 | 403 | HAN_SUBCOM_RESTORE_COLLECTIVE(fallbacks, comm, han_module, alltoallv); |
|
0 commit comments