Skip to content

Commit 976cc1e

Browse files
authored
Merge pull request #6509 from janjust/oshmem-multiple-contexts-v4.0.x
v4.0.x: Oshmem multiple contexts
2 parents b11cb23 + 69a80fc commit 976cc1e

File tree

14 files changed

+274
-132
lines changed

14 files changed

+274
-132
lines changed

opal/mca/common/ucx/common_ucx.c

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,11 @@ void opal_common_ucx_mca_proc_added(void)
153153
#endif
154154
}
155155

156+
OPAL_DECLSPEC int opal_common_ucx_mca_pmix_fence_nb(int *fenced)
157+
{
158+
return opal_pmix.fence_nb(NULL, 0, opal_common_ucx_mca_fence_complete_cb, (void *)fenced);
159+
}
160+
156161
OPAL_DECLSPEC int opal_common_ucx_mca_pmix_fence(ucp_worker_h worker)
157162
{
158163
volatile int fenced = 0;
@@ -181,9 +186,8 @@ static void opal_common_ucx_wait_all_requests(void **reqs, int count, ucp_worker
181186
}
182187
}
183188

184-
OPAL_DECLSPEC int opal_common_ucx_del_procs(opal_common_ucx_del_proc_t *procs, size_t count,
185-
size_t my_rank, size_t max_disconnect, ucp_worker_h worker)
186-
{
189+
OPAL_DECLSPEC int opal_common_ucx_del_procs_nofence(opal_common_ucx_del_proc_t *procs, size_t count,
190+
size_t my_rank, size_t max_disconnect, ucp_worker_h worker) {
187191
size_t num_reqs;
188192
size_t max_reqs;
189193
void *dreq, **dreqs;
@@ -230,7 +234,13 @@ OPAL_DECLSPEC int opal_common_ucx_del_procs(opal_common_ucx_del_proc_t *procs, s
230234
opal_common_ucx_wait_all_requests(dreqs, num_reqs, worker);
231235
free(dreqs);
232236

233-
opal_common_ucx_mca_pmix_fence(worker);
234-
235237
return OPAL_SUCCESS;
236238
}
239+
240+
OPAL_DECLSPEC int opal_common_ucx_del_procs(opal_common_ucx_del_proc_t *procs, size_t count,
241+
size_t my_rank, size_t max_disconnect, ucp_worker_h worker)
242+
{
243+
opal_common_ucx_del_procs_nofence(procs, count, my_rank, max_disconnect, worker);
244+
245+
return opal_common_ucx_mca_pmix_fence(worker);
246+
}

opal/mca/common/ucx/common_ucx.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,12 @@ OPAL_DECLSPEC void opal_common_ucx_mca_deregister(void);
100100
OPAL_DECLSPEC void opal_common_ucx_mca_proc_added(void);
101101
OPAL_DECLSPEC void opal_common_ucx_empty_complete_cb(void *request, ucs_status_t status);
102102
OPAL_DECLSPEC int opal_common_ucx_mca_pmix_fence(ucp_worker_h worker);
103-
OPAL_DECLSPEC void opal_common_ucx_mca_var_register(const mca_base_component_t *component);
103+
OPAL_DECLSPEC int opal_common_ucx_mca_pmix_fence_nb(int *fenced);
104104
OPAL_DECLSPEC int opal_common_ucx_del_procs(opal_common_ucx_del_proc_t *procs, size_t count,
105105
size_t my_rank, size_t max_disconnect, ucp_worker_h worker);
106+
OPAL_DECLSPEC int opal_common_ucx_del_procs_nofence(opal_common_ucx_del_proc_t *procs, size_t count,
107+
size_t my_rank, size_t max_disconnect, ucp_worker_h worker);
108+
OPAL_DECLSPEC void opal_common_ucx_mca_var_register(const mca_base_component_t *component);
106109

107110
static inline
108111
ucs_status_t opal_common_ucx_request_status(ucs_status_ptr_t request)

oshmem/mca/atomic/ucx/atomic_ucx_cswap.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ int mca_atomic_ucx_cswap(shmem_ctx_t ctx,
4040
assert(NULL != prev);
4141

4242
*prev = value;
43-
ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, pe, target, (void *)&rva, mca_spml_self);
43+
ucx_mkey = mca_spml_ucx_get_mkey(ctx, pe, target, (void *)&rva, mca_spml_self);
4444
status_ptr = ucp_atomic_fetch_nb(ucx_ctx->ucp_peers[pe].ucp_conn,
4545
UCP_ATOMIC_FETCH_OP_CSWAP, cond, prev, size,
4646
rva, ucx_mkey->rkey,

oshmem/mca/atomic/ucx/atomic_ucx_module.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ int mca_atomic_ucx_op(shmem_ctx_t ctx,
4747

4848
assert((8 == size) || (4 == size));
4949

50-
ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, pe, target, (void *)&rva, mca_spml_self);
50+
ucx_mkey = mca_spml_ucx_get_mkey(ctx, pe, target, (void *)&rva, mca_spml_self);
5151
status = ucp_atomic_post(ucx_ctx->ucp_peers[pe].ucp_conn,
5252
op, value, size, rva,
5353
ucx_mkey->rkey);
@@ -70,7 +70,7 @@ int mca_atomic_ucx_fop(shmem_ctx_t ctx,
7070

7171
assert((8 == size) || (4 == size));
7272

73-
ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, pe, target, (void *)&rva, mca_spml_self);
73+
ucx_mkey = mca_spml_ucx_get_mkey(ctx, pe, target, (void *)&rva, mca_spml_self);
7474
status_ptr = ucp_atomic_fetch_nb(ucx_ctx->ucp_peers[pe].ucp_conn,
7575
op, value, prev, size,
7676
rva, ucx_mkey->rkey,

oshmem/mca/memheap/base/base.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ void memheap_oob_destruct(void);
6969
OSHMEM_DECLSPEC int mca_memheap_base_is_symmetric_addr(const void* va);
7070
OSHMEM_DECLSPEC sshmem_mkey_t *mca_memheap_base_get_mkey(void* va,
7171
int tr_id);
72-
OSHMEM_DECLSPEC sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(map_segment_t *s,
72+
OSHMEM_DECLSPEC sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(shmem_ctx_t ctx,
73+
map_segment_t *s,
7374
int pe,
7475
void* va,
7576
int btl_id,
@@ -243,7 +244,8 @@ static inline map_segment_t *memheap_find_va(void* va)
243244
return s;
244245
}
245246

246-
static inline sshmem_mkey_t *mca_memheap_base_get_cached_mkey(int pe,
247+
static inline sshmem_mkey_t *mca_memheap_base_get_cached_mkey(shmem_ctx_t ctx,
248+
int pe,
247249
void* va,
248250
int btl_id,
249251
void** rva)
@@ -273,7 +275,7 @@ static inline sshmem_mkey_t *mca_memheap_base_get_cached_mkey(int pe,
273275
return mkey;
274276
}
275277

276-
return mca_memheap_base_get_cached_mkey_slow(s, pe, va, btl_id, rva);
278+
return mca_memheap_base_get_cached_mkey_slow(ctx, s, pe, va, btl_id, rva);
277279
}
278280

279281
static inline int mca_memheap_base_num_transports(void)

oshmem/mca/memheap/base/memheap_base_mkey.c

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ struct oob_comm {
5555
oob_comm_request_t req_pool[MEMHEAP_RECV_REQS_MAX];
5656
opal_list_t req_list;
5757
int is_inited;
58+
shmem_ctx_t ctx;
5859
};
5960

6061
mca_memheap_map_t* memheap_map = NULL;
@@ -66,7 +67,7 @@ static int send_buffer(int pe, opal_buffer_t *msg);
6667
static int oshmem_mkey_recv_cb(void);
6768

6869
/* pickup list of rkeys and remote va */
69-
static int memheap_oob_get_mkeys(int pe,
70+
static int memheap_oob_get_mkeys(shmem_ctx_t ctx, int pe,
7071
uint32_t va_seg_num,
7172
sshmem_mkey_t *mkey);
7273

@@ -142,7 +143,7 @@ static void memheap_attach_segment(sshmem_mkey_t *mkey, int tr_id)
142143
}
143144

144145

145-
static void unpack_remote_mkeys(opal_buffer_t *msg, int remote_pe)
146+
static void unpack_remote_mkeys(shmem_ctx_t ctx, opal_buffer_t *msg, int remote_pe)
146147
{
147148
int32_t cnt;
148149
int32_t n;
@@ -182,7 +183,7 @@ static void unpack_remote_mkeys(opal_buffer_t *msg, int remote_pe)
182183
} else {
183184
memheap_oob.mkeys[tr_id].u.key = MAP_SEGMENT_SHM_INVALID;
184185
}
185-
MCA_SPML_CALL(rmkey_unpack(&memheap_oob.mkeys[tr_id], memheap_oob.segno, remote_pe, tr_id));
186+
MCA_SPML_CALL(rmkey_unpack(ctx, &memheap_oob.mkeys[tr_id], memheap_oob.segno, remote_pe, tr_id));
186187
}
187188

188189
MEMHEAP_VERBOSE(5,
@@ -242,7 +243,7 @@ static void do_recv(int source_pe, opal_buffer_t* buffer)
242243
case MEMHEAP_RKEY_RESP:
243244
MEMHEAP_VERBOSE(5, "*** RKEY RESP");
244245
OPAL_THREAD_LOCK(&memheap_oob.lck);
245-
unpack_remote_mkeys(buffer, source_pe);
246+
unpack_remote_mkeys(memheap_oob.ctx, buffer, source_pe);
246247
memheap_oob.mkeys_rcvd = MEMHEAP_RKEY_RESP;
247248
opal_condition_broadcast(&memheap_oob.cond);
248249
OPAL_THREAD_UNLOCK(&memheap_oob.lck);
@@ -455,14 +456,14 @@ static int send_buffer(int pe, opal_buffer_t *msg)
455456
return rc;
456457
}
457458

458-
static int memheap_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys)
459+
static int memheap_oob_get_mkeys(shmem_ctx_t ctx, int pe, uint32_t seg, sshmem_mkey_t *mkeys)
459460
{
460461
opal_buffer_t *msg;
461462
uint8_t cmd;
462463
int i;
463464
int rc;
464465

465-
if (OSHMEM_SUCCESS == MCA_SPML_CALL(oob_get_mkeys(pe, seg, mkeys))) {
466+
if (OSHMEM_SUCCESS == MCA_SPML_CALL(oob_get_mkeys(ctx, pe, seg, mkeys))) {
466467
for (i = 0; i < memheap_map->num_transports; i++) {
467468
MEMHEAP_VERBOSE(5,
468469
"MKEY CALCULATED BY LOCAL SPML: pe: %d tr_id: %d %s",
@@ -478,6 +479,7 @@ static int memheap_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys)
478479
memheap_oob.mkeys = mkeys;
479480
memheap_oob.segno = seg;
480481
memheap_oob.mkeys_rcvd = 0;
482+
memheap_oob.ctx = ctx;
481483

482484
msg = OBJ_NEW(opal_buffer_t);
483485
if (!msg) {
@@ -645,7 +647,7 @@ void mca_memheap_modex_recv_all(void)
645647
}
646648
memheap_oob.mkeys = s->mkeys_cache[i];
647649
memheap_oob.segno = j;
648-
unpack_remote_mkeys(msg, i);
650+
unpack_remote_mkeys(oshmem_ctx_default, msg, i);
649651
}
650652
}
651653

@@ -674,7 +676,8 @@ void mca_memheap_modex_recv_all(void)
674676
}
675677
}
676678

677-
sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(map_segment_t *s,
679+
sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(shmem_ctx_t ctx,
680+
map_segment_t *s,
678681
int pe,
679682
void* va,
680683
int btl_id,
@@ -692,7 +695,7 @@ sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(map_segment_t *s,
692695
if (!s->mkeys_cache[pe])
693696
return NULL ;
694697

695-
rc = memheap_oob_get_mkeys(pe,
698+
rc = memheap_oob_get_mkeys(ctx, pe,
696699
s - memheap_map->mem_segs,
697700
s->mkeys_cache[pe]);
698701
if (OSHMEM_SUCCESS != rc)

oshmem/mca/spml/base/base.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,12 @@ OSHMEM_DECLSPEC int mca_spml_base_test(void* addr,
7272
void* value,
7373
int datatype,
7474
int *out_value);
75-
OSHMEM_DECLSPEC int mca_spml_base_oob_get_mkeys(int pe,
75+
OSHMEM_DECLSPEC int mca_spml_base_oob_get_mkeys(shmem_ctx_t ctx,
76+
int pe,
7677
uint32_t seg,
7778
sshmem_mkey_t *mkeys);
7879

79-
OSHMEM_DECLSPEC void mca_spml_base_rmkey_unpack(sshmem_mkey_t *mkey, uint32_t seg, int pe, int tr_id);
80+
OSHMEM_DECLSPEC void mca_spml_base_rmkey_unpack(shmem_ctx_t ctx, sshmem_mkey_t *mkey, uint32_t seg, int pe, int tr_id);
8081
OSHMEM_DECLSPEC void mca_spml_base_rmkey_free(sshmem_mkey_t *mkey);
8182
OSHMEM_DECLSPEC void *mca_spml_base_rmkey_ptr(const void *dst_addr, sshmem_mkey_t *mkey, int pe);
8283

oshmem/mca/spml/base/spml_base.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,12 +247,12 @@ int mca_spml_base_wait_nb(void* handle)
247247
return OSHMEM_SUCCESS;
248248
}
249249

250-
int mca_spml_base_oob_get_mkeys(int pe, uint32_t segno, sshmem_mkey_t *mkeys)
250+
int mca_spml_base_oob_get_mkeys(shmem_ctx_t ctx, int pe, uint32_t segno, sshmem_mkey_t *mkeys)
251251
{
252252
return OSHMEM_ERROR;
253253
}
254254

255-
void mca_spml_base_rmkey_unpack(sshmem_mkey_t *mkey, uint32_t segno, int pe, int tr_id)
255+
void mca_spml_base_rmkey_unpack(shmem_ctx_t ctx, sshmem_mkey_t *mkey, uint32_t segno, int pe, int tr_id)
256256
{
257257
}
258258

oshmem/mca/spml/spml.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ typedef int (*mca_spml_base_module_test_fn_t)(void* addr,
132132
*
133133
* @param mkey remote mkey
134134
*/
135-
typedef void (*mca_spml_base_module_mkey_unpack_fn_t)(sshmem_mkey_t *, uint32_t segno, int remote_pe, int tr_id);
135+
typedef void (*mca_spml_base_module_mkey_unpack_fn_t)(shmem_ctx_t ctx, sshmem_mkey_t *, uint32_t segno, int remote_pe, int tr_id);
136136

137137
/**
138138
* If possible, get a pointer to the remote memory described by the mkey
@@ -180,7 +180,7 @@ typedef int (*mca_spml_base_module_deregister_fn_t)(sshmem_mkey_t *mkeys);
180180
*
181181
* @return OSHMEM_SUCCSESS if keys are found
182182
*/
183-
typedef int (*mca_spml_base_module_oob_get_mkeys_fn_t)(int pe,
183+
typedef int (*mca_spml_base_module_oob_get_mkeys_fn_t)(shmem_ctx_t ctx, int pe,
184184
uint32_t seg,
185185
sshmem_mkey_t *mkeys);
186186

0 commit comments

Comments
 (0)