Skip to content

Commit b1ef5a4

Browse files
authored
Merge pull request #7016 from hjelmn/fix_btl_uct_from_yet_another_unannounced_api_break_in_the_openucx_uct_layer
btl/uct: add support for OpenUCX v1.8 API changes
2 parents b6c4d5c + 8473a66 commit b1ef5a4

File tree

7 files changed

+117
-10
lines changed

7 files changed

+117
-10
lines changed

opal/mca/btl/uct/btl_uct.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ struct mca_btl_uct_module_t {
8585
/** array containing the am_tl and rdma_tl */
8686
mca_btl_uct_tl_t *comm_tls[2];
8787

88+
#if UCT_API > UCT_VERSION(1, 7)
89+
uct_component_h uct_component;
90+
#endif
91+
8892
/** registration cache */
8993
mca_rcache_base_module_t *rcache;
9094

opal/mca/btl/uct/btl_uct_am.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,8 @@ int mca_btl_uct_send_frag (mca_btl_uct_module_t *uct_btl, mca_btl_uct_base_frag_
175175
if (!context->in_am_callback) {
176176
mca_btl_uct_context_lock (context);
177177
/* attempt to post the fragment */
178-
if (NULL != frag->base.super.registration) {
178+
if (NULL != frag->base.super.registration &&
179+
(context->uct_iface_attr.cap.flags & UCT_IFACE_FLAG_AM_ZCOPY)) {
179180
frag->comp.dev_context = context;
180181
ucs_status = uct_ep_am_zcopy (ep_handle, MCA_BTL_UCT_FRAG, &frag->header, sizeof (frag->header),
181182
&frag->uct_iov, 1, 0, &frag->comp.uct_comp);

opal/mca/btl/uct/btl_uct_amo.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ int mca_btl_uct_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
110110
mca_btl_uct_uct_completion_release (comp);
111111
}
112112

113-
uct_rkey_release (&rkey);
113+
mca_btl_uct_rkey_release (uct_btl, &rkey);
114114

115115
return rc;
116116
}
@@ -184,7 +184,7 @@ int mca_btl_uct_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_e
184184
mca_btl_uct_uct_completion_release (comp);
185185
}
186186

187-
uct_rkey_release (&rkey);
187+
mca_btl_uct_rkey_release (uct_btl, &rkey);
188188

189189
return rc;
190190
}

opal/mca/btl/uct/btl_uct_component.c

Lines changed: 79 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,12 @@ ucs_status_t mca_btl_uct_am_handler (void *arg, void *data, size_t length, unsig
316316
return UCS_OK;
317317
}
318318

319+
#if UCT_API > UCT_VERSION(1, 7)
320+
static int mca_btl_uct_component_process_uct_md (uct_component_h component, uct_md_resource_desc_t *md_desc,
321+
char **allowed_ifaces)
322+
#else
319323
static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc, char **allowed_ifaces)
324+
#endif
320325
{
321326
mca_rcache_base_resources_t rcache_resources;
322327
uct_tl_resource_desc_t *tl_desc;
@@ -350,8 +355,14 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc
350355

351356
md = OBJ_NEW(mca_btl_uct_md_t);
352357

358+
359+
#if UCT_API > UCT_VERSION(1, 7)
360+
uct_md_config_read (component, NULL, NULL, &uct_config);
361+
uct_md_open (component, md_desc->md_name, uct_config, &md->uct_md);
362+
#else
353363
uct_md_config_read (md_desc->md_name, NULL, NULL, &uct_config);
354364
uct_md_open (md_desc->md_name, uct_config, &md->uct_md);
365+
#endif
355366
uct_config_release (uct_config);
356367

357368
uct_md_query (md->uct_md, &md_attr);
@@ -377,6 +388,10 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc
377388
return OPAL_ERR_NOT_AVAILABLE;
378389
}
379390

391+
#if UCT_API > UCT_VERSION(1, 7)
392+
module->uct_component = component;
393+
#endif
394+
380395
mca_btl_uct_component.modules[mca_btl_uct_component.module_count++] = module;
381396

382397
/* NTH: a registration cache shouldn't be necessary when using UCT but there are measurable
@@ -402,6 +417,42 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc
402417
return OPAL_SUCCESS;
403418
}
404419

420+
#if UCT_API > UCT_VERSION(1, 7)
421+
static int mca_btl_uct_component_process_uct_component (uct_component_h component, char **allowed_ifaces)
422+
{
423+
uct_component_attr_t attr = {.field_mask = UCT_COMPONENT_ATTR_FIELD_NAME |
424+
UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT};
425+
ucs_status_t ucs_status;
426+
int rc;
427+
428+
ucs_status = uct_component_query (component, &attr);
429+
if (UCS_OK != ucs_status) {
430+
return OPAL_ERROR;
431+
}
432+
433+
BTL_VERBOSE(("processing uct component %s", attr.name));
434+
435+
attr.md_resources = calloc (attr.md_resource_count, sizeof (*attr.md_resources));
436+
attr.field_mask |= UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES;
437+
ucs_status = uct_component_query (component, &attr);
438+
if (UCS_OK != ucs_status) {
439+
return OPAL_ERROR;
440+
}
441+
442+
for (int i = 0 ; i < attr.md_resource_count ; ++i) {
443+
rc = mca_btl_uct_component_process_uct_md (component, attr.md_resources + i,
444+
allowed_ifaces);
445+
if (OPAL_SUCCESS != rc) {
446+
break;
447+
}
448+
}
449+
450+
free (attr.md_resources);
451+
452+
return OPAL_SUCCESS;
453+
}
454+
#endif /* UCT_API > UCT_VERSION(1, 7) */
455+
405456
/*
406457
* UCT component initialization:
407458
* (1) read interface list from kernel and compare against component parameters
@@ -417,6 +468,7 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules,
417468
struct mca_btl_base_module_t **base_modules;
418469
uct_md_resource_desc_t *resources;
419470
unsigned resource_count;
471+
ucs_status_t ucs_status;
420472
char **allowed_ifaces;
421473
int rc;
422474

@@ -433,10 +485,32 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules,
433485
return NULL;
434486
}
435487

436-
uct_query_md_resources (&resources, &resource_count);
437-
438488
mca_btl_uct_component.module_count = 0;
439489

490+
#if UCT_API > UCT_VERSION(1, 7)
491+
uct_component_h *components;
492+
unsigned num_components;
493+
494+
ucs_status = uct_query_components(&components, &num_components);
495+
if (UCS_OK != ucs_status) {
496+
BTL_ERROR(("could not query UCT components"));
497+
return NULL;
498+
}
499+
500+
/* generate all suitable btl modules */
501+
for (unsigned i = 0 ; i < num_components ; ++i) {
502+
rc = mca_btl_uct_component_process_uct_component (components[i], allowed_ifaces);
503+
if (OPAL_SUCCESS != rc) {
504+
break;
505+
}
506+
}
507+
508+
uct_release_component_list (components);
509+
510+
#else /* UCT 1.6 and older */
511+
512+
uct_query_md_resources (&resources, &resource_count);
513+
440514
/* generate all suitable btl modules */
441515
for (unsigned i = 0 ; i < resource_count ; ++i) {
442516
rc = mca_btl_uct_component_process_uct_md (resources + i, allowed_ifaces);
@@ -445,9 +519,11 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules,
445519
}
446520
}
447521

448-
opal_argv_free (allowed_ifaces);
449522
uct_release_md_resource_list (resources);
450523

524+
#endif /* UCT_API > UCT_VERSION(1, 7) */
525+
526+
opal_argv_free (allowed_ifaces);
451527
mca_btl_uct_modex_send ();
452528

453529
/* pass module array back to caller */

opal/mca/btl/uct/btl_uct_rdma.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ int mca_btl_uct_get (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
132132

133133
BTL_VERBOSE(("get issued. status = %d", ucs_status));
134134

135-
uct_rkey_release (&rkey);
135+
mca_btl_uct_rkey_release (uct_btl, &rkey);
136136

137137
return OPAL_LIKELY(UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERR_RESOURCE_BUSY;
138138
}
@@ -237,7 +237,7 @@ int mca_btl_uct_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
237237
mca_btl_uct_uct_completion_release (comp);
238238
}
239239

240-
uct_rkey_release (&rkey);
240+
mca_btl_uct_rkey_release (uct_btl, &rkey);
241241

242242
return OPAL_LIKELY(UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERR_RESOURCE_BUSY;
243243
}

opal/mca/btl/uct/btl_uct_rdma.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,22 @@ static inline int mca_btl_uct_get_rkey (mca_btl_uct_module_t *module,
5555
return rc;
5656
}
5757

58+
#if UCT_API > UCT_VERSION(1, 7)
59+
ucs_status = uct_rkey_unpack (module->uct_component, (void *) remote_handle, rkey);
60+
#else
5861
ucs_status = uct_rkey_unpack ((void *) remote_handle, rkey);
62+
#endif
5963
return (UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERROR;
6064
}
6165

66+
static inline void mca_btl_uct_rkey_release (mca_btl_uct_module_t *uct_btl, uct_rkey_bundle_t *rkey)
67+
{
68+
#if UCT_API > UCT_VERSION(1, 7)
69+
uct_rkey_release (uct_btl->uct_component, rkey);
70+
#else
71+
(void) uct_btl;
72+
uct_rkey_release (rkey);
73+
#endif
74+
}
75+
6276
#endif /* !defined(BTL_UCT_RDMA_H) */

opal/mca/btl/uct/btl_uct_tl.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -461,8 +461,14 @@ static void mca_btl_uct_set_tl_am (mca_btl_uct_module_t *module, mca_btl_uct_tl_
461461
tl->max_device_contexts = mca_btl_uct_component.num_contexts_per_module;
462462
}
463463

464-
module->super.btl_max_send_size = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.am.max_zcopy - sizeof (mca_btl_uct_am_header_t);
465-
module->super.btl_eager_limit = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.am.max_bcopy - sizeof (mca_btl_uct_am_header_t);
464+
module->super.btl_eager_limit = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.am.max_bcopy -
465+
sizeof (mca_btl_uct_am_header_t);
466+
if (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & UCT_IFACE_FLAG_AM_ZCOPY) {
467+
module->super.btl_max_send_size = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.am.max_zcopy -
468+
sizeof (mca_btl_uct_am_header_t);
469+
} else {
470+
module->super.btl_max_send_size = module->super.btl_eager_limit;
471+
}
466472
}
467473

468474
static int mca_btl_uct_set_tl_conn (mca_btl_uct_module_t *module, mca_btl_uct_tl_t *tl)
@@ -516,7 +522,13 @@ static int mca_btl_uct_evaluate_tl (mca_btl_uct_module_t *module, mca_btl_uct_tl
516522
* come up with a better estimate. */
517523

518524
/* UCT bandwidth is in bytes/sec, BTL is in MB/sec */
525+
#if UCT_API > UCT_VERSION(1, 7)
526+
module->super.btl_bandwidth = (uint32_t) ((MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth.dedicated +
527+
MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth.shared /
528+
(opal_process_info.num_local_peers + 1)) / 1048576.0);
529+
#else
519530
module->super.btl_bandwidth = (uint32_t) (MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth / 1048576.0);
531+
#endif
520532
/* TODO -- figure out how to translate UCT latency to us */
521533
module->super.btl_latency = 1;
522534
}

0 commit comments

Comments
 (0)