Skip to content

Commit a2304eb

Browse files
authored
Merge pull request open-mpi#7152 from hppritcha/topic/btl_uct_fixes_for_v4.0.x
Topic/btl uct fixes for v4.0.x
2 parents 0e25083 + 59b24ab commit a2304eb

File tree

7 files changed

+145
-7
lines changed

7 files changed

+145
-7
lines changed

opal/mca/btl/uct/btl_uct.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights
1414
* reserved.
15+
* Copyright (c) 2019 Google, LLC. All rights reserved.
1516
* $COPYRIGHT$
1617
*
1718
* Additional copyrights may follow
@@ -85,6 +86,10 @@ struct mca_btl_uct_module_t {
8586
/** array containing the am_tl and rdma_tl */
8687
mca_btl_uct_tl_t *comm_tls[2];
8788

89+
#if UCT_API >= UCT_VERSION(1, 7)
90+
uct_component_h uct_component;
91+
#endif
92+
8893
/** registration cache */
8994
mca_rcache_base_module_t *rcache;
9095

opal/mca/btl/uct/btl_uct_amo.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ int mca_btl_uct_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
110110
mca_btl_uct_uct_completion_release (comp);
111111
}
112112

113-
uct_rkey_release (&rkey);
113+
mca_btl_uct_rkey_release (uct_btl, &rkey);
114114

115115
return rc;
116116
}
@@ -184,7 +184,7 @@ int mca_btl_uct_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_e
184184
mca_btl_uct_uct_completion_release (comp);
185185
}
186186

187-
uct_rkey_release (&rkey);
187+
mca_btl_uct_rkey_release (uct_btl, &rkey);
188188

189189
return rc;
190190
}

opal/mca/btl/uct/btl_uct_component.c

Lines changed: 79 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,12 @@ ucs_status_t mca_btl_uct_am_handler (void *arg, void *data, size_t length, unsig
314314
return UCS_OK;
315315
}
316316

317+
#if UCT_API >= UCT_VERSION(1, 7)
318+
static int mca_btl_uct_component_process_uct_md (uct_component_h component, uct_md_resource_desc_t *md_desc,
319+
char **allowed_ifaces)
320+
#else
317321
static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc, char **allowed_ifaces)
322+
#endif
318323
{
319324
mca_rcache_base_resources_t rcache_resources;
320325
uct_tl_resource_desc_t *tl_desc;
@@ -348,8 +353,14 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc
348353

349354
md = OBJ_NEW(mca_btl_uct_md_t);
350355

356+
357+
#if UCT_API >= UCT_VERSION(1, 7)
358+
uct_md_config_read (component, NULL, NULL, &uct_config);
359+
uct_md_open (component, md_desc->md_name, uct_config, &md->uct_md);
360+
#else
351361
uct_md_config_read (md_desc->md_name, NULL, NULL, &uct_config);
352362
uct_md_open (md_desc->md_name, uct_config, &md->uct_md);
363+
#endif
353364
uct_config_release (uct_config);
354365

355366
uct_md_query (md->uct_md, &md_attr);
@@ -375,6 +386,10 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc
375386
return OPAL_ERR_NOT_AVAILABLE;
376387
}
377388

389+
#if UCT_API >= UCT_VERSION(1, 7)
390+
module->uct_component = component;
391+
#endif
392+
378393
mca_btl_uct_component.modules[mca_btl_uct_component.module_count++] = module;
379394

380395
/* NTH: a registration cache shouldn't be necessary when using UCT but there are measurable
@@ -400,6 +415,42 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc
400415
return OPAL_SUCCESS;
401416
}
402417

418+
#if UCT_API >= UCT_VERSION(1, 7)
419+
static int mca_btl_uct_component_process_uct_component (uct_component_h component, char **allowed_ifaces)
420+
{
421+
uct_component_attr_t attr = {.field_mask = UCT_COMPONENT_ATTR_FIELD_NAME |
422+
UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT};
423+
ucs_status_t ucs_status;
424+
int rc;
425+
426+
ucs_status = uct_component_query (component, &attr);
427+
if (UCS_OK != ucs_status) {
428+
return OPAL_ERROR;
429+
}
430+
431+
BTL_VERBOSE(("processing uct component %s", attr.name));
432+
433+
attr.md_resources = calloc (attr.md_resource_count, sizeof (*attr.md_resources));
434+
attr.field_mask |= UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES;
435+
ucs_status = uct_component_query (component, &attr);
436+
if (UCS_OK != ucs_status) {
437+
return OPAL_ERROR;
438+
}
439+
440+
for (int i = 0 ; i < attr.md_resource_count ; ++i) {
441+
rc = mca_btl_uct_component_process_uct_md (component, attr.md_resources + i,
442+
allowed_ifaces);
443+
if (OPAL_SUCCESS != rc) {
444+
break;
445+
}
446+
}
447+
448+
free (attr.md_resources);
449+
450+
return OPAL_SUCCESS;
451+
}
452+
#endif /* UCT_API >= UCT_VERSION(1, 7) */
453+
403454
/*
404455
* UCT component initialization:
405456
* (1) read interface list from kernel and compare against component parameters
@@ -415,6 +466,7 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules,
415466
struct mca_btl_base_module_t **base_modules;
416467
uct_md_resource_desc_t *resources;
417468
unsigned resource_count;
469+
ucs_status_t ucs_status;
418470
char **allowed_ifaces;
419471
int rc;
420472

@@ -431,10 +483,32 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules,
431483
return NULL;
432484
}
433485

434-
uct_query_md_resources (&resources, &resource_count);
435-
436486
mca_btl_uct_component.module_count = 0;
437487

488+
#if UCT_API >= UCT_VERSION(1, 7)
489+
uct_component_h *components;
490+
unsigned num_components;
491+
492+
ucs_status = uct_query_components(&components, &num_components);
493+
if (UCS_OK != ucs_status) {
494+
BTL_ERROR(("could not query UCT components"));
495+
return NULL;
496+
}
497+
498+
/* generate all suitable btl modules */
499+
for (unsigned i = 0 ; i < num_components ; ++i) {
500+
rc = mca_btl_uct_component_process_uct_component (components[i], allowed_ifaces);
501+
if (OPAL_SUCCESS != rc) {
502+
break;
503+
}
504+
}
505+
506+
uct_release_component_list (components);
507+
508+
#else /* UCT 1.6 and older */
509+
510+
uct_query_md_resources (&resources, &resource_count);
511+
438512
/* generate all suitable btl modules */
439513
for (unsigned i = 0 ; i < resource_count ; ++i) {
440514
rc = mca_btl_uct_component_process_uct_md (resources + i, allowed_ifaces);
@@ -443,9 +517,11 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules,
443517
}
444518
}
445519

446-
opal_argv_free (allowed_ifaces);
447520
uct_release_md_resource_list (resources);
448521

522+
#endif /* UCT_API >= UCT_VERSION(1, 7) */
523+
524+
opal_argv_free (allowed_ifaces);
449525
mca_btl_uct_modex_send ();
450526

451527
/* pass module array back to caller */

opal/mca/btl/uct/btl_uct_rdma.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ int mca_btl_uct_get (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
132132

133133
BTL_VERBOSE(("get issued. status = %d", ucs_status));
134134

135-
uct_rkey_release (&rkey);
135+
mca_btl_uct_rkey_release (uct_btl, &rkey);
136136

137137
return OPAL_LIKELY(UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERR_RESOURCE_BUSY;
138138
}
@@ -237,7 +237,7 @@ int mca_btl_uct_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
237237
mca_btl_uct_uct_completion_release (comp);
238238
}
239239

240-
uct_rkey_release (&rkey);
240+
mca_btl_uct_rkey_release (uct_btl, &rkey);
241241

242242
return OPAL_LIKELY(UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERR_RESOURCE_BUSY;
243243
}

opal/mca/btl/uct/btl_uct_rdma.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,22 @@ static inline int mca_btl_uct_get_rkey (mca_btl_uct_module_t *module,
5555
return rc;
5656
}
5757

58+
#if UCT_API >= UCT_VERSION(1, 7)
59+
ucs_status = uct_rkey_unpack (module->uct_component, (void *) remote_handle, rkey);
60+
#else
5861
ucs_status = uct_rkey_unpack ((void *) remote_handle, rkey);
62+
#endif
5963
return (UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERROR;
6064
}
6165

66+
static inline void mca_btl_uct_rkey_release (mca_btl_uct_module_t *uct_btl, uct_rkey_bundle_t *rkey)
67+
{
68+
#if UCT_API >= UCT_VERSION(1, 7)
69+
uct_rkey_release (uct_btl->uct_component, rkey);
70+
#else
71+
(void) uct_btl;
72+
uct_rkey_release (rkey);
73+
#endif
74+
}
75+
6276
#endif /* !defined(BTL_UCT_RDMA_H) */

opal/mca/btl/uct/btl_uct_tl.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,13 @@ static int mca_btl_uct_evaluate_tl (mca_btl_uct_module_t *module, mca_btl_uct_tl
516516
* come up with a better estimate. */
517517

518518
/* UCT bandwidth is in bytes/sec, BTL is in MB/sec */
519+
#if UCT_API >= UCT_VERSION(1, 7)
520+
module->super.btl_bandwidth = (uint32_t) ((MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth.dedicated +
521+
MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth.shared /
522+
(opal_process_info.num_local_peers + 1)) / 1048576.0);
523+
#else
519524
module->super.btl_bandwidth = (uint32_t) (MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth / 1048576.0);
525+
#endif
520526
/* TODO -- figure out how to translate UCT latency to us */
521527
module->super.btl_latency = 1;
522528
}

opal/mca/btl/uct/configure.m4

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
# All rights reserved.
1717
# Copyright (c) 2018 Research Organization for Information Science
1818
# and Technology (RIST). All rights reserved.
19+
# Copyright (c) 2019 Triad National Security, LLC. All rights
20+
# reserved.
1921
# $COPYRIGHT$
2022
#
2123
# Additional copyrights may follow
@@ -35,6 +37,41 @@ AC_DEFUN([MCA_opal_btl_uct_CONFIG],[
3537
OMPI_CHECK_UCX([btl_uct],
3638
[btl_uct_happy="yes"],
3739
[btl_uct_happy="no"])
40+
dnl
41+
dnl check UCT version. UCT API can change at any given release
42+
dnl so we only allow compiling against ones we know work.
43+
dnl
44+
AC_ARG_ENABLE([uct-version-check],
45+
[AC_HELP_STRING([--enable-uct-version-check],
46+
[enable UCT version check (default: enabled)])])
47+
AC_MSG_CHECKING([check uct version])
48+
if test "$enable_uct_version_check" != "no"; then
49+
AC_MSG_RESULT([yes])
50+
else
51+
AC_MSG_RESULT([no])
52+
fi
53+
54+
max_allowed_uct_major=1
55+
max_allowed_uct_minor=7
56+
if test "$btl_uct_happy" = "yes" && test "$enable_uct_version_check" != "no"; then
57+
AC_MSG_CHECKING([UCT version compatibility])
58+
OPAL_VAR_SCOPE_PUSH([CPPFLAGS_save])
59+
CPPFLAGS_save="$CPPFLAGS"
60+
CPPFLAGS="$CPPFLAGS $btl_uct_CPPFLAGS"
61+
AC_PREPROC_IFELSE([AC_LANG_PROGRAM([#include <uct/api/version.h>
62+
#if (UCT_VERNO_MAJOR > $max_allowed_uct_major)
63+
#error "UCT MAJOR VERNO > $max_allowed_uct_major"
64+
#endif
65+
#if (UCT_VERNO_MINOR > $max_allowed_uct_minor)
66+
#error "UCT MINOR VERNO > $max_allowed_uct_minor"
67+
#endif], [])],
68+
[AC_MSG_RESULT([UCT version compatible])],
69+
[AC_MSG_RESULT([UCT version not compatible - need UCX $max_allowed_uct_major.$max_allowed_uct_minor or older])
70+
btl_uct_happy="no"])
71+
CPPFLAGS="$CPPFLAGS_save"
72+
OPAL_VAR_SCOPE_POP
73+
fi
74+
3875
if test "$btl_uct_happy" = "yes" ; then
3976
OPAL_VAR_SCOPE_PUSH([CPPFLAGS_save])
4077

0 commit comments

Comments
 (0)