Skip to content

Commit 0706d86

Browse files
committed
osc/rdma: add an mca parameter to list MTLs for which osc pt2pt should have
higher priority than rdma and default to psm2. Context: the Intel Omni-path driver (hfi1) has verbs support, so the openib btl is available to use. However, at a bad performance. Without this change osc rdma using btl openib is the default choice when running on Intel Omni-path, with a lower performance than osc pt2pt over mtl psm2. Signed-off-by: Matias A Cabral <[email protected]> (cherry picked from commit 80c8858)
1 parent cc8b5ac commit 0706d86

File tree

1 file changed

+31
-1
lines changed

1 file changed

+31
-1
lines changed

ompi/mca/osc/rdma/osc_rdma_component.c

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
1616
* Copyright (c) 2012-2015 Sandia National Laboratories. All rights reserved.
1717
* Copyright (c) 2015 NVIDIA Corporation. All rights reserved.
18-
* Copyright (c) 2015 Intel, Inc. All rights reserved.
18+
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
19+
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
1920
* $COPYRIGHT$
2021
*
2122
* Additional copyrights may follow
@@ -53,6 +54,7 @@
5354
#include "opal/mca/btl/base/base.h"
5455
#include "opal/mca/base/mca_base_pvar.h"
5556
#include "ompi/mca/bml/base/base.h"
57+
#include "ompi/mca/mtl/base/base.h"
5658

5759
static int ompi_osc_rdma_component_register (void);
5860
static int ompi_osc_rdma_component_init (bool enable_progress_threads, bool enable_mpi_threads);
@@ -68,8 +70,10 @@ static int ompi_osc_rdma_set_info (struct ompi_win_t *win, struct ompi_info_t *i
6870
static int ompi_osc_rdma_get_info (struct ompi_win_t *win, struct ompi_info_t **info_used);
6971

7072
static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_base_module_t **btl);
73+
static int ompi_osc_rdma_query_mtls (void);
7174

7275
static char *ompi_osc_rdma_btl_names;
76+
static char *ompi_osc_rdma_mtl_names;
7377

7478
ompi_osc_rdma_component_t mca_osc_rdma_component = {
7579
.super = {
@@ -222,6 +226,13 @@ static int ompi_osc_rdma_component_register (void)
222226
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3,
223227
MCA_BASE_VAR_SCOPE_GROUP, &ompi_osc_rdma_btl_names);
224228

229+
ompi_osc_rdma_mtl_names = "psm2";
230+
(void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "mtls",
231+
"Comma-delimited list of MTL component names to lower the priority of rdma "
232+
"osc component favoring pt2pt osc (default: psm2)",
233+
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3,
234+
MCA_BASE_VAR_SCOPE_GROUP, &ompi_osc_rdma_mtl_names);
235+
225236

226237
/* register performance variables */
227238

@@ -339,6 +350,10 @@ static int ompi_osc_rdma_component_query (struct ompi_win_t *win, void **base, s
339350
}
340351
#endif /* OPAL_CUDA_SUPPORT */
341352

353+
if (OMPI_SUCCESS == ompi_osc_rdma_query_mtls ()) {
354+
return 5; /* this has to be lower that osc pt2pt default priority */
355+
}
356+
342357
if (OMPI_SUCCESS != ompi_osc_rdma_query_btls (comm, NULL)) {
343358
return -1;
344359
}
@@ -703,6 +718,21 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
703718
return ret;
704719
}
705720

721+
static int ompi_osc_rdma_query_mtls (void)
722+
{
723+
char **mtls_to_use;
724+
725+
mtls_to_use = opal_argv_split (ompi_osc_rdma_mtl_names, ',');
726+
if (mtls_to_use && ompi_mtl_base_selected_component) {
727+
for (int i = 0 ; mtls_to_use[i] ; ++i) {
728+
if (0 == strcmp (mtls_to_use[i], ompi_mtl_base_selected_component->mtl_version.mca_component_name)) {
729+
return OMPI_SUCCESS;
730+
}
731+
}
732+
}
733+
return -1;
734+
}
735+
706736
static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_base_module_t **btl)
707737
{
708738
struct mca_btl_base_module_t **possible_btls = NULL;

0 commit comments

Comments
 (0)