Skip to content

Commit e7a44a1

Browse files
author
Ralph Castain
authored
Merge pull request #3814 from anandhis/ofi-choose-provider-at-send
Choosing the ofi provider when opening conduit and sending message to peer
2 parents fbeb7b9 + 793ebc2 commit e7a44a1

File tree

3 files changed

+316
-90
lines changed

3 files changed

+316
-90
lines changed

orte/mca/rml/ofi/rml_ofi.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,10 +158,17 @@ typedef struct {
158158
} ;
159159
typedef struct orte_rml_ofi_module_t orte_rml_ofi_module_t;
160160

161+
/* For every first send initiated to new peer
162+
* select the peer provider, peer ep-addr,
163+
* local provider and populate in orte_rml_ofi_peer_t instance.
164+
* Insert this in hash table.
165+
* */
161166
typedef struct {
162167
opal_object_t super;
163-
void* ofi_ep;
164-
size_t ofi_ep_len;
168+
char* ofi_prov_name; /* peer (dest) provider chosen */
169+
void* ofi_ep; /* peer (dest) ep chosen */
170+
size_t ofi_ep_len; /* peer (dest) ep length */
171+
uint8_t src_prov_id; /* index of the local (src) provider used for this peer */
165172
} orte_rml_ofi_peer_t;
166173
OBJ_CLASS_DECLARATION(orte_rml_ofi_peer_t);
167174

@@ -200,6 +207,7 @@ int orte_rml_ofi_error_callback(struct fi_cq_err_entry *error,
200207
/* OFI Recv handler */
201208
int orte_rml_ofi_recv_handler(struct fi_cq_data_entry *wc, uint8_t ofi_prov_id);
202209

210+
bool user_override(void);
203211
END_C_DECLS
204212

205213
#endif

orte/mca/rml/ofi/rml_ofi_component.c

Lines changed: 73 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,18 @@ orte_rml_ofi_module_t orte_rml_ofi = {
8080
/* Local variables */
8181
static bool init_done = false;
8282
static char *ofi_transports_supported = NULL;
83+
static char *initial_ofi_transports_supported = NULL;
8384
static bool ofi_desired = false;
8485

86+
/* return true if user override for choice of ofi provider */
87+
bool user_override(void)
88+
{
89+
if( 0 == strcmp(initial_ofi_transports_supported, ofi_transports_supported ) )
90+
return false;
91+
else
92+
return true;
93+
}
94+
8595
static int
8696
rml_ofi_component_open(void)
8797
{
@@ -232,7 +242,8 @@ static int rml_ofi_component_register(void)
232242
{
233243
mca_base_component_t *component = &mca_rml_ofi_component.base;
234244

235-
ofi_transports_supported = strdup("fabric,ethernet");
245+
initial_ofi_transports_supported = strdup("fabric");
246+
ofi_transports_supported = strdup(initial_ofi_transports_supported);
236247
mca_base_component_var_register(component, "transports",
237248
"Comma-delimited list of transports to support (default=\"fabric,ethernet\"",
238249
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
@@ -923,29 +934,54 @@ static int rml_ofi_component_init(void)
923934
int get_ofi_prov_id( opal_list_t *attributes)
924935
{
925936

937+
bool choose_fabric = false, choice_made = false;
926938
int ofi_prov_id = RML_OFI_PROV_ID_INVALID, prov_num=0;
927939
char *provider = NULL, *transport = NULL;
928940
char *ethernet="sockets", *fabric="psm2";
929941
struct fi_info *cur_fi;
942+
char *comp_attrib = NULL;
943+
char **comps;
944+
int i;
930945

931-
/* check the list of attributes to see if we should respond
946+
/* check the list of attributes in below order
932947
* Attribute should have ORTE_RML_TRANSPORT_ATTRIB key
933-
* with values "ethernet" or "fabric"
948+
* with values "ethernet" or "fabric". "fabric" is higher priority.
934949
* (or) ORTE_RML_OFI_PROV_NAME key with values "socket" or "OPA"
935950
* if both above attributes are missing return failure
936951
*/
937-
if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_ATTRIB, (void**)&transport, OPAL_STRING) &&
938-
NULL != transport) {
939-
if( 0 == strcmp( transport, "ethernet") ) {
940-
provider = ethernet;
941-
} else if ( 0 == strcmp( transport, "fabric") ) {
942-
provider = fabric;
943-
}
952+
//if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_ATTRIB, (void**)&transport, OPAL_STRING) ) {
953+
954+
if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, (void**)&comp_attrib, OPAL_STRING) &&
955+
NULL != comp_attrib) {
956+
comps = opal_argv_split(comp_attrib, ',');
957+
for (i=0; NULL != comps[i] && choice_made == false ; i++) {
958+
if (NULL != strstr(ofi_transports_supported, comps[i])) {
959+
if (0 == strcmp( comps[i], "ethernet")) {
960+
opal_output_verbose(20,orte_rml_base_framework.framework_output,
961+
"%s - Opening conduit using OFI ethernet/sockets provider",
962+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
963+
opal_argv_free(comps);
964+
provider = ethernet;
965+
choose_fabric = false;
966+
choice_made = false; /* continue to see if fabric is requested */
967+
} else if ( 0 == strcmp ( comps[i], "fabric")) {
968+
opal_output_verbose(20,orte_rml_base_framework.framework_output,
969+
"%s - Opening conduit using OFI fabric provider",
970+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
971+
opal_argv_free(comps);
972+
choose_fabric = true;
973+
provider = NULL;
974+
choice_made = true; /* fabric is highest priority so don't check for anymore */
975+
}
976+
}
977+
}
944978
}
945979
/* if from the transport we don't know which provider we want, then check for the ORTE_RML_OFI_PROV_NAME_ATTRIB */
946980
if ( NULL == provider) {
947-
if (orte_get_attribute(attributes, ORTE_RML_PROVIDER_ATTRIB, (void**)&provider, OPAL_STRING) &&
948-
NULL != provider) {
981+
orte_get_attribute(attributes, ORTE_RML_PROVIDER_ATTRIB, (void**)&provider, OPAL_STRING);
982+
}
983+
/* either ethernet-sockets or specific is requested. Proceed to choose that provider */
984+
if ( NULL != provider) {
949985
// loop the orte_rml_ofi.ofi_provs[] and find the provider name that matches
950986
for ( prov_num = 0; prov_num < orte_rml_ofi.ofi_prov_open_num && ofi_prov_id == RML_OFI_PROV_ID_INVALID ; prov_num++ ) {
951987
cur_fi = orte_rml_ofi.ofi_prov[prov_num].fabric_info;
@@ -954,11 +990,27 @@ int get_ofi_prov_id( opal_list_t *attributes)
954990
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),provider,cur_fi->fabric_attr->prov_name);
955991
if ( strcmp(provider,cur_fi->fabric_attr->prov_name) == 0) {
956992
ofi_prov_id = prov_num;
957-
}
993+
opal_output_verbose(20,orte_rml_base_framework.framework_output,
994+
"%s - Choosing provider %s",
995+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
996+
cur_fi->fabric_attr->prov_name);
997+
}
998+
}
999+
} else if ( choose_fabric ) {
1000+
// "fabric" is requested, choose the first fabric(non-ethernet) provider
1001+
for ( prov_num = 0; prov_num < orte_rml_ofi.ofi_prov_open_num && ofi_prov_id == RML_OFI_PROV_ID_INVALID ; prov_num++ ) {
1002+
cur_fi = orte_rml_ofi.ofi_prov[prov_num].fabric_info;
1003+
opal_output_verbose(20,orte_rml_base_framework.framework_output,
1004+
"%s -choosing fabric -> comparing %s != %s ",
1005+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ethernet,cur_fi->fabric_attr->prov_name);
1006+
if ( strcmp(ethernet, cur_fi->fabric_attr->prov_name) != 0) {
1007+
ofi_prov_id = prov_num;
1008+
opal_output_verbose(20,orte_rml_base_framework.framework_output,
1009+
"%s - Choosing fabric provider %s",
1010+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),cur_fi->fabric_attr->prov_name);
9581011
}
9591012
}
9601013
}
961-
9621014
opal_output_verbose(20,orte_rml_base_framework.framework_output,
9631015
"%s - get_ofi_prov_id(), returning ofi_prov_id=%d ",
9641016
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ofi_prov_id);
@@ -1076,22 +1128,18 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes)
10761128
"%s - ORTE_RML_TRANSPORT_TYPE = %s ",
10771129
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), comp_attrib);
10781130
comps = opal_argv_split(comp_attrib, ',');
1079-
for (i=0; 0 == i; i++) {
1131+
for (i=0; NULL != comps[i]; i++) {
10801132
if (NULL != strstr(ofi_transports_supported, comps[i])) {
10811133
/* we are a candidate, */
10821134
opal_output_verbose(20,orte_rml_base_framework.framework_output,
1083-
"%s - Forcibly returning ofi socket provider for ethernet transport request",
1135+
"%s - Opening conduit using OFI.. ",
10841136
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
10851137
opal_argv_free(comps);
1086-
OBJ_CONSTRUCT(&provider, opal_list_t);
1087-
orte_set_attribute(&provider, ORTE_RML_PROVIDER_ATTRIB,
1088-
ORTE_ATTR_LOCAL, "sockets", OPAL_STRING);
1089-
return make_module(get_ofi_prov_id(&provider));
1138+
return make_module(get_ofi_prov_id(attributes));
10901139
}
10911140
}
10921141
opal_argv_free(comps);
10931142
}
1094-
/* end [Debug] */
10951143

10961144
/* Alternatively, check the attributes to see if we qualify - we only handle
10971145
* "pt2pt" */
@@ -1108,12 +1156,16 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes)
11081156

11091157
static void pr_cons(orte_rml_ofi_peer_t *ptr)
11101158
{
1159+
ptr->ofi_prov_name = NULL;
11111160
ptr->ofi_ep = NULL;
11121161
ptr->ofi_ep_len = 0;
1162+
ptr->src_prov_id = RML_OFI_PROV_ID_INVALID;
11131163
}
11141164

11151165
static void pr_des(orte_rml_ofi_peer_t *ptr)
11161166
{
1167+
if ( NULL != ptr->ofi_prov_name)
1168+
free(ptr->ofi_prov_name);
11171169
if ( 0 < ptr->ofi_ep_len)
11181170
free( ptr->ofi_ep);
11191171
}

0 commit comments

Comments
 (0)