Skip to content

Commit 793ebc2

Browse files
anandhisRalph Castain
authored andcommitted
When opening conduit, checking for the transport preference in below order -
(1) rml_ofi_transports mca parameter. This parameter should have the list of transports (currently ethernet,fabric are valid) fabric is higher priority if provided. (2) ORTE_RML_TRANSPORT_TYPE key with values "ethernet" or "fabric". "fabric" is higher priority. If specific provider is required use ORTE_RML_OFI_PROV_NAME key with values "socket" or "OPA" or any other supported in system. modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c On send_msg choose the provider on local and peer to follow below rules - 1. if the user specified the transport for this conduit (even giving us a prioritized list of candidates), then the one we selected is the _only_ one we will use. If the remote peer has a matching endpoint, then we use it - otherwise, we error out 2. if the user didn't specify a transport, then we look for matches against _all_ of our available transports, starting with fabric and then going to Ethernet, taking the first one that matches. 3. if we can't find any match, then we error out modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c send_msg() -> Fixed case when the local provider chosen at time of opening conduit is not present in peer (destination) node modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_send.c When opening conduit, checking for the transport preference in below order - (1) rml_ofi_transports mca parameter. This parameter should have the list of transports (currently ethernet,fabric are valid) fabric is higher priority if provided. (2) ORTE_RML_TRANSPORT_TYPE key with values "ethernet" or "fabric". "fabric" is higher priority. If specific provider is required use ORTE_RML_OFI_PROV_NAME key with values "socket" or "OPA" or any other supported in system. modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c On send_msg choose the provider on local and peer to follow below rules - 1. if the user specified the transport for this conduit (even giving us a prioritized list of candidates), then the one we selected is the _only_ one we will use. If the remote peer has a matching endpoint, then we use it - otherwise, we error out 2. if the user didn't specify a transport, then we look for matches against _all_ of our available transports, starting with fabric and then going to Ethernet, taking the first one that matches. 3. if we can't find any match, then we error out modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c send_msg() -> Fixed case when the local provider chosen at time of opening conduit is not present in peer (destination) node modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_send.c Signed-off-by: Anandhi Jayakumar <[email protected]>
1 parent fbeb7b9 commit 793ebc2

File tree

3 files changed

+316
-90
lines changed

3 files changed

+316
-90
lines changed

orte/mca/rml/ofi/rml_ofi.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,10 +158,17 @@ typedef struct {
158158
} ;
159159
typedef struct orte_rml_ofi_module_t orte_rml_ofi_module_t;
160160

161+
/* For every first send initiated to new peer
162+
* select the peer provider, peer ep-addr,
163+
* local provider and populate in orte_rml_ofi_peer_t instance.
164+
* Insert this in hash table.
165+
* */
161166
typedef struct {
162167
opal_object_t super;
163-
void* ofi_ep;
164-
size_t ofi_ep_len;
168+
char* ofi_prov_name; /* peer (dest) provider chosen */
169+
void* ofi_ep; /* peer (dest) ep chosen */
170+
size_t ofi_ep_len; /* peer (dest) ep length */
171+
uint8_t src_prov_id; /* index of the local (src) provider used for this peer */
165172
} orte_rml_ofi_peer_t;
166173
OBJ_CLASS_DECLARATION(orte_rml_ofi_peer_t);
167174

@@ -200,6 +207,7 @@ int orte_rml_ofi_error_callback(struct fi_cq_err_entry *error,
200207
/* OFI Recv handler */
201208
int orte_rml_ofi_recv_handler(struct fi_cq_data_entry *wc, uint8_t ofi_prov_id);
202209

210+
bool user_override(void);
203211
END_C_DECLS
204212

205213
#endif

orte/mca/rml/ofi/rml_ofi_component.c

Lines changed: 73 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,18 @@ orte_rml_ofi_module_t orte_rml_ofi = {
8080
/* Local variables */
8181
static bool init_done = false;
8282
static char *ofi_transports_supported = NULL;
83+
static char *initial_ofi_transports_supported = NULL;
8384
static bool ofi_desired = false;
8485

86+
/* return true if user override for choice of ofi provider */
87+
bool user_override(void)
88+
{
89+
if( 0 == strcmp(initial_ofi_transports_supported, ofi_transports_supported ) )
90+
return false;
91+
else
92+
return true;
93+
}
94+
8595
static int
8696
rml_ofi_component_open(void)
8797
{
@@ -232,7 +242,8 @@ static int rml_ofi_component_register(void)
232242
{
233243
mca_base_component_t *component = &mca_rml_ofi_component.base;
234244

235-
ofi_transports_supported = strdup("fabric,ethernet");
245+
initial_ofi_transports_supported = strdup("fabric");
246+
ofi_transports_supported = strdup(initial_ofi_transports_supported);
236247
mca_base_component_var_register(component, "transports",
237248
"Comma-delimited list of transports to support (default=\"fabric,ethernet\"",
238249
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
@@ -923,29 +934,54 @@ static int rml_ofi_component_init(void)
923934
int get_ofi_prov_id( opal_list_t *attributes)
924935
{
925936

937+
bool choose_fabric = false, choice_made = false;
926938
int ofi_prov_id = RML_OFI_PROV_ID_INVALID, prov_num=0;
927939
char *provider = NULL, *transport = NULL;
928940
char *ethernet="sockets", *fabric="psm2";
929941
struct fi_info *cur_fi;
942+
char *comp_attrib = NULL;
943+
char **comps;
944+
int i;
930945

931-
/* check the list of attributes to see if we should respond
946+
/* check the list of attributes in below order
932947
* Attribute should have ORTE_RML_TRANSPORT_ATTRIB key
933-
* with values "ethernet" or "fabric"
948+
* with values "ethernet" or "fabric". "fabric" is higher priority.
934949
* (or) ORTE_RML_OFI_PROV_NAME key with values "socket" or "OPA"
935950
* if both above attributes are missing return failure
936951
*/
937-
if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_ATTRIB, (void**)&transport, OPAL_STRING) &&
938-
NULL != transport) {
939-
if( 0 == strcmp( transport, "ethernet") ) {
940-
provider = ethernet;
941-
} else if ( 0 == strcmp( transport, "fabric") ) {
942-
provider = fabric;
943-
}
952+
//if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_ATTRIB, (void**)&transport, OPAL_STRING) ) {
953+
954+
if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, (void**)&comp_attrib, OPAL_STRING) &&
955+
NULL != comp_attrib) {
956+
comps = opal_argv_split(comp_attrib, ',');
957+
for (i=0; NULL != comps[i] && choice_made == false ; i++) {
958+
if (NULL != strstr(ofi_transports_supported, comps[i])) {
959+
if (0 == strcmp( comps[i], "ethernet")) {
960+
opal_output_verbose(20,orte_rml_base_framework.framework_output,
961+
"%s - Opening conduit using OFI ethernet/sockets provider",
962+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
963+
opal_argv_free(comps);
964+
provider = ethernet;
965+
choose_fabric = false;
966+
choice_made = false; /* continue to see if fabric is requested */
967+
} else if ( 0 == strcmp ( comps[i], "fabric")) {
968+
opal_output_verbose(20,orte_rml_base_framework.framework_output,
969+
"%s - Opening conduit using OFI fabric provider",
970+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
971+
opal_argv_free(comps);
972+
choose_fabric = true;
973+
provider = NULL;
974+
choice_made = true; /* fabric is highest priority so don't check for anymore */
975+
}
976+
}
977+
}
944978
}
945979
/* if from the transport we don't know which provider we want, then check for the ORTE_RML_OFI_PROV_NAME_ATTRIB */
946980
if ( NULL == provider) {
947-
if (orte_get_attribute(attributes, ORTE_RML_PROVIDER_ATTRIB, (void**)&provider, OPAL_STRING) &&
948-
NULL != provider) {
981+
orte_get_attribute(attributes, ORTE_RML_PROVIDER_ATTRIB, (void**)&provider, OPAL_STRING);
982+
}
983+
/* either ethernet-sockets or specific is requested. Proceed to choose that provider */
984+
if ( NULL != provider) {
949985
// loop the orte_rml_ofi.ofi_provs[] and find the provider name that matches
950986
for ( prov_num = 0; prov_num < orte_rml_ofi.ofi_prov_open_num && ofi_prov_id == RML_OFI_PROV_ID_INVALID ; prov_num++ ) {
951987
cur_fi = orte_rml_ofi.ofi_prov[prov_num].fabric_info;
@@ -954,11 +990,27 @@ int get_ofi_prov_id( opal_list_t *attributes)
954990
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),provider,cur_fi->fabric_attr->prov_name);
955991
if ( strcmp(provider,cur_fi->fabric_attr->prov_name) == 0) {
956992
ofi_prov_id = prov_num;
957-
}
993+
opal_output_verbose(20,orte_rml_base_framework.framework_output,
994+
"%s - Choosing provider %s",
995+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
996+
cur_fi->fabric_attr->prov_name);
997+
}
998+
}
999+
} else if ( choose_fabric ) {
1000+
// "fabric" is requested, choose the first fabric(non-ethernet) provider
1001+
for ( prov_num = 0; prov_num < orte_rml_ofi.ofi_prov_open_num && ofi_prov_id == RML_OFI_PROV_ID_INVALID ; prov_num++ ) {
1002+
cur_fi = orte_rml_ofi.ofi_prov[prov_num].fabric_info;
1003+
opal_output_verbose(20,orte_rml_base_framework.framework_output,
1004+
"%s -choosing fabric -> comparing %s != %s ",
1005+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ethernet,cur_fi->fabric_attr->prov_name);
1006+
if ( strcmp(ethernet, cur_fi->fabric_attr->prov_name) != 0) {
1007+
ofi_prov_id = prov_num;
1008+
opal_output_verbose(20,orte_rml_base_framework.framework_output,
1009+
"%s - Choosing fabric provider %s",
1010+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),cur_fi->fabric_attr->prov_name);
9581011
}
9591012
}
9601013
}
961-
9621014
opal_output_verbose(20,orte_rml_base_framework.framework_output,
9631015
"%s - get_ofi_prov_id(), returning ofi_prov_id=%d ",
9641016
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ofi_prov_id);
@@ -1076,22 +1128,18 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes)
10761128
"%s - ORTE_RML_TRANSPORT_TYPE = %s ",
10771129
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), comp_attrib);
10781130
comps = opal_argv_split(comp_attrib, ',');
1079-
for (i=0; 0 == i; i++) {
1131+
for (i=0; NULL != comps[i]; i++) {
10801132
if (NULL != strstr(ofi_transports_supported, comps[i])) {
10811133
/* we are a candidate, */
10821134
opal_output_verbose(20,orte_rml_base_framework.framework_output,
1083-
"%s - Forcibly returning ofi socket provider for ethernet transport request",
1135+
"%s - Opening conduit using OFI.. ",
10841136
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
10851137
opal_argv_free(comps);
1086-
OBJ_CONSTRUCT(&provider, opal_list_t);
1087-
orte_set_attribute(&provider, ORTE_RML_PROVIDER_ATTRIB,
1088-
ORTE_ATTR_LOCAL, "sockets", OPAL_STRING);
1089-
return make_module(get_ofi_prov_id(&provider));
1138+
return make_module(get_ofi_prov_id(attributes));
10901139
}
10911140
}
10921141
opal_argv_free(comps);
10931142
}
1094-
/* end [Debug] */
10951143

10961144
/* Alternatively, check the attributes to see if we qualify - we only handle
10971145
* "pt2pt" */
@@ -1108,12 +1156,16 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes)
11081156

11091157
static void pr_cons(orte_rml_ofi_peer_t *ptr)
11101158
{
1159+
ptr->ofi_prov_name = NULL;
11111160
ptr->ofi_ep = NULL;
11121161
ptr->ofi_ep_len = 0;
1162+
ptr->src_prov_id = RML_OFI_PROV_ID_INVALID;
11131163
}
11141164

11151165
static void pr_des(orte_rml_ofi_peer_t *ptr)
11161166
{
1167+
if ( NULL != ptr->ofi_prov_name)
1168+
free(ptr->ofi_prov_name);
11171169
if ( 0 < ptr->ofi_ep_len)
11181170
free( ptr->ofi_ep);
11191171
}

0 commit comments

Comments
 (0)