Skip to content

Commit 7002535

Browse files
author
Ralph Castain
authored
Merge pull request #3671 from rhc54/topic/ofi
We cannot use OFI to determine when daemons can finalize as we don't …
2 parents 484004b + 919d7fc commit 7002535

File tree

6 files changed

+134
-125
lines changed

6 files changed

+134
-125
lines changed

orte/mca/errmgr/default_hnp/errmgr_default_hnp.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,17 @@ static void hnp_abort(int error_code, char *fmt, ...)
136136
char *outmsg = NULL;
137137
orte_timer_t *timer;
138138

139+
/* only do this once */
140+
if (orte_abnormal_term_ordered) {
141+
return;
142+
}
143+
139144
/* ensure we exit with non-zero status */
140145
ORTE_UPDATE_EXIT_STATUS(error_code);
141146

147+
/* set the aborting flag */
148+
orte_abnormal_term_ordered = true;
149+
142150
/* If there was a message, construct it */
143151
va_start(arglist, fmt);
144152
if (NULL != fmt) {

orte/mca/rml/base/base.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,9 +202,9 @@ OBJ_CLASS_DECLARATION(orte_self_send_xfer_t);
202202
do { \
203203
orte_rml_recv_t *msg; \
204204
opal_output_verbose(5, orte_rml_base_framework.framework_output, \
205-
"%s Message posted at %s:%d", \
205+
"%s Message posted at %s:%d for tag %d", \
206206
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
207-
__FILE__, __LINE__); \
207+
__FILE__, __LINE__, (t)); \
208208
msg = OBJ_NEW(orte_rml_recv_t); \
209209
msg->sender.jobid = (p)->jobid; \
210210
msg->sender.vpid = (p)->vpid; \

orte/mca/rml/ofi/rml_ofi_component.c

Lines changed: 66 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "opal/mca/base/base.h"
1414
#include "opal/util/argv.h"
15+
#include "opal/util/net.h"
1516
#include "opal/util/output.h"
1617
#include "opal/mca/backtrace/backtrace.h"
1718
#include "opal/mca/event/event.h"
@@ -85,6 +86,7 @@ orte_rml_ofi_module_t orte_rml_ofi = {
8586
/* Local variables */
8687
static bool init_done = false;
8788
static char *ofi_transports_supported = NULL;
89+
static bool ofi_desired = false;
8890

8991
static int
9092
rml_ofi_component_open(void)
@@ -98,6 +100,7 @@ rml_ofi_component_open(void)
98100
orte_rml_ofi.ofi_prov_open_num = 0;
99101
OBJ_CONSTRUCT(&orte_rml_ofi.peers, opal_hash_table_t);
100102
opal_hash_table_init(&orte_rml_ofi.peers, 128);
103+
OBJ_CONSTRUCT(&orte_rml_ofi.recv_msg_queue_list, opal_list_t);
101104

102105
for( uint8_t ofi_prov_id=0; ofi_prov_id < MAX_OFI_PROVIDERS ; ofi_prov_id++) {
103106
orte_rml_ofi.ofi_prov[ofi_prov_id].fabric = NULL;
@@ -116,6 +119,12 @@ rml_ofi_component_open(void)
116119

117120
opal_output_verbose(10,orte_rml_base_framework.framework_output," from %s:%d rml_ofi_component_open()",__FILE__,__LINE__);
118121

122+
if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON) {
123+
return ORTE_ERROR;
124+
}
125+
if (!ofi_desired) {
126+
return ORTE_ERROR;
127+
}
119128
return ORTE_SUCCESS;
120129
}
121130

@@ -218,7 +227,7 @@ rml_ofi_component_close(void)
218227
(void **)&value, &node);
219228
while (OPAL_SUCCESS == rc) {
220229
if (NULL != value) {
221-
OBJ_RELEASE(value);
230+
OBJ_RELEASE(value);
222231
}
223232
rc = opal_hash_table_get_next_key_uint64 (&orte_rml_ofi.peers, &key,
224233
(void **) &value, node, &node);
@@ -242,7 +251,16 @@ static int rml_ofi_component_register(void)
242251
OPAL_INFO_LVL_2,
243252
MCA_BASE_VAR_SCOPE_LOCAL,
244253
&ofi_transports_supported);
245-
opal_output(0, "OFI TRANSPORTS %s", ofi_transports_supported);
254+
255+
256+
ofi_desired = false;
257+
mca_base_component_var_register(component, "desired",
258+
"Use OFI for coll conduit",
259+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
260+
OPAL_INFO_LVL_2,
261+
MCA_BASE_VAR_SCOPE_LOCAL,
262+
&ofi_desired);
263+
246264
return ORTE_SUCCESS;
247265
}
248266

@@ -982,7 +1000,6 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes)
9821000
"%s - Entering rml_ofi_open_conduit()",
9831001
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
9841002

985-
9861003
/* Open all ofi endpoints */
9871004
if (!init_done) {
9881005
rml_ofi_component_init();
@@ -1135,6 +1152,12 @@ static void ofi_set_contact_info (const char *uri)
11351152
return;
11361153
}
11371154

1155+
/* Open all ofi endpoints */
1156+
if (!init_done) {
1157+
rml_ofi_component_init();
1158+
init_done = true;
1159+
}
1160+
11381161
uris = strdup(uri);
11391162
process_uri(uris);
11401163
free(uris);
@@ -1146,10 +1169,10 @@ static void process_uri( char *uri)
11461169
orte_process_name_t peer;
11471170
char *cptr, *ofiuri;
11481171
char **uris=NULL;
1149-
int rc, i=0, tot_reqd = 1, tot_found = 0;
1172+
int rc, i=0, cur_ofi_prov;
11501173
uint64_t ui64;
11511174
orte_rml_ofi_peer_t *pr;
1152-
struct sockaddr_in* ep_sockaddr;
1175+
struct sockaddr_in *ep_sockaddr, *ep_sockaddr2;
11531176

11541177
/* find the first semi-colon in the string */
11551178
cptr = strchr(uri, ';');
@@ -1176,27 +1199,21 @@ static void process_uri( char *uri)
11761199
"%s:OFI set_contact_info peer %s is me",
11771200
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
11781201
ORTE_NAME_PRINT(&peer));
1179-
//skip adding to hashtable for HNP
1180-
if (!ORTE_PROC_IS_HNP) {
1181-
return;
1182-
} else {
1183-
opal_output_verbose(15, orte_rml_base_framework.framework_output,
1184-
"%s:OFI set_contact_info - HNP process so proceeding to add to hashtable",
1185-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
1186-
}
1202+
return;
11871203
}
11881204

11891205
/* split the rest of the uri into component parts */
11901206
uris = opal_argv_split(cptr, ';');
11911207

11921208
/* get the peer object for this process */
11931209
memcpy(&ui64, (char*)&peer, sizeof(uint64_t));
1194-
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_rml_ofi.peers,
1195-
ui64, (void**)&pr) ||
1210+
pr = NULL;
1211+
if (OPAL_SUCCESS != (rc = opal_hash_table_get_value_uint64(&orte_rml_ofi.peers,
1212+
ui64, (void**)&pr)) ||
11961213
NULL == pr) {
11971214
pr = OBJ_NEW(orte_rml_ofi_peer_t);
11981215
/* populate the peer object with the ofi addresses */
1199-
for(i=0; NULL != uris[i] && tot_found < tot_reqd; i++) {
1216+
for(i=0; NULL != uris[i]; i++) {
12001217
ofiuri = strdup(uris[i]);
12011218
if (NULL == ofiuri) {
12021219
opal_output_verbose(2, orte_rml_base_framework.framework_output,
@@ -1211,35 +1228,43 @@ static void process_uri( char *uri)
12111228
ep_sockaddr = malloc( sizeof ( struct sockaddr_in) );
12121229
/* ofiuri for socket provider is of format - ofi-socket:<sin_family,sin_addr,sin_port> */
12131230
convert_to_sockaddr(ofiuri, ep_sockaddr);
1214-
pr->ofi_ep = (void *)ep_sockaddr;
1215-
tot_found++;
1231+
/* see if we have this subnet in our providers - we take
1232+
* the first one that matches (other than loopback) */
1233+
for( cur_ofi_prov=0; cur_ofi_prov < orte_rml_ofi.ofi_prov_open_num ; cur_ofi_prov++ ) {
1234+
ep_sockaddr2 = (struct sockaddr_in*)orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name;
1235+
if (opal_net_samenetwork((struct sockaddr*)ep_sockaddr, (struct sockaddr*)ep_sockaddr2, 24)) {
1236+
pr->ofi_ep = (void *)ep_sockaddr;
1237+
if (OPAL_SUCCESS !=
1238+
(rc = opal_hash_table_set_value_uint64(&orte_rml_ofi.peers, ui64, (void*)pr))) {
1239+
opal_output_verbose(15, orte_rml_base_framework.framework_output,
1240+
"%s: ofi peer address insertion failed for peer %s ",
1241+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1242+
ORTE_NAME_PRINT(&peer));
1243+
ORTE_ERROR_LOG(rc);
1244+
}
1245+
opal_output_verbose(15, orte_rml_base_framework.framework_output,
1246+
"%s: ofi peer address inserted for peer %s ",
1247+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1248+
ORTE_NAME_PRINT(&peer));
1249+
opal_output_verbose(15, orte_rml_base_framework.framework_output,
1250+
"%s: ofi sock address length = %zd ",
1251+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1252+
pr->ofi_ep_len);
1253+
struct sockaddr_in* ep_sockaddr = (struct sockaddr_in*)pr->ofi_ep;
1254+
opal_output_verbose(15,orte_rml_base_framework.framework_output,
1255+
"%s OFI set_name() port = 0x%x, InternetAddr = %s ",
1256+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1257+
ntohs(ep_sockaddr->sin_port),
1258+
inet_ntoa(ep_sockaddr->sin_addr));
1259+
opal_argv_free(uris);
1260+
return;
1261+
}
1262+
}
12161263
}
12171264
free( ofiuri);
12181265
}
1219-
/* if atleast one OFI address is known for peer insert it */
1220-
if( 1 <= tot_found ) {
1221-
if (OPAL_SUCCESS !=
1222-
(rc = opal_hash_table_set_value_uint64(&orte_rml_ofi.peers, ui64, (void*)pr))) {
1223-
opal_output_verbose(15, orte_rml_base_framework.framework_output,
1224-
"%s: ofi peer address insertion failed for peer %s ",
1225-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1226-
ORTE_NAME_PRINT(&peer));
1227-
ORTE_ERROR_LOG(rc);
1228-
}
1229-
opal_output_verbose(15, orte_rml_base_framework.framework_output,
1230-
"%s: ofi peer address inserted for peer %s ",
1231-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1232-
ORTE_NAME_PRINT(&peer));
1233-
opal_output_verbose(15, orte_rml_base_framework.framework_output,
1234-
"%s: ofi sock address length = %zd ",
1235-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1236-
pr->ofi_ep_len);
1237-
struct sockaddr_in* ep_sockaddr = (struct sockaddr_in*)pr->ofi_ep;
1238-
opal_output_verbose(15,orte_rml_base_framework.framework_output,
1239-
"%s OFI set_name() port = 0x%x, InternetAddr = %s ",
1240-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ntohs(ep_sockaddr->sin_port),inet_ntoa(ep_sockaddr->sin_addr));
1241-
}
12421266
}
1267+
12431268
opal_output_verbose(10,orte_rml_base_framework.framework_output,
12441269
"%s OFI end of set_contact_info()",
12451270
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));

orte/mca/rml/ofi/rml_ofi_send.c

Lines changed: 47 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -376,17 +376,13 @@ static void send_msg(int fd, short args, void *cbdata)
376376
uint32_t total_packets;
377377
fi_addr_t dest_fi_addr;
378378
orte_rml_send_t *snd;
379-
orte_rml_recv_t *rcv;
380-
orte_self_send_xfer_t *xfer;
381379
orte_rml_ofi_request_t* ofi_send_req = OBJ_NEW( orte_rml_ofi_request_t );
382380
uint8_t ofi_prov_id = req->ofi_prov_id;
383381
orte_rml_ofi_send_pkt_t* ofi_msg_pkt;
384382
size_t datalen_per_pkt, hdrsize, data_in_pkt; // the length of data in per packet excluding the header size
385383
orte_rml_ofi_peer_t* pr;
386384
uint64_t ui64;
387385
struct sockaddr_in* ep_sockaddr;
388-
int i, bytes;
389-
char *ptr;
390386

391387
snd = OBJ_NEW(orte_rml_send_t);
392388
snd->dst = *peer;
@@ -408,85 +404,59 @@ static void send_msg(int fd, short args, void *cbdata)
408404
ORTE_NAME_PRINT(peer), tag);
409405

410406

411-
/* get the peer address by doing modex_receive */
407+
/* get the peer address from our internal hash table */
408+
opal_output_verbose(1, orte_rml_base_framework.framework_output,
409+
"%s getting contact info for DAEMON peer %s from internal hash table",
410+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer));
411+
memcpy(&ui64, (char*)peer, sizeof(uint64_t));
412+
if (OPAL_SUCCESS != (ret = opal_hash_table_get_value_uint64(&orte_rml_ofi.peers,
413+
ui64, (void**)&pr) || NULL == pr)) {
414+
opal_output_verbose(1, orte_rml_base_framework.framework_output,
415+
"%s rml:ofi: Send failed to get peer OFI contact info ",
416+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
417+
snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN;
418+
ORTE_RML_SEND_COMPLETE(snd);
419+
//OBJ_RELEASE( ofi_send_req);
420+
return;
421+
}
422+
opal_output_verbose(1, orte_rml_base_framework.framework_output,
423+
"%s rml:ofi: OFI peer contact info got from hash table",
424+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
425+
dest_ep_name = pr->ofi_ep;
426+
dest_ep_namelen = pr->ofi_ep_len;
427+
428+
//[Debug] printing additional info of IP
429+
switch ( orte_rml_ofi.ofi_prov[ofi_prov_id].fabric_info->addr_format)
430+
{
431+
case FI_SOCKADDR_IN :
432+
/* Address is of type sockaddr_in (IPv4) */
433+
/*[debug] - print the sockaddr - port and s_addr */
434+
ep_sockaddr = (struct sockaddr_in*)dest_ep_name;
435+
opal_output_verbose(1,orte_rml_base_framework.framework_output,
436+
"%s peer %s epnamelen is %lu, port = %d (or) 0x%x, InternetAddr = 0x%s ",
437+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ORTE_NAME_PRINT(peer),
438+
(unsigned long)orte_rml_ofi.ofi_prov[ofi_prov_id].epnamelen,ntohs(ep_sockaddr->sin_port),
439+
ntohs(ep_sockaddr->sin_port),inet_ntoa(ep_sockaddr->sin_addr));
440+
/*[end debug]*/
441+
break;
442+
}
443+
//[Debug] end debug
412444
opal_output_verbose(10, orte_rml_base_framework.framework_output,
413-
"%s calling OPAL_MODEX_RECV_STRING ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
414-
if (ORTE_PROC_IS_APP ) {
415-
asprintf(&pmix_key,"%s%d",orte_rml_ofi.ofi_prov[ofi_prov_id].fabric_info->fabric_attr->prov_name,ofi_prov_id);
416-
opal_output_verbose(1, orte_rml_base_framework.framework_output,
417-
"%s calling OPAL_MODEX_RECV_STRING for ORTE_PROC_APP peer - %s, key - %s ",
418-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer),pmix_key );
419-
OPAL_MODEX_RECV_STRING(ret, pmix_key, peer , (uint8_t **) &dest_ep_name, &dest_ep_namelen);
420-
opal_output_verbose(10, orte_rml_base_framework.framework_output, "Returned from MODEX_RECV");
421-
opal_output_verbose(50, orte_rml_base_framework.framework_output,
422-
"%s Return value from OPAL_MODEX_RECV_STRING - %d, length returned - %lu",
423-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret, dest_ep_namelen);
424-
free(pmix_key);
425-
} else {
445+
"%s OPAL_MODEX_RECV succeded, %s peer ep name obtained. length=%lu",
446+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
447+
ORTE_NAME_PRINT(peer), dest_ep_namelen);
448+
ret = fi_av_insert(orte_rml_ofi.ofi_prov[ofi_prov_id].av, dest_ep_name,1,&dest_fi_addr,0,NULL);
449+
if( ret != 1) {
426450
opal_output_verbose(1, orte_rml_base_framework.framework_output,
427-
"%s calling OPAL_MODEX_RECV_STRING for DAEMON peer %s",
428-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer));
429-
memcpy(&ui64, (char*)peer, sizeof(uint64_t));
430-
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_rml_ofi.peers,
431-
ui64, (void**)&pr) || NULL == pr) {
432-
opal_output_verbose(1, orte_rml_base_framework.framework_output,
433-
"%s rml:ofi: Send failed to get peer OFI contact info ",
434-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
435-
return;
436-
}
437-
opal_output_verbose(1, orte_rml_base_framework.framework_output,
438-
"%s rml:ofi: OFI peer contact info got from hash table",
439-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
440-
dest_ep_name = pr->ofi_ep;
441-
dest_ep_namelen = pr->ofi_ep_len;
442-
ret = OPAL_SUCCESS;
443-
}
444-
if ( OPAL_SUCCESS == ret) {
445-
//[Debug] printing additional info of IP
446-
switch ( orte_rml_ofi.ofi_prov[ofi_prov_id].fabric_info->addr_format)
447-
{
448-
case FI_SOCKADDR_IN :
449-
/* Address is of type sockaddr_in (IPv4) */
450-
/*[debug] - print the sockaddr - port and s_addr */
451-
ep_sockaddr = (struct sockaddr_in*)dest_ep_name;
452-
opal_output_verbose(1,orte_rml_base_framework.framework_output,
453-
"%s peer %s epnamelen is %d, port = %d (or) 0x%x, InternetAddr = 0x%s ",
454-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ORTE_NAME_PRINT(peer),
455-
orte_rml_ofi.ofi_prov[ofi_prov_id].epnamelen,ntohs(ep_sockaddr->sin_port),
456-
ntohs(ep_sockaddr->sin_port),inet_ntoa(ep_sockaddr->sin_addr));
457-
/*[end debug]*/
458-
break;
459-
}
460-
//[Debug] end debug
461-
opal_output_verbose(10, orte_rml_base_framework.framework_output,
462-
"%s OPAL_MODEX_RECV succeded, %s peer ep name obtained. length=%lu",
463-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
464-
ORTE_NAME_PRINT(peer), dest_ep_namelen);
465-
ret = fi_av_insert(orte_rml_ofi.ofi_prov[ofi_prov_id].av, dest_ep_name,1,&dest_fi_addr,0,NULL);
466-
if( ret != 1) {
467-
opal_output_verbose(1, orte_rml_base_framework.framework_output,
468-
"%s fi_av_insert failed in send_msg() returned %d",
469-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ret );
470-
/* call the send-callback fn with error and return, also return failure status */
471-
snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN;
451+
"%s fi_av_insert failed in send_msg() returned %d",
452+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ret );
453+
/* call the send-callback fn with error and return, also return failure status */
454+
snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN;
472455

473-
ORTE_RML_SEND_COMPLETE(snd);
474-
475-
return;
476-
}
477-
} else {
478-
479-
opal_output_verbose(1, orte_rml_base_framework.framework_output,
480-
"%s OPAL_MODEX_RECV failed to obtain %s peer ep name ",
481-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
482-
ORTE_NAME_PRINT(peer));
483-
/* call the send-callback fn with error and return, also return failure status */
484-
snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN;
485456
ORTE_RML_SEND_COMPLETE(snd);
486-
//OBJ_RELEASE( ofi_send_req);
457+
487458
return;
488459
}
489-
490460
ofi_send_req->send = snd;
491461
ofi_send_req->completion_count = 1;
492462

@@ -625,7 +595,6 @@ int orte_rml_ofi_send_nb(struct orte_rml_base_module_t* mod,
625595
void* cbdata)
626596
{
627597
orte_rml_recv_t *rcv;
628-
orte_rml_send_t *snd;
629598
int bytes;
630599
orte_self_send_xfer_t *xfer;
631600
int i;
@@ -749,7 +718,6 @@ int orte_rml_ofi_send_buffer_nb(struct orte_rml_base_module_t *mod,
749718
void* cbdata)
750719
{
751720
orte_rml_recv_t *rcv;
752-
orte_rml_send_t *snd;
753721
orte_self_send_xfer_t *xfer;
754722
ofi_send_request_t *req;
755723
orte_rml_ofi_module_t *ofi_mod = (orte_rml_ofi_module_t*)mod;

0 commit comments

Comments
 (0)