@@ -376,17 +376,13 @@ static void send_msg(int fd, short args, void *cbdata)
376
376
uint32_t total_packets ;
377
377
fi_addr_t dest_fi_addr ;
378
378
orte_rml_send_t * snd ;
379
- orte_rml_recv_t * rcv ;
380
- orte_self_send_xfer_t * xfer ;
381
379
orte_rml_ofi_request_t * ofi_send_req = OBJ_NEW ( orte_rml_ofi_request_t );
382
380
uint8_t ofi_prov_id = req -> ofi_prov_id ;
383
381
orte_rml_ofi_send_pkt_t * ofi_msg_pkt ;
384
382
size_t datalen_per_pkt , hdrsize , data_in_pkt ; // the length of data in per packet excluding the header size
385
383
orte_rml_ofi_peer_t * pr ;
386
384
uint64_t ui64 ;
387
385
struct sockaddr_in * ep_sockaddr ;
388
- int i , bytes ;
389
- char * ptr ;
390
386
391
387
snd = OBJ_NEW (orte_rml_send_t );
392
388
snd -> dst = * peer ;
@@ -408,85 +404,59 @@ static void send_msg(int fd, short args, void *cbdata)
408
404
ORTE_NAME_PRINT (peer ), tag );
409
405
410
406
411
- /* get the peer address by doing modex_receive */
407
+ /* get the peer address from our internal hash table */
408
+ opal_output_verbose (1 , orte_rml_base_framework .framework_output ,
409
+ "%s getting contact info for DAEMON peer %s from internal hash table" ,
410
+ ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ), ORTE_NAME_PRINT (peer ));
411
+ memcpy (& ui64 , (char * )peer , sizeof (uint64_t ));
412
+ if (OPAL_SUCCESS != (ret = opal_hash_table_get_value_uint64 (& orte_rml_ofi .peers ,
413
+ ui64 , (void * * )& pr ) || NULL == pr )) {
414
+ opal_output_verbose (1 , orte_rml_base_framework .framework_output ,
415
+ "%s rml:ofi: Send failed to get peer OFI contact info " ,
416
+ ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ));
417
+ snd -> status = ORTE_ERR_ADDRESSEE_UNKNOWN ;
418
+ ORTE_RML_SEND_COMPLETE (snd );
419
+ //OBJ_RELEASE( ofi_send_req);
420
+ return ;
421
+ }
422
+ opal_output_verbose (1 , orte_rml_base_framework .framework_output ,
423
+ "%s rml:ofi: OFI peer contact info got from hash table" ,
424
+ ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ));
425
+ dest_ep_name = pr -> ofi_ep ;
426
+ dest_ep_namelen = pr -> ofi_ep_len ;
427
+
428
+ //[Debug] printing additional info of IP
429
+ switch ( orte_rml_ofi .ofi_prov [ofi_prov_id ].fabric_info -> addr_format )
430
+ {
431
+ case FI_SOCKADDR_IN :
432
+ /* Address is of type sockaddr_in (IPv4) */
433
+ /*[debug] - print the sockaddr - port and s_addr */
434
+ ep_sockaddr = (struct sockaddr_in * )dest_ep_name ;
435
+ opal_output_verbose (1 ,orte_rml_base_framework .framework_output ,
436
+ "%s peer %s epnamelen is %lu, port = %d (or) 0x%x, InternetAddr = 0x%s " ,
437
+ ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),ORTE_NAME_PRINT (peer ),
438
+ (unsigned long )orte_rml_ofi .ofi_prov [ofi_prov_id ].epnamelen ,ntohs (ep_sockaddr -> sin_port ),
439
+ ntohs (ep_sockaddr -> sin_port ),inet_ntoa (ep_sockaddr -> sin_addr ));
440
+ /*[end debug]*/
441
+ break ;
442
+ }
443
+ //[Debug] end debug
412
444
opal_output_verbose (10 , orte_rml_base_framework .framework_output ,
413
- "%s calling OPAL_MODEX_RECV_STRING " , ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ) );
414
- if (ORTE_PROC_IS_APP ) {
415
- asprintf (& pmix_key ,"%s%d" ,orte_rml_ofi .ofi_prov [ofi_prov_id ].fabric_info -> fabric_attr -> prov_name ,ofi_prov_id );
416
- opal_output_verbose (1 , orte_rml_base_framework .framework_output ,
417
- "%s calling OPAL_MODEX_RECV_STRING for ORTE_PROC_APP peer - %s, key - %s " ,
418
- ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ), ORTE_NAME_PRINT (peer ),pmix_key );
419
- OPAL_MODEX_RECV_STRING (ret , pmix_key , peer , (uint8_t * * ) & dest_ep_name , & dest_ep_namelen );
420
- opal_output_verbose (10 , orte_rml_base_framework .framework_output , "Returned from MODEX_RECV" );
421
- opal_output_verbose (50 , orte_rml_base_framework .framework_output ,
422
- "%s Return value from OPAL_MODEX_RECV_STRING - %d, length returned - %lu" ,
423
- ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ), ret , dest_ep_namelen );
424
- free (pmix_key );
425
- } else {
445
+ "%s OPAL_MODEX_RECV succeded, %s peer ep name obtained. length=%lu" ,
446
+ ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
447
+ ORTE_NAME_PRINT (peer ), dest_ep_namelen );
448
+ ret = fi_av_insert (orte_rml_ofi .ofi_prov [ofi_prov_id ].av , dest_ep_name ,1 ,& dest_fi_addr ,0 ,NULL );
449
+ if ( ret != 1 ) {
426
450
opal_output_verbose (1 , orte_rml_base_framework .framework_output ,
427
- "%s calling OPAL_MODEX_RECV_STRING for DAEMON peer %s" ,
428
- ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ), ORTE_NAME_PRINT (peer ));
429
- memcpy (& ui64 , (char * )peer , sizeof (uint64_t ));
430
- if (OPAL_SUCCESS != opal_hash_table_get_value_uint64 (& orte_rml_ofi .peers ,
431
- ui64 , (void * * )& pr ) || NULL == pr ) {
432
- opal_output_verbose (1 , orte_rml_base_framework .framework_output ,
433
- "%s rml:ofi: Send failed to get peer OFI contact info " ,
434
- ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ));
435
- return ;
436
- }
437
- opal_output_verbose (1 , orte_rml_base_framework .framework_output ,
438
- "%s rml:ofi: OFI peer contact info got from hash table" ,
439
- ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ));
440
- dest_ep_name = pr -> ofi_ep ;
441
- dest_ep_namelen = pr -> ofi_ep_len ;
442
- ret = OPAL_SUCCESS ;
443
- }
444
- if ( OPAL_SUCCESS == ret ) {
445
- //[Debug] printing additional info of IP
446
- switch ( orte_rml_ofi .ofi_prov [ofi_prov_id ].fabric_info -> addr_format )
447
- {
448
- case FI_SOCKADDR_IN :
449
- /* Address is of type sockaddr_in (IPv4) */
450
- /*[debug] - print the sockaddr - port and s_addr */
451
- ep_sockaddr = (struct sockaddr_in * )dest_ep_name ;
452
- opal_output_verbose (1 ,orte_rml_base_framework .framework_output ,
453
- "%s peer %s epnamelen is %d, port = %d (or) 0x%x, InternetAddr = 0x%s " ,
454
- ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),ORTE_NAME_PRINT (peer ),
455
- orte_rml_ofi .ofi_prov [ofi_prov_id ].epnamelen ,ntohs (ep_sockaddr -> sin_port ),
456
- ntohs (ep_sockaddr -> sin_port ),inet_ntoa (ep_sockaddr -> sin_addr ));
457
- /*[end debug]*/
458
- break ;
459
- }
460
- //[Debug] end debug
461
- opal_output_verbose (10 , orte_rml_base_framework .framework_output ,
462
- "%s OPAL_MODEX_RECV succeded, %s peer ep name obtained. length=%lu" ,
463
- ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
464
- ORTE_NAME_PRINT (peer ), dest_ep_namelen );
465
- ret = fi_av_insert (orte_rml_ofi .ofi_prov [ofi_prov_id ].av , dest_ep_name ,1 ,& dest_fi_addr ,0 ,NULL );
466
- if ( ret != 1 ) {
467
- opal_output_verbose (1 , orte_rml_base_framework .framework_output ,
468
- "%s fi_av_insert failed in send_msg() returned %d" ,
469
- ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),ret );
470
- /* call the send-callback fn with error and return, also return failure status */
471
- snd -> status = ORTE_ERR_ADDRESSEE_UNKNOWN ;
451
+ "%s fi_av_insert failed in send_msg() returned %d" ,
452
+ ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),ret );
453
+ /* call the send-callback fn with error and return, also return failure status */
454
+ snd -> status = ORTE_ERR_ADDRESSEE_UNKNOWN ;
472
455
473
- ORTE_RML_SEND_COMPLETE (snd );
474
-
475
- return ;
476
- }
477
- } else {
478
-
479
- opal_output_verbose (1 , orte_rml_base_framework .framework_output ,
480
- "%s OPAL_MODEX_RECV failed to obtain %s peer ep name " ,
481
- ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
482
- ORTE_NAME_PRINT (peer ));
483
- /* call the send-callback fn with error and return, also return failure status */
484
- snd -> status = ORTE_ERR_ADDRESSEE_UNKNOWN ;
485
456
ORTE_RML_SEND_COMPLETE (snd );
486
- //OBJ_RELEASE( ofi_send_req);
457
+
487
458
return ;
488
459
}
489
-
490
460
ofi_send_req -> send = snd ;
491
461
ofi_send_req -> completion_count = 1 ;
492
462
@@ -625,7 +595,6 @@ int orte_rml_ofi_send_nb(struct orte_rml_base_module_t* mod,
625
595
void * cbdata )
626
596
{
627
597
orte_rml_recv_t * rcv ;
628
- orte_rml_send_t * snd ;
629
598
int bytes ;
630
599
orte_self_send_xfer_t * xfer ;
631
600
int i ;
@@ -749,7 +718,6 @@ int orte_rml_ofi_send_buffer_nb(struct orte_rml_base_module_t *mod,
749
718
void * cbdata )
750
719
{
751
720
orte_rml_recv_t * rcv ;
752
- orte_rml_send_t * snd ;
753
721
orte_self_send_xfer_t * xfer ;
754
722
ofi_send_request_t * req ;
755
723
orte_rml_ofi_module_t * ofi_mod = (orte_rml_ofi_module_t * )mod ;
0 commit comments