Skip to content

Commit c30a429

Browse files
committed
Btl tcp: Refactoring non-blocking send/receive function
Moving non-blocking send/receive function to btl_tcp will help reusing these function where ever needed. In this case we plan to reuse receive function to retrive magic string to validate established connection is from mpi process. Signed-off-by: Mohan Gandhi <[email protected]>
1 parent af85e48 commit c30a429

File tree

3 files changed

+88
-44
lines changed

3 files changed

+88
-44
lines changed

opal/mca/btl/tcp/btl_tcp.c

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,14 @@
3131
#include "opal/mca/mpool/base/base.h"
3232
#include "opal/mca/mpool/mpool.h"
3333
#include "opal/mca/btl/base/btl_base_error.h"
34+
#include "opal/opal_socket_errno.h"
3435

3536
#include "btl_tcp.h"
3637
#include "btl_tcp_frag.h"
3738
#include "btl_tcp_proc.h"
3839
#include "btl_tcp_endpoint.h"
3940

41+
4042
mca_btl_tcp_module_t mca_btl_tcp_module = {
4143
.super = {
4244
.btl_component = &mca_btl_tcp_component.super,
@@ -531,3 +533,64 @@ void mca_btl_tcp_dump(struct mca_btl_base_module_t* base_btl,
531533
}
532534
#endif /* OPAL_ENABLE_DEBUG && WANT_PEER_DUMP */
533535
}
536+
537+
538+
/*
539+
* A blocking recv on a non-blocking socket. Used to receive the small
540+
* amount of connection information that identifies the endpoints
541+
* endpoint.
542+
*/
543+
544+
int mca_btl_tcp_recv_blocking(int sd, void* data, size_t size)
545+
{
546+
unsigned char* ptr = (unsigned char*)data;
547+
size_t cnt = 0;
548+
while (cnt < size) {
549+
int retval = recv(sd, ((char *)ptr) + cnt, size - cnt, 0);
550+
/* remote closed connection */
551+
if (0 == retval) {
552+
BTL_ERROR(("remote peer unexpectedly closed connection while I was waiting for blocking message"));
553+
return -1;
554+
}
555+
556+
/* socket is non-blocking so handle errors */
557+
if (retval < 0) {
558+
if (opal_socket_errno != EINTR &&
559+
opal_socket_errno != EAGAIN &&
560+
opal_socket_errno != EWOULDBLOCK) {
561+
BTL_ERROR(("recv(%d) failed: %s (%d)", sd, strerror(opal_socket_errno), opal_socket_errno));
562+
return -1;
563+
}
564+
continue;
565+
}
566+
cnt += retval;
567+
}
568+
return cnt;
569+
}
570+
571+
572+
/*
573+
* A blocking send on a non-blocking socket. Used to send the small
574+
* amount of connection information that identifies the endpoints
575+
* endpoint.
576+
*/
577+
578+
int mca_btl_tcp_send_blocking(int sd, const void* data, size_t size)
579+
{
580+
unsigned char* ptr = (unsigned char*)data;
581+
size_t cnt = 0;
582+
while(cnt < size) {
583+
int retval = send(sd, ((const char *)ptr) + cnt, size - cnt, 0);
584+
if (retval < 0) {
585+
if (opal_socket_errno != EINTR &&
586+
opal_socket_errno != EAGAIN &&
587+
opal_socket_errno != EWOULDBLOCK) {
588+
BTL_ERROR(("send() failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno));
589+
return -1;
590+
}
591+
continue;
592+
}
593+
cnt += retval;
594+
}
595+
return cnt;
596+
}

opal/mca/btl/tcp/btl_tcp.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,5 +351,19 @@ mca_btl_tcp_dump(struct mca_btl_base_module_t* btl,
351351
*/
352352
int mca_btl_tcp_ft_event(int state);
353353

354+
/*
355+
* A blocking send on a non-blocking socket. Used to send the small
356+
* amount of connection information that identifies the endpoints
357+
* endpoint.
358+
*/
359+
int mca_btl_tcp_send_blocking(int sd, const void* data, size_t size);
360+
361+
/*
362+
* A blocking recv on a non-blocking socket. Used to receive the small
363+
* amount of connection information that identifies the endpoints
364+
* endpoint.
365+
*/
366+
int mca_btl_tcp_recv_blocking(int sd, void* data, size_t size);
367+
354368
END_C_DECLS
355369
#endif

opal/mca/btl/tcp/btl_tcp_endpoint.c

Lines changed: 11 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -371,31 +371,18 @@ int mca_btl_tcp_endpoint_send(mca_btl_base_endpoint_t* btl_endpoint, mca_btl_tcp
371371

372372

373373
/*
374-
* A blocking send on a non-blocking socket. Used to send the small amount of connection
375-
* information that identifies the endpoints endpoint.
374+
* A blocking send on a non-blocking socket. Used to send the small
375+
* amount of connection information that identifies the endpoints endpoint.
376376
*/
377377
static int
378378
mca_btl_tcp_endpoint_send_blocking(mca_btl_base_endpoint_t* btl_endpoint,
379-
void* data, size_t size)
379+
const void* data, size_t size)
380380
{
381-
unsigned char* ptr = (unsigned char*)data;
382-
size_t cnt = 0;
383-
while(cnt < size) {
384-
int retval = send(btl_endpoint->endpoint_sd, (const char *)ptr+cnt, size-cnt, 0);
385-
if(retval < 0) {
386-
if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
387-
BTL_ERROR(("send(%d, %p, %lu/%lu) failed: %s (%d)",
388-
btl_endpoint->endpoint_sd, data, cnt, size,
389-
strerror(opal_socket_errno), opal_socket_errno));
390-
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
391-
mca_btl_tcp_endpoint_close(btl_endpoint);
392-
return -1;
393-
}
394-
continue;
395-
}
396-
cnt += retval;
381+
int ret = mca_btl_tcp_send_blocking(btl_endpoint->endpoint_sd, data, size);
382+
if (ret < 0) {
383+
mca_btl_tcp_endpoint_close(btl_endpoint);
397384
}
398-
return cnt;
385+
return ret;
399386
}
400387

401388

@@ -573,31 +560,11 @@ static void mca_btl_tcp_endpoint_connected(mca_btl_base_endpoint_t* btl_endpoint
573560
*/
574561
static int mca_btl_tcp_endpoint_recv_blocking(mca_btl_base_endpoint_t* btl_endpoint, void* data, size_t size)
575562
{
576-
unsigned char* ptr = (unsigned char*)data;
577-
size_t cnt = 0;
578-
while(cnt < size) {
579-
int retval = recv(btl_endpoint->endpoint_sd, (char *)ptr+cnt, size-cnt, 0);
580-
581-
/* remote closed connection */
582-
if(retval == 0) {
583-
mca_btl_tcp_endpoint_close(btl_endpoint);
584-
return cnt;
585-
}
586-
587-
/* socket is non-blocking so handle errors */
588-
if(retval < 0) {
589-
if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
590-
BTL_ERROR(("recv(%d, %lu/%lu) failed: %s (%d)",
591-
btl_endpoint->endpoint_sd, cnt, size, strerror(opal_socket_errno), opal_socket_errno));
592-
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
593-
mca_btl_tcp_endpoint_close(btl_endpoint);
594-
return -1;
595-
}
596-
continue;
597-
}
598-
cnt += retval;
563+
int ret = mca_btl_tcp_recv_blocking(btl_endpoint->endpoint_sd, data, size);
564+
if (ret <= 0) {
565+
mca_btl_tcp_endpoint_close(btl_endpoint);
599566
}
600-
return cnt;
567+
return ret;
601568
}
602569

603570

0 commit comments

Comments
 (0)