|
75 | 75 | #include "orte/mca/oob/tcp/oob_tcp_common.h" |
76 | 76 | #include "orte/mca/oob/tcp/oob_tcp_connection.h" |
77 | 77 |
|
| 78 | +#define OOB_SEND_MAX_RETRIES 3 |
| 79 | + |
78 | 80 | void mca_oob_tcp_queue_msg(int sd, short args, void *cbdata) |
79 | 81 | { |
80 | 82 | mca_oob_tcp_send_t *snd = (mca_oob_tcp_send_t*)cbdata; |
@@ -105,7 +107,7 @@ void mca_oob_tcp_queue_msg(int sd, short args, void *cbdata) |
105 | 107 | static int send_msg(mca_oob_tcp_peer_t* peer, mca_oob_tcp_send_t* msg) |
106 | 108 | { |
107 | 109 | struct iovec iov[2]; |
108 | | - int iov_count; |
| 110 | + int iov_count, retries = 0; |
109 | 111 | ssize_t remain = msg->sdbytes, rc; |
110 | 112 |
|
111 | 113 | OPAL_TIMING_EVENT((&tm_oob, "to %s %d bytes", |
@@ -146,12 +148,20 @@ static int send_msg(mca_oob_tcp_peer_t* peer, mca_oob_tcp_send_t* msg) |
146 | 148 | * but let the event lib cycle so other messages |
147 | 149 | * can progress while this socket is busy |
148 | 150 | */ |
| 151 | + ++retries; |
| 152 | + if (retries < OOB_SEND_MAX_RETRIES) { |
| 153 | + goto retry; |
| 154 | + } |
149 | 155 | return ORTE_ERR_RESOURCE_BUSY; |
150 | 156 | } else if (opal_socket_errno == EWOULDBLOCK) { |
151 | 157 | /* tell the caller to keep this message on active, |
152 | 158 | * but let the event lib cycle so other messages |
153 | 159 | * can progress while this socket is busy |
154 | 160 | */ |
| 161 | + ++retries; |
| 162 | + if (retries < OOB_SEND_MAX_RETRIES) { |
| 163 | + goto retry; |
| 164 | + } |
155 | 165 | return ORTE_ERR_WOULD_BLOCK; |
156 | 166 | } else { |
157 | 167 | /* we hit an error and cannot progress this message */ |
|
0 commit comments