Skip to content

Commit 056f53c

Browse files
committed
session server UPDATE remove endpoint socket retries
Use CH configuration max attempts instead. Fixes CESNET/netopeer2#1278
1 parent 743ff9f commit 056f53c

File tree

3 files changed

+61
-48
lines changed

3 files changed

+61
-48
lines changed

src/session_client.c

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1399,12 +1399,18 @@ nc_connect_unix(const char *address, struct ly_ctx *ctx)
13991399
return NULL;
14001400
}
14011401

1402-
/*
1403-
Helper for a non-blocking connect (which is required because of the locking
1404-
concept for e.g. call home settings). For more details see nc_sock_connect().
1402+
/**
1403+
* @brief Try to connect a socket, optionally a pending one from a previous attempt.
1404+
*
1405+
* @param[in] timeout_ms Timeout in ms to wait for the connection to be fully established, -1 to block.
1406+
* @param[in,out] sock_pending Optional previously created socked that was not fully connected yet. If provided and
1407+
* connected, is set to -1.
1408+
* @param[in] res Addrinfo resource to use when creating a new socket.
1409+
* @param[in] ka Keepalives to set.
1410+
* @return Connected socket or -1 on error.
14051411
*/
14061412
static int
1407-
_non_blocking_connect(int timeout, int *sock_pending, struct addrinfo *res, struct nc_keepalives *ka)
1413+
sock_connect(int timeout_ms, int *sock_pending, struct addrinfo *res, struct nc_keepalives *ka)
14081414
{
14091415
int flags, ret, error;
14101416
int sock = -1;
@@ -1453,20 +1459,23 @@ _non_blocking_connect(int timeout, int *sock_pending, struct addrinfo *res, stru
14531459
}
14541460
}
14551461
}
1456-
ts.tv_sec = timeout;
1457-
ts.tv_usec = 0;
14581462

14591463
FD_ZERO(&wset);
14601464
FD_SET(sock, &wset);
14611465

1462-
if ((ret = select(sock + 1, NULL, &wset, NULL, (timeout != -1) ? &ts : NULL)) < 0) {
1466+
/* wait for some data on the socket */
1467+
if (timeout_ms != -1) {
1468+
ts.tv_sec = timeout_ms / 1000;
1469+
ts.tv_usec = (timeout_ms % 1000) * 1000;
1470+
}
1471+
if ((ret = select(sock + 1, NULL, &wset, NULL, (timeout_ms != -1) ? &ts : NULL)) < 0) {
14631472
ERR(NULL, "select() failed (%s).", strerror(errno));
14641473
goto cleanup;
14651474
}
14661475

14671476
if (ret == 0) {
14681477
/* there was a timeout */
1469-
VRB(NULL, "Timed out after %ds (%s).", timeout, strerror(errno));
1478+
VRB(NULL, "Timed out after %d ms (%s).", timeout_ms, strerror(errno));
14701479
if (sock_pending) {
14711480
/* no sock-close, we'll try it again */
14721481
*sock_pending = sock;
@@ -1494,6 +1503,10 @@ _non_blocking_connect(int timeout, int *sock_pending, struct addrinfo *res, stru
14941503
goto cleanup;
14951504
}
14961505

1506+
/* connected */
1507+
if (sock_pending) {
1508+
*sock_pending = -1;
1509+
}
14971510
return sock;
14981511

14991512
cleanup:
@@ -1504,24 +1517,16 @@ _non_blocking_connect(int timeout, int *sock_pending, struct addrinfo *res, stru
15041517
return -1;
15051518
}
15061519

1507-
/* A given timeout value limits the time how long the function blocks. If it has to block
1508-
only for some seconds, a socket connection might not yet have been fully established.
1509-
Therefore the active (pending) socket will be stored in *sock_pending, but the return
1510-
value will be -1. In such a case a subsequent invokation is required, by providing the
1511-
stored sock_pending, again.
1512-
In general, if this function returns -1, when a timeout has been given, this function
1513-
has to be invoked, until it returns a valid socket.
1514-
*/
15151520
int
1516-
nc_sock_connect(const char *host, uint16_t port, int timeout, struct nc_keepalives *ka, int *sock_pending, char **ip_host)
1521+
nc_sock_connect(const char *host, uint16_t port, int timeout_ms, struct nc_keepalives *ka, int *sock_pending, char **ip_host)
15171522
{
15181523
int i, opt;
15191524
int sock = sock_pending ? *sock_pending : -1;
15201525
struct addrinfo hints, *res_list = NULL, *res;
15211526
char *buf, port_s[6]; /* length of string representation of short int */
15221527
void *addr;
15231528

1524-
DBG(NULL, "nc_sock_connect(%s, %u, %d, %d)", host, port, timeout, sock);
1529+
DBG(NULL, "nc_sock_connect(%s, %u, %d, %d)", host, port, timeout_ms, sock);
15251530

15261531
/* no pending socket */
15271532
if (sock == -1) {
@@ -1538,7 +1543,7 @@ nc_sock_connect(const char *host, uint16_t port, int timeout, struct nc_keepaliv
15381543
}
15391544

15401545
for (res = res_list; res != NULL; res = res->ai_next) {
1541-
sock = _non_blocking_connect(timeout, sock_pending, res, ka);
1546+
sock = sock_connect(timeout_ms, sock_pending, res, ka);
15421547
if (sock == -1) {
15431548
if (!sock_pending || (*sock_pending == -1)) {
15441549
/* try the next resource */
@@ -1582,7 +1587,7 @@ nc_sock_connect(const char *host, uint16_t port, int timeout, struct nc_keepaliv
15821587
} else {
15831588
/* try to get a connection with the pending socket */
15841589
assert(sock_pending);
1585-
sock = _non_blocking_connect(timeout, sock_pending, NULL, ka);
1590+
sock = sock_connect(timeout_ms, sock_pending, NULL, ka);
15861591
}
15871592

15881593
return sock;

src/session_p.h

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,6 @@ struct nc_server_opts {
277277
char *address;
278278
uint16_t port;
279279
int sock_pending;
280-
int sock_retries;
281280
struct nc_keepalives ka;
282281

283282
union {
@@ -350,19 +349,14 @@ struct nc_server_opts {
350349
#define NC_CH_NO_ENDPT_WAIT 1000
351350

352351
/**
353-
* Number of sockets kept waiting to be accepted.
352+
* Timeout in msec for a Call Home socket to establish its connection.
354353
*/
355-
#define NC_REVERSE_QUEUE 5
354+
#define NC_CH_CONNECT_TIMEOUT 500
356355

357356
/**
358-
* Timeout for connecting Call Home socket to a client (s).
359-
*/
360-
#define NC_SOCKET_CH_TIMEOUT 5
361-
362-
/**
363-
* Number of retires of connection Call Home socket to a client.
357+
* Number of sockets kept waiting to be accepted.
364358
*/
365-
#define NC_SOCKET_CH_RETRIES 5
359+
#define NC_REVERSE_QUEUE 5
366360

367361
/**
368362
* @brief Type of the session
@@ -662,13 +656,15 @@ NC_MSG_TYPE nc_handshake_io(struct nc_session *session);
662656
*
663657
* @param[in] host Hostname to connect to.
664658
* @param[in] port Port to connect on.
665-
* @param[in] timeout for blocking the connect+select call (-1 for infinite).
659+
* @param[in] timeout_ms Timeout in ms for blocking the connect + select call (-1 for infinite).
666660
* @param[in] ka Keepalives parameters.
667-
* @param[in,out] sock_pending for exchanging the pending socket, if the blocking timeout was != -1
661+
* @param[in,out] sock_pending Previous pending socket. If set, equal to -1, and the connection is still in progress
662+
* after @p timeout, it is set to the pending socket but -1 is returned. If NULL, the socket is closed on timeout.
668663
* @param[out] ip_host Optional parameter with string IP address of the connected host.
669664
* @return Connected socket or -1 on error.
670665
*/
671-
int nc_sock_connect(const char *host, uint16_t port, int timeout, struct nc_keepalives *ka, int *sock_pending, char **ip_host);
666+
int nc_sock_connect(const char *host, uint16_t port, int timeout_ms, struct nc_keepalives *ka, int *sock_pending,
667+
char **ip_host);
672668

673669
/**
674670
* @brief Accept a new socket connection.

src/session_server.c

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3295,7 +3295,18 @@ nc_server_ch_client_set_max_attempts(const char *client_name, uint8_t max_attemp
32953295
return 0;
32963296
}
32973297

3298-
/* client lock is expected to be held */
3298+
/**
3299+
* @brief Create a connection for an endpoint.
3300+
*
3301+
* Client lock is expected to be held.
3302+
*
3303+
* @param[in] endpt Endpoint to use.
3304+
* @param[in] acquire_ctx_cb Callback for acquiring the libyang context.
3305+
* @param[in] release_ctx_cb Callback for releasing the libyang context.
3306+
* @param[in] ctx_cb_data Context callbacks data.
3307+
* @param[out] session Created NC session.
3308+
* @return NC_MSG values.
3309+
*/
32993310
static NC_MSG_TYPE
33003311
nc_connect_ch_endpt(struct nc_ch_endpt *endpt, nc_server_ch_session_acquire_ctx_cb acquire_ctx_cb,
33013312
nc_server_ch_session_release_ctx_cb release_ctx_cb, void *ctx_cb_data, struct nc_session **session)
@@ -3306,21 +3317,10 @@ nc_connect_ch_endpt(struct nc_ch_endpt *endpt, nc_server_ch_session_acquire_ctx_
33063317
struct timespec ts_cur;
33073318
char *ip_host;
33083319

3309-
sock = nc_sock_connect(endpt->address, endpt->port, NC_SOCKET_CH_TIMEOUT, &endpt->ka, &endpt->sock_pending, &ip_host);
3320+
sock = nc_sock_connect(endpt->address, endpt->port, NC_CH_CONNECT_TIMEOUT, &endpt->ka, &endpt->sock_pending, &ip_host);
33103321
if (sock < 0) {
3311-
if (endpt->sock_pending > -1) {
3312-
++endpt->sock_retries;
3313-
if (endpt->sock_retries == NC_SOCKET_CH_RETRIES) {
3314-
ERR(NULL, "Failed to connect socket %d after %d retries, closing.", endpt->sock_pending, NC_SOCKET_CH_RETRIES);
3315-
close(endpt->sock_pending);
3316-
endpt->sock_pending = -1;
3317-
endpt->sock_retries = 0;
3318-
}
3319-
}
33203322
return NC_MSG_ERROR;
33213323
}
3322-
/* no need to store the socket as pending any longer */
3323-
endpt->sock_pending = -1;
33243324

33253325
/* acquire context */
33263326
ctx = acquire_ctx_cb(ctx_cb_data);
@@ -3548,8 +3548,10 @@ nc_ch_client_thread(void *arg)
35483548
cur_endpt = &client->ch_endpts[0];
35493549
cur_endpt_name = strdup(cur_endpt->name);
35503550

3551-
VRB(NULL, "Call Home client \"%s\" connecting...", data->client_name);
35523551
while (1) {
3552+
if (!cur_attempts) {
3553+
VRB(NULL, "Call Home client \"%s\" endpoint \"%s\" connecting...", data->client_name, cur_endpt_name);
3554+
}
35533555
msgtype = nc_connect_ch_endpt(cur_endpt, data->acquire_ctx_cb, data->release_ctx_cb, data->ctx_cb_data, &session);
35543556

35553557
if (msgtype == NC_MSG_HELLO) {
@@ -3648,18 +3650,28 @@ nc_ch_client_thread(void *arg)
36483650

36493651
if (next_endpt_index >= client->ch_endpt_count) {
36503652
/* endpoint was removed, start with the first one */
3653+
VRB(NULL, "Call Home client \"%s\" endpoint \"%s\" removed.", data->client_name, cur_endpt_name);
36513654
next_endpt_index = 0;
36523655
cur_attempts = 0;
36533656
} else if (cur_attempts == client->max_attempts) {
36543657
/* we have tried to connect to this endpoint enough times */
3658+
VRB(NULL, "Call Home client \"%s\" endpoint \"%s\" failed connection attempt limit %" PRIu8 " reached.",
3659+
data->client_name, cur_endpt_name, client->max_attempts);
3660+
3661+
/* clear a pending socket, if any */
3662+
cur_endpt = &client->ch_endpts[next_endpt_index];
3663+
if (cur_endpt->sock_pending > -1) {
3664+
close(cur_endpt->sock_pending);
3665+
cur_endpt->sock_pending = -1;
3666+
}
3667+
36553668
if (next_endpt_index < client->ch_endpt_count - 1) {
36563669
/* just go to the next endpoint */
36573670
++next_endpt_index;
36583671
} else {
36593672
/* cur_endpoint is the last, start with the first one */
36603673
next_endpt_index = 0;
36613674
}
3662-
36633675
cur_attempts = 0;
36643676
} /* else we keep the current one */
36653677
}

0 commit comments

Comments
 (0)