Skip to content

Commit 3c77d91

Browse files
authored
Merge pull request networkupstools#3303 from jimklimov/issue-3302
upsd: chunk the MAXCONN-sized array processing into several smaller "sysmaxconn"-sized polls as needed
2 parents cc72213 + 7a17b0d commit 3c77d91

File tree

6 files changed

+186
-49
lines changed

6 files changed

+186
-49
lines changed

NEWS.adoc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ For a complete and more detailed list of changes, please refer to the
1212
ChangeLog file (generated for release archives), or to the Git version
1313
control history for "live" codebase.
1414

15-
1615
PLANNED: Release notes for NUT 2.8.5 - what's new since 2.8.4
1716
-------------------------------------------------------------
1817

@@ -296,6 +295,11 @@ https://github.com/networkupstools/nut/milestone/12
296295
comes into play and breaks things. [issue #661]
297296
* Fixed `LISTEN *` handling for `upsd.exe` in NUT for Windows builds.
298297
[PR #3237]
298+
* Extended processing of `MAXCONN` setting to allow larger values than the
299+
operating system allows, by only waiting for that amount of Unix sockets
300+
or Windows `HANDLE`'s at a time, and moving on to another chunk.
301+
The system-provided value can be further limited by `NUT_SYSMAXCONN_LIMIT`
302+
environment variable (e.g. in tests). [#3302]
299303

300304
- `upsdrvctl` tool updates:
301305
* Make use of `setproctag()` and `getproctag()` to report parent/child

common/common.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3778,7 +3778,8 @@ static void vupslog(int priority, const char *fmt, va_list va, int use_strerror)
37783778
{
37793779
int ret, errno_orig = errno;
37803780
#ifdef HAVE_VA_COPY_VARIANT
3781-
size_t bufsize = 128;
3781+
/* Most our debug messages fit into this */
3782+
size_t bufsize = 256;
37823783
#else
37833784
/* err on the safe(r) side, as re-runs can truncate
37843785
* the output when varargs are re-used */

conf/upsd.conf.sample

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,11 @@
121121
# LISTEN address and each client count as one connection. If the server
122122
# runs out of connections, it will no longer accept new incoming client
123123
# connections. Only set this if you know exactly what you're doing.
124+
# Note that on some platforms there may be a smaller amount of file descriptors
125+
# or handles that can be polled in one operation, the server would then poll
126+
# several smaller groups until it handles all the connections it tracks.
127+
# With a large amount of connections this may however impact the delays between
128+
# processing loops, and time before an incoming message is seen and processed.
124129

125130
# =======================================================================
126131
# CERTFILE <certificate file>

docs/man/upsd.conf.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,12 @@ This defaults to maximum number allowed on your system. Each UPS, each
150150
`LISTEN` address and each client count as one connection. If the server
151151
runs out of connections, it will no longer accept new incoming client
152152
connections. Only set this if you know exactly what you're doing.
153+
+
154+
Note that on some platforms there may be a smaller amount of file descriptors
155+
or handles that can be polled in one operation, the server would then poll
156+
several smaller groups until it handles all the connections it tracks.
157+
With a large amount of connections this may however impact the delays between
158+
processing loops, and time before an incoming message is seen and processed.
153159

154160
*CERTFILE 'certificate file'*::
155161

docs/nut.dict

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
personal_ws-1.1 en 3659 utf-8
1+
personal_ws-1.1 en 3660 utf-8
22
AAC
33
AAS
44
ABI
@@ -1241,6 +1241,7 @@ SX
12411241
SXI
12421242
SXL
12431243
SYMLINKDIR
1244+
SYSMAXCONN
12441245
SafeNet
12451246
Salicru
12461247
Salvia

server/upsd.c

Lines changed: 166 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
2008 Arjen de Korte <adkorte-guest@alioth.debian.org>
66
2011 - 2012 Arnaud Quette <arnaud.quette.free.fr>
77
2019 Eaton (author: Arnaud Quette <ArnaudQuette@eaton.com>)
8-
2020 - 2025 Jim Klimov <jimklimov+nut@gmail.com>
8+
2020 - 2026 Jim Klimov <jimklimov+nut@gmail.com>
99
1010
This program is free software; you can redistribute it and/or modify
1111
it under the terms of the GNU General Public License as published by
@@ -94,8 +94,12 @@ int allow_no_device = 0;
9494
*/
9595
int allow_not_all_listeners = 0;
9696

97-
/* preloaded to {OPEN_MAX} in main, can be overridden via upsd.conf */
97+
/* preloaded to POSIX sysconf(_SC_OPEN_MAX) or WIN32 MAX_WAIT_OBJECTS in main
98+
* and elsewhere, the run-time value can be overridden via upsd.conf `MAXCONN`
99+
* option (may cause partial waits chunk by chunk, if sysmaxconn is smaller).
100+
*/
98101
nfds_t maxconn = 0;
102+
static nfds_t sysmaxconn = 0;
99103

100104
/* preloaded to STATEPATH in main, can be overridden via upsd.conf */
101105
char *statepath = NULL;
@@ -112,7 +116,7 @@ nut_ctype_t *firstclient = NULL;
112116
/* default is to listen on all local interfaces */
113117
static stype_t *firstaddr = NULL;
114118

115-
static int opt_af = AF_UNSPEC;
119+
static int opt_af = AF_UNSPEC;
116120

117121
typedef enum {
118122
DRIVER = 1,
@@ -1218,22 +1222,68 @@ static void upsd_cleanup(void)
12181222
upsdebugx(1, "%s: finished", __func__);
12191223
}
12201224

1221-
static void poll_reload(void)
1225+
static void update_sysmaxconn(void)
12221226
{
1223-
long ret;
1224-
size_t maxalloc;
1227+
long l;
1228+
char *s = getenv("NUT_SYSMAXCONN_LIMIT");
12251229

12261230
#ifndef WIN32
1227-
ret = sysconf(_SC_OPEN_MAX);
1231+
/* default to system limit (may be overridden in upsd.conf) */
1232+
/* FIXME: Check for overflows (and int size of nfds_t vs. long) - see get_max_pid_t() for example */
1233+
l = sysconf(_SC_OPEN_MAX);
12281234
#else /* WIN32 */
1229-
ret = (long)MAXIMUM_WAIT_OBJECTS;
1235+
/* hard-coded 64 (from ddk/wdm.h or winnt.h) */
1236+
l = (long)MAXIMUM_WAIT_OBJECTS;
12301237
#endif /* WIN32 */
12311238

1232-
if ((intmax_t)ret < (intmax_t)maxconn) {
1239+
if (l < 1) {
1240+
/* TOTHINK: Not fail, but use a conservative fallback number?
1241+
* Can we trust the OS to support any?
1242+
*/
12331243
fatalx(EXIT_FAILURE,
1234-
"Your system limits the maximum number of connections to %ld\n"
1235-
"but you requested %" PRIdMAX ". The server won't start until this\n"
1236-
"problem is resolved.\n", ret, (intmax_t)maxconn);
1244+
"System reported an absurd value %ld as maximum number of connections.\n"
1245+
"The server won't start until this problem is resolved.\n",
1246+
l);
1247+
}
1248+
1249+
/* Note this historically also serves as
1250+
* the initial/default MAXCONN setting
1251+
* (so site/platform-dependent).
1252+
*/
1253+
sysmaxconn = (nfds_t)l;
1254+
if (maxconn < 1) {
1255+
upsdebugx(1, "%s: defaulting maxconn to sysmaxconn: %ld",
1256+
__func__, l);
1257+
maxconn = sysmaxconn;
1258+
}
1259+
1260+
/* Support envvar for NIT or similar tests.
1261+
* Still do not exceed what the OS said.
1262+
*/
1263+
if (s && str_to_long(s, &l, 10)) {
1264+
if (l > 0 && (nfds_t)l < sysmaxconn) {
1265+
upslogx(LOG_INFO, "Adjusting sysmaxconn according to NUT_SYSMAXCONN_LIMIT envvar: %ld", l);
1266+
sysmaxconn = (nfds_t)l;
1267+
} else {
1268+
upslogx(LOG_WARNING, "Adjusting sysmaxconn according to NUT_SYSMAXCONN_LIMIT envvar failed: %ld is out of range. Keeping OS-provided %ld.",
1269+
l, (long)sysmaxconn);
1270+
}
1271+
} /* else nothing to bother about */
1272+
}
1273+
1274+
static void poll_reload(void)
1275+
{
1276+
size_t maxalloc;
1277+
1278+
/* Not likely this would change, but refresh just in case */
1279+
update_sysmaxconn();
1280+
1281+
if ((intmax_t)sysmaxconn < (intmax_t)maxconn) {
1282+
upslogx(LOG_WARNING,
1283+
"Your system limits the maximum number of connections to %" PRIdMAX "\n"
1284+
"but you requested %" PRIdMAX ". The server may handle connections\n"
1285+
"in smaller groups, maybe affecting efficiency and response time.\n",
1286+
(intmax_t)sysmaxconn, (intmax_t)maxconn);
12371287
}
12381288

12391289
if (1 > maxconn) {
@@ -1242,12 +1292,8 @@ static void poll_reload(void)
12421292
"The server won't start until this problem is resolved.\n", (intmax_t)maxconn);
12431293
}
12441294

1245-
#ifndef WIN32
12461295
/* How many items can we stuff into the array? */
12471296
maxalloc = SIZE_MAX / sizeof(void *);
1248-
#else /* WIN32 */
1249-
maxalloc = MAXIMUM_WAIT_OBJECTS;
1250-
#endif /* WIN32 */
12511297
if ((uintmax_t)maxalloc < (uintmax_t)maxconn) {
12521298
fatalx(EXIT_FAILURE,
12531299
"You requested %" PRIdMAX " as maximum number of connections, but we can only allocate %" PRIuSIZE ".\n"
@@ -1497,14 +1543,15 @@ static void mainloop(void)
14971543
nfds_t i;
14981544
#else /* WIN32 */
14991545
DWORD ret;
1500-
pipe_conn_t * conn;
1546+
pipe_conn_t *conn;
1547+
size_t chunk = 0;
15011548
#endif /* WIN32 */
15021549

15031550
size_t nfds_wanted = 0, /* Connections we looked at (some may be invalid) */
15041551
nfds_considered = 0; /* Connections we wanted to poll (but might be over maxconn limit) */
15051552
nfds_t nfds = 0;
15061553
upstype_t *ups;
1507-
nut_ctype_t *client, *cnext;
1554+
nut_ctype_t *client, *cnext;
15081555
stype_t *server;
15091556
time_t now;
15101557

@@ -1644,18 +1691,58 @@ static void mainloop(void)
16441691
upsdebugx(2, "%s: polling %" PRIdMAX " filedescriptors; some stats: "
16451692
"considered %" PRIdMAX " connections, "
16461693
"wanted to actually poll %" PRIdMAX
1647-
" and was constrained by maxconn=%" PRIdMAX,
1694+
" and was constrained by maxconn=%" PRIdMAX
1695+
" and chunked by sysmaxconn=%" PRIdMAX,
16481696
__func__, (intmax_t)nfds, (intmax_t)nfds_considered,
1649-
(intmax_t)nfds_wanted, (intmax_t)maxconn);
1697+
(intmax_t)nfds_wanted, (intmax_t)maxconn, (intmax_t)sysmaxconn);
16501698

1651-
if (nfds_wanted != nfds || nfds_wanted >= maxconn) {
1699+
if (nfds_wanted != nfds || nfds_wanted > maxconn) {
16521700
upslogx(LOG_ERR, "upsd polling %" PRIdMAX " filedescriptors,"
16531701
" but wanted to poll %" PRIdMAX
1654-
" and was constrained by maxconn=%" PRIdMAX,
1702+
" and was constrained by maxconn=%" PRIdMAX
1703+
" (see upsd.conf MAXCONN setting to adjust)",
16551704
(intmax_t)nfds, (intmax_t)nfds_wanted, (intmax_t)maxconn);
16561705
}
16571706

1658-
ret = poll(fds, nfds, 2000);
1707+
if (nfds <= sysmaxconn) {
1708+
ret = poll(fds, nfds, 2000);
1709+
} else {
1710+
/* Chunk it all; try to fit into same 2 sec as above.
1711+
* Note that nfds at the moment may be smaller than
1712+
* maxconn (allocated array size).
1713+
*/
1714+
size_t last_chunk = nfds % sysmaxconn, chunk,
1715+
chunks = nfds / sysmaxconn + (last_chunk ? 1 : 0);
1716+
int poll_TO = 2000 / chunks, tmpret;
1717+
1718+
if (poll_TO < 10)
1719+
poll_TO = 10;
1720+
1721+
upsdebugx(4, "%s: chunked filedescriptor polling via %" PRIuSIZE
1722+
" chunks, last one sized %" PRIuSIZE
1723+
", with timeout of %d msec per chunk",
1724+
__func__, chunks, last_chunk, poll_TO);
1725+
1726+
ret = 0;
1727+
for (chunk = 0; chunk < chunks; chunk++) {
1728+
upsdebugx(5,
1729+
"%s: chunked filedescriptor polling #%" PRIuSIZE
1730+
" of %" PRIuSIZE " chunks, with %d hits so far",
1731+
__func__, chunk, chunks, ret);
1732+
tmpret = poll(&fds[chunk * sysmaxconn],
1733+
(last_chunk && chunk == chunks - 1 ? last_chunk : sysmaxconn),
1734+
poll_TO);
1735+
if (tmpret < 0) {
1736+
upsdebug_with_errno(2,
1737+
"%s: failed during chunked polling, handled %" PRIuSIZE
1738+
" of %" PRIuSIZE " chunks so far, with %d hits",
1739+
__func__, chunk, chunks, ret);
1740+
ret = tmpret;
1741+
break;
1742+
}
1743+
ret += tmpret;
1744+
}
1745+
}
16591746

16601747
if (ret == 0) {
16611748
upsdebugx(2, "%s: no data available", __func__);
@@ -1942,19 +2029,58 @@ static void mainloop(void)
19422029
upsdebugx(2, "%s: wait for %" PRIdMAX " filedescriptors; some stats: "
19432030
"considered %" PRIdMAX " connections, "
19442031
"wanted to actually poll %" PRIdMAX
1945-
" and was constrained by maxconn=%" PRIdMAX,
2032+
" and was constrained by maxconn=%" PRIdMAX
2033+
" and chunked by sysmaxconn=%" PRIdMAX,
19462034
__func__, (intmax_t)nfds, (intmax_t)nfds_considered,
1947-
(intmax_t)nfds_wanted, (intmax_t)maxconn);
2035+
(intmax_t)nfds_wanted, (intmax_t)maxconn, (intmax_t)sysmaxconn);
19482036

1949-
if (nfds_wanted != nfds || nfds_wanted >= maxconn) {
2037+
if (nfds_wanted != nfds || nfds_wanted > maxconn) {
19502038
upslogx(LOG_ERR, "upsd polling %" PRIuMAX " filedescriptors,"
19512039
" but wanted to poll %" PRIuMAX
1952-
" and was constrained by maxconn=%" PRIuMAX,
2040+
" and was constrained by maxconn=%" PRIuMAX
2041+
" (see upsd.conf MAXCONN setting to adjust)",
19532042
(uintmax_t)nfds, (uintmax_t)nfds_wanted, (uintmax_t)maxconn);
19542043
}
19552044

1956-
/* https://docs.microsoft.com/en-us/windows/win32/api/synchapi/nf-synchapi-waitformultipleobjects */
1957-
ret = WaitForMultipleObjects(nfds,fds,FALSE,2000);
2045+
/* https://docs.microsoft.com/en-us/windows/win32/api/synchapi/nf-synchapi-waitformultipleobjects
2046+
* We handle whoever lights up first, one per loop cycle.
2047+
*/
2048+
chunk = 0;
2049+
if (nfds <= sysmaxconn) {
2050+
ret = WaitForMultipleObjects(nfds, fds, FALSE, 2000);
2051+
} else {
2052+
/* Chunk it all; try to fit into same 2 sec as above.
2053+
* Note that nfds at the moment may be smaller than
2054+
* maxconn (allocated array size).
2055+
*/
2056+
size_t last_chunk = nfds % sysmaxconn,
2057+
chunks = nfds / sysmaxconn + (last_chunk ? 1 : 0);
2058+
DWORD poll_TO = 2000 / chunks, tmpret;
2059+
2060+
if (poll_TO < 10)
2061+
poll_TO = 10;
2062+
2063+
upsdebugx(4, "%s: chunked filedescriptor polling via %" PRIuSIZE
2064+
" chunks, last one sized %" PRIuSIZE
2065+
", with timeout of %" PRIi64 " msec per chunk",
2066+
__func__, chunks, last_chunk, poll_TO);
2067+
2068+
ret = WAIT_TIMEOUT;
2069+
for (chunk = 0; chunk < chunks; chunk++) {
2070+
upsdebugx(5,
2071+
"%s: chunked filedescriptor polling #%" PRIuSIZE
2072+
" of %" PRIuSIZE " chunks, with %" PRIu64 " hits so far",
2073+
__func__, chunk, chunks, ret);
2074+
tmpret = WaitForMultipleObjects(
2075+
(last_chunk && chunk == chunks - 1 ? last_chunk : sysmaxconn),
2076+
&fds[chunk * sysmaxconn],
2077+
FALSE, poll_TO);
2078+
if (tmpret != WAIT_TIMEOUT) {
2079+
ret = tmpret;
2080+
break;
2081+
}
2082+
}
2083+
}
19582084

19592085
upsdebugx(6, "%s: wait for filedescriptors done: %" PRIu64, __func__, ret);
19602086

@@ -1981,7 +2107,7 @@ static void mainloop(void)
19812107
#ifdef HAVE_PRAGMA_GCC_DIAGNOSTIC_IGNORED_TAUTOLOGICAL_CONSTANT_OUT_OF_RANGE_COMPARE
19822108
# pragma GCC diagnostic ignored "-Wtautological-constant-out-of-range-compare"
19832109
#endif
1984-
if (ret >= WAIT_ABANDONED_0 && ret <= WAIT_ABANDONED_0 + nfds - 1) {
2110+
if (ret >= WAIT_ABANDONED_0 && ret <= WAIT_ABANDONED_0 + (nfds < sysmaxconn ? nfds : sysmaxconn) - 1) {
19852111
/* One abandoned mutex object that satisfied the wait? */
19862112
ret = ret - WAIT_ABANDONED_0;
19872113
upsdebugx(5, "%s: got abandoned FD array item: %" PRIu64, __func__, nfds, ret);
@@ -1991,7 +2117,7 @@ static void mainloop(void)
19912117
/* Which one handle was triggered this time? */
19922118
/* Note: WAIT_OBJECT_0 may be currently defined as 0,
19932119
* but docs insist on checking and shifting the range */
1994-
ret = ret - WAIT_OBJECT_0;
2120+
ret = ret - WAIT_OBJECT_0 + chunk * sysmaxconn;
19952121
upsdebugx(5, "%s: got event on FD array item: %" PRIu64, __func__, nfds, ret);
19962122
}
19972123
#if (defined HAVE_PRAGMA_GCC_DIAGNOSTIC_PUSH_POP) && ( (defined HAVE_PRAGMA_GCC_DIAGNOSTIC_IGNORED_TYPE_LIMITS) || (defined HAVE_PRAGMA_GCC_DIAGNOSTIC_IGNORED_TAUTOLOGICAL_CONSTANT_OUT_OF_RANGE_COMPARE) )
@@ -2284,14 +2410,14 @@ int main(int argc, char **argv)
22842410
}
22852411

22862412
{ /* scoping */
2287-
char *s = getenv("NUT_DEBUG_LEVEL");
2288-
int l;
2289-
if (s && str_to_int(s, &l, 10)) {
2290-
if (l > 0 && nut_debug_level_args < 1) {
2413+
char *s = getenv("NUT_DEBUG_LEVEL");
2414+
int lvl;
2415+
if (s && str_to_int(s, &lvl, 10)) {
2416+
if (lvl > 0 && nut_debug_level_args < 1) {
22912417
upslogx(LOG_INFO, "Defaulting debug verbosity to NUT_DEBUG_LEVEL=%d "
2292-
"since none was requested by command-line options", l);
2293-
nut_debug_level = l;
2294-
nut_debug_level_args = l;
2418+
"since none was requested by command-line options", lvl);
2419+
nut_debug_level = lvl;
2420+
nut_debug_level_args = lvl;
22952421
} /* else follow -D settings */
22962422
} /* else nothing to bother about */
22972423
}
@@ -2444,14 +2570,8 @@ int main(int argc, char **argv)
24442570
chroot_start(chroot_path);
24452571
}
24462572

2447-
#ifndef WIN32
2448-
/* default to system limit (may be overridden in upsd.conf) */
2449-
/* FIXME: Check for overflows (and int size of nfds_t vs. long) - see get_max_pid_t() for example */
2450-
maxconn = (nfds_t)sysconf(_SC_OPEN_MAX);
2451-
#else /* WIN32 */
2452-
/* hard-coded 64 (from ddk/wdm.h or winnt.h) */
2453-
maxconn = MAXIMUM_WAIT_OBJECTS;
2454-
#endif /* WIN32 */
2573+
/* Also initializes maxconn to what the OS says */
2574+
update_sysmaxconn();
24552575

24562576
/* handle upsd.conf */
24572577
load_upsdconf(0); /* 0 = initial */

0 commit comments

Comments
 (0)