diff --git a/NEWS.adoc b/NEWS.adoc index c30766204a..8998dfaff1 100644 --- a/NEWS.adoc +++ b/NEWS.adoc @@ -242,6 +242,10 @@ https://github.com/networkupstools/nut/milestone/12 - `upsdrvctl` tool updates: * Make use of `setproctag()` and `getproctag()` to report parent/child process names. [#3084] + * The global-level `debug_min` setting in `ups.conf` now also applies + to bumping the debug verbosity of `upsdrvctl` tool. [#3276] + * If we fail to stop a driver by signal (e.g. PID file was not saved), + retry to `INSTCMD driver.exit` it by socket protocol. [#3277] - `upslog` tool updates: * Updated `help()` and failure messages to suggest `-m '*,-'` for logging diff --git a/conf/ups.conf.sample b/conf/ups.conf.sample index d9eabf1853..4d04554001 100644 --- a/conf/ups.conf.sample +++ b/conf/ups.conf.sample @@ -113,9 +113,10 @@ maxretry = 3 # The default is 75 seconds. # # debug_min: OPTIONAL. Specify a minimum debug level for all driver daemons -# (when specified at global level), or for this driver daemon -# (when specified in a driver section), e.g. for troubleshooting -# a deployment. This does not directly impact the foreground or +# and for the upsdrvctl tool (when specified at global level), +# or for this driver daemon (when specified in a driver section), +# e.g. for troubleshooting a deployment without changing init +# scripts. This does not directly impact the foreground or # background running mode. If both the global and driver level # `debug_min` are set, the driver-level setting takes precedence. # Command-line option `-D` can only increase this verbosity level. diff --git a/docs/man/ups.conf.txt b/docs/man/ups.conf.txt index d083b091af..9211df7121 100644 --- a/docs/man/ups.conf.txt +++ b/docs/man/ups.conf.txt @@ -183,8 +183,9 @@ or with device filesystems re-generated by an OS for every reboot. *debug_min* 'INTEGER':: -Optional. Specify a minimum debug level for all driver daemons, e.g. for -troubleshooting a deployment, without impacting foreground or background +Optional. Specify a minimum debug level for all driver daemons and +for the linkman:upsdrvctl[8] tool, e.g. for troubleshooting a deployment +without changing init scripts, without impacting foreground or background running mode directly. Command-line option `-D` can only increase this verbosity level. diff --git a/docs/man/upsdrvctl.txt b/docs/man/upsdrvctl.txt index cdd31deb77..dee0488f47 100644 --- a/docs/man/upsdrvctl.txt +++ b/docs/man/upsdrvctl.txt @@ -81,6 +81,11 @@ global section. *-D*:: Raise the debug level. Use this multiple times for additional details. +Alternatively the value can be set via +debug_min+ in global section of +linkman:ups.conf[5]. Note the global setting applies to both the tool +and a started driver; but another line in a driver's individual section +can override that with a larger or smaller value (including a `debug_min=0` +to disable tracing the driver by default). + Note that this does not preclude the `upsdrvctl` tool from exiting after its job is done (however an explicit *-F* option does). diff --git a/drivers/upsdrvctl.c b/drivers/upsdrvctl.c index 08bbf12e26..8487d7492e 100644 --- a/drivers/upsdrvctl.c +++ b/drivers/upsdrvctl.c @@ -92,6 +92,17 @@ static char *pt_root = NULL, *pt_user = NULL, *pt_cmd = NULL; static int nut_debug_level_passthrough = 0; static int nut_foreground_passthrough = -1; +/* Users can pass a -D[...] option to enable debugging. + * For the service tracing purposes, also the ups.conf + * can define a debug_min value in the global or device + * section, to set the minimal debug level (CLI provided + * value less than that would not have effect, can only + * have more). Finally, it can also be set over socket + * protocol, taking precedence over other inputs. + */ +static int nut_debug_level_args = -1; +static int nut_debug_level_global = -1; + /* Keep track of requested operation (function pointer) */ static void (*command)(const ups_t *) = NULL; @@ -107,6 +118,28 @@ static int signal_flag = 0; static char *signal_flag = NULL; #endif /* WIN32 */ +/* Reduced version of code from drivers/main.c */ +static void assign_debug_level(void) { + /* CLI debug level can not be smaller than debug_min specified + * in ups.conf, and for upsdrvctl tool we care about the global + * value (not any specified for a driver config section). + * Note that a non-zero debug_min does not impact foreground + * running mode. + */ + + /* At minimum, use the verbosity we started with - via CLI + * arguments; but maybe a greater debug_min is set in current + * config file. + */ + nut_debug_level = nut_debug_level_args; + if (nut_debug_level_global > nut_debug_level) { + /* Applying debug_min=%d from ups.conf global section */ + nut_debug_level = nut_debug_level_global; + } + + upsdebugx(1, "debug level for upsdrvctl is '%d'", nut_debug_level); +} + void do_upsconf_args(char *arg_upsname, char *var, char *val) { ups_t *tmp, *last; @@ -141,6 +174,22 @@ void do_upsconf_args(char *arg_upsname, char *var, char *val) } } + /* Allow to specify its minimal debugging level for all drivers - + * or the upsdrvctl tool here - and admins can set more with + * command-line args, but can't set less without changing config. + * Should help debug of services. + */ + if (!strcasecmp(var, "debug_min")) { + int lvl = -1; /* typeof common/common.c: int nut_debug_level */ + if ( str_to_int (val, &lvl, 10) && lvl >= 0 ) { + nut_debug_level_global = lvl; + } else { + upslogx(LOG_INFO, "WARNING : Invalid debug_min value found in ups.conf global settings"); + } + + return; + } + /* ignore anything else - it's probably for main */ return; @@ -388,8 +437,49 @@ static void stop_driver(const ups_t *ups) } if (ret != 0) { + ssize_t udq_ret; + udq_pipe_conn_t *udq_pipe; + upslog_with_errno(LOG_ERR, "Can't open %s either", pidfn); - exec_error++; + + udq_pipe = upsdrvquery_connect_drvname_upsname(ups->driver, ups->upsname); + if (udq_pipe) + upslogx(LOG_ERR, "At least could open the driver socket"); + if (testmode) { + udq_ret = (udq_pipe == NULL ? -1 : 0); + } else { + char buf[LARGEBUF]; + struct timeval tv; + + memset(buf, 0, sizeof(buf)); + /* Post the query and wait for reply */ + /* FIXME: coordinate with pollfreq? */ + tv.tv_sec = 15; + tv.tv_usec = 0; + udq_ret = upsdrvquery_oneshot_conn(udq_pipe, "INSTCMD driver.exit\n", buf, sizeof(buf), &tv); + upsdebugx(1, "%s: upsdrvquery_oneshot_conn() replied to 'exit' (%" PRIiSIZE "): '%s'", __func__, udq_ret, buf); + + if (udq_ret >= 0) { + upsdrvquery_write(udq_pipe, "NOBROADCAST"); + while (upsdrvquery_ping(udq_pipe, &tv, 1000) > 0) { + /* 0 = no reply, -1 = socket error; either way driver deemed dead? */ + upsdebugx(1, "%s: keep waiting for driver exit", __func__); + sleep(1); + } + upsdebugx(1, "%s: final PING did not PONG back", __func__); + } + } + + upslogx(LOG_ERR, "%s to %s the 'exit' command to the driver socket", + (udq_ret < 0) ? "Failed" : "Succeeded", + testmode ? "emulate" : "send"); + + if (udq_ret < 0) { + exec_error++; + } + + upsdrvquery_close(udq_pipe); + return; } } else { @@ -897,7 +987,8 @@ static void status_driver(const ups_t *ups) #ifndef WIN32 snprintf(pidfn, sizeof(pidfn), "%s/%s-%s.pid", altpidpath(), ups->driver, ups->upsname); pidFromFile = parsepidfile(pidfn); - if (pidFromFile >= 0) { /* this method actively reports errors, if any */ + if (pidFromFile >= 0) { + /* this method actively reports errors, if any */ cmdret = sendsignalpid(pidFromFile, 0, ups->driver, 1); /* returns zero for a successfully sent signal */ if (cmdret == 0) @@ -1509,7 +1600,7 @@ int main(int argc, char **argv) exit(EXIT_SUCCESS); case 'D': - nut_debug_level++; + nut_debug_level_args++; break; case 'd': @@ -1607,10 +1698,10 @@ int main(int argc, char **argv) char *s = getenv("NUT_DEBUG_LEVEL"); int l; if (s && str_to_int(s, &l, 10)) { - if (l > 0 && nut_debug_level < 1) { + if (l > 0 && nut_debug_level_args < 1) { upslogx(LOG_INFO, "Defaulting debug verbosity to NUT_DEBUG_LEVEL=%d " "since none was requested by command-line options", l); - nut_debug_level = l; + nut_debug_level_args = l; } /* else follow -D settings */ } /* else nothing to bother about */ } @@ -1660,6 +1751,18 @@ int main(int argc, char **argv) if (!command) fatalx(EXIT_FAILURE, "Error: unrecognized command [%s]", argv[0]); +#ifndef WIN32 + driverpath = xstrdup(DRVPATH); /* set default */ +#else /* WIN32 */ + driverpath = getfullpath2(DRVPATH, PATH_BIN); /* Can get converted to relative path in WIN32 */ +#endif /* WIN32 */ + + atexit(exit_cleanup); + + read_upsconf(1); + + assign_debug_level(); + if (nut_debug_level_passthrough == 0 && (command == &start_driver || command == &shutdown_driver)) { upsdebugx(2, "\n" "If you're not a NUT core developer, chances are that you're told to enable debugging\n" @@ -1673,16 +1776,6 @@ int main(int argc, char **argv) "pass its current debug level to the launched driver, and '-B' keeps it backgrounded.\n"); } -#ifndef WIN32 - driverpath = xstrdup(DRVPATH); /* set default */ -#else /* WIN32 */ - driverpath = getfullpath2(DRVPATH, PATH_BIN); /* Can get converted to relative path in WIN32 */ -#endif /* WIN32 */ - - atexit(exit_cleanup); - - read_upsconf(1); - if (argc == lastarg) { ups_t *tmp = upstable; char tag[SMALLBUF]; diff --git a/drivers/upsdrvquery.c b/drivers/upsdrvquery.c index 0fc348ec5f..f922a5acac 100644 --- a/drivers/upsdrvquery.c +++ b/drivers/upsdrvquery.c @@ -187,8 +187,39 @@ void upsdrvquery_close(udq_pipe_conn_t *conn) { if (VALID_FD(conn->sockfd)) { int nudl = nut_upsdrvquery_debug_level; ssize_t ret; + + /* The connection may be closed on the other side, + * so send() can issue SIGPIPE and by default the + * process crashes. Try to work around that here. + * https://stackoverflow.com/questions/108183/how-to-prevent-sigpipes-or-handle-them-properly + * TODO: Consider mixing send() and MSG_NOSIGNAL + * where available. + */ +#ifndef WIN32 + sigset_t old_state, set; + + /* get the current state */ + sigprocmask(SIG_BLOCK, NULL, &old_state); + + /* add signal_to_block to that existing state */ + set = old_state; + sigaddset(&set, SIGPIPE); + + /* block that signal also */ + sigprocmask(SIG_BLOCK, &set, NULL); +# ifdef SO_NOSIGPIPE + setsockopt(conn->sockfd, SO_NOSIGPIPE); +# endif /* SO_NOSIGPIPE */ +#endif /* !WIN32 */ + upsdebugx(5, "%s: closing driver socket, try to say goodbye", __func__); + ret = upsdrvquery_write(conn, "LOGOUT\n"); + +#ifndef WIN32 + sigprocmask(SIG_BLOCK, &old_state, NULL); +#endif /* !WIN32 */ + if (7 <= ret) { upsdebugx(5, "%s: okay", __func__); #ifdef WIN32 @@ -424,35 +455,29 @@ ssize_t upsdrvquery_write(udq_pipe_conn_t *conn, const char *buf) { return -1; } -ssize_t upsdrvquery_prepare(udq_pipe_conn_t *conn, struct timeval tv) { - struct timeval start, now; - - if (!conn || INVALID_FD(conn->sockfd)) - return -1; - - /* Avoid noise */ - if (upsdrvquery_write(conn, "NOBROADCAST\n") < 0) - goto socket_error; +/* Return 1 if we had a reply, 0 if not, -1 on socket errors */ +ssize_t upsdrvquery_ping(udq_pipe_conn_t *conn, struct timeval *ptv, useconds_t read_interval) { + struct timeval start, now, tv; - if (tv.tv_sec < 1 && tv.tv_usec < 1) { - upsdebugx(5, "%s: proclaiming readiness for tracked commands without flush of server messages", __func__); - return 1; + if (!ptv) { + tv.tv_sec = 5; + tv.tv_usec = 0; + ptv = &tv; } - /* flush incoming, if any */ gettimeofday(&start, NULL); if (upsdrvquery_write(conn, "PING\n") < 0) - goto socket_error; + return -1; - upsdebugx(5, "%s: waiting for a while to flush server messages", __func__); while (1) { - char *buf; - upsdrvquery_read_timeout(conn, tv); + char *buf; + + upsdrvquery_read_timeout(conn, *ptv); gettimeofday(&now, NULL); - if (difftimeval(now, start) > ((double)(tv.tv_sec) + 0.000001 * (double)(tv.tv_usec))) { + if (difftimeval(now, start) > ((double)(ptv->tv_sec) + 0.000001 * (double)(ptv->tv_usec))) { upsdebugx(5, "%s: requested timeout expired", __func__); - break; + return 0; } /* Await a PONG for quick confirmation of achieved quietness @@ -467,7 +492,7 @@ ssize_t upsdrvquery_prepare(udq_pipe_conn_t *conn, struct timeval tv) { while (buf && *buf) { if (!strncmp(buf, "PONG\n", 5)) { upsdebugx(5, "%s: got expected PONG", __func__); - goto finish; + return 1; } buf = strchr(buf, '\n'); if (buf) { @@ -480,15 +505,41 @@ ssize_t upsdrvquery_prepare(udq_pipe_conn_t *conn, struct timeval tv) { } /* Diminishing timeouts for read() */ - tv.tv_usec -= (suseconds_t)(difftimeval(now, start)); - while (tv.tv_usec < 0) { - tv.tv_sec--; - tv.tv_usec = 1000000 + tv.tv_usec; /* Note it is negative */ + ptv->tv_usec -= (suseconds_t)(difftimeval(now, start)); + while (ptv->tv_usec < 0) { + ptv->tv_sec--; + ptv->tv_usec = 1000000 + ptv->tv_usec; /* Note it is negative */ } - if (tv.tv_sec <= 0 && tv.tv_usec <= 0) { + if (ptv->tv_sec <= 0 && ptv->tv_usec <= 0) { upsdebugx(5, "%s: requested timeout expired", __func__); - break; + return 0; } + + if (read_interval) + usleep(read_interval); + } +} + +ssize_t upsdrvquery_prepare(udq_pipe_conn_t *conn, struct timeval tv) { + if (!conn || INVALID_FD(conn->sockfd)) + return -1; + + /* Avoid noise */ + if (upsdrvquery_write(conn, "NOBROADCAST\n") < 0) + goto socket_error; + + if (tv.tv_sec < 1 && tv.tv_usec < 1) { + upsdebugx(5, "%s: proclaiming readiness for tracked commands without flush of server messages", __func__); + return 1; + } + + /* flush incoming, if any */ + upsdebugx(5, "%s: waiting for a while to flush server messages - posting a PING, waiting for PONG", __func__); + switch (upsdrvquery_ping(conn, &tv, 1000)) { + case -1: goto socket_error; + case 0: goto finish; /* No reply - maybe handle somehow? */ + case 1: break; /* Got PONG */ + default: break; } /* Check that we can have a civilized dialog -- diff --git a/drivers/upsdrvquery.h b/drivers/upsdrvquery.h index 8d84a4314e..348a11c9d7 100644 --- a/drivers/upsdrvquery.h +++ b/drivers/upsdrvquery.h @@ -42,6 +42,9 @@ void upsdrvquery_close(udq_pipe_conn_t *conn); ssize_t upsdrvquery_read_timeout(udq_pipe_conn_t *conn, struct timeval tv); ssize_t upsdrvquery_write(udq_pipe_conn_t *conn, const char *buf); +/* Return 1 if we had a reply within timeout specified by *ptv (5 sec if NULL), + * 0 if not, -1 on socket errors */ +ssize_t upsdrvquery_ping(udq_pipe_conn_t *conn, struct timeval *ptv, useconds_t read_interval); ssize_t upsdrvquery_prepare(udq_pipe_conn_t *conn, struct timeval tv); ssize_t upsdrvquery_request(udq_pipe_conn_t *conn, struct timeval tv, const char *query); ssize_t upsdrvquery_restore_broadcast(udq_pipe_conn_t *conn);