Skip to content

Commit 4e887c7

Browse files
committed
lightningd: add option to crash itself after some time.
We have CI runs which timeout (after 2 hours). It's not clear why, but we can at least eliminate CLN lockups as the answer. Since pytest disabled the --timeout option on test shutdown, we could be seeing an issue on stopping taking a long time? Signed-off-by: Rusty Russell <[email protected]>
1 parent 9565b3a commit 4e887c7

File tree

4 files changed

+41
-0
lines changed

4 files changed

+41
-0
lines changed

contrib/msggen/msggen/schema.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20552,6 +20552,10 @@
2055220552
"value_int": 3,
2055320553
"source": "cmdline"
2055420554
},
20555+
"dev-crash-after": {
20556+
"value_str": "3600",
20557+
"source": "cmdline"
20558+
},
2055520559
"dev-fail-on-subdaemon-fail": {
2055620560
"set": true,
2055720561
"source": "cmdline"

doc/schemas/lightning-listconfigs.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2678,6 +2678,10 @@
26782678
"value_int": 3,
26792679
"source": "cmdline"
26802680
},
2681+
"dev-crash-after": {
2682+
"value_str": "3600",
2683+
"source": "cmdline"
2684+
},
26812685
"dev-fail-on-subdaemon-fail": {
26822686
"set": true,
26832687
"source": "cmdline"

lightningd/options.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -811,6 +811,31 @@ static char *opt_ignore(void *unused)
811811
return NULL;
812812
}
813813

814+
static void handle_alarm(int sig)
815+
{
816+
abort();
817+
}
818+
819+
static char *opt_set_crash_timeout(const char *arg, struct lightningd *ld)
820+
{
821+
struct sigaction act;
822+
u32 time;
823+
char *errstr = opt_set_u32(arg, &time);
824+
if (errstr)
825+
return errstr;
826+
827+
/* In case we're *REALLY* stuck, use alarm() */
828+
memset(&act, 0, sizeof(act));
829+
act.sa_handler = handle_alarm;
830+
act.sa_flags = 0;
831+
832+
if (sigaction(SIGALRM, &act, NULL) != 0)
833+
err(1, "Setting up SIGARLM handler");
834+
835+
alarm(time);
836+
return NULL;
837+
}
838+
814839
static void dev_register_opts(struct lightningd *ld)
815840
{
816841
/* We might want to debug plugins, which are started before normal
@@ -978,6 +1003,10 @@ static void dev_register_opts(struct lightningd *ld)
9781003
opt_set_u32, opt_show_u32,
9791004
&ld->dev_low_prio_anchor_blocks,
9801005
"How many blocks to aim for low-priority anchor closes (default: 2016)");
1006+
clnopt_witharg("--dev-crash-after", OPT_DEV,
1007+
opt_set_crash_timeout, NULL,
1008+
ld,
1009+
"Crash if we are still going after this long.");
9811010
/* This is handled directly in daemon_developer_mode(), so we ignore it here */
9821011
clnopt_noarg("--dev-debug-self", OPT_DEV,
9831012
opt_ignore,
@@ -2219,6 +2248,7 @@ bool is_known_opt_cb_arg(char *(*cb_arg)(const char *, void *))
22192248
|| cb_arg == (void *)opt_add_accept_htlc_tlv
22202249
|| cb_arg == (void *)opt_set_codex32_or_hex
22212250
|| cb_arg == (void *)opt_subd_dev_disconnect
2251+
|| cb_arg == (void *)opt_set_crash_timeout
22222252
|| cb_arg == (void *)opt_add_api_beg
22232253
|| cb_arg == (void *)opt_force_featureset
22242254
|| cb_arg == (void *)opt_force_privkey

tests/fixtures.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ class LightningNode(utils.LightningNode):
1818
def __init__(self, *args, **kwargs):
1919
utils.LightningNode.__init__(self, *args, **kwargs)
2020

21+
# This is a recent innovation, and we don't want to nail pyln-testing to this version.
22+
self.daemon.opts['dev-crash-after'] = 3600
23+
2124
# We have some valgrind suppressions in the `tests/`
2225
# directory, so we can add these to the valgrind configuration
2326
# (not generally true when running pyln-testing, hence why

0 commit comments

Comments
 (0)