From 06a9a0df56ec03e40937a5a36f604a473926c836 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 2 Sep 2025 16:03:03 +0930 Subject: [PATCH 1/8] pyln-testing: catch special CI string so we can have non-BROKEN CI warnings. Signed-off-by: Rusty Russell --- common/status_levels.h | 3 +++ contrib/pyln-testing/pyln/testing/fixtures.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/common/status_levels.h b/common/status_levels.h index 79f7bc57313a..baa04ebb4720 100644 --- a/common/status_levels.h +++ b/common/status_levels.h @@ -20,6 +20,9 @@ enum log_level { }; #define LOG_LEVEL_MAX LOG_BROKEN +/* Things that can happen in real life, but we don't expect under CI. */ +#define CI_UNEXPECTED "That's weird: " + const char *log_level_name(enum log_level level); bool log_level_parse(const char *levelstr, size_t len, enum log_level *level); diff --git a/contrib/pyln-testing/pyln/testing/fixtures.py b/contrib/pyln-testing/pyln/testing/fixtures.py index 215c54022325..4bd062ccc327 100644 --- a/contrib/pyln-testing/pyln/testing/fixtures.py +++ b/contrib/pyln-testing/pyln/testing/fixtures.py @@ -609,7 +609,7 @@ def checkBadGossip(node): def checkBroken(node): node.daemon.logs_catchup() - broken_lines = [l for l in node.daemon.logs if '**BROKEN**' in l] + broken_lines = [l for l in node.daemon.logs if '**BROKEN**' in l or "That's weird: " in l] if node.broken_log: ex = re.compile(node.broken_log) broken_lines = [l for l in broken_lines if not ex.search(l)] From a485ff08af1f0ab31e468ed915ebaaba4dd14daf Mon Sep 17 00:00:00 2001 From: Matt Whitlock Date: Tue, 2 Sep 2025 16:04:03 +0930 Subject: [PATCH 2/8] connectd: demote "Peer did not close, forcing close" to UNUSUAL This message is logged when connectd tries to shut down a peer connection but the transmit buffer remains full for too long, maybe because the peer has crashed or has lost connectivity. Logging this message at the BROKEN level is inappropriate because BROKEN is intended to flag logic errors that imply incorrect code in CLN. The error in question here is actually a runtime error, which does not imply incorrect code (at least on our side), so demote the log message to the UNUSUAL level. (Even this is still probably too severe, as this message is logged rather more frequently than "unusual" would suggest.) Changelog-None Closes: https://github.com/ElementsProject/lightning/issues/5678 --- connectd/multiplex.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/connectd/multiplex.c b/connectd/multiplex.c index d7cbf80e2b59..0d5ac0dc5b97 100644 --- a/connectd/multiplex.c +++ b/connectd/multiplex.c @@ -106,8 +106,7 @@ static void maybe_free_peer(struct peer *peer) * not reading, we have to give up. */ static void close_peer_io_timeout(struct peer *peer) { - /* BROKEN means we'll trigger CI if we see it, though it's possible */ - status_peer_broken(&peer->id, "Peer did not close, forcing close"); + status_peer_unusual(&peer->id, CI_UNEXPECTED "Peer did not close, forcing close"); io_close(peer->to_peer); } From d1860e6af1fc25efb124607a707e35a1284c3b89 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 2 Sep 2025 16:05:03 +0930 Subject: [PATCH 3/8] pytest: fix flake in tests/test_bookkeeper.py::test_bookkeeping_missed_chans_leases ``` FAILED tests/test_bookkeeper.py::test_bookkeeping_missed_chans_leases - AssertionError: assert [{'tag': 'channel_open', 'credit_msat': 506268000, 'debit_msat': 0}, {'tag': 'lease_fee', 'credit_msat': 0, 'debit_msat': 6268000}, {'tag': 'invoice', 'credit_msat': 0, 'debit_msat': 11000000}, {'tag': 'onchain_fee', 'credit_msat': 1314000, 'debit_msat': 0}] == [{'tag': 'channel_open', 'credit_msat': 506268000, 'debit_msat': 0}, {'tag': 'lease_fee', 'credit_msat': 0, 'debit_msat': 6268000}, {'tag': 'onchain_fee', 'credit_msat': 1314000, 'debit_msat': 0}, {'tag': 'invoice', 'credit_msat': 0, 'debit_msat': 11000000}] At index 2 diff: {'tag': 'invoice', 'credit_msat': 0, 'debit_msat': 11000000} != {'tag': 'onchain_fee', 'credit_msat': 1314000, 'debit_msat': 0} Full diff: [ { 'credit_msat': 506268000, 'debit_msat': 0, 'tag': 'channel_open', }, { 'credit_msat': 0, 'debit_msat': 6268000, 'tag': 'lease_fee', }, { + 'credit_msat': 0, + 'debit_msat': 11000000, + 'tag': 'invoice', + }, + { 'credit_msat': 1314000, 'debit_msat': 0, 'tag': 'onchain_fee', }, - { - 'credit_msat': 0, - 'debit_msat': 11000000, - 'tag': 'invoice', - }, ] ``` Signed-off-by: Rusty Russell --- tests/test_bookkeeper.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/test_bookkeeper.py b/tests/test_bookkeeper.py index edb57c0aa04f..1b4d086f9aec 100644 --- a/tests/test_bookkeeper.py +++ b/tests/test_bookkeeper.py @@ -318,8 +318,14 @@ def test_bookkeeping_missed_chans_leases(node_factory, bitcoind): l1.wait_local_channel_active(scid) channel_id = first_channel_id(l1, l2) + # Sigh. bookkeeper sorts events by timestamp. If the invoice event happens + # too close, it can change the order, so sleep here. + time.sleep(2) + + # Send l2 funds via the channel l1.pay(l2, invoice_msat) - l1.daemon.wait_for_log(r'coin movement:.*\'invoice\'') + # Make sure they're completely settled, so accounting correct. + wait_for(lambda: only_one(l1.rpc.listpeerchannels()['channels'])['htlcs'] == []) # Now turn the bookkeeper on and restart l1.stop() From 05df7a40dde0debe8981f0806f71adee987f9f38 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 2 Sep 2025 16:34:01 +0930 Subject: [PATCH 4/8] pytest: fix flake in reckless install timeout. Don't test installing a plugin under valgrind. There's no way to increase reckless' (completely reasonable) 15 seconds timeout, and that can happen under valgrind & CI: ``` def test_reckless_uv_install(node_factory): node = get_reckless_node(node_factory) node.start() r = reckless([f"--network={NETWORK}", "-v", "install", "testpluguv"], dir=node.lightning_dir) > assert r.returncode == 0 E assert 1 == 0 E + where 1 = self.returncode, self.stdout, self.stderr.returncode tests/test_reckless.py:359: AssertionError ... ***RECKLESS STDERR*** config file not found: /tmp/ltests-tui1vmrg/test_reckless_uv_install_1/lightning-1/regtest/config press [Y] to create one now. config file not found: /tmp/ltests-tui1vmrg/test_reckless_uv_install_1/lightning-1/reckless/regtest-reckless.conf config file not found: /tmp/ltests-tui1vmrg/test_reckless_uv_install_1/lightning-1/reckless/.sources Traceback (most recent call last): File "/home/runner/work/lightning/lightning/tools/reckless", line 2091, in log.add_result(args.func(target)) File "/home/runner/work/lightning/lightning/tools/reckless", line 1524, in install return _enable_installed(installed, plugin_name) File "/home/runner/work/lightning/lightning/tools/reckless", line 1476, in _enable_installed if enable(installed.name): File "/home/runner/work/lightning/lightning/tools/reckless", line 1647, in enable lightning_cli('plugin', 'start', path) File "/home/runner/work/lightning/lightning/tools/reckless", line 1613, in lightning_cli clncli = run(cmd, stdout=PIPE, stderr=PIPE, check=False, timeout=timeout) File "/opt/hostedtoolcache/Python/3.10.18/x64/lib/python3.10/subprocess.py", line 505, in run stdout, stderr = process.communicate(input, timeout=timeout) File "/opt/hostedtoolcache/Python/3.10.18/x64/lib/python3.10/subprocess.py", line 1154, in communicate stdout, stderr = self._communicate(input, endtime, timeout) File "/opt/hostedtoolcache/Python/3.10.18/x64/lib/python3.10/subprocess.py", line 2022, in _communicate self._check_timeout(endtime, orig_timeout, stdout, stderr) File "/opt/hostedtoolcache/Python/3.10.18/x64/lib/python3.10/subprocess.py", line 1198, in _check_timeout raise TimeoutExpired( subprocess.TimeoutExpired: Command '['/home/runner/work/lightning/lightning/cli/lightning-cli', '--network=regtest', '--lightning-dir=/tmp/ltests-tui1vmrg/test_reckless_uv_install_1/lightning-1', 'plugin', 'start', '/tmp/ltests-tui1vmrg/test_reckless_uv_install_1/lightning-1/reckless/testpluguv/testpluguv.py']' timed out after 15 seconds ``` Signed-off-by: Rusty Russell --- tests/test_reckless.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_reckless.py b/tests/test_reckless.py index 64efa6ab6418..10ade3624795 100644 --- a/tests/test_reckless.py +++ b/tests/test_reckless.py @@ -2,7 +2,7 @@ import subprocess from pathlib import PosixPath, Path import socket -from pyln.testing.utils import VALGRIND +from pyln.testing.utils import VALGRIND, SLOW_MACHINE import pytest import os import re @@ -351,6 +351,7 @@ def test_tag_install(node_factory): header = line +@unittest.skipIf(VALGRIND and SLOW_MACHINE, "node too slow for starting plugin under valgrind") def test_reckless_uv_install(node_factory): node = get_reckless_node(node_factory) node.start() From e3d279bb7695fd73eb38169541de45717759d325 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 2 Sep 2025 16:34:23 +0930 Subject: [PATCH 5/8] pytest: fix flake in test_coinmoves_unilateral_htlc_fulfill DER sigs! Normally, the commitment weight is: ``` Anchorspend for local commit tx fee 9751sat (w=722), commit_tx fee 4866sat (w=1284): package feerate 7286 perkw Creating anchor spend for local commit tx 6a0816ca60d499edc70bfb786ebd164fb7a55d234c84d926102f5bd35087fd45: we're paying fee 9751sat ``` But if we're "lucky" the commitment tx is shorter: ``` Anchorspend for local commit tx fee 9744sat (w=722), commit_tx fee 4866sat (w=1283): package feerate 7286 perkw Creating anchor spend for local commit tx acf78532a9448dd62a4e6319a3d2712189a88b6e59abc637260067d60df70782: we're paying fee 9744sat ``` The resulting failure: ``` 2025-08-21T02:30:34.1906751Z > assert moves == expected ... ... 2025-08-21T02:30:34.1965346Z E { 2025-08-21T02:30:34.1965529Z E 'account_id': 'wallet', 2025-08-21T02:30:34.1965767Z E 'blockheight': 104, 2025-08-21T02:30:34.1965997Z E 'created_index': 6, 2025-08-21T02:30:34.1966229Z E - 'credit_msat': 15579000, 2025-08-21T02:30:34.1966467Z E ? ^^ 2025-08-21T02:30:34.1966698Z E + 'credit_msat': 15586000, 2025-08-21T02:30:34.1966927Z E ? ^^ 2025-08-21T02:30:34.1967150Z E 'debit_msat': 0, 2025-08-21T02:30:34.1967376Z E 'extra_tags': [], 2025-08-21T02:30:34.1967599Z E - 'output_msat': 15579000, 2025-08-21T02:30:34.1967832Z E ? ^^ 2025-08-21T02:30:34.1968061Z E + 'output_msat': 15586000, 2025-08-21T02:30:34.1968294Z E ? ^^ 2025-08-21T02:30:34.1968540Z E 'primary_tag': 'deposit', 2025-08-21T02:30:34.1968908Z E 'utxo': 'acf78532a9448dd62a4e6319a3d2712189a88b6e59abc637260067d60df70782:0', 2025-08-21T02:30:34.1969366Z E }, ``` Signed-off-by: Rusty Russell --- tests/test_coinmoves.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/test_coinmoves.py b/tests/test_coinmoves.py index 0cdefe83fc73..bf5407fd6685 100644 --- a/tests/test_coinmoves.py +++ b/tests/test_coinmoves.py @@ -1221,6 +1221,11 @@ def test_coinmoves_unilateral_htlc_fulfill(node_factory, bitcoind): line = l1.daemon.is_in_log('Tracking output.*/OUR_HTLC') htlc = int(re.search(r'output [0-9a-f]{64}:([0-9]):', line).group(1)) + # commitment tx weight can vary (DER sigs, FML) and so even though the feerate target + # is fixed, the amount of the child tx we create will vary, hence the change varies. + # So it's usually 15579000, but one in 128 it will be 15586000... + anchor_change_msats = bitcoind.rpc.gettxout(anchor_spend_txid, 0)['value'] * 100_000_000_000 + expected_chain1 += [{'account_id': 'wallet', # Anchor spend from fundchannel change 'blockheight': 104, 'credit_msat': 0, @@ -1232,10 +1237,10 @@ def test_coinmoves_unilateral_htlc_fulfill(node_factory, bitcoind): 'utxo': f"{fundchannel['txid']}:{fundchannel['outnum'] ^ 1}"}, {'account_id': 'wallet', # Change from anchor spend 'blockheight': 104, - 'credit_msat': 15579000, + 'credit_msat': anchor_change_msats, 'debit_msat': 0, 'extra_tags': [], - 'output_msat': 15579000, + 'output_msat': anchor_change_msats, 'primary_tag': 'deposit', 'utxo': f"{anchor_spend_txid}:0"}, {'account_id': fundchannel['channel_id'], From a988505286f5de25903dde4b414ba704deee0983 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 2 Sep 2025 16:34:23 +0930 Subject: [PATCH 6/8] pytest: clean up JSON sql test. 1. Establish a channel with l3; we already have one with l2. 2. Don't bother generating 6 more blocks (fundchannel ensures it's mined). 3. Allow htlcs to be empty: Whitslack reported that happens for him 4. Use only_one() to access where we insist there is only one element in the list. 5. Tighten tests to assert the exact contents, not just test some. Signed-off-by: Rusty Russell Fixes: https://github.com/ElementsProject/lightning/issues/8497 --- tests/test_plugin.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/tests/test_plugin.py b/tests/test_plugin.py index b01005479322..9f1bed283ff2 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -4045,19 +4045,29 @@ def test_sql(node_factory, bitcoind): wait_for(lambda: l3.rpc.sql("SELECT * FROM nodes WHERE alias = '{}'".format(alias))['rows'] != []) # Test json functions - l1.fundchannel(l2) - bitcoind.generate_block(6) - l1.rpc.pay(l2.rpc.invoice(amount_msat=1000000, label='inv1000', description='description 1000 msat')['bolt11']) - ret = l1.rpc.sql("SELECT json_object('peer_id', hex(pc.peer_id), 'alias', alias, 'htlcs'," + scidl1l3, _ = l1.fundchannel(l3) + l1.rpc.pay(l3.rpc.invoice(amount_msat=1000000, label='inv1000', description='description 1000 msat')['bolt11']) + + # Two channels, l1->l3 *may* have an HTLC in flight. + ret = l1.rpc.sql("SELECT json_object('peer_id', hex(pc.peer_id), 'alias', alias, 'scid', short_channel_id, 'htlcs'," " (SELECT json_group_array(json_object('id', hex(id), 'amount_msat', amount_msat))" " FROM peerchannels_htlcs ph WHERE ph.row = pc.rowid)) FROM peerchannels pc JOIN nodes n" " ON pc.peer_id = n.nodeid ORDER BY n.alias, pc.peer_id;") assert len(ret['rows']) == 2 - row1 = json.loads(ret['rows'][0][0]) - row2 = json.loads(ret['rows'][1][0]) - assert row1['peer_id'] == format(l2.info['id'].upper()) - assert len(row2['htlcs']) == 1 - assert row2['htlcs'][0]['amount_msat'] == 1000000 + row1 = json.loads(only_one(ret['rows'][0])) + row2 = json.loads(only_one(ret['rows'][1])) + assert row1 in ({"peer_id": format(l3.info['id'].upper()), + "alias": l3.rpc.getinfo()['alias'], + "scid": scidl1l3, + "htlcs": []}, + {"peer_id": format(l3.info['id'].upper()), + "alias": l3.rpc.getinfo()['alias'], + "scid": scidl1l3, + "htlcs": [{"id": "30", "amount_msat": 1000000}]}) + assert row2 == {"peer_id": format(l2.info['id'].upper()), + "alias": l2.rpc.getinfo()['alias'], + "scid": scid, + "htlcs": []} def test_sql_deprecated(node_factory, bitcoind): From 385e476c618915ca7be1bb4b08f472cef8afba44 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 2 Sep 2025 16:34:23 +0930 Subject: [PATCH 7/8] pytest: fix test_xpay_fake_channeld flake Also, resulting log was huge, so suppress log level (will probably speed test) ``` 2025-09-02T06:01:39.4881086Z > l1.rpc.plugin_start(os.path.join(os.getcwd(), 'plugins/cln-askrene')) 2025-09-02T06:01:39.4881090Z 2025-09-02T06:01:39.4881174Z tests/test_xpay.py:279: ... 2025-09-02T06:01:39.4883193Z > self.sock.connect(str(self.path)) 2025-09-02T06:01:39.4883340Z E ConnectionRefusedError: [Errno 111] Connection refused ... 2025-09-02T06:01:41.7767610Z lightningd-1 2025-09-02T06:01:27.235Z **BROKEN** plugin-cln-xpay: askrene-age failed with {"code":-4, "message":"Plugin terminated before replying to RPC call."} 2025-09-02T06:01:41.7768127Z lightningd-1 2025-09-02T06:01:27.305Z INFO plugin-cln-xpay: Killing plugin: exited during normal operation ``` Signed-off-by: Rusty Russell --- plugins/xpay/xpay.c | 10 ++++++++++ tests/test_xpay.py | 11 +++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/plugins/xpay/xpay.c b/plugins/xpay/xpay.c index 16717ce3d045..4528a9e2d6b7 100644 --- a/plugins/xpay/xpay.c +++ b/plugins/xpay/xpay.c @@ -38,6 +38,8 @@ struct xpay { bool take_over_pay; /* Are we to wait for all parts to complete before returning? */ bool slow_mode; + /* Suppress calls to askrene-age */ + bool dev_no_age; }; static struct xpay *xpay_of(struct plugin *plugin) @@ -2098,6 +2100,10 @@ static struct command_result *age_layer(struct command *timer_cmd, void *unused) static void start_aging_timer(struct plugin *plugin) { + struct xpay *xpay = xpay_of(plugin); + + if (xpay->dev_no_age) + return; notleak(global_timer(plugin, time_from_sec(60), age_layer, NULL)); } @@ -2422,6 +2428,7 @@ int main(int argc, char *argv[]) xpay = tal(NULL, struct xpay); xpay->take_over_pay = false; xpay->slow_mode = false; + xpay->dev_no_age = false; plugin_main(argv, init, take(xpay), PLUGIN_RESTARTABLE, true, NULL, commands, ARRAY_SIZE(commands), @@ -2434,5 +2441,8 @@ int main(int argc, char *argv[]) plugin_option_dynamic("xpay-slow-mode", "bool", "Wait until all parts have completed before returning success or failure", bool_option, bool_jsonfmt, &xpay->slow_mode), + plugin_option_dev("dev-xpay-no-age", "flag", + "Don't call askrene-age", + flag_option, flag_jsonfmt, &xpay->dev_no_age), NULL); } diff --git a/tests/test_xpay.py b/tests/test_xpay.py index ca464c801578..5a822bfbdc15 100644 --- a/tests/test_xpay.py +++ b/tests/test_xpay.py @@ -227,14 +227,21 @@ def test_xpay_fake_channeld(node_factory, bitcoind, chainparams, slow_mode): # l2 will warn l1 about its invalid gossip: ignore. # We throttle l1's gossip to avoid massive log spam. + # Suppress debug and below because logs are huge l1, l2 = node_factory.line_graph(2, # This is in sats, so 1000x amount we send. fundamount=AMOUNT, opts=[{'gossip_store_file': outfile.name, 'subdaemon': 'channeld:../tests/plugins/channeld_fakenet', 'allow_warning': True, - 'dev-throttle-gossip': None}, - {'allow_bad_gossip': True}]) + 'dev-throttle-gossip': None, + 'log-level': 'info', + # xpay gets upset if it's aging when we remove cln-askrene! + 'dev-xpay-no-age': None, + }, + {'allow_bad_gossip': True, + 'log-level': 'info', + }]) # l1 needs to know l2's shaseed for the channel so it can make revocations hsmfile = os.path.join(l2.daemon.lightning_dir, TEST_NETWORK, "hsm_secret") From d26e4903e964f57b39f4edd4d3e57d1fab7e57fd Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 4 Sep 2025 13:22:04 +0930 Subject: [PATCH 8/8] pytest: fix broken message in test_even_sendcustommsg. We can stop listening on the incoming peer while we are closing, so we don't notice if they close: ``` ['lightningd-2 2025-09-03T09:48:19.555Z **BROKEN** 0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518-connectd: Peer did not close, forcing close', 'lightningd-2 2025-09-03T09:48:22.918Z **BROKEN** 0266e4598d1d3c415f572a8488830b60f7e744ed9235eb0b1ba93283b315c03518-connectd: Peer did not close, forcing close'] =========================== short test summary info ============================ ERROR tests/test_misc.py::test_even_sendcustommsg - ValueError: ``` Signed-off-by: Rusty Russell --- connectd/multiplex.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/connectd/multiplex.c b/connectd/multiplex.c index 0d5ac0dc5b97..99137a0f5b9a 100644 --- a/connectd/multiplex.c +++ b/connectd/multiplex.c @@ -1082,8 +1082,10 @@ static struct io_plan *write_to_peer(struct io_conn *peer_conn, /* Still nothing to send? */ if (!msg) { /* Draining? We're done when subds are done. */ - if (peer->draining && tal_count(peer->subds) == 0) + if (peer->draining && tal_count(peer->subds) == 0) { + io_wake(&peer->peer_in); return io_sock_shutdown(peer_conn); + } /* If they want us to send gossip, do so now. */ if (!peer->draining)