From 12622df78c738020131127160f249879d4b0455d Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Thu, 31 Jul 2025 13:23:37 +0000 Subject: [PATCH 01/13] Correct `test_dup_htlc_onchain_doesnt_fail_on_reload` `test_dup_htlc_onchain_doesnt_fail_on_reload` made reference to `ChainMonitor` persisting `ChannelMonitor`s on each new block, which hasn't been the case in some time. Instead, we update the comment and code to make explicit that it doesn't impact the test. --- lightning/src/ln/payment_tests.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lightning/src/ln/payment_tests.rs b/lightning/src/ln/payment_tests.rs index 4c63bfa4f8e..b421a4b5433 100644 --- a/lightning/src/ln/payment_tests.rs +++ b/lightning/src/ln/payment_tests.rs @@ -14,7 +14,7 @@ use crate::chain::channelmonitor::{ ANTI_REORG_DELAY, HTLC_FAIL_BACK_BUFFER, LATENCY_GRACE_PERIOD_BLOCKS, }; -use crate::chain::{ChannelMonitorUpdateStatus, Confirm, Listen}; +use crate::chain::{Confirm, Listen}; use crate::events::{ ClosureReason, Event, HTLCHandlingFailureType, PathFailure, PaymentFailureReason, PaymentPurpose, @@ -1310,16 +1310,15 @@ fn do_test_dup_htlc_onchain_doesnt_fail_on_reload( connect_blocks(&nodes[0], ANTI_REORG_DELAY - 2); } - // Now connect the HTLC claim transaction with the ChainMonitor-generated ChannelMonitor update - // returning InProgress. This should cause the claim event to never make its way to the - // ChannelManager. - chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress); - + // Now connect the HTLC claim transaction. Note that ChannelMonitors aren't re-persisted on + // each block connection (as the block being reconnected on startup should get us the same + // result). if payment_timeout { connect_blocks(&nodes[0], 1); } else { connect_block(&nodes[0], &claim_block); } + check_added_monitors(&nodes[0], 0); // Note that we skip persisting ChannelMonitors. We should still be generating the payment sent // event without ChannelMonitor persistence. If we reset to a previous state on reload, the block From 5740ce2a76ba1e6d439bf87d5c62225131d17786 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Fri, 1 Aug 2025 22:10:09 +0000 Subject: [PATCH 02/13] Make `ChannelMonitor` round-trip tests more robust During testsing, we check that a `ChannelMonitor` will round-trip through serialization exactly. However, we recently added a fix to change a value in `PackageTemplate` on reload to fix some issues in the field in 0.1. This can cause the round-trip tests to fail as a field is modified during read. We fix it here by simply exempting the field from the equality test in the condition where it would be updated on read. We also make the `ChannelMonitor` `PartialEq` trait implementation non-public as weird workarounds like this make clear that such a comparison is a britle API at best. --- lightning/src/chain/channelmonitor.rs | 20 +++++++++--- lightning/src/chain/package.rs | 46 ++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index f72eea850d5..39e5bfca149 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -77,7 +77,7 @@ use crate::util::ser::{ use crate::prelude::*; use crate::io::{self, Error}; -use crate::sync::{LockTestExt, Mutex}; +use crate::sync::Mutex; use core::ops::Deref; use core::{cmp, mem}; @@ -1371,18 +1371,30 @@ macro_rules! holder_commitment_htlcs { /// Transaction outputs to watch for on-chain spends. pub type TransactionOutputs = (Txid, Vec<(u32, TxOut)>); +// Because we have weird workarounds for `ChannelMonitor` equality checks in `OnchainTxHandler` and +// `PackageTemplate` the equality implementation isn't really fit for public consumption. Instead, +// we only expose it during tests. +#[cfg(any(feature = "_test_utils", test))] impl PartialEq for ChannelMonitor where Signer: PartialEq, { - #[rustfmt::skip] fn eq(&self, other: &Self) -> bool { + use crate::sync::LockTestExt; // We need some kind of total lockorder. Absent a better idea, we sort by position in // memory and take locks in that order (assuming that we can't move within memory while a // lock is held). let ord = ((self as *const _) as usize) < ((other as *const _) as usize); - let a = if ord { self.inner.unsafe_well_ordered_double_lock_self() } else { other.inner.unsafe_well_ordered_double_lock_self() }; - let b = if ord { other.inner.unsafe_well_ordered_double_lock_self() } else { self.inner.unsafe_well_ordered_double_lock_self() }; + let a = if ord { + self.inner.unsafe_well_ordered_double_lock_self() + } else { + other.inner.unsafe_well_ordered_double_lock_self() + }; + let b = if ord { + other.inner.unsafe_well_ordered_double_lock_self() + } else { + self.inner.unsafe_well_ordered_double_lock_self() + }; a.eq(&b) } } diff --git a/lightning/src/chain/package.rs b/lightning/src/chain/package.rs index 3de690e492f..bdc5774e598 100644 --- a/lightning/src/chain/package.rs +++ b/lightning/src/chain/package.rs @@ -1093,7 +1093,7 @@ enum PackageMalleability { /// /// As packages are time-sensitive, we fee-bump and rebroadcast them at scheduled intervals. /// Failing to confirm a package translate as a loss of funds for the user. -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, Eq)] pub struct PackageTemplate { // List of onchain outputs and solving data to generate satisfying witnesses. inputs: Vec<(BitcoinOutPoint, PackageSolvingData)>, @@ -1122,6 +1122,50 @@ pub struct PackageTemplate { height_timer: u32, } +impl PartialEq for PackageTemplate { + fn eq(&self, o: &Self) -> bool { + if self.inputs != o.inputs + || self.malleability != o.malleability + || self.feerate_previous != o.feerate_previous + || self.height_timer != o.height_timer + { + return false; + } + #[cfg(test)] + { + // In some cases we may reset `counterparty_spendable_height` to zero on reload, which + // can cause our test assertions that ChannelMonitors round-trip exactly to trip. Here + // we allow exactly the same case as we tweak in the `PackageTemplate` `Readable` + // implementation. + if self.counterparty_spendable_height == 0 { + for (_, input) in self.inputs.iter() { + if let PackageSolvingData::RevokedHTLCOutput(RevokedHTLCOutput { + htlc, .. + }) = input + { + if !htlc.offered && htlc.cltv_expiry != 0 { + return true; + } + } + } + } + if o.counterparty_spendable_height == 0 { + for (_, input) in o.inputs.iter() { + if let PackageSolvingData::RevokedHTLCOutput(RevokedHTLCOutput { + htlc, .. + }) = input + { + if !htlc.offered && htlc.cltv_expiry != 0 { + return true; + } + } + } + } + } + self.counterparty_spendable_height == o.counterparty_spendable_height + } +} + impl PackageTemplate { #[rustfmt::skip] pub(crate) fn can_merge_with(&self, other: &PackageTemplate, cur_height: u32) -> bool { From ac49603e7e01a9d7a1f8af1a1de94a0dee52c5c4 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Fri, 1 Aug 2025 22:44:08 +0000 Subject: [PATCH 03/13] Rebuild pending payments list before replaying pending claims/fails On `ChannelManager` reload we rebuild the pending outbound payments list by looking for any missing payments in `ChannelMonitor`s. However, in the same loop over `ChannelMonitor`s, we also re-claim any pending payments which we see we have a payment preimage for. If we send an MPP payment across different chanels, the result may be that we'll iterate the loop, and in each iteration add a pending payment with only one known path, then claim/fail it and remove the pending apyment (at least for the claim case). This may result in spurious extra events, or even both a `PaymentFailed` and `PaymentSent` event on startup for the same payment. --- lightning/src/ln/channelmanager.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index e4614351ee9..5f4807cae51 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -16386,6 +16386,10 @@ where // payments which are still in-flight via their on-chain state. // We only rebuild the pending payments map if we were most recently serialized by // 0.0.102+ + // + // First we rebuild the pending payments, and only once we do so we go through and + // re-claim and re-fail pending payments. This avoids edge-cases around MPP payments + // resulting in redundant actions. for (channel_id, monitor) in args.channel_monitors.iter() { let mut is_channel_closed = false; let counterparty_node_id = monitor.get_counterparty_node_id(); @@ -16424,6 +16428,18 @@ where ); } } + } + } + for (channel_id, monitor) in args.channel_monitors.iter() { + let mut is_channel_closed = false; + let counterparty_node_id = monitor.get_counterparty_node_id(); + if let Some(peer_state_mtx) = per_peer_state.get(&counterparty_node_id) { + let mut peer_state_lock = peer_state_mtx.lock().unwrap(); + let peer_state = &mut *peer_state_lock; + is_channel_closed = !peer_state.channel_by_id.contains_key(channel_id); + } + + if is_channel_closed { for (htlc_source, (htlc, preimage_opt)) in monitor.get_all_current_outbound_htlcs() { From 5d398a183245f9f0701d154db1b102924af7224a Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Sun, 20 Jul 2025 23:58:51 +0000 Subject: [PATCH 04/13] Store preimages we learned on chain in case of `MonitorEvent` loss `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fashion - if the `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets persisted (i.e. due to a block update) then the node crashes, prior to persisting the `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be lost. This isn't likely in a sync persist environment, but in an async one this could be an issue. Note that this is only an issue for closed channels - `MonitorEvent`s only inform the `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. When we restart and, during `ChannelManager` load, see a `ChannelMonitor` for a closed channel, we scan it for preimages that we passed to it and re-apply those to any pending or forwarded payments. However, we didn't scan it for preimages it learned from transactions on-chain. In cases where a `MonitorEvent` is lost, this can lead to a lost preimage. Here we fix it by simply tracking preimages we learned on-chain the same way we track preimages picked up during normal channel operation. --- lightning/src/chain/channelmonitor.rs | 2 + lightning/src/ln/monitor_tests.rs | 165 ++++++++++++++++++++++++++ 2 files changed, 167 insertions(+) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index 39e5bfca149..61d65e65ca0 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -5787,6 +5787,7 @@ impl ChannelMonitorImpl { on_to_local_output_csv: None, }, }); + self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage); self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate { source, payment_preimage: Some(payment_preimage), @@ -5810,6 +5811,7 @@ impl ChannelMonitorImpl { on_to_local_output_csv: None, }, }); + self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage); self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate { source, payment_preimage: Some(payment_preimage), diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs index 11dd13317e0..ba290a379e3 100644 --- a/lightning/src/ln/monitor_tests.rs +++ b/lightning/src/ln/monitor_tests.rs @@ -12,6 +12,7 @@ //! Further functional tests which test blockchain reorganizations. use crate::sign::{ecdsa::EcdsaChannelSigner, OutputSpender, SignerProvider, SpendableOutputDescriptor}; +use crate::chain::Watch; use crate::chain::channelmonitor::{ANTI_REORG_DELAY, ARCHIVAL_DELAY_BLOCKS,LATENCY_GRACE_PERIOD_BLOCKS, COUNTERPARTY_CLAIMABLE_WITHIN_BLOCKS_PINNABLE, Balance, BalanceSource, ChannelMonitorUpdateStep}; use crate::chain::transaction::OutPoint; use crate::chain::chaininterface::{ConfirmationTarget, LowerBoundedFeeEstimator, compute_feerate_sat_per_1000_weight}; @@ -3298,3 +3299,167 @@ fn test_claim_event_never_handled() { // `ChannelMonitorUpdate`. check_added_monitors(&nodes[1], 2); } + +fn do_test_lost_preimage_monitor_events(on_counterparty_tx: bool) { + // `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fasion - if the + // `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets + // persisted (i.e. due to a block update) then the node crashes, prior to persisting the + // `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be + // lost. This isn't likely in a sync persist environment, but in an async one this could be an + // issue. + // + // Note that this is only an issue for closed channels - `MonitorEvent`s only inform the + // `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup + // or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes + // completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved + // on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. + // + // Here we test that losing `MonitorEvent`s that contain HTLC resolution preimages does not + // cause us to lose funds or miss a `PaymentSent` event. + let mut cfg = test_default_channel_config(); + cfg.manually_accept_inbound_channels = true; + cfg.channel_handshake_config.negotiate_anchors_zero_fee_htlc_tx = true; + let cfgs = [Some(cfg.clone()), Some(cfg.clone()), Some(cfg.clone())]; + + let chanmon_cfgs = create_chanmon_cfgs(3); + let node_cfgs = create_node_cfgs(3, &chanmon_cfgs); + let persister; + let new_chain_mon; + let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &cfgs); + let node_b_reload; + let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs); + + let coinbase_tx = provide_anchor_reserves(&nodes); + + let node_b_id = nodes[1].node.get_our_node_id(); + let node_c_id = nodes[2].node.get_our_node_id(); + + let chan_a = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 0).2; + let chan_b = create_announced_chan_between_nodes_with_value(&nodes, 1, 2, 1_000_000, 0).2; + + let (preimage_a, hash_a, ..) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 1_000_000); + let (preimage_b, hash_b, ..) = route_payment(&nodes[1], &[&nodes[2]], 1_000_000); + + nodes[1].node.peer_disconnected(nodes[2].node.get_our_node_id()); + nodes[2].node.peer_disconnected(nodes[1].node.get_our_node_id()); + + nodes[2].node.claim_funds(preimage_a); + check_added_monitors(&nodes[2], 1); + expect_payment_claimed!(nodes[2], hash_a, 1_000_000); + nodes[2].node.claim_funds(preimage_b); + check_added_monitors(&nodes[2], 1); + expect_payment_claimed!(nodes[2], hash_b, 1_000_000); + + // Force-close the channel, confirming a commitment transaction then letting C claim the HTLCs. + let message = "Closed".to_owned(); + nodes[2] + .node + .force_close_broadcasting_latest_txn(&chan_b, &node_b_id, message.clone()) + .unwrap(); + check_added_monitors(&nodes[2], 1); + let c_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true), message }; + check_closed_event!(nodes[2], 1, c_reason, [node_b_id], 1_000_000); + check_closed_broadcast!(nodes[2], true); + + handle_bump_events(&nodes[2], true, 0); + let cs_commit_tx = nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + assert_eq!(cs_commit_tx.len(), 1); + + let message = "Closed".to_owned(); + nodes[1] + .node + .force_close_broadcasting_latest_txn(&chan_b, &node_c_id, message.clone()) + .unwrap(); + check_added_monitors(&nodes[1], 1); + let b_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true), message }; + check_closed_event!(nodes[1], 1, b_reason, [node_c_id], 1_000_000); + check_closed_broadcast!(nodes[1], true); + + handle_bump_events(&nodes[1], true, 0); + let bs_commit_tx = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + assert_eq!(bs_commit_tx.len(), 1); + + let selected_commit_tx = if on_counterparty_tx { + &cs_commit_tx[0] + } else { + &bs_commit_tx[0] + }; + + mine_transaction(&nodes[2], selected_commit_tx); + let mut cs_transactions = nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + let c_replays_commitment = nodes[2].connect_style.borrow().updates_best_block_first(); + let cs_htlc_claims = if on_counterparty_tx { + assert_eq!(cs_transactions.len(), 0); + + handle_bump_events(&nodes[2], c_replays_commitment, 1); + let mut cs_transactions = + nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + if c_replays_commitment { + assert_eq!(cs_transactions.len(), 3); + check_spends!(cs_transactions[1], cs_transactions[0], coinbase_tx); + } else { + assert_eq!(cs_transactions.len(), 1); + } + vec![cs_transactions.pop().unwrap()] + } else { + assert_eq!(cs_transactions.len(), 1); + cs_transactions + }; + + assert_eq!(cs_htlc_claims.len(), 1); + check_spends!(cs_htlc_claims[0], selected_commit_tx, coinbase_tx); + + // Now replay the claims on node B, which should generate preimage-containing `MonitorUpdate`s + mine_transaction(&nodes[1], selected_commit_tx); + mine_transaction(&nodes[1], &cs_htlc_claims[0]); + + // Now simulate a restart where the B<->C ChannelMonitor has been persisted (i.e. because we + // just processed a new block) but the ChannelManager was not. This should be exceedingly rare + // given we have to be connecting a block at the right moment and not manage to get a + // ChannelManager persisted after it does a thing that should immediately precede persistence, + // but with async persist it is more common. + // + // We do this by wiping the `MonitorEvent`s from the monitors and then reloading with the + // latest state. + let mon_events = nodes[1].chain_monitor.chain_monitor.release_pending_monitor_events(); + assert_eq!(mon_events.len(), 1); + assert_eq!(mon_events[0].2.len(), 2); + + let node_ser = nodes[1].node.encode(); + let mon_a_ser = get_monitor!(nodes[1], chan_a).encode(); + let mon_b_ser = get_monitor!(nodes[1], chan_b).encode(); + let mons = &[&mon_a_ser[..], &mon_b_ser[..]]; + reload_node!(nodes[1], cfg, &node_ser, mons, persister, new_chain_mon, node_b_reload); + + let preimage_events = nodes[1].node.get_and_clear_pending_events(); + assert_eq!(preimage_events.len(), 2, "{preimage_events:?}"); + for ev in preimage_events { + match ev { + Event::PaymentSent { payment_hash, .. } => { + assert_eq!(payment_hash, hash_b); + }, + Event::PaymentForwarded { claim_from_onchain_tx, .. } => { + assert!(claim_from_onchain_tx); + }, + _ => panic!("Wrong event {ev:?}"), + } + } + + // After the background events are processed in `get_and_clear_pending_events`, above, node B + // will create the requisite `ChannelMontiorUpdate` for claiming the forwarded payment back. + // The HTLC, however, is added to the holding cell for replay after the peer connects, below. + check_added_monitors(&nodes[1], 1); + + nodes[0].node.peer_disconnected(nodes[1].node.get_our_node_id()); + + let mut reconnect_args = ReconnectArgs::new(&nodes[0], &nodes[1]); + reconnect_args.pending_cell_htlc_claims = (1, 0); + reconnect_nodes(reconnect_args); + expect_payment_sent(&nodes[0], preimage_a, None, true, true); +} + +#[test] +fn test_lost_preimage_monitor_events() { + do_test_lost_preimage_monitor_events(true); + do_test_lost_preimage_monitor_events(false); +} From a16fe157ecabfbc34440790fac433d5db17f9ad4 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Fri, 15 Aug 2025 23:25:32 +0000 Subject: [PATCH 05/13] f sp --- lightning/src/ln/monitor_tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs index ba290a379e3..1ae311be5ec 100644 --- a/lightning/src/ln/monitor_tests.rs +++ b/lightning/src/ln/monitor_tests.rs @@ -3301,7 +3301,7 @@ fn test_claim_event_never_handled() { } fn do_test_lost_preimage_monitor_events(on_counterparty_tx: bool) { - // `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fasion - if the + // `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fashion - if the // `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets // persisted (i.e. due to a block update) then the node crashes, prior to persisting the // `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be From 943056103b6ccd34e7c2f7b057acb2ffa7ae33f2 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Fri, 1 Aug 2025 23:16:16 +0000 Subject: [PATCH 06/13] `rustfmt` and clean up `get_onchain_failed_outbound_htlcs` --- lightning/src/chain/channelmonitor.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index 61d65e65ca0..b393163c2cd 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -2956,29 +2956,29 @@ impl ChannelMonitor { /// This is similar to [`Self::get_pending_or_resolved_outbound_htlcs`] except it includes /// HTLCs which were resolved on-chain (i.e. where the final HTLC resolution was done by an /// event from this `ChannelMonitor`). - #[rustfmt::skip] - pub(crate) fn get_all_current_outbound_htlcs(&self) -> HashMap)> { + pub(crate) fn get_all_current_outbound_htlcs( + &self, + ) -> HashMap)> { let mut res = new_hash_map(); // Just examine the available counterparty commitment transactions. See docs on // `fail_unbroadcast_htlcs`, below, for justification. let us = self.inner.lock().unwrap(); - macro_rules! walk_counterparty_commitment { - ($txid: expr) => { - if let Some(ref latest_outpoints) = us.funding.counterparty_claimable_outpoints.get($txid) { - for &(ref htlc, ref source_option) in latest_outpoints.iter() { - if let &Some(ref source) = source_option { - res.insert((**source).clone(), (htlc.clone(), - us.counterparty_fulfilled_htlcs.get(&SentHTLCId::from_source(source)).cloned())); - } + let mut walk_counterparty_commitment = |txid| { + if let Some(latest_outpoints) = us.funding.counterparty_claimable_outpoints.get(txid) { + for &(ref htlc, ref source_option) in latest_outpoints.iter() { + if let &Some(ref source) = source_option { + let htlc_id = SentHTLCId::from_source(source); + let preimage_opt = us.counterparty_fulfilled_htlcs.get(&htlc_id).cloned(); + res.insert((**source).clone(), (htlc.clone(), preimage_opt)); } } } - } + }; if let Some(ref txid) = us.funding.current_counterparty_commitment_txid { - walk_counterparty_commitment!(txid); + walk_counterparty_commitment(txid); } if let Some(ref txid) = us.funding.prev_counterparty_commitment_txid { - walk_counterparty_commitment!(txid); + walk_counterparty_commitment(txid); } res } From fba2f2d8cabeac8bf87b9a6571082d896eefd741 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Fri, 1 Aug 2025 23:16:30 +0000 Subject: [PATCH 07/13] Re-fail perm-failed HTLCs on startup in case of `MonitorEvent` loss `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fashion - if the `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets persisted (i.e. due to a block update) then the node crashes, prior to persisting the `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be lost. This isn't likely in a sync persist environment, but in an async one this could be an issue. Note that this is only an issue for closed channels - `MonitorEvent`s only inform the `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. In a previous commit we handled the case of claimed HTLCs by replaying payment preimages on startup to avoid `MonitorEvent` loss causing us to miss an HTLC claim. Here we handle the HTLC-failed case similarly. Unlike with HTLC claims via preimage, we don't already have replay logic in `ChannelManager` startup, but its easy enough to add one. Luckily, we already track when an HTLC reaches permanently-failed state in `ChannelMonitor` (i.e. it has `ANTI_REORG_DELAY` confirmations on-chain on the failing transaction), so all we need to do is add the ability to query for that and fail them on `ChannelManager` startup. --- lightning/src/chain/channelmonitor.rs | 69 ++++++++++ lightning/src/ln/channelmanager.rs | 33 +++-- lightning/src/ln/monitor_tests.rs | 177 ++++++++++++++++++++++++++ 3 files changed, 270 insertions(+), 9 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index b393163c2cd..81fd8c3ca85 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -2983,6 +2983,75 @@ impl ChannelMonitor { res } + /// Gets the set of outbound HTLCs which hit the chain and ultimately were claimed by us via + /// the timeout path and reached [`ANTI_REORG_DELAY`] confirmations. This is used to determine + /// if an HTLC has failed without the `ChannelManager` having seen it prior to being persisted. + pub(crate) fn get_onchain_failed_outbound_htlcs( + &self, + ) -> HashMap { + let us = self.inner.lock().unwrap(); + // We're only concerned with the confirmation count of HTLC transactions, and don't + // actually care how many confirmations a commitment transaction may or may not have. Thus, + // we look for either a FundingSpendConfirmation event or a funding_spend_confirmed. + let confirmed_txid = us.funding_spend_confirmed.or_else(|| { + us.onchain_events_awaiting_threshold_conf.iter().find_map(|event| { + if let OnchainEvent::FundingSpendConfirmation { .. } = event.event { + Some(event.txid) + } else { + None + } + }) + }); + + if confirmed_txid.is_none() { + return new_hash_map(); + } + + let mut res = new_hash_map(); + macro_rules! walk_htlcs { + ($holder_commitment: expr, $htlc_iter: expr) => { + for (htlc, source) in $htlc_iter { + let filter = |v: &&IrrevocablyResolvedHTLC| { + v.commitment_tx_output_idx == htlc.transaction_output_index + }; + if let Some(state) = us.htlcs_resolved_on_chain.iter().filter(filter).next() { + if let Some(source) = source { + if state.payment_preimage.is_none() { + res.insert(source.clone(), htlc.clone()); + } + } + } + } + }; + } + + let txid = confirmed_txid.unwrap(); + if Some(txid) == us.funding.current_counterparty_commitment_txid + || Some(txid) == us.funding.prev_counterparty_commitment_txid + { + walk_htlcs!( + false, + us.funding.counterparty_claimable_outpoints.get(&txid).unwrap().iter().filter_map( + |(a, b)| { + if let &Some(ref source) = b { + Some((a, Some(&**source))) + } else { + None + } + } + ) + ); + } else if txid == us.funding.current_holder_commitment_tx.trust().txid() { + walk_htlcs!(true, holder_commitment_htlcs!(us, CURRENT_WITH_SOURCES)); + } else if let Some(prev_commitment_tx) = &us.funding.prev_holder_commitment_tx { + if txid == prev_commitment_tx.trust().txid() { + walk_htlcs!(true, holder_commitment_htlcs!(us, PREV_WITH_SOURCES).unwrap()); + } + } + + res + } + /// Gets the set of outbound HTLCs which are pending resolution in this channel or which were /// resolved with a preimage from our counterparty. /// diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 5f4807cae51..5ac458a2d06 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -15739,7 +15739,7 @@ where log_error!(logger, " The ChannelMonitor for channel {} is at counterparty commitment transaction number {} but the ChannelManager is at counterparty commitment transaction number {}.", &channel.context.channel_id(), monitor.get_cur_counterparty_commitment_number(), channel.get_cur_counterparty_commitment_transaction_number()); } - let mut shutdown_result = + let shutdown_result = channel.force_shutdown(ClosureReason::OutdatedChannelManager); if shutdown_result.unbroadcasted_batch_funding_txid.is_some() { return Err(DecodeError::InvalidValue); @@ -15771,7 +15771,10 @@ where }, ); } - failed_htlcs.append(&mut shutdown_result.dropped_outbound_htlcs); + for (source, hash, cp_id, chan_id) in shutdown_result.dropped_outbound_htlcs { + let reason = LocalHTLCFailureReason::ChannelClosed; + failed_htlcs.push((source, hash, cp_id, chan_id, reason)); + } channel_closures.push_back(( events::Event::ChannelClosed { channel_id: channel.context.channel_id(), @@ -15813,6 +15816,7 @@ where *payment_hash, channel.context.get_counterparty_node_id(), channel.context.channel_id(), + LocalHTLCFailureReason::ChannelClosed, )); } } @@ -16537,6 +16541,20 @@ where }, } } + for (htlc_source, htlc) in monitor.get_onchain_failed_outbound_htlcs() { + log_info!( + args.logger, + "Failing HTLC with payment hash {} as it was resolved on-chain.", + htlc.payment_hash + ); + failed_htlcs.push(( + htlc_source, + htlc.payment_hash, + monitor.get_counterparty_node_id(), + monitor.channel_id(), + LocalHTLCFailureReason::OnChainTimeout, + )); + } } // Whether the downstream channel was closed or not, try to re-apply any payment @@ -17217,13 +17235,10 @@ where } } - for htlc_source in failed_htlcs.drain(..) { - let (source, payment_hash, counterparty_node_id, channel_id) = htlc_source; - let failure_reason = LocalHTLCFailureReason::ChannelClosed; - let receiver = HTLCHandlingFailureType::Forward { - node_id: Some(counterparty_node_id), - channel_id, - }; + for htlc_source in failed_htlcs { + let (source, payment_hash, counterparty_id, channel_id, failure_reason) = htlc_source; + let receiver = + HTLCHandlingFailureType::Forward { node_id: Some(counterparty_id), channel_id }; let reason = HTLCFailReason::from_failure_code(failure_reason); channel_manager.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver); } diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs index 1ae311be5ec..2dcd2d3e385 100644 --- a/lightning/src/ln/monitor_tests.rs +++ b/lightning/src/ln/monitor_tests.rs @@ -3463,3 +3463,180 @@ fn test_lost_preimage_monitor_events() { do_test_lost_preimage_monitor_events(true); do_test_lost_preimage_monitor_events(false); } + +fn do_test_lost_timeout_monitor_events(on_counterparty_tx: bool) { + // `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fasion - if the + // `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets + // persisted (i.e. due to a block update) then the node crashes, prior to persisting the + // `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be + // lost. This isn't likely in a sync persist environment, but in an async one this could be an + // issue. + // + // Note that this is only an issue for closed channels - `MonitorEvent`s only inform the + // `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup + // or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes + // completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved + // on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. + // + // Here we test that losing `MonitorEvent`s that contain HTLC resolution via timeouts does not + // cause us to lose a `PaymentFailed` event. + let mut cfg = test_default_channel_config(); + cfg.manually_accept_inbound_channels = true; + cfg.channel_handshake_config.negotiate_anchors_zero_fee_htlc_tx = true; + let cfgs = [Some(cfg.clone()), Some(cfg.clone()), Some(cfg.clone())]; + + let chanmon_cfgs = create_chanmon_cfgs(3); + let node_cfgs = create_node_cfgs(3, &chanmon_cfgs); + let persister; + let new_chain_mon; + let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &cfgs); + let node_b_reload; + let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs); + + provide_anchor_reserves(&nodes); + + let node_a_id = nodes[0].node.get_our_node_id(); + let node_b_id = nodes[1].node.get_our_node_id(); + let node_c_id = nodes[2].node.get_our_node_id(); + + let chan_a = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 0).2; + let chan_b = create_announced_chan_between_nodes_with_value(&nodes, 1, 2, 1_000_000, 0).2; + + // Ensure all nodes are at the same height + let node_max_height = + nodes.iter().map(|node| node.blocks.lock().unwrap().len()).max().unwrap() as u32; + connect_blocks(&nodes[0], node_max_height - nodes[0].best_block_info().1); + connect_blocks(&nodes[1], node_max_height - nodes[1].best_block_info().1); + connect_blocks(&nodes[2], node_max_height - nodes[2].best_block_info().1); + + let (_, hash_a, ..) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 5_000_000); + let (_, hash_b, ..) = route_payment(&nodes[1], &[&nodes[2]], 5_000_000); + + nodes[1].node.peer_disconnected(nodes[2].node.get_our_node_id()); + nodes[2].node.peer_disconnected(nodes[1].node.get_our_node_id()); + + // Force-close the channel, confirming a commitment transaction then letting C claim the HTLCs. + let message = "Closed".to_owned(); + nodes[2] + .node + .force_close_broadcasting_latest_txn(&chan_b, &node_b_id, message.clone()) + .unwrap(); + check_added_monitors(&nodes[2], 1); + let c_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true), message }; + check_closed_event!(nodes[2], 1, c_reason, [node_b_id], 1_000_000); + check_closed_broadcast!(nodes[2], true); + + handle_bump_events(&nodes[2], true, 0); + let cs_commit_tx = nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + assert_eq!(cs_commit_tx.len(), 1); + + let message = "Closed".to_owned(); + nodes[1] + .node + .force_close_broadcasting_latest_txn(&chan_b, &node_c_id, message.clone()) + .unwrap(); + check_added_monitors(&nodes[1], 1); + let b_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true), message }; + check_closed_event!(nodes[1], 1, b_reason, [node_c_id], 1_000_000); + check_closed_broadcast!(nodes[1], true); + + handle_bump_events(&nodes[1], true, 0); + let bs_commit_tx = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + assert_eq!(bs_commit_tx.len(), 1); + + let selected_commit_tx = if on_counterparty_tx { + &cs_commit_tx[0] + } else { + &bs_commit_tx[0] + }; + + mine_transaction(&nodes[1], selected_commit_tx); + // If the block gets connected first we may re-broadcast B's commitment transaction before + // seeing the C's confirm. + nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clear(); + connect_blocks(&nodes[1], ANTI_REORG_DELAY - 1); + let mut events = nodes[1].chain_monitor.chain_monitor.get_and_clear_pending_events(); + if on_counterparty_tx { + assert_eq!(events.len(), 1, "{events:?}"); + match events[0] { + Event::SpendableOutputs { .. } => {}, + _ => panic!("Unexpected event {events:?}"), + } + } else { + assert_eq!(events.len(), 0); + } + + connect_blocks(&nodes[1], TEST_FINAL_CLTV - ANTI_REORG_DELAY + 1); + if !on_counterparty_tx { + let mut events = nodes[1].chain_monitor.chain_monitor.get_and_clear_pending_events(); + assert_eq!(events.len(), 1, "{events:?}"); + match events.pop().unwrap() { + Event::BumpTransaction(bump_event) => { + nodes[1].bump_tx_handler.handle_event(&bump_event); + }, + _ => panic!("Unexpected event"), + } + } + let bs_htlc_timeouts = + nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + assert_eq!(bs_htlc_timeouts.len(), 1); + + // Now replay the timeouts on node B, which after 6 confirmations should fail the HTLCs via + // `MonitorUpdate`s + mine_transactions(&nodes[1], &bs_htlc_timeouts.iter().collect::>()); + connect_blocks(&nodes[1], ANTI_REORG_DELAY - 1); + + // Now simulate a restart where the B<->C ChannelMonitor has been persisted (i.e. because we + // just processed a new block) but the ChannelManager was not. This should be exceedingly rare + // given we have to be connecting a block at the right moment and not manage to get a + // ChannelManager persisted after it does a thing that should immediately precede persistence, + // but with async persist it is more common. + // + // We do this by wiping the `MonitorEvent`s from the monitors and then reloading with the + // latest state. + let mon_events = nodes[1].chain_monitor.chain_monitor.release_pending_monitor_events(); + assert_eq!(mon_events.len(), 1); + assert_eq!(mon_events[0].2.len(), 2); + + let node_ser = nodes[1].node.encode(); + let mon_a_ser = get_monitor!(nodes[1], chan_a).encode(); + let mon_b_ser = get_monitor!(nodes[1], chan_b).encode(); + let mons = &[&mon_a_ser[..], &mon_b_ser[..]]; + reload_node!(nodes[1], cfg, &node_ser, mons, persister, new_chain_mon, node_b_reload); + + let timeout_events = nodes[1].node.get_and_clear_pending_events(); + assert_eq!(timeout_events.len(), 3, "{timeout_events:?}"); + for ev in timeout_events { + match ev { + Event::PaymentPathFailed { payment_hash, .. } => { + assert_eq!(payment_hash, hash_b); + }, + Event::PaymentFailed { payment_hash, .. } => { + assert_eq!(payment_hash, Some(hash_b)); + }, + Event::HTLCHandlingFailed { prev_channel_id, .. } => { + assert_eq!(prev_channel_id, chan_a); + }, + _ => panic!("Wrong event {ev:?}"), + } + } + + nodes[0].node.peer_disconnected(nodes[1].node.get_our_node_id()); + + let mut reconnect_args = ReconnectArgs::new(&nodes[0], &nodes[1]); + reconnect_args.pending_cell_htlc_fails = (0, 0); + reconnect_nodes(reconnect_args); + + nodes[1].node.process_pending_htlc_forwards(); + check_added_monitors(&nodes[1], 1); + let bs_fail = get_htlc_update_msgs(&nodes[1], &node_a_id); + nodes[0].node.handle_update_fail_htlc(node_b_id, &bs_fail.update_fail_htlcs[0]); + commitment_signed_dance!(nodes[0], nodes[1], bs_fail.commitment_signed, true, true); + expect_payment_failed!(nodes[0], hash_a, false); +} + +#[test] +fn test_lost_timeout_monitor_events() { + do_test_lost_timeout_monitor_events(true); + do_test_lost_timeout_monitor_events(false); +} From c32ff14e4d4011558425bb1044e6c4494be25f3d Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Tue, 12 Aug 2025 20:46:44 +0000 Subject: [PATCH 08/13] f explicitly handle and test dust --- lightning/src/chain/channelmonitor.rs | 52 ++++++++++++++++----------- lightning/src/ln/monitor_tests.rs | 38 +++++++++++--------- 2 files changed, 53 insertions(+), 37 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index 81fd8c3ca85..9760bb59cc4 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -2989,32 +2989,21 @@ impl ChannelMonitor { pub(crate) fn get_onchain_failed_outbound_htlcs( &self, ) -> HashMap { - let us = self.inner.lock().unwrap(); - // We're only concerned with the confirmation count of HTLC transactions, and don't - // actually care how many confirmations a commitment transaction may or may not have. Thus, - // we look for either a FundingSpendConfirmation event or a funding_spend_confirmed. - let confirmed_txid = us.funding_spend_confirmed.or_else(|| { - us.onchain_events_awaiting_threshold_conf.iter().find_map(|event| { - if let OnchainEvent::FundingSpendConfirmation { .. } = event.event { - Some(event.txid) - } else { - None - } - }) - }); - - if confirmed_txid.is_none() { - return new_hash_map(); - } - let mut res = new_hash_map(); + let us = self.inner.lock().unwrap(); macro_rules! walk_htlcs { ($holder_commitment: expr, $htlc_iter: expr) => { for (htlc, source) in $htlc_iter { let filter = |v: &&IrrevocablyResolvedHTLC| { v.commitment_tx_output_idx == htlc.transaction_output_index }; - if let Some(state) = us.htlcs_resolved_on_chain.iter().filter(filter).next() { + if htlc.transaction_output_index.is_none() { + if let Some(source) = source { + // Dust HTLCs are always implicitly failed once the commitment + // transaction reaches ANTI_REORG_DELAY confirmations. + res.insert(source.clone(), htlc.clone()); + } + } else if let Some(state) = us.htlcs_resolved_on_chain.iter().filter(filter).next() { if let Some(source) = source { if state.payment_preimage.is_none() { res.insert(source.clone(), htlc.clone()); @@ -3025,7 +3014,30 @@ impl ChannelMonitor { }; } - let txid = confirmed_txid.unwrap(); + + // We only want HTLCs with ANTI_REORG_DELAY confirmations, which implies the commitment + // transaction has least ANTI_REORG_DELAY confirmations for any dependent HTLC transactions + // to have been confirmed. + let confirmed_txid = us.funding_spend_confirmed.or_else(|| { + us.onchain_events_awaiting_threshold_conf.iter().find_map(|event| { + if let OnchainEvent::FundingSpendConfirmation { .. } = event.event { + if event.height <= us.best_block.height - ANTI_REORG_DELAY + 1 { + Some(event.txid) + } else { + None + } + } else { + None + } + }) + }); + + let txid = if let Some(txid) = confirmed_txid { + txid + } else { + return res; + }; + if Some(txid) == us.funding.current_counterparty_commitment_txid || Some(txid) == us.funding.prev_counterparty_commitment_txid { diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs index 2dcd2d3e385..591c21b1278 100644 --- a/lightning/src/ln/monitor_tests.rs +++ b/lightning/src/ln/monitor_tests.rs @@ -3464,7 +3464,7 @@ fn test_lost_preimage_monitor_events() { do_test_lost_preimage_monitor_events(false); } -fn do_test_lost_timeout_monitor_events(on_counterparty_tx: bool) { +fn do_test_lost_timeout_monitor_events(on_counterparty_tx: bool, dust_htlcs: bool) { // `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fasion - if the // `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets // persisted (i.e. due to a block update) then the node crashes, prior to persisting the @@ -3509,8 +3509,11 @@ fn do_test_lost_timeout_monitor_events(on_counterparty_tx: bool) { connect_blocks(&nodes[1], node_max_height - nodes[1].best_block_info().1); connect_blocks(&nodes[2], node_max_height - nodes[2].best_block_info().1); - let (_, hash_a, ..) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 5_000_000); - let (_, hash_b, ..) = route_payment(&nodes[1], &[&nodes[2]], 5_000_000); + send_payment(&nodes[0], &[&nodes[1], &nodes[2]], 25_000_000); + + let amt = if dust_htlcs { 1_000 } else { 10_000_000 }; + let (_, hash_a, ..) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], amt); + let (_, hash_b, ..) = route_payment(&nodes[1], &[&nodes[2]], amt); nodes[1].node.peer_disconnected(nodes[2].node.get_our_node_id()); nodes[2].node.peer_disconnected(nodes[1].node.get_our_node_id()); @@ -3568,23 +3571,22 @@ fn do_test_lost_timeout_monitor_events(on_counterparty_tx: bool) { connect_blocks(&nodes[1], TEST_FINAL_CLTV - ANTI_REORG_DELAY + 1); if !on_counterparty_tx { - let mut events = nodes[1].chain_monitor.chain_monitor.get_and_clear_pending_events(); - assert_eq!(events.len(), 1, "{events:?}"); - match events.pop().unwrap() { - Event::BumpTransaction(bump_event) => { - nodes[1].bump_tx_handler.handle_event(&bump_event); - }, - _ => panic!("Unexpected event"), + if !dust_htlcs { + handle_bump_events(&nodes[1], false, 1); } } let bs_htlc_timeouts = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); - assert_eq!(bs_htlc_timeouts.len(), 1); + if dust_htlcs { + assert_eq!(bs_htlc_timeouts.len(), 0); + } else { + assert_eq!(bs_htlc_timeouts.len(), 1); - // Now replay the timeouts on node B, which after 6 confirmations should fail the HTLCs via - // `MonitorUpdate`s - mine_transactions(&nodes[1], &bs_htlc_timeouts.iter().collect::>()); - connect_blocks(&nodes[1], ANTI_REORG_DELAY - 1); + // Now replay the timeouts on node B, which after 6 confirmations should fail the HTLCs via + // `MonitorUpdate`s + mine_transactions(&nodes[1], &bs_htlc_timeouts.iter().collect::>()); + connect_blocks(&nodes[1], ANTI_REORG_DELAY - 1); + } // Now simulate a restart where the B<->C ChannelMonitor has been persisted (i.e. because we // just processed a new block) but the ChannelManager was not. This should be exceedingly rare @@ -3637,6 +3639,8 @@ fn do_test_lost_timeout_monitor_events(on_counterparty_tx: bool) { #[test] fn test_lost_timeout_monitor_events() { - do_test_lost_timeout_monitor_events(true); - do_test_lost_timeout_monitor_events(false); + do_test_lost_timeout_monitor_events(true, false); + do_test_lost_timeout_monitor_events(false, false); + do_test_lost_timeout_monitor_events(true, true); + do_test_lost_timeout_monitor_events(false, true); } From 6cd8b7999f239f84f2265fe3f85360511a669970 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Wed, 13 Aug 2025 14:29:31 +0000 Subject: [PATCH 09/13] f rustfmt --- lightning/src/chain/channelmonitor.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index 9760bb59cc4..81de5522e8d 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -3003,7 +3003,9 @@ impl ChannelMonitor { // transaction reaches ANTI_REORG_DELAY confirmations. res.insert(source.clone(), htlc.clone()); } - } else if let Some(state) = us.htlcs_resolved_on_chain.iter().filter(filter).next() { + } else if let Some(state) = + us.htlcs_resolved_on_chain.iter().filter(filter).next() + { if let Some(source) = source { if state.payment_preimage.is_none() { res.insert(source.clone(), htlc.clone()); @@ -3014,7 +3016,6 @@ impl ChannelMonitor { }; } - // We only want HTLCs with ANTI_REORG_DELAY confirmations, which implies the commitment // transaction has least ANTI_REORG_DELAY confirmations for any dependent HTLC transactions // to have been confirmed. From c355b639e426e64e86bf3f377b7de3107e5f5295 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Wed, 13 Aug 2025 21:49:48 +0000 Subject: [PATCH 10/13] f spurious default re-set --- lightning/src/ln/monitor_tests.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs index 591c21b1278..049429398ce 100644 --- a/lightning/src/ln/monitor_tests.rs +++ b/lightning/src/ln/monitor_tests.rs @@ -3625,9 +3625,7 @@ fn do_test_lost_timeout_monitor_events(on_counterparty_tx: bool, dust_htlcs: boo nodes[0].node.peer_disconnected(nodes[1].node.get_our_node_id()); - let mut reconnect_args = ReconnectArgs::new(&nodes[0], &nodes[1]); - reconnect_args.pending_cell_htlc_fails = (0, 0); - reconnect_nodes(reconnect_args); + reconnect_nodes(ReconnectArgs::new(&nodes[0], &nodes[1])); nodes[1].node.process_pending_htlc_forwards(); check_added_monitors(&nodes[1], 1); From 6074cf52efe79e7aa2a96124b1a1e6f303f6ec5f Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Fri, 15 Aug 2025 22:28:44 +0000 Subject: [PATCH 11/13] f handle non-latest transactions confirming correctly and update test --- lightning/src/chain/channelmonitor.rs | 103 +++++++++++++++------- lightning/src/ln/channelmanager.rs | 6 +- lightning/src/ln/monitor_tests.rs | 119 ++++++++++++++++++++------ 3 files changed, 166 insertions(+), 62 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index 81de5522e8d..4b3debaeefc 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -2986,35 +2986,9 @@ impl ChannelMonitor { /// Gets the set of outbound HTLCs which hit the chain and ultimately were claimed by us via /// the timeout path and reached [`ANTI_REORG_DELAY`] confirmations. This is used to determine /// if an HTLC has failed without the `ChannelManager` having seen it prior to being persisted. - pub(crate) fn get_onchain_failed_outbound_htlcs( - &self, - ) -> HashMap { + pub(crate) fn get_onchain_failed_outbound_htlcs(&self) -> HashMap { let mut res = new_hash_map(); let us = self.inner.lock().unwrap(); - macro_rules! walk_htlcs { - ($holder_commitment: expr, $htlc_iter: expr) => { - for (htlc, source) in $htlc_iter { - let filter = |v: &&IrrevocablyResolvedHTLC| { - v.commitment_tx_output_idx == htlc.transaction_output_index - }; - if htlc.transaction_output_index.is_none() { - if let Some(source) = source { - // Dust HTLCs are always implicitly failed once the commitment - // transaction reaches ANTI_REORG_DELAY confirmations. - res.insert(source.clone(), htlc.clone()); - } - } else if let Some(state) = - us.htlcs_resolved_on_chain.iter().filter(filter).next() - { - if let Some(source) = source { - if state.payment_preimage.is_none() { - res.insert(source.clone(), htlc.clone()); - } - } - } - } - }; - } // We only want HTLCs with ANTI_REORG_DELAY confirmations, which implies the commitment // transaction has least ANTI_REORG_DELAY confirmations for any dependent HTLC transactions @@ -3033,18 +3007,75 @@ impl ChannelMonitor { }) }); - let txid = if let Some(txid) = confirmed_txid { + let confirmed_txid = if let Some(txid) = confirmed_txid { txid } else { return res; }; - if Some(txid) == us.funding.current_counterparty_commitment_txid - || Some(txid) == us.funding.prev_counterparty_commitment_txid + macro_rules! walk_htlcs { + ($holder_commitment: expr, $htlc_iter: expr) => { + let mut walk_candidate_htlcs = |htlcs| { + for &(ref candidate_htlc, ref candidate_source) in htlcs { + let candidate_htlc: &HTLCOutputInCommitment = &candidate_htlc; + let candidate_source: &Option> = &candidate_source; + + let source: &HTLCSource = if let Some(source) = candidate_source { + &*source + } else { + continue; + }; + let confirmed = $htlc_iter.find(|(_, conf_src)| Some(source) == *conf_src); + if let Some((confirmed_htlc, _)) = confirmed { + let filter = |v: &&IrrevocablyResolvedHTLC| { + v.commitment_tx_output_idx + == confirmed_htlc.transaction_output_index + }; + + // The HTLC was included in the confirmed commitment transaction, so we + // need to see if it has been irrevocably failed yet. + if confirmed_htlc.transaction_output_index.is_none() { + // Dust HTLCs are always implicitly failed once the commitment + // transaction reaches ANTI_REORG_DELAY confirmations. + res.insert(source.clone(), confirmed_htlc.payment_hash); + } else if let Some(state) = + us.htlcs_resolved_on_chain.iter().filter(filter).next() + { + if state.payment_preimage.is_none() { + res.insert(source.clone(), confirmed_htlc.payment_hash); + } + } + } else { + // The HTLC was not included in the confirmed commitment transaction, + // which has now reached ANTI_REORG_DELAY confirmations and thus the + // HTLC has been failed. + res.insert(source.clone(), candidate_htlc.payment_hash); + } + } + }; + + // We walk the set of HTLCs in the unrevoked counterparty commitment transactions (see + // `fail_unbroadcast_htlcs` for a description of why). + if let Some(ref txid) = us.funding.current_counterparty_commitment_txid { + if let Some(htlcs) = us.funding.counterparty_claimable_outpoints.get(txid) { + walk_candidate_htlcs(htlcs); + } + } + if let Some(ref txid) = us.funding.prev_counterparty_commitment_txid { + if let Some(htlcs) = us.funding.counterparty_claimable_outpoints.get(txid) { + walk_candidate_htlcs(htlcs); + } + } + }; + } + + if Some(confirmed_txid) == us.funding.current_counterparty_commitment_txid + || Some(confirmed_txid) == us.funding.prev_counterparty_commitment_txid { + let htlcs = funding.counterparty_claimable_outpoints.get(&confirmed_txid).unwrap(); walk_htlcs!( false, - us.funding.counterparty_claimable_outpoints.get(&txid).unwrap().iter().filter_map( + htlcs.iter().filter_map( |(a, b)| { if let &Some(ref source) = b { Some((a, Some(&**source))) @@ -3054,12 +3085,18 @@ impl ChannelMonitor { } ) ); - } else if txid == us.funding.current_holder_commitment_tx.trust().txid() { + } else if confirmed_txid == us.funding.current_holder_commitment_tx.trust().txid() { walk_htlcs!(true, holder_commitment_htlcs!(us, CURRENT_WITH_SOURCES)); } else if let Some(prev_commitment_tx) = &us.funding.prev_holder_commitment_tx { - if txid == prev_commitment_tx.trust().txid() { + if confirmed_txid == prev_commitment_tx.trust().txid() { walk_htlcs!(true, holder_commitment_htlcs!(us, PREV_WITH_SOURCES).unwrap()); + } else { + let htlcs_confirmed: &[(&HTLCOutputInCommitment, _)] = &[]; + walk_htlcs!(false, htlcs_confirmed.iter()); } + } else { + let htlcs_confirmed: &[(&HTLCOutputInCommitment, _)] = &[]; + walk_htlcs!(false, htlcs_confirmed.iter()); } res diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 5ac458a2d06..0a4d8b1cc78 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -16541,15 +16541,15 @@ where }, } } - for (htlc_source, htlc) in monitor.get_onchain_failed_outbound_htlcs() { + for (htlc_source, payment_hash) in monitor.get_onchain_failed_outbound_htlcs() { log_info!( args.logger, "Failing HTLC with payment hash {} as it was resolved on-chain.", - htlc.payment_hash + payment_hash ); failed_htlcs.push(( htlc_source, - htlc.payment_hash, + payment_hash, monitor.get_counterparty_node_id(), monitor.channel_id(), LocalHTLCFailureReason::OnChainTimeout, diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs index 049429398ce..adb1fd54cb5 100644 --- a/lightning/src/ln/monitor_tests.rs +++ b/lightning/src/ln/monitor_tests.rs @@ -3464,7 +3464,16 @@ fn test_lost_preimage_monitor_events() { do_test_lost_preimage_monitor_events(false); } -fn do_test_lost_timeout_monitor_events(on_counterparty_tx: bool, dust_htlcs: bool) { +#[derive(PartialEq)] +enum CommitmentType { + RevokedCounterparty, + LatestCounterparty, + PreviousCounterparty, + LocalWithoutLastHTLC, + LocalWithLastHTLC, +} + +fn do_test_lost_timeout_monitor_events(confirm_tx: CommitmentType, dust_htlcs: bool) { // `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fasion - if the // `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets // persisted (i.e. due to a block update) then the node crashes, prior to persisting the @@ -3511,9 +3520,37 @@ fn do_test_lost_timeout_monitor_events(on_counterparty_tx: bool, dust_htlcs: boo send_payment(&nodes[0], &[&nodes[1], &nodes[2]], 25_000_000); + let cs_revoked_commit = get_local_commitment_txn!(nodes[2], chan_b); + assert_eq!(cs_revoked_commit.len(), 1); + let amt = if dust_htlcs { 1_000 } else { 10_000_000 }; let (_, hash_a, ..) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], amt); - let (_, hash_b, ..) = route_payment(&nodes[1], &[&nodes[2]], amt); + + let cs_previous_commit = get_local_commitment_txn!(nodes[2], chan_b); + assert_eq!(cs_previous_commit.len(), 1); + + let (route, hash_b, _, payment_secret_b) = + get_route_and_payment_hash!(nodes[1], nodes[2], amt); + let onion = RecipientOnionFields::secret_only(payment_secret_b); + nodes[1].node.send_payment_with_route(route, hash_b, onion, PaymentId(hash_b.0)).unwrap(); + check_added_monitors(&nodes[1], 1); + + let updates = get_htlc_update_msgs(&nodes[1], &node_c_id); + nodes[2].node.handle_update_add_htlc(node_b_id, &updates.update_add_htlcs[0]); + nodes[2].node.handle_commitment_signed_batch_test(node_b_id, &updates.commitment_signed); + check_added_monitors(&nodes[2], 1); + + let (cs_raa, cs_cs) = get_revoke_commit_msgs!(nodes[2], node_b_id); + if confirm_tx == CommitmentType::LocalWithLastHTLC { + // Only deliver the last RAA + CS if we need to update the local commitment with the third + // HTLC. + nodes[1].node.handle_revoke_and_ack(node_c_id, &cs_raa); + check_added_monitors(&nodes[1], 1); + nodes[1].node.handle_commitment_signed_batch_test(node_c_id, &cs_cs); + check_added_monitors(&nodes[1], 1); + + let _bs_raa = get_event_msg!(nodes[1], MessageSendEvent::SendRevokeAndACK, node_c_id); + } nodes[1].node.peer_disconnected(nodes[2].node.get_our_node_id()); nodes[2].node.peer_disconnected(nodes[1].node.get_our_node_id()); @@ -3547,44 +3584,68 @@ fn do_test_lost_timeout_monitor_events(on_counterparty_tx: bool, dust_htlcs: boo let bs_commit_tx = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); assert_eq!(bs_commit_tx.len(), 1); - let selected_commit_tx = if on_counterparty_tx { - &cs_commit_tx[0] - } else { - &bs_commit_tx[0] + let selected_commit_tx = match confirm_tx { + CommitmentType::RevokedCounterparty => &cs_revoked_commit[0], + CommitmentType::PreviousCounterparty => &cs_previous_commit[0], + CommitmentType::LatestCounterparty => &cs_commit_tx[0], + CommitmentType::LocalWithoutLastHTLC|CommitmentType::LocalWithLastHTLC => &bs_commit_tx[0], }; mine_transaction(&nodes[1], selected_commit_tx); // If the block gets connected first we may re-broadcast B's commitment transaction before - // seeing the C's confirm. - nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clear(); + // seeing the C's confirm. In any case, if we confirmed the revoked counterparty commitment + // transaction, we want to go ahead and confirm the spend of it. + let bs_transactions = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + if confirm_tx == CommitmentType::RevokedCounterparty { + assert!(bs_transactions.len() == 1 || bs_transactions.len() == 2); + mine_transaction(&nodes[1], bs_transactions.last().unwrap()); + } else { + assert!(bs_transactions.len() == 1 || bs_transactions.len() == 0); + } + connect_blocks(&nodes[1], ANTI_REORG_DELAY - 1); let mut events = nodes[1].chain_monitor.chain_monitor.get_and_clear_pending_events(); - if on_counterparty_tx { - assert_eq!(events.len(), 1, "{events:?}"); - match events[0] { - Event::SpendableOutputs { .. } => {}, - _ => panic!("Unexpected event {events:?}"), - } - } else { - assert_eq!(events.len(), 0); + match confirm_tx { + CommitmentType::LocalWithoutLastHTLC|CommitmentType::LocalWithLastHTLC => { + assert_eq!(events.len(), 0, "{events:?}"); + }, + CommitmentType::PreviousCounterparty|CommitmentType::LatestCounterparty => { + assert_eq!(events.len(), 1, "{events:?}"); + match events[0] { + Event::SpendableOutputs { .. } => {}, + _ => panic!("Unexpected event {events:?}"), + } + }, + CommitmentType::RevokedCounterparty => { + assert_eq!(events.len(), 2, "{events:?}"); + for event in events { + match event { + Event::SpendableOutputs { .. } => {}, + _ => panic!("Unexpected event {event:?}"), + } + } + }, } - connect_blocks(&nodes[1], TEST_FINAL_CLTV - ANTI_REORG_DELAY + 1); - if !on_counterparty_tx { - if !dust_htlcs { - handle_bump_events(&nodes[1], false, 1); + if confirm_tx != CommitmentType::RevokedCounterparty { + connect_blocks(&nodes[1], TEST_FINAL_CLTV - ANTI_REORG_DELAY + 1); + if confirm_tx == CommitmentType::LocalWithoutLastHTLC || confirm_tx == CommitmentType::LocalWithLastHTLC { + if !dust_htlcs { + handle_bump_events(&nodes[1], false, 1); + } } } + let bs_htlc_timeouts = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); - if dust_htlcs { + if dust_htlcs || confirm_tx == CommitmentType::RevokedCounterparty { assert_eq!(bs_htlc_timeouts.len(), 0); } else { assert_eq!(bs_htlc_timeouts.len(), 1); // Now replay the timeouts on node B, which after 6 confirmations should fail the HTLCs via // `MonitorUpdate`s - mine_transactions(&nodes[1], &bs_htlc_timeouts.iter().collect::>()); + mine_transaction(&nodes[1], &bs_htlc_timeouts[0]); connect_blocks(&nodes[1], ANTI_REORG_DELAY - 1); } @@ -3637,8 +3698,14 @@ fn do_test_lost_timeout_monitor_events(on_counterparty_tx: bool, dust_htlcs: boo #[test] fn test_lost_timeout_monitor_events() { - do_test_lost_timeout_monitor_events(true, false); - do_test_lost_timeout_monitor_events(false, false); - do_test_lost_timeout_monitor_events(true, true); - do_test_lost_timeout_monitor_events(false, true); + do_test_lost_timeout_monitor_events(CommitmentType::RevokedCounterparty, false); + do_test_lost_timeout_monitor_events(CommitmentType::RevokedCounterparty, true); + do_test_lost_timeout_monitor_events(CommitmentType::PreviousCounterparty, false); + do_test_lost_timeout_monitor_events(CommitmentType::PreviousCounterparty, true); + do_test_lost_timeout_monitor_events(CommitmentType::LatestCounterparty, false); + do_test_lost_timeout_monitor_events(CommitmentType::LatestCounterparty, true); + do_test_lost_timeout_monitor_events(CommitmentType::LocalWithoutLastHTLC, false); + do_test_lost_timeout_monitor_events(CommitmentType::LocalWithoutLastHTLC, true); + do_test_lost_timeout_monitor_events(CommitmentType::LocalWithLastHTLC, false); + do_test_lost_timeout_monitor_events(CommitmentType::LocalWithLastHTLC, true); } From 9c7adaaa701cb1cfcdd5ca8fc20ea011ca87f698 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Fri, 15 Aug 2025 23:22:34 +0000 Subject: [PATCH 12/13] f update to new funding logic --- lightning/src/chain/channelmonitor.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index 4b3debaeefc..c29fcbe88b4 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -3021,7 +3021,7 @@ impl ChannelMonitor { let candidate_source: &Option> = &candidate_source; let source: &HTLCSource = if let Some(source) = candidate_source { - &*source + source } else { continue; }; @@ -3069,25 +3069,25 @@ impl ChannelMonitor { }; } - if Some(confirmed_txid) == us.funding.current_counterparty_commitment_txid - || Some(confirmed_txid) == us.funding.prev_counterparty_commitment_txid + let funding = get_confirmed_funding_scope!(us); + + if Some(confirmed_txid) == funding.current_counterparty_commitment_txid + || Some(confirmed_txid) == funding.prev_counterparty_commitment_txid { let htlcs = funding.counterparty_claimable_outpoints.get(&confirmed_txid).unwrap(); walk_htlcs!( false, - htlcs.iter().filter_map( - |(a, b)| { - if let &Some(ref source) = b { - Some((a, Some(&**source))) - } else { - None - } + htlcs.iter().filter_map(|(a, b)| { + if let &Some(ref source) = b { + Some((a, Some(&**source))) + } else { + None } - ) + }) ); - } else if confirmed_txid == us.funding.current_holder_commitment_tx.trust().txid() { + } else if confirmed_txid == funding.current_holder_commitment_tx.trust().txid() { walk_htlcs!(true, holder_commitment_htlcs!(us, CURRENT_WITH_SOURCES)); - } else if let Some(prev_commitment_tx) = &us.funding.prev_holder_commitment_tx { + } else if let Some(prev_commitment_tx) = &funding.prev_holder_commitment_tx { if confirmed_txid == prev_commitment_tx.trust().txid() { walk_htlcs!(true, holder_commitment_htlcs!(us, PREV_WITH_SOURCES).unwrap()); } else { From 69b281b28592a03fc8fced124e69b20d6d781bb2 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Fri, 15 Aug 2025 23:25:35 +0000 Subject: [PATCH 13/13] f sp --- lightning/src/ln/monitor_tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs index adb1fd54cb5..2c5d0d9e061 100644 --- a/lightning/src/ln/monitor_tests.rs +++ b/lightning/src/ln/monitor_tests.rs @@ -3474,7 +3474,7 @@ enum CommitmentType { } fn do_test_lost_timeout_monitor_events(confirm_tx: CommitmentType, dust_htlcs: bool) { - // `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fasion - if the + // `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fashion - if the // `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets // persisted (i.e. due to a block update) then the node crashes, prior to persisting the // `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be