From 8beea92e82fc55e60307de3f058497558de2a7f2 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Mon, 22 Sep 2025 18:28:20 +0000 Subject: [PATCH 01/23] Correct gossip forwarding criteria while doing background sync If we're doing gossip backfill to a peer, we first forward all our `channel_announcement`s and `channel_update`s in SCID-order. While doing so, we don't forward any fresh `channel_update` messages for any channels which we haven't yet backfilled (as we'll eventually send the new update anyway, and it might get rejected without the corresponding `channel_announcement`). Sadly, our comparison for this was the wrong way, so we actually *only* forwarded updates which were for channels we haven't yet backfilled, and dropped updates for channels we already had backfilled. Backport of edc7903f17d57443191de63cdc7d935dc7521edd --- lightning/src/ln/peer_channel_encryptor.rs | 5 ++ lightning/src/ln/peer_handler.rs | 76 +++++++++++++++++++++- lightning/src/util/test_utils.rs | 2 +- 3 files changed, 81 insertions(+), 2 deletions(-) diff --git a/lightning/src/ln/peer_channel_encryptor.rs b/lightning/src/ln/peer_channel_encryptor.rs index 00e45afb4d8..6da697a02a1 100644 --- a/lightning/src/ln/peer_channel_encryptor.rs +++ b/lightning/src/ln/peer_channel_encryptor.rs @@ -564,6 +564,11 @@ impl MessageBuf { res[16 + 2..].copy_from_slice(&encoded_msg); Self(res) } + + #[cfg(test)] + pub(crate) fn fetch_encoded_msg_with_type_pfx(&self) -> Vec { + self.0.clone().split_off(16 + 2) + } } #[cfg(test)] diff --git a/lightning/src/ln/peer_handler.rs b/lightning/src/ln/peer_handler.rs index 9b649408423..629d6d12b96 100644 --- a/lightning/src/ln/peer_handler.rs +++ b/lightning/src/ln/peer_handler.rs @@ -632,7 +632,7 @@ impl Peer { } match self.sync_status { InitSyncTracker::NoSyncRequested => true, - InitSyncTracker::ChannelsSyncing(i) => i < channel_id, + InitSyncTracker::ChannelsSyncing(i) => channel_id < i, InitSyncTracker::NodesSyncing(_) => true, } } @@ -3294,6 +3294,80 @@ mod tests { assert_eq!(cfgs[1].routing_handler.chan_anns_recvd.load(Ordering::Acquire), 54); } + #[test] + fn test_forward_while_syncing() { + use crate::ln::peer_handler::tests::test_utils::get_dummy_channel_update; + + // Test forwarding new channel announcements while we're doing syncing. + let cfgs = create_peermgr_cfgs(2); + cfgs[0].routing_handler.request_full_sync.store(true, Ordering::Release); + cfgs[1].routing_handler.request_full_sync.store(true, Ordering::Release); + cfgs[0].routing_handler.announcement_available_for_sync.store(true, Ordering::Release); + cfgs[1].routing_handler.announcement_available_for_sync.store(true, Ordering::Release); + let peers = create_network(2, &cfgs); + + let (mut fd_a, mut fd_b) = establish_connection(&peers[0], &peers[1]); + + // Iterate a handful of times to exchange some messages + for _ in 0..150 { + peers[1].process_events(); + let a_read_data = fd_b.outbound_data.lock().unwrap().split_off(0); + assert!(!a_read_data.is_empty()); + + peers[0].read_event(&mut fd_a, &a_read_data).unwrap(); + peers[0].process_events(); + + let b_read_data = fd_a.outbound_data.lock().unwrap().split_off(0); + assert!(!b_read_data.is_empty()); + peers[1].read_event(&mut fd_b, &b_read_data).unwrap(); + + peers[0].process_events(); + assert_eq!( + fd_a.outbound_data.lock().unwrap().len(), + 0, + "Until A receives data, it shouldn't send more messages" + ); + } + + // Forward one more gossip backfill message but don't flush it so that we can examine the + // unencrypted message for broadcasts. + fd_b.hang_writes.store(true, Ordering::Relaxed); + peers[1].process_events(); + + { + let peer_lock = peers[1].peers.read().unwrap(); + let peer = peer_lock.get(&fd_b).unwrap().lock().unwrap(); + assert_eq!(peer.pending_outbound_buffer.len(), 1); + assert_eq!(peer.gossip_broadcast_buffer.len(), 0); + } + + // At this point we should have sent channel announcements up to roughly SCID 150. Now + // build an updated update for SCID 100 and SCID 5000 and make sure only the one for SCID + // 100 gets forwarded + let msg_100 = get_dummy_channel_update(100); + let msg_ev_100 = MessageSendEvent::BroadcastChannelUpdate { msg: msg_100.clone() }; + + let msg_5000 = get_dummy_channel_update(5000); + let msg_ev_5000 = MessageSendEvent::BroadcastChannelUpdate { msg: msg_5000 }; + + fd_a.hang_writes.store(true, Ordering::Relaxed); + + cfgs[1].routing_handler.pending_events.lock().unwrap().push(msg_ev_100); + cfgs[1].routing_handler.pending_events.lock().unwrap().push(msg_ev_5000); + peers[1].process_events(); + + { + let peer_lock = peers[1].peers.read().unwrap(); + let peer = peer_lock.get(&fd_b).unwrap().lock().unwrap(); + assert_eq!(peer.pending_outbound_buffer.len(), 1); + assert_eq!(peer.gossip_broadcast_buffer.len(), 1); + + let pending_msg = &peer.gossip_broadcast_buffer[0]; + let expected = encode_msg!(&msg_100); + assert_eq!(expected, pending_msg.fetch_encoded_msg_with_type_pfx()); + } + } + #[test] fn test_handshake_timeout() { // Tests that we time out a peer still waiting on handshake completion after a full timer diff --git a/lightning/src/util/test_utils.rs b/lightning/src/util/test_utils.rs index 317b46a02ef..30e0df6401b 100644 --- a/lightning/src/util/test_utils.rs +++ b/lightning/src/util/test_utils.rs @@ -993,7 +993,7 @@ fn get_dummy_channel_announcement(short_chan_id: u64) -> msgs::ChannelAnnounceme } } -fn get_dummy_channel_update(short_chan_id: u64) -> msgs::ChannelUpdate { +pub fn get_dummy_channel_update(short_chan_id: u64) -> msgs::ChannelUpdate { use bitcoin::secp256k1::ffi::Signature as FFISignature; let network = Network::Testnet; msgs::ChannelUpdate { From 9501d82c4526fcd0f732882ce2a02860d83df508 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Thu, 18 Sep 2025 01:28:41 +0000 Subject: [PATCH 02/23] Immediately archive `ChannelMonitor`s for inbound unfuned channels If a peer opens a channel to us, but never actually broadcasts the funding transaction, we'll still keep a `ChannelMonitor` around for the channel. While we maybe shouldn't do this either, when the channel ultimately times out 2016 blocks later, we should at least immediately archive the `ChannelMonitor`, which we do here. Fixes #3384 Backport of 744664e88f45f85d60471e29951571db43b8badf Addressed nontrivial conflicts in: * lightning/src/chain/channelmonitor.rs due to splicing changes in `Balance` creation upstream and trivial ones in: * lightning/src/ln/functional_tests.rs --- lightning/src/chain/channelmonitor.rs | 54 +++++++++++++++++++++------ lightning/src/ln/functional_tests.rs | 26 ++++++++++++- 2 files changed, 67 insertions(+), 13 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index 9522d06c638..fdc77ec7eaa 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -2041,6 +2041,16 @@ impl ChannelMonitor { let current_height = self.current_best_block().height; let mut inner = self.inner.lock().unwrap(); + if inner.is_closed_without_updates() + && is_all_funds_claimed + && !inner.funding_spend_seen + { + // We closed the channel without ever advancing it and didn't have any funds in it. + // We should immediately archive this monitor as there's nothing for us to ever do with + // it. + return (true, false); + } + if is_all_funds_claimed && !inner.funding_spend_seen { debug_assert!(false, "We should see funding spend by the time a monitor clears out"); is_all_funds_claimed = false; @@ -2500,18 +2510,28 @@ impl ChannelMonitor { } } } - res.push(Balance::ClaimableOnChannelClose { - amount_satoshis: us.current_holder_commitment_tx.to_self_value_sat + claimable_inbound_htlc_value_sat, - transaction_fee_satoshis: if us.holder_pays_commitment_tx_fee.unwrap_or(true) { - chan_utils::commit_tx_fee_sat( - us.current_holder_commitment_tx.feerate_per_kw, nondust_htlc_count, - us.onchain_tx_handler.channel_type_features()) - } else { 0 }, - outbound_payment_htlc_rounded_msat, - outbound_forwarded_htlc_rounded_msat, - inbound_claiming_htlc_rounded_msat, - inbound_htlc_rounded_msat, - }); + // Only push a primary balance if either the channel isn't closed or we've advanced the + // channel state machine at least once (implying there are multiple previous commitment + // transactions) or we actually have a balance. + // Avoiding including a `Balance` if none of these are true allows us to prune monitors + // for chanels that were opened inbound to us but where the funding transaction never + // confirmed at all. + let amount_satoshis = + us.current_holder_commitment_tx.to_self_value_sat + claimable_inbound_htlc_value_sat; + if !us.is_closed_without_updates() || amount_satoshis != 0 { + res.push(Balance::ClaimableOnChannelClose { + amount_satoshis, + transaction_fee_satoshis: if us.holder_pays_commitment_tx_fee.unwrap_or(true) { + chan_utils::commit_tx_fee_sat( + us.current_holder_commitment_tx.feerate_per_kw, nondust_htlc_count, + us.onchain_tx_handler.channel_type_features()) + } else { 0 }, + outbound_payment_htlc_rounded_msat, + outbound_forwarded_htlc_rounded_msat, + inbound_claiming_htlc_rounded_msat, + inbound_htlc_rounded_msat, + }); + } } res @@ -3291,6 +3311,16 @@ impl ChannelMonitorImpl { } else { ret } } + /// Returns true if the channel has been closed (i.e. no further updates are allowed) and no + /// commitment state updates ever happened. + fn is_closed_without_updates(&self) -> bool { + let mut commitment_not_advanced = + self.current_counterparty_commitment_number == INITIAL_COMMITMENT_NUMBER; + commitment_not_advanced &= + self.current_holder_commitment_number == INITIAL_COMMITMENT_NUMBER; + (self.holder_tx_signed || self.lockdown_from_offchain) && commitment_not_advanced + } + fn no_further_updates_allowed(&self) -> bool { self.funding_spend_seen || self.lockdown_from_offchain || self.holder_tx_signed } diff --git a/lightning/src/ln/functional_tests.rs b/lightning/src/ln/functional_tests.rs index 2cbf04a40ff..74c2c15337a 100644 --- a/lightning/src/ln/functional_tests.rs +++ b/lightning/src/ln/functional_tests.rs @@ -8306,7 +8306,11 @@ fn test_channel_conf_timeout() { let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]); let nodes = create_network(2, &node_cfgs, &node_chanmgrs); - let _funding_tx = create_chan_between_nodes_with_value_init(&nodes[0], &nodes[1], 1_000_000, 100_000); + let funding_tx = create_chan_between_nodes_with_value_init(&nodes[0], &nodes[1], 1_000_000, 100_000); + + // Inbound channels which haven't advanced state at all and never were funded will generate + // claimable `Balance`s until they're closed. + assert!(!nodes[1].chain_monitor.chain_monitor.get_claimable_balances(&[]).is_empty()); // The outbound node should wait forever for confirmation: // This matches `channel::FUNDING_CONF_DEADLINE_BLOCKS` and BOLT 2's suggested timeout, thus is @@ -8319,6 +8323,10 @@ fn test_channel_conf_timeout() { check_added_monitors!(nodes[1], 0); assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty()); + nodes[1].chain_monitor.chain_monitor.archive_fully_resolved_channel_monitors(); + assert_eq!(nodes[1].chain_monitor.chain_monitor.list_monitors().len(), 1); + assert!(!nodes[1].chain_monitor.chain_monitor.get_claimable_balances(&[]).is_empty()); + connect_blocks(&nodes[1], 1); check_added_monitors!(nodes[1], 1); check_closed_event!(nodes[1], 1, ClosureReason::FundingTimedOut, [nodes[0].node.get_our_node_id()], 1000000); @@ -8331,6 +8339,22 @@ fn test_channel_conf_timeout() { }, _ => panic!("Unexpected event"), } + + // Once an inbound never-confirmed channel is closed, it will no longer generate any claimable + // `Balance`s. + assert!(nodes[1].chain_monitor.chain_monitor.get_claimable_balances(&[]).is_empty()); + + // Once the funding times out the monitor should be immediately archived. + nodes[1].chain_monitor.chain_monitor.archive_fully_resolved_channel_monitors(); + assert_eq!(nodes[1].chain_monitor.chain_monitor.list_monitors().len(), 0); + assert!(nodes[1].chain_monitor.chain_monitor.get_claimable_balances(&[]).is_empty()); + + // Remove the corresponding outputs and transactions the chain source is + // watching. This is to make sure the `Drop` function assertions pass. + nodes[1].chain_source.remove_watched_txn_and_outputs( + OutPoint { txid: funding_tx.compute_txid(), index: 0 }, + funding_tx.output[0].script_pubkey.clone(), + ); } #[test] From 50e42fc2060b16e6e9e5738ce35f129c0a995520 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Wed, 17 Sep 2025 23:22:46 +0000 Subject: [PATCH 03/23] Speed up `remove_stale_channels_and_tracking` nontrivially During startup, the lightning protocol forces us to fetch a ton of gossip for channels where there is a `channel_update` in only one direction. We then have to wait around a while until we can prune the crap cause we don't know when the gossip sync has completed. Sadly, doing a large prune via `remove_stale_channels_and_tracking` is somewhat slow. Removing a large portion of our graph currently takes a bit more than 7.5 seconds on an i9-14900K, which can ultimately ~hang a node with a few less GHz ~forever. The bulk of this time is in our `IndexedMap` removals, where we walk the entire `keys` `Vec` to remove the entry, then shift it down after removing. Here we shift to a bulk removal model when removing channels, doing a single `Vec` iterate + shift. This reduces the same test to around 1.38 seconds on the same hardware. Backport of cc028f4cb1c5e1f06e92b122e21fccee80d269ff without conflicts. --- lightning/src/routing/gossip.rs | 17 ++++++++--------- lightning/src/util/indexed_map.rs | 12 ++++++++++++ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/lightning/src/routing/gossip.rs b/lightning/src/routing/gossip.rs index 7eb82722134..c9aaf155194 100644 --- a/lightning/src/routing/gossip.rs +++ b/lightning/src/routing/gossip.rs @@ -2351,9 +2351,7 @@ where return; } let min_time_unix: u32 = (current_time_unix - STALE_CHANNEL_UPDATE_AGE_LIMIT_SECS) as u32; - // Sadly BTreeMap::retain was only stabilized in 1.53 so we can't switch to it for some - // time. - let mut scids_to_remove = Vec::new(); + let mut scids_to_remove = new_hash_set(); for (scid, info) in channels.unordered_iter_mut() { if info.one_to_two.is_some() && info.one_to_two.as_ref().unwrap().last_update < min_time_unix @@ -2377,18 +2375,19 @@ where if announcement_received_timestamp < min_time_unix as u64 { log_gossip!(self.logger, "Removing channel {} because both directional updates are missing and its announcement timestamp {} being below {}", scid, announcement_received_timestamp, min_time_unix); - scids_to_remove.push(*scid); + scids_to_remove.insert(*scid); } } } if !scids_to_remove.is_empty() { let mut nodes = self.nodes.write().unwrap(); - for scid in scids_to_remove { - let info = channels - .remove(&scid) - .expect("We just accessed this scid, it should be present"); + let mut removed_channels_lck = self.removed_channels.lock().unwrap(); + + let channels_removed_bulk = channels.remove_fetch_bulk(&scids_to_remove); + removed_channels_lck.reserve(channels_removed_bulk.len()); + for (scid, info) in channels_removed_bulk { self.remove_channel_in_nodes(&mut nodes, &info, scid); - self.removed_channels.lock().unwrap().insert(scid, Some(current_time_unix)); + removed_channels_lck.insert(scid, Some(current_time_unix)); } } diff --git a/lightning/src/util/indexed_map.rs b/lightning/src/util/indexed_map.rs index 34860e3d68a..83551512177 100644 --- a/lightning/src/util/indexed_map.rs +++ b/lightning/src/util/indexed_map.rs @@ -72,6 +72,18 @@ impl IndexedMap { ret } + /// Removes elements with the given `keys` in bulk, returning the set of removed elements. + pub fn remove_fetch_bulk(&mut self, keys: &HashSet) -> Vec<(K, V)> { + let mut res = Vec::with_capacity(keys.len()); + for key in keys.iter() { + if let Some((k, v)) = self.map.remove_entry(key) { + res.push((k, v)); + } + } + self.keys.retain(|k| !keys.contains(k)); + res + } + /// Inserts the given `key`/`value` pair into the map, returning the element that was /// previously stored at the given `key`, if one exists. pub fn insert(&mut self, key: K, value: V) -> Option { From 560f17c385576ef2fdfb5a0bb552478d66b238ba Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Wed, 17 Sep 2025 23:42:37 +0000 Subject: [PATCH 04/23] Speed up `remove_stale_channels_and_tracking` nontrivially During startup, the lightning protocol forces us to fetch a ton of gossip for channels where there is a `channel_update` in only one direction. We then have to wait around a while until we can prune the crap cause we don't know when the gossip sync has completed. Sadly, doing a large prune via `remove_stale_channels_and_tracking` is somewhat slow. Removing a large portion of our graph currently takes a bit more than 7.5 seconds on an i9-14900K, which can ultimately ~hang a node with a few less GHz ~forever. The bulk of this time is in our `IndexedMap` removals, where we walk the entire `keys` `Vec` to remove the entry, then shift it down after removing. In the previous commit we shifted to a bulk removal model for channels, here we do the same for nodes. This reduces the same test to around 340 milliseconds on the same hardware. Backport of 28b526acd5eba89dd2f38b692bcff6cead982c22 Resolved trivial `use` conflicts in: * lightning/src/routing/gossip.rs --- lightning/src/routing/gossip.rs | 25 ++++++++++++++++++++----- lightning/src/util/indexed_map.rs | 13 +++++++++++++ 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/lightning/src/routing/gossip.rs b/lightning/src/routing/gossip.rs index c9aaf155194..63f8e706133 100644 --- a/lightning/src/routing/gossip.rs +++ b/lightning/src/routing/gossip.rs @@ -36,7 +36,9 @@ use crate::ln::msgs::{ use crate::ln::types::ChannelId; use crate::routing::utxo::{self, UtxoLookup, UtxoResolver}; use crate::types::features::{ChannelFeatures, InitFeatures, NodeFeatures}; -use crate::util::indexed_map::{Entry as IndexedMapEntry, IndexedMap}; +use crate::util::indexed_map::{ + Entry as IndexedMapEntry, IndexedMap, OccupiedEntry as IndexedMapOccupiedEntry, +}; use crate::util::logger::{Level, Logger}; use crate::util::scid_utils::{block_from_scid, scid_from_parts, MAX_SCID_BLOCK}; use crate::util::ser::{MaybeReadable, Readable, ReadableArgs, RequiredWrapper, Writeable, Writer}; @@ -2384,11 +2386,15 @@ where let mut removed_channels_lck = self.removed_channels.lock().unwrap(); let channels_removed_bulk = channels.remove_fetch_bulk(&scids_to_remove); - removed_channels_lck.reserve(channels_removed_bulk.len()); + self.removed_node_counters.lock().unwrap().reserve(channels_removed_bulk.len()); + let mut nodes_to_remove = hash_set_with_capacity(channels_removed_bulk.len()); for (scid, info) in channels_removed_bulk { - self.remove_channel_in_nodes(&mut nodes, &info, scid); + self.remove_channel_in_nodes_callback(&mut nodes, &info, scid, |e| { + nodes_to_remove.insert(*e.key()); + }); removed_channels_lck.insert(scid, Some(current_time_unix)); } + nodes.remove_bulk(&nodes_to_remove); } let should_keep_tracking = |time: &mut Option| { @@ -2627,8 +2633,9 @@ where Ok(()) } - fn remove_channel_in_nodes( + fn remove_channel_in_nodes_callback)>( &self, nodes: &mut IndexedMap, chan: &ChannelInfo, short_channel_id: u64, + mut remove_node: RM, ) { macro_rules! remove_from_node { ($node_id: expr) => { @@ -2636,7 +2643,7 @@ where entry.get_mut().channels.retain(|chan_id| short_channel_id != *chan_id); if entry.get().channels.is_empty() { self.removed_node_counters.lock().unwrap().push(entry.get().node_counter); - entry.remove_entry(); + remove_node(entry); } } else { panic!( @@ -2649,6 +2656,14 @@ where remove_from_node!(chan.node_one); remove_from_node!(chan.node_two); } + + fn remove_channel_in_nodes( + &self, nodes: &mut IndexedMap, chan: &ChannelInfo, short_channel_id: u64, + ) { + self.remove_channel_in_nodes_callback(nodes, chan, short_channel_id, |e| { + e.remove_entry(); + }); + } } impl ReadOnlyNetworkGraph<'_> { diff --git a/lightning/src/util/indexed_map.rs b/lightning/src/util/indexed_map.rs index 83551512177..b97f9dfc119 100644 --- a/lightning/src/util/indexed_map.rs +++ b/lightning/src/util/indexed_map.rs @@ -84,6 +84,14 @@ impl IndexedMap { res } + /// Removes elements with the given `keys` in bulk. + pub fn remove_bulk(&mut self, keys: &HashSet) { + for key in keys.iter() { + self.map.remove(key); + } + self.keys.retain(|k| !keys.contains(k)); + } + /// Inserts the given `key`/`value` pair into the map, returning the element that was /// previously stored at the given `key`, if one exists. pub fn insert(&mut self, key: K, value: V) -> Option { @@ -222,6 +230,11 @@ impl<'a, K: Hash + Ord, V> OccupiedEntry<'a, K, V> { res } + /// Get a reference to the key at the position described by this entry. + pub fn key(&self) -> &K { + self.underlying_entry.key() + } + /// Get a reference to the value at the position described by this entry. pub fn get(&self) -> &V { self.underlying_entry.get() From 6aa7d875aaa214542718d80293a900293967cf3a Mon Sep 17 00:00:00 2001 From: Valentine Wallace Date: Wed, 17 Sep 2025 18:17:23 -0400 Subject: [PATCH 05/23] Don't auto-fail offers payments pre-HTLC lock in Previously, we had a bug that particularly affected async payments where if an outbound payment was in the state {Static}InvoiceReceived and there was a call to process_pending_htlc_forwards, the payment would be automatically abandoned. We would behave correctly and avoid abandoning if the payment was awaiting an invoice, but not if the payment had an invoice but the HTLCs weren't yet locked in. Backport of ebb8e79f80187071fd36475c48bd6b7c0502db37 Resolved trivial conflicts in: * lightning/src/ln/outbound_payment.rs and removed changes in * lightning/src/ln/async_payments_tests.rs as it doesn't exist in this branch. --- lightning/src/ln/outbound_payment.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lightning/src/ln/outbound_payment.rs b/lightning/src/ln/outbound_payment.rs index 1719b47dab3..5e12cc86a39 100644 --- a/lightning/src/ln/outbound_payment.rs +++ b/lightning/src/ln/outbound_payment.rs @@ -182,9 +182,13 @@ impl PendingOutboundPayment { params.insert_previously_failed_blinded_path(blinded_tail); } } - fn is_awaiting_invoice(&self) -> bool { + // Used for payments to BOLT 12 offers where we are either waiting for an invoice or have an + // invoice but have not locked in HTLCs for the payment yet. + fn is_pre_htlc_lock_in(&self) -> bool { match self { - PendingOutboundPayment::AwaitingInvoice { .. } => true, + PendingOutboundPayment::AwaitingInvoice { .. } + | PendingOutboundPayment::InvoiceReceived { .. } + | PendingOutboundPayment::StaticInvoiceReceived { .. } => true, _ => false, } } @@ -1128,7 +1132,7 @@ impl OutboundPayments { let mut outbounds = self.pending_outbound_payments.lock().unwrap(); outbounds.retain(|pmt_id, pmt| { let mut retain = true; - if !pmt.is_auto_retryable_now() && pmt.remaining_parts() == 0 && !pmt.is_awaiting_invoice() { + if !pmt.is_auto_retryable_now() && pmt.remaining_parts() == 0 && !pmt.is_pre_htlc_lock_in() { pmt.mark_abandoned(PaymentFailureReason::RetriesExhausted); if let PendingOutboundPayment::Abandoned { payment_hash, reason, .. } = pmt { pending_events.lock().unwrap().push_back((events::Event::PaymentFailed { @@ -1147,7 +1151,7 @@ impl OutboundPayments { let outbounds = self.pending_outbound_payments.lock().unwrap(); outbounds.iter().any(|(_, pmt)| !pmt.is_auto_retryable_now() && pmt.remaining_parts() == 0 && !pmt.is_fulfilled() && - !pmt.is_awaiting_invoice()) + !pmt.is_pre_htlc_lock_in()) } fn find_initial_route( From 1ba558e042bd1386b9f39ae0148c84b6f4e26bb9 Mon Sep 17 00:00:00 2001 From: shaavan Date: Fri, 3 Jan 2025 19:10:23 +0530 Subject: [PATCH 06/23] Introduce get_and_clear_pending_raa_blockers Note: The `actions_blocking_raa_monitor_updates` list may contain stale entries in the form of `(channel_id, [])`, which do not represent actual dangling actions. To handle this, stale entries are ignored when accumulating pending actions before clearing them. This ensures that the logic focuses only on relevant actions and avoids unnecessary accumulation of already processed data. Backport of 86a0109a7aab9f95408b36546996f233d134e944 --- lightning/src/ln/channelmanager.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index a9e14f17f99..c8adef8a7bc 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -10640,6 +10640,29 @@ where self.pending_outbound_payments.clear_pending_payments() } + #[cfg(any(test, feature = "_test_utils"))] + pub(crate) fn get_and_clear_pending_raa_blockers( + &self, + ) -> Vec<(ChannelId, Vec)> { + let per_peer_state = self.per_peer_state.read().unwrap(); + let mut pending_blockers = Vec::new(); + + for (_peer_pubkey, peer_state_mutex) in per_peer_state.iter() { + let mut peer_state = peer_state_mutex.lock().unwrap(); + + for (chan_id, actions) in peer_state.actions_blocking_raa_monitor_updates.iter() { + // Only collect the non-empty actions into `pending_blockers`. + if !actions.is_empty() { + pending_blockers.push((chan_id.clone(), actions.clone())); + } + } + + peer_state.actions_blocking_raa_monitor_updates.clear(); + } + + pending_blockers + } + /// When something which was blocking a channel from updating its [`ChannelMonitor`] (e.g. an /// [`Event`] being handled) completes, this should be called to restore the channel to normal /// operation. It will double-check that nothing *else* is also blocking the same channel from From 6b9003391bf8e8398afd5b9739df7e3fbe625375 Mon Sep 17 00:00:00 2001 From: shaavan Date: Fri, 3 Jan 2025 19:50:19 +0530 Subject: [PATCH 07/23] Introduce RAA Blocker check in Node::drop() Co-authored by: Matt Corallo <649246+TheBlueMatt@users.noreply.github.com> Backport of 45aa824f7c1c6ae61b75767d0bf8547558981a5a as a prerequisite to backporting 583a9a3621896b6e7c09fe3d27b1c68bcc0f187b. `use` conflicts resolved in: * lightning/src/ln/chanmon_update_fail_tests.rs --- lightning/src/ln/chanmon_update_fail_tests.rs | 79 ++++++++++++++----- lightning/src/ln/functional_test_utils.rs | 5 ++ 2 files changed, 66 insertions(+), 18 deletions(-) diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs index 657e089d293..50a41533020 100644 --- a/lightning/src/ln/chanmon_update_fail_tests.rs +++ b/lightning/src/ln/chanmon_update_fail_tests.rs @@ -3361,22 +3361,25 @@ fn do_test_durable_preimages_on_closed_channel(close_chans_before_reload: bool, reconnect_nodes(reconnect_args); - // Once the blocked `ChannelMonitorUpdate` *finally* completes, the pending - // `PaymentForwarded` event will finally be released. - let (outpoint, ab_update_id, _) = nodes[1].chain_monitor.latest_monitor_update_id.lock().unwrap().get(&chan_id_ab).unwrap().clone(); - nodes[1].chain_monitor.chain_monitor.force_channel_monitor_updated(outpoint, ab_update_id); - - // If the A<->B channel was closed before we reload, we'll replay the claim against it on - // reload, causing the `PaymentForwarded` event to get replayed. - let evs = nodes[1].node.get_and_clear_pending_events(); - assert_eq!(evs.len(), if close_chans_before_reload { 2 } else { 1 }); - for ev in evs { - if let Event::PaymentForwarded { .. } = ev { } - else { - panic!(); - } + } + + // Once the blocked `ChannelMonitorUpdate` *finally* completes, the pending + // `PaymentForwarded` event will finally be released. + let (outpoint, ab_update_id, _) = nodes[1].chain_monitor.latest_monitor_update_id.lock().unwrap().get(&chan_id_ab).unwrap().clone(); + nodes[1].chain_monitor.chain_monitor.force_channel_monitor_updated(outpoint, ab_update_id); + + // If the A<->B channel was closed before we reload, we'll replay the claim against it on + // reload, causing the `PaymentForwarded` event to get replayed. + let evs = nodes[1].node.get_and_clear_pending_events(); + assert_eq!(evs.len(), if close_chans_before_reload { 2 } else { 1 }); + for ev in evs { + if let Event::PaymentForwarded { .. } = ev { } + else { + panic!(); } + } + if !close_chans_before_reload || close_only_a { // Once we call `process_pending_events` the final `ChannelMonitor` for the B<->C channel // will fly, removing the payment preimage from it. check_added_monitors(&nodes[1], 1); @@ -3597,8 +3600,11 @@ fn do_test_glacial_peer_cant_hang(hold_chan_a: bool) { let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]); let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs); - create_announced_chan_between_nodes(&nodes, 0, 1); - create_announced_chan_between_nodes(&nodes, 1, 2); + let node_a_id = nodes[0].node.get_our_node_id(); + let node_c_id = nodes[2].node.get_our_node_id(); + + let chan_id_ab = create_announced_chan_between_nodes(&nodes, 0, 1).2; + let _chan_id_bc = create_announced_chan_between_nodes(&nodes, 1, 2).2; // Route a payment from A, through B, to C, then claim it on C. Replay the // `update_fulfill_htlc` twice on B to check that B doesn't hang. @@ -3610,7 +3616,7 @@ fn do_test_glacial_peer_cant_hang(hold_chan_a: bool) { let cs_updates = get_htlc_update_msgs(&nodes[2], &nodes[1].node.get_our_node_id()); if hold_chan_a { - // The first update will be on the A <-> B channel, which we allow to complete. + // The first update will be on the A <-> B channel, which we optionally allow to complete. chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress); } nodes[1].node.handle_update_fulfill_htlc(nodes[2].node.get_our_node_id(), &cs_updates.update_fulfill_htlcs[0]); @@ -3637,14 +3643,51 @@ fn do_test_glacial_peer_cant_hang(hold_chan_a: bool) { assert!(nodes[1].node.get_and_clear_pending_events().is_empty()); assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty()); - let (route, payment_hash_2, _, payment_secret_2) = get_route_and_payment_hash!(&nodes[1], nodes[2], 1_000_000); + let (route, payment_hash_2, payment_preimage_2, payment_secret_2) = + get_route_and_payment_hash!(&nodes[1], nodes[2], 1_000_000); + // With the A<->B preimage persistence not yet complete, the B<->C channel is stuck + // waiting. nodes[1].node.send_payment_with_route(route, payment_hash_2, RecipientOnionFields::secret_only(payment_secret_2), PaymentId(payment_hash_2.0)).unwrap(); check_added_monitors(&nodes[1], 0); assert!(nodes[1].node.get_and_clear_pending_events().is_empty()); assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty()); + + // ...but once we complete the A<->B channel preimage persistence, the B<->C channel + // unlocks and we send both peers commitment updates. + let (outpoint, ab_update_id, _) = nodes[1].chain_monitor.latest_monitor_update_id.lock().unwrap().get(&chan_id_ab).unwrap().clone(); + assert!(nodes[1].chain_monitor.chain_monitor.channel_monitor_updated(outpoint, ab_update_id).is_ok()); + + let mut msg_events = nodes[1].node.get_and_clear_pending_msg_events(); + assert_eq!(msg_events.len(), 2); + check_added_monitors(&nodes[1], 2); + + let mut c_update = msg_events.iter() + .filter(|ev| matches!(ev, MessageSendEvent::UpdateHTLCs { node_id, .. } if *node_id == node_c_id)) + .cloned().collect::>(); + let a_filtermap = |ev| if let MessageSendEvent::UpdateHTLCs { node_id, updates } = ev { + if node_id == node_a_id { + Some(updates) + } else { + None + } + } else { + None + }; + let a_update = msg_events.drain(..).filter_map(|ev| a_filtermap(ev)).collect::>(); + + assert_eq!(a_update.len(), 1); + assert_eq!(c_update.len(), 1); + + nodes[0].node.handle_update_fulfill_htlc(nodes[1].node.get_our_node_id(), &a_update[0].update_fulfill_htlcs[0]); + commitment_signed_dance!(nodes[0], nodes[1], a_update[0].commitment_signed, false); + expect_payment_sent(&nodes[0], payment_preimage, None, true, true); + expect_payment_forwarded!(nodes[1], nodes[0], nodes[2], Some(1000), false, false); + + pass_along_path(&nodes[1], &[&nodes[2]], 1_000_000, payment_hash_2, Some(payment_secret_2), c_update.pop().unwrap(), true, None); + claim_payment(&nodes[1], &[&nodes[2]], payment_preimage_2); } } diff --git a/lightning/src/ln/functional_test_utils.rs b/lightning/src/ln/functional_test_utils.rs index 420978ad5fc..c4cb7bcebb0 100644 --- a/lightning/src/ln/functional_test_utils.rs +++ b/lightning/src/ln/functional_test_utils.rs @@ -674,6 +674,11 @@ impl<'a, 'b, 'c> Drop for Node<'a, 'b, 'c> { panic!("Had {} excess added monitors on node {}", added_monitors.len(), self.logger.id); } + let raa_blockers = self.node.get_and_clear_pending_raa_blockers(); + if !raa_blockers.is_empty() { + panic!( "Had excess RAA blockers on node {}: {:?}", self.logger.id, raa_blockers); + } + // Check that if we serialize the network graph, we can deserialize it again. let network_graph = { let mut w = test_utils::TestVecWriter(Vec::new()); From 1accc19091d18e0d7214a680cd3edb6ec309ebd2 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Mon, 14 Jul 2025 20:31:22 +0000 Subject: [PATCH 08/23] Ensure partial MPP claims continue to blocks channels on restart In 9cc6e08965c8d326b16a3dd9af9e33308f066770, we started using the `RAAMonitorUpdateBlockingAction` logic to block RAAs which may remove a preimage from one `ChannelMonitor` if it isn't durably stored in another that is a part of the same MPP claim. Then, in 254b78fd3553c977a00f9b81cf829e81427b0a04, when we claimed a payment, if we saw that the HTLC was already claimed in the channel, we'd simply drop the new RAA blocker. This can happen on reload when replaying MPP claims. However, just because an HTLC is no longer present in `ChannelManager`'s `Channel`, doesn't mean that the `ChannelMonitorUpdate` which stored the preimage actually made it durably into the `ChannelMonitor` on disk. We could begin an MPP payment, have one channel get the preimage durably into its `ChannelMonitor`, then step forward another update with the peer. Then, we could reload, causing the MPP claims to be replayed across all chanels, leading to the RAA blocker(s) being dropped and all channels being unlocked. Finally, if the first channel managed to step forward a further update with its peer before the (now-replayed) `ChannelMonitorUpdate`s for all MPP parts make it to disk we could theoretically lose the preimage. This is, of course, a somewhat comically unlikely scenario, but I had an old note to expand the test and it turned up the issue, so we might as well fix it. Backport of 583a9a3621896b6e7c09fe3d27b1c68bcc0f187b Resolved conflicts in: * lightning/src/ln/chanmon_update_fail_tests.rs * lightning/src/ln/channelmanager.rs --- lightning/src/ln/chanmon_update_fail_tests.rs | 119 +++++++++++++++++- lightning/src/ln/channelmanager.rs | 35 +++++- 2 files changed, 145 insertions(+), 9 deletions(-) diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs index 50a41533020..84a56a1fa73 100644 --- a/lightning/src/ln/chanmon_update_fail_tests.rs +++ b/lightning/src/ln/chanmon_update_fail_tests.rs @@ -3697,13 +3697,17 @@ fn test_glacial_peer_cant_hang() { do_test_glacial_peer_cant_hang(true); } -#[test] -fn test_partial_claim_mon_update_compl_actions() { +fn do_test_partial_claim_mon_update_compl_actions(reload_a: bool, reload_b: bool) { // Test that if we have an MPP claim that we ensure the preimage for the claim is retained in // all the `ChannelMonitor`s until the preimage reaches every `ChannelMonitor` for a channel // which was a part of the MPP. let chanmon_cfgs = create_chanmon_cfgs(4); let node_cfgs = create_node_cfgs(4, &chanmon_cfgs); + + let (persister, persister_2, persister_3); + let (new_chain_mon, new_chain_mon_2, new_chain_mon_3); + let (nodes_3_reload, nodes_3_reload_2, nodes_3_reload_3); + let node_chanmgrs = create_node_chanmgrs(4, &node_cfgs, &[None, None, None, None]); let mut nodes = create_network(4, &node_cfgs, &node_chanmgrs); @@ -3725,6 +3729,8 @@ fn test_partial_claim_mon_update_compl_actions() { route.paths[1].hops[1].short_channel_id = chan_4_scid; send_along_route_with_secret(&nodes[0], route, &[&[&nodes[1], &nodes[3]], &[&nodes[2], &nodes[3]]], 200_000, payment_hash, payment_secret); + // Store the monitor for channel 4 without the preimage to use on reload + let chan_4_monitor_serialized = get_monitor!(nodes[3], chan_4_id).encode(); // Claim along both paths, but only complete one of the two monitor updates. chanmon_cfgs[3].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress); chanmon_cfgs[3].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress); @@ -3736,7 +3742,13 @@ fn test_partial_claim_mon_update_compl_actions() { // Complete the 1<->3 monitor update and play the commitment_signed dance forward until it // blocks. nodes[3].chain_monitor.complete_sole_pending_chan_update(&chan_3_id); - expect_payment_claimed!(&nodes[3], payment_hash, 200_000); + let payment_claimed = nodes[3].node.get_and_clear_pending_events(); + assert_eq!(payment_claimed.len(), 1, "{payment_claimed:?}"); + if let Event::PaymentClaimed { payment_hash: ev_hash, .. } = &payment_claimed[0] { + assert_eq!(*ev_hash, payment_hash); + } else { + panic!("{payment_claimed:?}"); + } let updates = get_htlc_update_msgs(&nodes[3], &nodes[1].node.get_our_node_id()); nodes[1].node.handle_update_fulfill_htlc(nodes[3].node.get_our_node_id(), &updates.update_fulfill_htlcs[0]); @@ -3755,15 +3767,41 @@ fn test_partial_claim_mon_update_compl_actions() { check_added_monitors(&nodes[3], 0); assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty()); + if reload_a { + // After a reload (with the monitor not yet fully updated), the RAA should still be blocked + // waiting until the monitor update completes. + let node_ser = nodes[3].node.encode(); + let chan_3_monitor_serialized = get_monitor!(nodes[3], chan_3_id).encode(); + let mons = &[&chan_3_monitor_serialized[..], &chan_4_monitor_serialized[..]]; + reload_node!(nodes[3], &node_ser, mons, persister, new_chain_mon, nodes_3_reload); + // The final update to channel 4 should be replayed. + persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress); + assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty()); + check_added_monitors(&nodes[3], 1); + + // Because the HTLCs aren't yet cleared, the PaymentClaimed event will be replayed on + // restart. + let second_payment_claimed = nodes[3].node.get_and_clear_pending_events(); + assert_eq!(payment_claimed, second_payment_claimed); + + nodes[1].node.peer_disconnected(nodes[3].node.get_our_node_id()); + nodes[2].node.peer_disconnected(nodes[3].node.get_our_node_id()); + reconnect_nodes(ReconnectArgs::new(&nodes[1], &nodes[3])); + reconnect_nodes(ReconnectArgs::new(&nodes[2], &nodes[3])); + + assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty()); + } + // Now double-check that the preimage is still in the 1<->3 channel and complete the pending // monitor update, allowing node 3 to claim the payment on the 2<->3 channel. This also // unblocks the 1<->3 channel, allowing node 3 to release the two blocked monitor updates and // respond to the final commitment_signed. assert!(get_monitor!(nodes[3], chan_3_id).get_stored_preimages().contains_key(&payment_hash)); + assert!(nodes[3].node.get_and_clear_pending_events().is_empty()); nodes[3].chain_monitor.complete_sole_pending_chan_update(&chan_4_id); let mut ds_msgs = nodes[3].node.get_and_clear_pending_msg_events(); - assert_eq!(ds_msgs.len(), 2); + assert_eq!(ds_msgs.len(), 2, "{ds_msgs:?}"); check_added_monitors(&nodes[3], 2); match remove_first_msg_event_to_node(&nodes[1].node.get_our_node_id(), &mut ds_msgs) { @@ -3804,14 +3842,87 @@ fn test_partial_claim_mon_update_compl_actions() { assert!(get_monitor!(nodes[3], chan_3_id).get_stored_preimages().contains_key(&payment_hash)); assert!(get_monitor!(nodes[3], chan_4_id).get_stored_preimages().contains_key(&payment_hash)); + if reload_b { + // Ensure that the channel pause logic doesn't accidentally get restarted after a second + // reload once the HTLCs for the first payment have been removed and the monitors + // completed. + let node_ser = nodes[3].node.encode(); + let chan_3_monitor_serialized = get_monitor!(nodes[3], chan_3_id).encode(); + let chan_4_monitor_serialized = get_monitor!(nodes[3], chan_4_id).encode(); + let mons = &[&chan_3_monitor_serialized[..], &chan_4_monitor_serialized[..]]; + reload_node!(nodes[3], &node_ser, mons, persister_2, new_chain_mon_2, nodes_3_reload_2); + check_added_monitors(&nodes[3], 0); + + nodes[1].node.peer_disconnected(nodes[3].node.get_our_node_id()); + nodes[2].node.peer_disconnected(nodes[3].node.get_our_node_id()); + reconnect_nodes(ReconnectArgs::new(&nodes[1], &nodes[3])); + reconnect_nodes(ReconnectArgs::new(&nodes[2], &nodes[3])); + + assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty()); + + // Because the HTLCs aren't yet cleared, the PaymentClaimed event will be replayed on + // restart. + let third_payment_claimed = nodes[3].node.get_and_clear_pending_events(); + assert_eq!(payment_claimed, third_payment_claimed); + } + send_payment(&nodes[1], &[&nodes[3]], 100_000); assert!(!get_monitor!(nodes[3], chan_3_id).get_stored_preimages().contains_key(&payment_hash)); + if reload_b { + // Ensure that the channel pause logic doesn't accidentally get restarted after a second + // reload once the HTLCs for the first payment have been removed and the monitors + // completed, even if only one of the two monitors still knows about the first payment. + let node_ser = nodes[3].node.encode(); + let chan_3_monitor_serialized = get_monitor!(nodes[3], chan_3_id).encode(); + let chan_4_monitor_serialized = get_monitor!(nodes[3], chan_4_id).encode(); + let mons = &[&chan_3_monitor_serialized[..], &chan_4_monitor_serialized[..]]; + reload_node!(nodes[3], &node_ser, mons, persister_3, new_chain_mon_3, nodes_3_reload_3); + check_added_monitors(&nodes[3], 0); + + nodes[1].node.peer_disconnected(nodes[3].node.get_our_node_id()); + nodes[2].node.peer_disconnected(nodes[3].node.get_our_node_id()); + reconnect_nodes(ReconnectArgs::new(&nodes[1], &nodes[3])); + reconnect_nodes(ReconnectArgs::new(&nodes[2], &nodes[3])); + + assert!(nodes[3].node.get_and_clear_pending_msg_events().is_empty()); + + // Because the HTLCs aren't yet cleared, the PaymentClaimed events for both payments will + // be replayed on restart. + // Use this as an opportunity to check the payment_ids are unique. + let mut events = nodes[3].node.get_and_clear_pending_events(); + assert_eq!(events.len(), 2); + events.retain(|ev| *ev != payment_claimed[0]); + assert_eq!(events.len(), 1); + if let Event::PaymentClaimed { payment_id: original_payment_id, .. } = &payment_claimed[0] { + assert!(original_payment_id.is_some()); + if let Event::PaymentClaimed { amount_msat, payment_id, .. } = &events[0] { + assert!(payment_id.is_some()); + assert_ne!(original_payment_id, payment_id); + assert_eq!(*amount_msat, 100_000); + } else { + panic!("{events:?}"); + } + } else { + panic!("{events:?}"); + } + + send_payment(&nodes[1], &[&nodes[3]], 100_000); + } + send_payment(&nodes[2], &[&nodes[3]], 100_000); assert!(!get_monitor!(nodes[3], chan_4_id).get_stored_preimages().contains_key(&payment_hash)); } +#[test] +fn test_partial_claim_mon_update_compl_actions() { + do_test_partial_claim_mon_update_compl_actions(true, true); + do_test_partial_claim_mon_update_compl_actions(true, false); + do_test_partial_claim_mon_update_compl_actions(false, true); + do_test_partial_claim_mon_update_compl_actions(false, false); +} + #[test] fn test_claim_to_closed_channel_blocks_forwarded_preimage_removal() { // One of the last features for async persistence we implemented was the correct blocking of diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index c8adef8a7bc..bf6085da56c 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -7155,12 +7155,21 @@ where // payment claim from a `ChannelMonitor`. In some cases (MPP or // if the HTLC was only recently removed) we make such claims // after an HTLC has been removed from a channel entirely, and - // thus the RAA blocker has long since completed. + // thus the RAA blocker may have long since completed. // - // In any other case, the RAA blocker must still be present and - // blocking RAAs. - debug_assert!(during_init || - peer_state.actions_blocking_raa_monitor_updates.get(&chan_id).unwrap().contains(&raa_blocker)); + // However, its possible that the `ChannelMonitorUpdate` containing + // the preimage never completed and is still pending. In that case, + // we need to re-add the RAA blocker, which we do here. Handling + // the post-update action, below, will remove it again. + // + // In any other case (i.e. not during startup), the RAA blocker + // must still be present and blocking RAAs. + let actions = &mut peer_state.actions_blocking_raa_monitor_updates; + let actions_list = actions.entry(chan_id).or_insert_with(Vec::new); + if !actions_list.contains(&raa_blocker) { + debug_assert!(during_init); + actions_list.push(raa_blocker); + } } let action = if let Some(action) = action_opt { action @@ -7168,6 +7177,22 @@ where return; }; + // If there are monitor updates in flight, we may be in the case + // described above, replaying a claim on startup which needs an RAA + // blocker to remain blocked. Thus, in such a case we simply push the + // post-update action to the blocked list and move on. + // In any case, we should err on the side of caution and not process + // the post-update action no matter the situation. + let in_flight_mons = peer_state.in_flight_monitor_updates.get(&prev_hop.funding_txo); + if in_flight_mons.map(|mons| !mons.is_empty()).unwrap_or(false) { + peer_state + .monitor_update_blocked_actions + .entry(chan_id) + .or_insert_with(Vec::new) + .push(action); + return; + } + mem::drop(peer_state_opt); log_trace!(logger, "Completing monitor update completion action for channel {} as claim was redundant: {:?}", From f06a962fe5806531560b698b7738bcf44c008127 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Mon, 4 Aug 2025 12:20:39 +0000 Subject: [PATCH 09/23] Marginally simplify types for req'd `counterparty_node_id` in claim In 0.1 we started requiring `counterparty_node_id` to be filled in in various previous-hop datastructures when claiming HTLCs. While we can't switch `HTLCSource`'s `HTLCPreviousHopData::counterparty_node_id` to required (as it might cause us to fail to read old `ChannelMonitor`s which still hold `HTLCSource`s we no longer need to claim), we can at least start requiring the field in `PendingAddHTLCInfo` and `HTLCClaimSource`. This simplifies `claim_mpp_part` marginally. Backport of f624047c517f92316a012ed67e36b07550d5c265 Addressed substantial conflicts in: * lightning/src/ln/channelmanager.rs --- lightning/src/ln/channelmanager.rs | 99 +++++++++++++++++------------- 1 file changed, 57 insertions(+), 42 deletions(-) diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index bf6085da56c..c3ddeb9c4c1 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -346,7 +346,7 @@ pub(super) struct PendingAddHTLCInfo { // Note that this may be an outbound SCID alias for the associated channel. prev_short_channel_id: u64, prev_htlc_id: u64, - prev_counterparty_node_id: Option, + prev_counterparty_node_id: PublicKey, prev_channel_id: ChannelId, prev_funding_outpoint: OutPoint, prev_user_channel_id: u128, @@ -1190,7 +1190,7 @@ impl_writeable_tlv_based_enum!(EventCompletionAction, /// drop this and merge the two, however doing so may break upgrades for nodes which have pending /// forwarded payments. struct HTLCClaimSource { - counterparty_node_id: Option, + counterparty_node_id: PublicKey, funding_txo: OutPoint, channel_id: ChannelId, htlc_id: u64, @@ -1199,7 +1199,7 @@ struct HTLCClaimSource { impl From<&MPPClaimHTLCSource> for HTLCClaimSource { fn from(o: &MPPClaimHTLCSource) -> HTLCClaimSource { HTLCClaimSource { - counterparty_node_id: Some(o.counterparty_node_id), + counterparty_node_id: o.counterparty_node_id, funding_txo: o.funding_txo, channel_id: o.channel_id, htlc_id: o.htlc_id, @@ -5610,7 +5610,7 @@ where user_channel_id: Some(payment.prev_user_channel_id), outpoint: payment.prev_funding_outpoint, channel_id: payment.prev_channel_id, - counterparty_node_id: payment.prev_counterparty_node_id, + counterparty_node_id: Some(payment.prev_counterparty_node_id), htlc_id: payment.prev_htlc_id, incoming_packet_shared_secret: payment.forward_info.incoming_shared_secret, phantom_shared_secret: None, @@ -5735,7 +5735,7 @@ where // Process all of the forwards and failures for the channel in which the HTLCs were // proposed to as a batch. let pending_forwards = ( - incoming_scid, Some(incoming_counterparty_node_id), incoming_funding_txo, + incoming_scid, incoming_counterparty_node_id, incoming_funding_txo, incoming_channel_id, incoming_user_channel_id, htlc_forwards.drain(..).collect() ); self.forward_htlcs_without_forward_event(&mut [pending_forwards]); @@ -5771,7 +5771,7 @@ where let mut new_events = VecDeque::new(); let mut failed_forwards = Vec::new(); - let mut phantom_receives: Vec<(u64, Option, OutPoint, ChannelId, u128, Vec<(PendingHTLCInfo, u64)>)> = Vec::new(); + let mut phantom_receives: Vec<(u64, PublicKey, OutPoint, ChannelId, u128, Vec<(PendingHTLCInfo, u64)>)> = Vec::new(); { let mut forward_htlcs = new_hash_map(); mem::swap(&mut forward_htlcs, &mut self.forward_htlcs.lock().unwrap()); @@ -5801,7 +5801,7 @@ where user_channel_id: Some(prev_user_channel_id), channel_id: prev_channel_id, outpoint: prev_funding_outpoint, - counterparty_node_id: prev_counterparty_node_id, + counterparty_node_id: Some(prev_counterparty_node_id), htlc_id: prev_htlc_id, incoming_packet_shared_secret: incoming_shared_secret, phantom_shared_secret: $phantom_ss, @@ -5923,7 +5923,7 @@ where let htlc_source = HTLCSource::PreviousHopData(HTLCPreviousHopData { short_channel_id: prev_short_channel_id, user_channel_id: Some(prev_user_channel_id), - counterparty_node_id: prev_counterparty_node_id, + counterparty_node_id: Some(prev_counterparty_node_id), channel_id: prev_channel_id, outpoint: prev_funding_outpoint, htlc_id: prev_htlc_id, @@ -6098,7 +6098,7 @@ where prev_hop: HTLCPreviousHopData { short_channel_id: prev_short_channel_id, user_channel_id: Some(prev_user_channel_id), - counterparty_node_id: prev_counterparty_node_id, + counterparty_node_id: Some(prev_counterparty_node_id), channel_id: prev_channel_id, outpoint: prev_funding_outpoint, htlc_id: prev_htlc_id, @@ -7085,6 +7085,14 @@ where let short_to_chan_info = self.short_to_chan_info.read().unwrap(); short_to_chan_info.get(&prev_hop.short_channel_id).map(|(cp_id, _)| *cp_id) }); + let counterparty_node_id = if let Some(node_id) = counterparty_node_id { + node_id + } else { + let payment_hash: PaymentHash = payment_preimage.into(); + panic!( + "Prior to upgrading to LDK 0.1, all pending HTLCs forwarded by LDK 0.0.123 or before must be resolved. It appears at least the HTLC with payment_hash {payment_hash} (preimage {payment_preimage}) was not resolved. Please downgrade to LDK 0.0.125 and resolve the HTLC prior to upgrading.", + ); + }; let htlc_source = HTLCClaimSource { counterparty_node_id, @@ -7119,16 +7127,13 @@ where const MISSING_MON_ERROR: &'static str = "If we're going to claim an HTLC against a channel, we should always have *some* state for the channel, even if just the latest ChannelMonitor update_id. This failure indicates we need to claim an HTLC from a channel for which we did not have a ChannelMonitor at startup and didn't create one while running."; - // Note here that `peer_state_opt` is always `Some` if `prev_hop.counterparty_node_id` is - // `Some`. This is relied on in the closed-channel case below. - let mut peer_state_opt = prev_hop.counterparty_node_id.as_ref().map( - |counterparty_node_id| per_peer_state.get(counterparty_node_id) - .map(|peer_mutex| peer_mutex.lock().unwrap()) - .expect(MISSING_MON_ERROR) - ); + let mut peer_state_lock = per_peer_state + .get(&prev_hop.counterparty_node_id) + .map(|peer_mutex| peer_mutex.lock().unwrap()) + .expect(MISSING_MON_ERROR); - if let Some(peer_state_lock) = peer_state_opt.as_mut() { - let peer_state = &mut **peer_state_lock; + { + let peer_state = &mut *peer_state_lock; if let hash_map::Entry::Occupied(mut chan_phase_entry) = peer_state.channel_by_id.entry(chan_id) { if let ChannelPhase::Funded(chan) = chan_phase_entry.get_mut() { let logger = WithChannelContext::from(&self.logger, &chan.context, None); @@ -7145,8 +7150,15 @@ where if let Some(raa_blocker) = raa_blocker_opt { peer_state.actions_blocking_raa_monitor_updates.entry(chan_id).or_insert_with(Vec::new).push(raa_blocker); } - handle_new_monitor_update!(self, prev_hop.funding_txo, monitor_update, peer_state_opt, - peer_state, per_peer_state, chan); + handle_new_monitor_update!( + self, + prev_hop.funding_txo, + monitor_update, + peer_state_lock, + peer_state, + per_peer_state, + chan + ); } UpdateFulfillCommitFetch::DuplicateClaim {} => { let (action_opt, raa_blocker_opt) = completion_action(None, true); @@ -7193,7 +7205,7 @@ where return; } - mem::drop(peer_state_opt); + mem::drop(peer_state_lock); log_trace!(logger, "Completing monitor update completion action for channel {} as claim was redundant: {:?}", chan_id, action); @@ -7239,16 +7251,7 @@ where } } - if prev_hop.counterparty_node_id.is_none() { - let payment_hash: PaymentHash = payment_preimage.into(); - panic!( - "Prior to upgrading to LDK 0.1, all pending HTLCs forwarded by LDK 0.0.123 or before must be resolved. It appears at least the HTLC with payment_hash {} (preimage {}) was not resolved. Please downgrade to LDK 0.0.125 and resolve the HTLC prior to upgrading.", - payment_hash, - payment_preimage, - ); - } - let counterparty_node_id = prev_hop.counterparty_node_id.expect("Checked immediately above"); - let mut peer_state = peer_state_opt.expect("peer_state_opt is always Some when the counterparty_node_id is Some"); + let peer_state = &mut *peer_state_lock; let update_id = if let Some(latest_update_id) = peer_state.closed_channel_monitor_update_ids.get_mut(&chan_id) { *latest_update_id = latest_update_id.saturating_add(1); @@ -7263,7 +7266,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ let preimage_update = ChannelMonitorUpdate { update_id, - counterparty_node_id: Some(counterparty_node_id), + counterparty_node_id: Some(prev_hop.counterparty_node_id), updates: vec![ChannelMonitorUpdateStep::PaymentPreimage { payment_preimage, payment_info, @@ -7288,7 +7291,12 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ // Given the fact that we're in a bit of a weird edge case, its worth hashing the preimage // to include the `payment_hash` in the log metadata here. let payment_hash = payment_preimage.into(); - let logger = WithContext::from(&self.logger, Some(counterparty_node_id), Some(chan_id), Some(payment_hash)); + let logger = WithContext::from( + &self.logger, + Some(prev_hop.counterparty_node_id), + Some(chan_id), + Some(payment_hash), + ); if let Some(action) = action_opt { log_trace!(logger, "Tracking monitor update completion action for closed channel {}: {:?}", @@ -7297,8 +7305,15 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ } handle_new_monitor_update!( - self, prev_hop.funding_txo, preimage_update, peer_state, peer_state, per_peer_state, - counterparty_node_id, chan_id, POST_CHANNEL_CLOSE + self, + prev_hop.funding_txo, + preimage_update, + peer_state_lock, + peer_state, + per_peer_state, + prev_hop.counterparty_node_id, + chan_id, + POST_CHANNEL_CLOSE ); } @@ -7514,7 +7529,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ funding_broadcastable: Option, channel_ready: Option, announcement_sigs: Option, tx_signatures: Option - ) -> (Option<(u64, Option, OutPoint, ChannelId, u128, Vec<(PendingHTLCInfo, u64)>)>, Option<(u64, Vec)>) { + ) -> (Option<(u64, PublicKey, OutPoint, ChannelId, u128, Vec<(PendingHTLCInfo, u64)>)>, Option<(u64, Vec)>) { let logger = WithChannelContext::from(&self.logger, &channel.context, None); log_trace!(logger, "Handling channel resumption for channel {} with {} RAA, {} commitment update, {} pending forwards, {} pending update_add_htlcs, {}broadcasting funding, {} channel ready, {} announcement, {} tx_signatures", &channel.context.channel_id(), @@ -7533,7 +7548,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ let mut htlc_forwards = None; if !pending_forwards.is_empty() { htlc_forwards = Some(( - short_channel_id, Some(channel.context.get_counterparty_node_id()), + short_channel_id, channel.context.get_counterparty_node_id(), channel.context.get_funding_txo().unwrap(), channel.context.channel_id(), channel.context.get_user_id(), pending_forwards )); @@ -8982,13 +8997,13 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ } #[inline] - fn forward_htlcs(&self, per_source_pending_forwards: &mut [(u64, Option, OutPoint, ChannelId, u128, Vec<(PendingHTLCInfo, u64)>)]) { + fn forward_htlcs(&self, per_source_pending_forwards: &mut [(u64, PublicKey, OutPoint, ChannelId, u128, Vec<(PendingHTLCInfo, u64)>)]) { let push_forward_event = self.forward_htlcs_without_forward_event(per_source_pending_forwards); if push_forward_event { self.push_pending_forwards_ev() } } #[inline] - fn forward_htlcs_without_forward_event(&self, per_source_pending_forwards: &mut [(u64, Option, OutPoint, ChannelId, u128, Vec<(PendingHTLCInfo, u64)>)]) -> bool { + fn forward_htlcs_without_forward_event(&self, per_source_pending_forwards: &mut [(u64, PublicKey, OutPoint, ChannelId, u128, Vec<(PendingHTLCInfo, u64)>)]) -> bool { let mut push_forward_event = false; for &mut (prev_short_channel_id, prev_counterparty_node_id, prev_funding_outpoint, prev_channel_id, prev_user_channel_id, ref mut pending_forwards) in per_source_pending_forwards { let mut new_intercept_events = VecDeque::new(); @@ -9039,7 +9054,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ let htlc_source = HTLCSource::PreviousHopData(HTLCPreviousHopData { short_channel_id: prev_short_channel_id, user_channel_id: Some(prev_user_channel_id), - counterparty_node_id: prev_counterparty_node_id, + counterparty_node_id: Some(prev_counterparty_node_id), outpoint: prev_funding_outpoint, channel_id: prev_channel_id, htlc_id: prev_htlc_id, @@ -11229,7 +11244,7 @@ where htlc_id: htlc.prev_htlc_id, incoming_packet_shared_secret: htlc.forward_info.incoming_shared_secret, phantom_shared_secret: None, - counterparty_node_id: htlc.prev_counterparty_node_id, + counterparty_node_id: Some(htlc.prev_counterparty_node_id), outpoint: htlc.prev_funding_outpoint, channel_id: htlc.prev_channel_id, blinded_failure: htlc.forward_info.routing.blinded_failure(), @@ -12745,7 +12760,7 @@ impl_writeable_tlv_based!(PendingAddHTLCInfo, { // Note that by the time we get past the required read for type 6 above, prev_funding_outpoint will be // filled in, so we can safely unwrap it here. (7, prev_channel_id, (default_value, ChannelId::v1_from_funding_outpoint(prev_funding_outpoint.0.unwrap()))), - (9, prev_counterparty_node_id, option), + (9, prev_counterparty_node_id, required), }); impl Writeable for HTLCForwardInfo { From 3968d5e25b89164a21296462483d9958a712a4da Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Mon, 4 Aug 2025 12:37:22 +0000 Subject: [PATCH 10/23] Add a `counterparty_node_id` to `ClaimedHTLC` in claimed events When we claim a payment, `Event::PaymentClaimed` contains a list of the HTLCs we claimed from as `ClaimedHTLC` objects. While they include a `channel_id` the pyment came to us over, in theory `channel_id`s aren't guaranteed to be unique (though in practice they are in all opened channels aside from 0conf ones with a malicious counterparty). Further, our APIs often require passing both the `counterparty_node_id` and the `channel_id` to do things to chanels. Thus, here we add the missing `counterparty_node-id` to `ClaimedHTLC`. Backport of 7c735d978b339c0aa382ff5d4884cf5be827c86e --- lightning/src/events/mod.rs | 6 ++++++ lightning/src/ln/channelmanager.rs | 1 + 2 files changed, 7 insertions(+) diff --git a/lightning/src/events/mod.rs b/lightning/src/events/mod.rs index 5bc446f9724..5b759c9b9f7 100644 --- a/lightning/src/events/mod.rs +++ b/lightning/src/events/mod.rs @@ -229,6 +229,11 @@ impl_writeable_tlv_based_enum_legacy!(PaymentPurpose, /// Information about an HTLC that is part of a payment that can be claimed. #[derive(Clone, Debug, PartialEq, Eq)] pub struct ClaimedHTLC { + /// The counterparty of the channel. + /// + /// This value will always be `None` for objects serialized with LDK versions prior to 0.2 and + /// `Some` otherwise. + pub counterparty_node_id: Option, /// The `channel_id` of the channel over which the HTLC was received. pub channel_id: ChannelId, /// The `user_channel_id` of the channel over which the HTLC was received. This is the value @@ -259,6 +264,7 @@ impl_writeable_tlv_based!(ClaimedHTLC, { (0, channel_id, required), (1, counterparty_skimmed_fee_msat, (default_value, 0u64)), (2, user_channel_id, required), + (3, counterparty_node_id, option), (4, cltv_expiry, required), (6, value_msat, required), }); diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index c3ddeb9c4c1..cab7da2cf8f 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -435,6 +435,7 @@ struct ClaimableHTLC { impl From<&ClaimableHTLC> for events::ClaimedHTLC { fn from(val: &ClaimableHTLC) -> Self { events::ClaimedHTLC { + counterparty_node_id: val.prev_hop.counterparty_node_id, channel_id: val.prev_hop.channel_id, user_channel_id: val.prev_hop.user_channel_id.unwrap_or(0), cltv_expiry: val.cltv_expiry, From 9199a5fb153bc03c3e57b2e9482d13a751630516 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Mon, 4 Aug 2025 14:29:02 +0000 Subject: [PATCH 11/23] Stop using RAA-unblocking post event action chan funding outpoints Historically we indexed channels by `(counterparty_node_id, funding outpoint)` in several pipelines, especially the `ChannelMonitorUpdate` pipeline. This ended up complexifying quite a few things as we always needed to store the full `(counterparty_node_id, funding outpoint, channel_id)` tuple to ensure we can always access a channel no matter its state. Over time we want to move to only the `(counterparty_node_id, channel_id)` tuple as *the* channel index, especially as we move towards V2 channels that have a globally-unique `channel_id` anyway. Here we take one small step towards this, avoiding using the channel funding outpoint in the `EventCompletionAction` pipeline. Backport of 10df89dd341ecc6bf4d4206e74e26653bd81e096 Resolved conflicts in: * lightning/src/ln/channel.rs * lightning/src/ln/channelmanager.rs --- lightning/src/ln/channel.rs | 2 +- lightning/src/ln/channelmanager.rs | 119 ++++++++++++++++------------- 2 files changed, 65 insertions(+), 56 deletions(-) diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs index 16803a45bfd..840d11a36db 100644 --- a/lightning/src/ln/channel.rs +++ b/lightning/src/ln/channel.rs @@ -4392,7 +4392,7 @@ impl Channel where Ok((closing_transaction, total_fee_satoshis)) } - fn funding_outpoint(&self) -> OutPoint { + pub fn funding_outpoint(&self) -> OutPoint { self.context.channel_transaction_parameters.funding_outpoint.unwrap() } diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index cab7da2cf8f..546758b659c 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -1134,7 +1134,6 @@ pub(crate) enum MonitorUpdateCompletionAction { /// stored for later processing. FreeOtherChannelImmediately { downstream_counterparty_node_id: PublicKey, - downstream_funding_outpoint: OutPoint, blocking_action: RAAMonitorUpdateBlockingAction, downstream_channel_id: ChannelId, }, @@ -1149,11 +1148,8 @@ impl_writeable_tlv_based_enum_upgradable!(MonitorUpdateCompletionAction, // *immediately*. However, for simplicity we implement read/write here. (1, FreeOtherChannelImmediately) => { (0, downstream_counterparty_node_id, required), - (2, downstream_funding_outpoint, required), (4, blocking_action, upgradable_required), - // Note that by the time we get past the required read above, downstream_funding_outpoint will be - // filled in, so we can safely unwrap it here. - (5, downstream_channel_id, (default_value, ChannelId::v1_from_funding_outpoint(downstream_funding_outpoint.0.unwrap()))), + (5, downstream_channel_id, required), }, (2, EmitEventAndFreeOtherChannel) => { (0, event, upgradable_required), @@ -1170,17 +1166,21 @@ impl_writeable_tlv_based_enum_upgradable!(MonitorUpdateCompletionAction, pub(crate) enum EventCompletionAction { ReleaseRAAChannelMonitorUpdate { counterparty_node_id: PublicKey, - channel_funding_outpoint: OutPoint, + // Was required until LDK 0.2. Always filled in as `Some`. + channel_funding_outpoint: Option, channel_id: ChannelId, }, } impl_writeable_tlv_based_enum!(EventCompletionAction, (0, ReleaseRAAChannelMonitorUpdate) => { - (0, channel_funding_outpoint, required), + (0, channel_funding_outpoint, option), (2, counterparty_node_id, required), - // Note that by the time we get past the required read above, channel_funding_outpoint will be - // filled in, so we can safely unwrap it here. - (3, channel_id, (default_value, ChannelId::v1_from_funding_outpoint(channel_funding_outpoint.0.unwrap()))), + (3, channel_id, (default_value, { + if channel_funding_outpoint.is_none() { + Err(DecodeError::InvalidValue)? + } + ChannelId::v1_from_funding_outpoint(channel_funding_outpoint.unwrap()) + })), } ); @@ -1210,8 +1210,8 @@ impl From<&MPPClaimHTLCSource> for HTLCClaimSource { #[derive(Debug)] pub(crate) struct PendingMPPClaim { - channels_without_preimage: Vec<(PublicKey, OutPoint, ChannelId)>, - channels_with_preimage: Vec<(PublicKey, OutPoint, ChannelId)>, + channels_without_preimage: Vec<(PublicKey, ChannelId)>, + channels_with_preimage: Vec<(PublicKey, ChannelId)>, } #[derive(Clone, Debug, Hash, PartialEq, Eq)] @@ -7022,7 +7022,7 @@ where let pending_mpp_claim_ptr_opt = if sources.len() > 1 { let mut channels_without_preimage = Vec::with_capacity(mpp_parts.len()); for part in mpp_parts.iter() { - let chan = (part.counterparty_node_id, part.funding_txo, part.channel_id); + let chan = (part.counterparty_node_id, part.channel_id); if !channels_without_preimage.contains(&chan) { channels_without_preimage.push(chan); } @@ -7212,9 +7212,10 @@ where chan_id, action); if let MonitorUpdateCompletionAction::FreeOtherChannelImmediately { downstream_counterparty_node_id: node_id, - downstream_funding_outpoint: _, - blocking_action: blocker, downstream_channel_id: channel_id, - } = action { + blocking_action: blocker, + downstream_channel_id: channel_id, + } = action + { if let Some(peer_state_mtx) = per_peer_state.get(&node_id) { let mut peer_state = peer_state_mtx.lock().unwrap(); if let Some(blockers) = peer_state @@ -7335,7 +7336,8 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ debug_assert_eq!(pubkey, path.hops[0].pubkey); } let ev_completion_action = EventCompletionAction::ReleaseRAAChannelMonitorUpdate { - channel_funding_outpoint: next_channel_outpoint, channel_id: next_channel_id, + channel_funding_outpoint: Some(next_channel_outpoint), + channel_id: next_channel_id, counterparty_node_id: path.hops[0].pubkey, }; self.pending_outbound_payments.claim_htlc(payment_id, payment_preimage, @@ -7376,7 +7378,6 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ if let Some(other_chan) = chan_to_release { (Some(MonitorUpdateCompletionAction::FreeOtherChannelImmediately { downstream_counterparty_node_id: other_chan.counterparty_node_id, - downstream_funding_outpoint: other_chan.funding_txo, downstream_channel_id: other_chan.channel_id, blocking_action: other_chan.blocking_action, }), None) @@ -7436,17 +7437,17 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ if *pending_claim == claim_ptr { let mut pending_claim_state_lock = pending_claim.0.lock().unwrap(); let pending_claim_state = &mut *pending_claim_state_lock; - pending_claim_state.channels_without_preimage.retain(|(cp, op, cid)| { + pending_claim_state.channels_without_preimage.retain(|(cp, cid)| { let this_claim = *cp == counterparty_node_id && *cid == chan_id; if this_claim { - pending_claim_state.channels_with_preimage.push((*cp, *op, *cid)); + pending_claim_state.channels_with_preimage.push((*cp, *cid)); false } else { true } }); if pending_claim_state.channels_without_preimage.is_empty() { - for (cp, op, cid) in pending_claim_state.channels_with_preimage.iter() { - let freed_chan = (*cp, *op, *cid, blocker.clone()); + for (cp, cid) in pending_claim_state.channels_with_preimage.iter() { + let freed_chan = (*cp, *cid, blocker.clone()); freed_channels.push(freed_chan); } } @@ -7498,17 +7499,17 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ self.pending_events.lock().unwrap().push_back((event, None)); if let Some(unblocked) = downstream_counterparty_and_funding_outpoint { self.handle_monitor_update_release( - unblocked.counterparty_node_id, unblocked.funding_txo, - unblocked.channel_id, Some(unblocked.blocking_action), + unblocked.counterparty_node_id, + unblocked.channel_id, + Some(unblocked.blocking_action), ); } }, MonitorUpdateCompletionAction::FreeOtherChannelImmediately { - downstream_counterparty_node_id, downstream_funding_outpoint, downstream_channel_id, blocking_action, + downstream_counterparty_node_id, downstream_channel_id, blocking_action, } => { self.handle_monitor_update_release( downstream_counterparty_node_id, - downstream_funding_outpoint, downstream_channel_id, Some(blocking_action), ); @@ -7516,8 +7517,8 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ } } - for (node_id, funding_outpoint, channel_id, blocker) in freed_channels { - self.handle_monitor_update_release(node_id, funding_outpoint, channel_id, Some(blocker)); + for (node_id, channel_id, blocker) in freed_channels { + self.handle_monitor_update_release(node_id, channel_id, Some(blocker)); } } @@ -9122,16 +9123,20 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ /// the [`ChannelMonitorUpdate`] in question. fn raa_monitor_updates_held(&self, actions_blocking_raa_monitor_updates: &BTreeMap>, - channel_funding_outpoint: OutPoint, channel_id: ChannelId, counterparty_node_id: PublicKey + channel_id: ChannelId, counterparty_node_id: PublicKey, ) -> bool { actions_blocking_raa_monitor_updates .get(&channel_id).map(|v| !v.is_empty()).unwrap_or(false) || self.pending_events.lock().unwrap().iter().any(|(_, action)| { - action == &Some(EventCompletionAction::ReleaseRAAChannelMonitorUpdate { - channel_funding_outpoint, - channel_id, - counterparty_node_id, - }) + if let Some(EventCompletionAction::ReleaseRAAChannelMonitorUpdate { + channel_funding_outpoint: _, + channel_id: ev_channel_id, + counterparty_node_id: ev_counterparty_node_id + }) = action { + *ev_channel_id == channel_id && *ev_counterparty_node_id == counterparty_node_id + } else { + false + } }) } @@ -9144,10 +9149,12 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ let mut peer_state_lck = peer_state_mtx.lock().unwrap(); let peer_state = &mut *peer_state_lck; - if let Some(chan) = peer_state.channel_by_id.get(&channel_id) { - return self.raa_monitor_updates_held(&peer_state.actions_blocking_raa_monitor_updates, - chan.context().get_funding_txo().unwrap(), channel_id, counterparty_node_id); - } + assert!(peer_state.channel_by_id.contains_key(&channel_id)); + return self.raa_monitor_updates_held( + &peer_state.actions_blocking_raa_monitor_updates, + channel_id, + counterparty_node_id, + ); } false } @@ -9166,11 +9173,9 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ if let ChannelPhase::Funded(chan) = chan_phase_entry.get_mut() { let logger = WithChannelContext::from(&self.logger, &chan.context, None); let funding_txo_opt = chan.context.get_funding_txo(); - let mon_update_blocked = if let Some(funding_txo) = funding_txo_opt { - self.raa_monitor_updates_held( - &peer_state.actions_blocking_raa_monitor_updates, funding_txo, msg.channel_id, - *counterparty_node_id) - } else { false }; + let mon_update_blocked = self.raa_monitor_updates_held( + &peer_state.actions_blocking_raa_monitor_updates, msg.channel_id, + *counterparty_node_id); let (htlcs_to_fail, monitor_update_opt) = try_chan_phase_entry!(self, peer_state, chan.revoke_and_ack(&msg, &self.fee_estimator, &&logger, mon_update_blocked), chan_phase_entry); if let Some(monitor_update) = monitor_update_opt { @@ -10708,10 +10713,10 @@ where /// [`Event`] being handled) completes, this should be called to restore the channel to normal /// operation. It will double-check that nothing *else* is also blocking the same channel from /// making progress and then let any blocked [`ChannelMonitorUpdate`]s fly. - fn handle_monitor_update_release(&self, counterparty_node_id: PublicKey, - channel_funding_outpoint: OutPoint, channel_id: ChannelId, - mut completed_blocker: Option) { - + fn handle_monitor_update_release( + &self, counterparty_node_id: PublicKey, channel_id: ChannelId, + mut completed_blocker: Option + ) { let logger = WithContext::from( &self.logger, Some(counterparty_node_id), Some(channel_id), None ); @@ -10730,7 +10735,7 @@ where } if self.raa_monitor_updates_held(&peer_state.actions_blocking_raa_monitor_updates, - channel_funding_outpoint, channel_id, counterparty_node_id) { + channel_id, counterparty_node_id) { // Check that, while holding the peer lock, we don't have anything else // blocking monitor updates for this channel. If we do, release the monitor // update(s) when those blockers complete. @@ -10742,7 +10747,7 @@ where if let hash_map::Entry::Occupied(mut chan_phase_entry) = peer_state.channel_by_id.entry( channel_id) { if let ChannelPhase::Funded(chan) = chan_phase_entry.get_mut() { - debug_assert_eq!(chan.context.get_funding_txo().unwrap(), channel_funding_outpoint); + let channel_funding_outpoint = chan.funding_outpoint(); if let Some((monitor_update, further_update_exists)) = chan.unblock_next_blocked_monitor_update() { log_debug!(logger, "Unlocking monitor updating for channel {} and updating monitor", channel_id); @@ -10772,10 +10777,12 @@ where for action in actions { match action { EventCompletionAction::ReleaseRAAChannelMonitorUpdate { - channel_funding_outpoint, channel_id, counterparty_node_id + channel_funding_outpoint: _, + channel_id, + counterparty_node_id, } => { - self.handle_monitor_update_release(counterparty_node_id, channel_funding_outpoint, channel_id, None); - } + self.handle_monitor_update_release(counterparty_node_id, channel_id, None); + }, } } } @@ -13908,7 +13915,7 @@ where // `ChannelMonitor` is removed. let compl_action = EventCompletionAction::ReleaseRAAChannelMonitorUpdate { - channel_funding_outpoint: monitor.get_funding_txo().0, + channel_funding_outpoint: Some(monitor.get_funding_txo().0), channel_id: monitor.channel_id(), counterparty_node_id: path.hops[0].pubkey, }; @@ -14351,8 +14358,10 @@ where } } - let mut channels_without_preimage = payment_claim.mpp_parts.iter() - .map(|htlc_info| (htlc_info.counterparty_node_id, htlc_info.funding_txo, htlc_info.channel_id)) + let mut channels_without_preimage = payment_claim + .mpp_parts + .iter() + .map(|htlc_info| (htlc_info.counterparty_node_id, htlc_info.channel_id)) .collect::>(); // If we have multiple MPP parts which were received over the same channel, // we only track it once as once we get a preimage durably in the From b23dc9a2d18f3d4403b08e64dc689622e9e1aa73 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Mon, 4 Aug 2025 14:27:37 +0000 Subject: [PATCH 12/23] Block RAA `ChannelMonitorUpdate`s on `PaymentClaimed` events We added the ability to block `ChannelMonitorUpdate`s on receipt of an RAA in order to avoid dropping a payment preimage from a channel that created a `PaymentSent` event in 9ede794e8e8559f1b2b386c1c57372094fc92fd4. We did not at the time use the same infrastructure for `PaymentClaimed` events, but really should have. While a `PaymentClaimed` event may seem a bit less critical than a `PaymentSent` event (it doesn't contain a payment preimage that the user needs to make sure they store for proof of payment), its still important for users to ensure their payment tracking logic is always correct. Here we take the (relatively straightforward) action of setting a `EventCompletionAction` to block RAA monitor updates on channels which created a `PaymentClaimed` event. Note that we only block one random channel from an MPP paymnet, not all of them, as any single channel should provide enough information for us to recreate the `PaymentClaimed` event on restart. Backport of a80c855b2128c09e7371dfb9daf241252194bda2 Resolved conflicts in: * lightning/src/ln/async_signer_tests.rs due to changes only being on tests which don't exist on this branch, * lightning/src/ln/chanmon_update_fail_tests.rs * lightning/src/ln/channelmanager.rs * lightning/src/ln/quiescence_tests.rs due to file not being present in this branch. Resolved silent conflicts in: * lightning/src/ln/monitor_tests.rs due to slightly different test APIs. --- lightning/src/ln/chanmon_update_fail_tests.rs | 23 ++++++- lightning/src/ln/channelmanager.rs | 62 +++++++++++++++---- lightning/src/ln/monitor_tests.rs | 62 +++++++++++++++++++ 3 files changed, 134 insertions(+), 13 deletions(-) diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs index 84a56a1fa73..2e06876bbad 100644 --- a/lightning/src/ln/chanmon_update_fail_tests.rs +++ b/lightning/src/ln/chanmon_update_fail_tests.rs @@ -4118,6 +4118,23 @@ fn test_single_channel_multiple_mpp() { // `update_fulfill_htlc`/`commitment_signed` pair to pass to our counterparty. do_a_write.send(()).unwrap(); + let event_node: &'static TestChannelManager<'static, 'static> = + unsafe { std::mem::transmute(nodes[8].node as &TestChannelManager) }; + let thrd_event = std::thread::spawn(move || { + let mut have_event = false; + while !have_event { + let mut events = event_node.get_and_clear_pending_events(); + assert!(events.len() == 1 || events.len() == 0); + if events.len() == 1 { + if let Event::PaymentClaimed { .. } = events[0] { + } else { + panic!("Unexpected event {events:?}"); + } + have_event = true; + } + } + }); + // Then fetch the `update_fulfill_htlc`/`commitment_signed`. Note that the // `get_and_clear_pending_msg_events` will immediately hang trying to take a peer lock which // `claim_funds` is holding. Thus, we release a second write after a small sleep in the @@ -4137,7 +4154,11 @@ fn test_single_channel_multiple_mpp() { }); block_thrd2.store(false, Ordering::Release); let first_updates = get_htlc_update_msgs(&nodes[8], &nodes[7].node.get_our_node_id()); + + // Thread 2 could unblock first, or it could get blocked waiting on us to process a + // `PaymentClaimed` event. Either way, wait until both have finished. thrd2.join().unwrap(); + thrd_event.join().unwrap(); // Disconnect node 6 from all its peers so it doesn't bother to fail the HTLCs back nodes[7].node.peer_disconnected(nodes[1].node.get_our_node_id()); @@ -4183,8 +4204,6 @@ fn test_single_channel_multiple_mpp() { thrd4.join().unwrap(); thrd.join().unwrap(); - expect_payment_claimed!(nodes[8], payment_hash, 50_000_000); - // At the end, we should have 7 ChannelMonitorUpdates - 6 for HTLC claims, and one for the // above `revoke_and_ack`. check_added_monitors(&nodes[8], 7); diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 546758b659c..c1e21c5c62b 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -861,9 +861,19 @@ struct ClaimingPayment { sender_intended_value: Option, onion_fields: Option, payment_id: Option, + /// When we claim and generate a [`Event::PaymentClaimed`], we want to block any + /// payment-preimage-removing RAA [`ChannelMonitorUpdate`]s until the [`Event::PaymentClaimed`] + /// is handled, ensuring we can regenerate the event on restart. We pick a random channel to + /// block and store it here. + /// + /// Note that once we disallow downgrades to 0.1 we should be able to simply use + /// [`Self::htlcs`] to generate this rather than storing it here (as we won't need the funding + /// outpoint), allowing us to remove this field. + durable_preimage_channel: Option<(OutPoint, PublicKey, ChannelId)>, } impl_writeable_tlv_based!(ClaimingPayment, { (0, amount_msat, required), + (1, durable_preimage_channel, option), (2, payment_purpose, required), (4, receiver_node_id, required), (5, htlcs, optional_vec), @@ -999,6 +1009,16 @@ impl ClaimablePayments { .or_insert_with(|| { let htlcs = payment.htlcs.iter().map(events::ClaimedHTLC::from).collect(); let sender_intended_value = payment.htlcs.first().map(|htlc| htlc.total_msat); + // Pick an "arbitrary" channel to block RAAs on until the `PaymentSent` + // event is processed, specifically the last channel to get claimed. + let durable_preimage_channel = payment.htlcs.last().map_or(None, |htlc| { + if let Some(node_id) = htlc.prev_hop.counterparty_node_id { + Some((htlc.prev_hop.outpoint, node_id, htlc.prev_hop.channel_id)) + } else { + None + } + }); + debug_assert!(durable_preimage_channel.is_some()); ClaimingPayment { amount_msat: payment.htlcs.iter().map(|source| source.value).sum(), payment_purpose: payment.purpose, @@ -1007,6 +1027,7 @@ impl ClaimablePayments { sender_intended_value, onion_fields: payment.onion_fields, payment_id: Some(payment_id), + durable_preimage_channel, } }).clone(); @@ -7471,6 +7492,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ sender_intended_value: sender_intended_total_msat, onion_fields, payment_id, + durable_preimage_channel, }) = payment { let event = events::Event::PaymentClaimed { payment_hash, @@ -7482,7 +7504,18 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ onion_fields, payment_id, }; - let event_action = (event, None); + let action = if let Some((outpoint, counterparty_node_id, channel_id)) + = durable_preimage_channel + { + Some(EventCompletionAction::ReleaseRAAChannelMonitorUpdate { + channel_funding_outpoint: Some(outpoint), + counterparty_node_id, + channel_id, + }) + } else { + None + }; + let event_action = (event, action); let mut pending_events = self.pending_events.lock().unwrap(); // If we're replaying a claim on startup we may end up duplicating an event // that's already in our queue, so check before we push another one. The @@ -14489,16 +14522,23 @@ where } let mut pending_events = channel_manager.pending_events.lock().unwrap(); let payment_id = payment.inbound_payment_id(&inbound_payment_id_secret.unwrap()); - pending_events.push_back((events::Event::PaymentClaimed { - receiver_node_id, - payment_hash, - purpose: payment.purpose, - amount_msat: claimable_amt_msat, - htlcs: payment.htlcs.iter().map(events::ClaimedHTLC::from).collect(), - sender_intended_total_msat: payment.htlcs.first().map(|htlc| htlc.total_msat), - onion_fields: payment.onion_fields, - payment_id: Some(payment_id), - }, None)); + pending_events.push_back(( + events::Event::PaymentClaimed { + receiver_node_id, + payment_hash, + purpose: payment.purpose, + amount_msat: claimable_amt_msat, + htlcs: payment.htlcs.iter().map(events::ClaimedHTLC::from).collect(), + sender_intended_total_msat: payment.htlcs.first().map(|htlc| htlc.total_msat), + onion_fields: payment.onion_fields, + payment_id: Some(payment_id), + }, + // Note that we don't bother adding a EventCompletionAction here to + // ensure the `PaymentClaimed` event is durable processed as this + // should only be hit for particularly old channels and we don't have + // enough information to generate such an action. + None, + )); } } } diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs index d105d69edd2..472057d4a05 100644 --- a/lightning/src/ln/monitor_tests.rs +++ b/lightning/src/ln/monitor_tests.rs @@ -3200,3 +3200,65 @@ fn test_update_replay_panics() { monitor.update_monitor(&updates[2], &nodes[1].tx_broadcaster, &nodes[1].fee_estimator, &nodes[1].logger).unwrap(); monitor.update_monitor(&updates[3], &nodes[1].tx_broadcaster, &nodes[1].fee_estimator, &nodes[1].logger).unwrap(); } + +#[test] +fn test_claim_event_never_handled() { + // When a payment is claimed, the `ChannelMonitorUpdate` containing the payment preimage goes + // out and when it completes the `PaymentClaimed` event is generated. If the channel then + // progresses forward a few steps, the payment preimage will then eventually be removed from + // the channel. By that point, we have to make sure that the `PaymentClaimed` event has been + // handled (which ensures the user has maked the payment received). + // Otherwise, it is possible that, on restart, we load with a stale `ChannelManager` which + // doesn't have the `PaymentClaimed` event and it needs to rebuild it from the + // `ChannelMonitor`'s payment information and preimage. + let chanmon_cfgs = create_chanmon_cfgs(2); + let node_cfgs = create_node_cfgs(2, &chanmon_cfgs); + let persister; + let new_chain_mon; + let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]); + let nodes_1_reload; + let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs); + + let node_a_id = nodes[0].node.get_our_node_id(); + let node_b_id = nodes[1].node.get_our_node_id(); + + let init_node_ser = nodes[1].node.encode(); + + let chan = create_announced_chan_between_nodes(&nodes, 0, 1); + + // Send the payment we'll ultimately test the PaymentClaimed event for. + let (preimage_a, payment_hash_a, ..) = route_payment(&nodes[0], &[&nodes[1]], 1_000_000); + + nodes[1].node.claim_funds(preimage_a); + check_added_monitors(&nodes[1], 1); + + let mut updates = get_htlc_update_msgs(&nodes[1], &node_a_id); + nodes[0].node.handle_update_fulfill_htlc(node_b_id, &updates.update_fulfill_htlcs[0]); + expect_payment_sent(&nodes[0], preimage_a, None, false, false); + + nodes[0].node.handle_commitment_signed(node_b_id, &updates.commitment_signed); + check_added_monitors(&nodes[0], 1); + + // Once the `PaymentClaimed` event is generated, further RAA `ChannelMonitorUpdate`s will be + // blocked until it is handled, ensuring we never get far enough to remove the preimage. + let (raa, cs) = get_revoke_commit_msgs(&nodes[0], &node_b_id); + nodes[1].node.handle_revoke_and_ack(node_a_id, &raa); + nodes[1].node.handle_commitment_signed(node_a_id, &cs); + check_added_monitors(&nodes[1], 0); + + // The last RAA here should be blocked waiting on us to handle the PaymentClaimed event before + // continuing. Otherwise, we'd be able to make enough progress that the payment preimage is + // removed from node A's `ChannelMonitor`. This leaves us unable to make further progress. + assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty()); + + // Finally, reload node B with an empty `ChannelManager` and check that we get the + // `PaymentClaimed` event. + let chan_0_monitor_serialized = get_monitor!(nodes[1], chan.2).encode(); + let mons = &[&chan_0_monitor_serialized[..]]; + reload_node!(nodes[1], &init_node_ser, mons, persister, new_chain_mon, nodes_1_reload); + + expect_payment_claimed!(nodes[1], payment_hash_a, 1_000_000); + // The reload logic spuriously generates a redundant payment preimage-containing + // `ChannelMonitorUpdate`. + check_added_monitors(&nodes[1], 2); +} From eced3fbfb66c7fbcc43e772418e46d2136490b58 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Thu, 31 Jul 2025 13:23:37 +0000 Subject: [PATCH 13/23] Correct `test_dup_htlc_onchain_doesnt_fail_on_reload` `test_dup_htlc_onchain_doesnt_fail_on_reload` made reference to `ChainMonitor` persisting `ChannelMonitor`s on each new block, which hasn't been the case in some time. Instead, we update the comment and code to make explicit that it doesn't impact the test. Backport of 0a6c3fb4c3f82f369442afcfb78a46049395c39a Resolved `use` conflicts in: * lightning/src/ln/payment_tests.rs --- lightning/src/ln/payment_tests.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lightning/src/ln/payment_tests.rs b/lightning/src/ln/payment_tests.rs index 348cace949d..12f7a639908 100644 --- a/lightning/src/ln/payment_tests.rs +++ b/lightning/src/ln/payment_tests.rs @@ -11,7 +11,7 @@ //! serialization ordering between ChannelManager/ChannelMonitors and ensuring we can still retry //! payments thereafter. -use crate::chain::{ChannelMonitorUpdateStatus, Confirm, Listen}; +use crate::chain::{Confirm, Listen}; use crate::chain::channelmonitor::{ANTI_REORG_DELAY, HTLC_FAIL_BACK_BUFFER, LATENCY_GRACE_PERIOD_BLOCKS}; use crate::sign::EntropySource; use crate::events::{ClosureReason, Event, HTLCDestination, MessageSendEvent, MessageSendEventsProvider, PathFailure, PaymentFailureReason, PaymentPurpose}; @@ -1086,16 +1086,15 @@ fn do_test_dup_htlc_onchain_doesnt_fail_on_reload(persist_manager_post_event: bo connect_blocks(&nodes[0], ANTI_REORG_DELAY - 2); } - // Now connect the HTLC claim transaction with the ChainMonitor-generated ChannelMonitor update - // returning InProgress. This should cause the claim event to never make its way to the - // ChannelManager. - chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress); - + // Now connect the HTLC claim transaction. Note that ChannelMonitors aren't re-persisted on + // each block connection (as the block being reconnected on startup should get us the same + // result). if payment_timeout { connect_blocks(&nodes[0], 1); } else { connect_block(&nodes[0], &claim_block); } + check_added_monitors(&nodes[0], 0); // Note that we skip persisting ChannelMonitors. We should still be generating the payment sent // event without ChannelMonitor persistence. If we reset to a previous state on reload, the block From 2b141a49c425c113c8ca0f1ffc7913efd444dda6 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Fri, 1 Aug 2025 22:10:09 +0000 Subject: [PATCH 14/23] Make `ChannelMonitor` round-trip tests more robust During testsing, we check that a `ChannelMonitor` will round-trip through serialization exactly. However, we recently added a fix to change a value in `PackageTemplate` on reload to fix some issues in the field in 0.1. This can cause the round-trip tests to fail as a field is modified during read. We fix it here by simply exempting the field from the equality test in the condition where it would be updated on read. We also make the `ChannelMonitor` `PartialEq` trait implementation non-public as weird workarounds like this make clear that such a comparison is a britle API at best. Backport of a8ec966177345442343714ecfd42dea6f4155519 Resolved `use` and `rustfmt` conflicts in: * lightning/src/chain/channelmonitor.rs In the upstream version of this commit, the `PartialEq` implementation for `ChannelMonitor` was made test-only however to avoid breaking a public API we do not do so here. However, the changes to `PartialEq` for `PackageTemplate` are `test`-only, so it shouldn't result in any behavioral change (not that the marginal `PartialEq` changes are likely to impact downstream crates in any case). --- lightning/src/chain/channelmonitor.rs | 15 +++++++-- lightning/src/chain/package.rs | 46 ++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index fdc77ec7eaa..5a8a7a535e5 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -59,7 +59,7 @@ use crate::prelude::*; use core::{cmp, mem}; use crate::io::{self, Error}; use core::ops::Deref; -use crate::sync::{Mutex, LockTestExt}; +use crate::sync::Mutex; /// An update generated by the underlying channel itself which contains some new information the /// [`ChannelMonitor`] should be made aware of. @@ -1040,12 +1040,21 @@ pub type TransactionOutputs = (Txid, Vec<(u32, TxOut)>); impl PartialEq for ChannelMonitor where Signer: PartialEq { fn eq(&self, other: &Self) -> bool { + use crate::sync::LockTestExt; // We need some kind of total lockorder. Absent a better idea, we sort by position in // memory and take locks in that order (assuming that we can't move within memory while a // lock is held). let ord = ((self as *const _) as usize) < ((other as *const _) as usize); - let a = if ord { self.inner.unsafe_well_ordered_double_lock_self() } else { other.inner.unsafe_well_ordered_double_lock_self() }; - let b = if ord { other.inner.unsafe_well_ordered_double_lock_self() } else { self.inner.unsafe_well_ordered_double_lock_self() }; + let a = if ord { + self.inner.unsafe_well_ordered_double_lock_self() + } else { + other.inner.unsafe_well_ordered_double_lock_self() + }; + let b = if ord { + other.inner.unsafe_well_ordered_double_lock_self() + } else { + self.inner.unsafe_well_ordered_double_lock_self() + }; a.eq(&b) } } diff --git a/lightning/src/chain/package.rs b/lightning/src/chain/package.rs index 9fe16915be4..f4ef2f28d53 100644 --- a/lightning/src/chain/package.rs +++ b/lightning/src/chain/package.rs @@ -820,7 +820,7 @@ enum PackageMalleability { /// /// As packages are time-sensitive, we fee-bump and rebroadcast them at scheduled intervals. /// Failing to confirm a package translate as a loss of funds for the user. -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, Eq)] pub struct PackageTemplate { // List of onchain outputs and solving data to generate satisfying witnesses. inputs: Vec<(BitcoinOutPoint, PackageSolvingData)>, @@ -849,6 +849,50 @@ pub struct PackageTemplate { height_timer: u32, } +impl PartialEq for PackageTemplate { + fn eq(&self, o: &Self) -> bool { + if self.inputs != o.inputs + || self.malleability != o.malleability + || self.feerate_previous != o.feerate_previous + || self.height_timer != o.height_timer + { + return false; + } + #[cfg(test)] + { + // In some cases we may reset `counterparty_spendable_height` to zero on reload, which + // can cause our test assertions that ChannelMonitors round-trip exactly to trip. Here + // we allow exactly the same case as we tweak in the `PackageTemplate` `Readable` + // implementation. + if self.counterparty_spendable_height == 0 { + for (_, input) in self.inputs.iter() { + if let PackageSolvingData::RevokedHTLCOutput(RevokedHTLCOutput { + htlc, .. + }) = input + { + if !htlc.offered && htlc.cltv_expiry != 0 { + return true; + } + } + } + } + if o.counterparty_spendable_height == 0 { + for (_, input) in o.inputs.iter() { + if let PackageSolvingData::RevokedHTLCOutput(RevokedHTLCOutput { + htlc, .. + }) = input + { + if !htlc.offered && htlc.cltv_expiry != 0 { + return true; + } + } + } + } + } + self.counterparty_spendable_height == o.counterparty_spendable_height + } +} + impl PackageTemplate { pub(crate) fn can_merge_with(&self, other: &PackageTemplate, cur_height: u32) -> bool { match (self.malleability, other.malleability) { From 52035cb60f2589995576c57a64a5393b11036adc Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Sun, 5 Oct 2025 10:30:40 +0000 Subject: [PATCH 15/23] Rebuild pending payments list before replaying pending claims/fails On `ChannelManager` reload we rebuild the pending outbound payments list by looking for any missing payments in `ChannelMonitor`s. However, in the same loop over `ChannelMonitor`s, we also re-claim any pending payments which we see we have a payment preimage for. If we send an MPP payment across different chanels, the result may be that we'll iterate the loop, and in each iteration add a pending payment with only one known path, then claim/fail it and remove the pending apyment (at least for the claim case). This may result in spurious extra events, or even both a `PaymentFailed` and `PaymentSent` event on startup for the same payment. Backport of 8106dbfd662b2cebd193bbd8942cbb07b417227f Resolved substantial conflcits in: * lightning/src/ln/channelmanager.rs by simply rewriting the patch. Note that the `is_channel_closed` variable used in the upstream version of this commit replaced simply checking if the `outpoint_to_peer` map had an entry for the channel's funding outpoint. Note that the next upstream commit in this series (3239d6714f3ac83ffdd66b6885fac98789224747) is unnecessary on this branch as we use `outpoint_to_peer` rather than `per_peer_state` to detect channel closure here. --- lightning/src/ln/channelmanager.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index c1e21c5c62b..7b6563404d6 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -13867,8 +13867,12 @@ where // payments which are still in-flight via their on-chain state. // We only rebuild the pending payments map if we were most recently serialized by // 0.0.102+ + // + // First we rebuild all pending payments, then separately re-claim and re-fail pending + // payments. This avoids edge-cases around MPP payments resulting in redundant actions. for (_, monitor) in args.channel_monitors.iter() { let counterparty_opt = outpoint_to_peer.get(&monitor.get_funding_txo().0); + // outpoint_to_peer missing the funding outpoint implies the channel is closed if counterparty_opt.is_none() { for (htlc_source, (htlc, _)) in monitor.get_pending_or_resolved_outbound_htlcs() { let logger = WithChannelMonitor::from(&args.logger, monitor, Some(htlc.payment_hash)); @@ -13885,6 +13889,11 @@ where ); } } + } + } + for (_, monitor) in args.channel_monitors.iter() { + let counterparty_opt = outpoint_to_peer.get(&monitor.get_funding_txo().0); + if counterparty_opt.is_none() { for (htlc_source, (htlc, preimage_opt)) in monitor.get_all_current_outbound_htlcs() { let logger = WithChannelMonitor::from(&args.logger, monitor, Some(htlc.payment_hash)); match htlc_source { From 60dce06b6d4b147cc80b260af9bf60122bb8846d Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Sun, 20 Jul 2025 23:58:51 +0000 Subject: [PATCH 16/23] Store preimages we learned on chain in case of `MonitorEvent` loss `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fashion - if the `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets persisted (i.e. due to a block update) then the node crashes, prior to persisting the `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be lost. This isn't likely in a sync persist environment, but in an async one this could be an issue. Note that this is only an issue for closed channels - `MonitorEvent`s only inform the `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. When we restart and, during `ChannelManager` load, see a `ChannelMonitor` for a closed channel, we scan it for preimages that we passed to it and re-apply those to any pending or forwarded payments. However, we didn't scan it for preimages it learned from transactions on-chain. In cases where a `MonitorEvent` is lost, this can lead to a lost preimage. Here we fix it by simply tracking preimages we learned on-chain the same way we track preimages picked up during normal channel operation. Backport of 543cc85e7f4f8ca6fb04f05d775c07c18c35559b Resolved conflicts in * lightning/src/ln/monitor_tests.rs due to trivial changes upstream as well as changes to upstream bump events and commitment announcement logic. --- lightning/src/chain/channelmonitor.rs | 2 + lightning/src/ln/monitor_tests.rs | 161 ++++++++++++++++++++++++++ 2 files changed, 163 insertions(+) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index 5a8a7a535e5..6106f46fc13 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -4695,6 +4695,7 @@ impl ChannelMonitorImpl { on_to_local_output_csv: None, }, }); + self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage); self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate { source, payment_preimage: Some(payment_preimage), @@ -4718,6 +4719,7 @@ impl ChannelMonitorImpl { on_to_local_output_csv: None, }, }); + self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage); self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate { source, payment_preimage: Some(payment_preimage), diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs index 472057d4a05..7bc54726791 100644 --- a/lightning/src/ln/monitor_tests.rs +++ b/lightning/src/ln/monitor_tests.rs @@ -10,6 +10,7 @@ //! Further functional tests which test blockchain reorganizations. use crate::sign::{ecdsa::EcdsaChannelSigner, OutputSpender, SpendableOutputDescriptor}; +use crate::chain::Watch; use crate::chain::channelmonitor::{ANTI_REORG_DELAY, ARCHIVAL_DELAY_BLOCKS,LATENCY_GRACE_PERIOD_BLOCKS, COUNTERPARTY_CLAIMABLE_WITHIN_BLOCKS_PINNABLE, Balance, BalanceSource, ChannelMonitorUpdateStep}; use crate::chain::transaction::OutPoint; use crate::chain::chaininterface::{ConfirmationTarget, LowerBoundedFeeEstimator, compute_feerate_sat_per_1000_weight}; @@ -3262,3 +3263,163 @@ fn test_claim_event_never_handled() { // `ChannelMonitorUpdate`. check_added_monitors(&nodes[1], 2); } + +fn do_test_lost_preimage_monitor_events(on_counterparty_tx: bool) { + // `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fashion - if the + // `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets + // persisted (i.e. due to a block update) then the node crashes, prior to persisting the + // `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be + // lost. This isn't likely in a sync persist environment, but in an async one this could be an + // issue. + // + // Note that this is only an issue for closed channels - `MonitorEvent`s only inform the + // `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup + // or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes + // completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved + // on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. + // + // Here we test that losing `MonitorEvent`s that contain HTLC resolution preimages does not + // cause us to lose funds or miss a `PaymentSent` event. + let mut cfg = test_default_channel_config(); + cfg.manually_accept_inbound_channels = true; + cfg.channel_handshake_config.negotiate_anchors_zero_fee_htlc_tx = true; + let cfgs = [Some(cfg.clone()), Some(cfg.clone()), Some(cfg.clone())]; + + let chanmon_cfgs = create_chanmon_cfgs(3); + let node_cfgs = create_node_cfgs(3, &chanmon_cfgs); + let persister; + let new_chain_mon; + let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &cfgs); + let node_b_reload; + let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs); + + let coinbase_tx = provide_anchor_reserves(&nodes); + + let node_b_id = nodes[1].node.get_our_node_id(); + let node_c_id = nodes[2].node.get_our_node_id(); + + let chan_a = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 0).2; + let chan_b = create_announced_chan_between_nodes_with_value(&nodes, 1, 2, 1_000_000, 0).2; + + let (preimage_a, hash_a, ..) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 1_000_000); + let (preimage_b, hash_b, ..) = route_payment(&nodes[1], &[&nodes[2]], 1_000_000); + + nodes[1].node.peer_disconnected(nodes[2].node.get_our_node_id()); + nodes[2].node.peer_disconnected(nodes[1].node.get_our_node_id()); + + nodes[2].node.claim_funds(preimage_a); + check_added_monitors(&nodes[2], 1); + expect_payment_claimed!(nodes[2], hash_a, 1_000_000); + nodes[2].node.claim_funds(preimage_b); + check_added_monitors(&nodes[2], 1); + expect_payment_claimed!(nodes[2], hash_b, 1_000_000); + + // Force-close the channel, confirming a commitment transaction then letting C claim the HTLCs. + let message = "Closed".to_owned(); + nodes[2] + .node + .force_close_broadcasting_latest_txn(&chan_b, &node_b_id, message) + .unwrap(); + check_added_monitors(&nodes[2], 1); + let c_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true) }; + check_closed_event!(nodes[2], 1, c_reason, [node_b_id], 1_000_000); + check_closed_broadcast!(nodes[2], true); + + let cs_commit_tx = nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + assert_eq!(cs_commit_tx.len(), 1); + + let message = "Closed".to_owned(); + nodes[1] + .node + .force_close_broadcasting_latest_txn(&chan_b, &node_c_id, message) + .unwrap(); + check_added_monitors(&nodes[1], 1); + let b_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true) }; + check_closed_event!(nodes[1], 1, b_reason, [node_c_id], 1_000_000); + check_closed_broadcast!(nodes[1], true); + + let bs_commit_tx = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + assert_eq!(bs_commit_tx.len(), 1); + + let selected_commit_tx = if on_counterparty_tx { + &cs_commit_tx[0] + } else { + &bs_commit_tx[0] + }; + + mine_transaction(&nodes[2], selected_commit_tx); + let mut cs_transactions = nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + let c_replays_commitment = nodes[2].connect_style.borrow().updates_best_block_first(); + let cs_htlc_claims = if on_counterparty_tx { + handle_bump_htlc_event(&nodes[2], 1); + let mut cs_transactions = + nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + assert_eq!(cs_transactions.len(), 1); + vec![cs_transactions.pop().unwrap()] + } else { + if c_replays_commitment { + assert_eq!(cs_transactions.len(), 2, "{cs_transactions:?}"); + vec![cs_transactions.pop().unwrap()] + } else { + assert_eq!(cs_transactions.len(), 1, "{cs_transactions:?}"); + cs_transactions + } + }; + + assert_eq!(cs_htlc_claims.len(), 1); + check_spends!(cs_htlc_claims[0], selected_commit_tx, coinbase_tx); + + // Now replay the claims on node B, which should generate preimage-containing `MonitorUpdate`s + mine_transaction(&nodes[1], selected_commit_tx); + mine_transaction(&nodes[1], &cs_htlc_claims[0]); + + // Now simulate a restart where the B<->C ChannelMonitor has been persisted (i.e. because we + // just processed a new block) but the ChannelManager was not. This should be exceedingly rare + // given we have to be connecting a block at the right moment and not manage to get a + // ChannelManager persisted after it does a thing that should immediately precede persistence, + // but with async persist it is more common. + // + // We do this by wiping the `MonitorEvent`s from the monitors and then reloading with the + // latest state. + let mon_events = nodes[1].chain_monitor.chain_monitor.release_pending_monitor_events(); + assert_eq!(mon_events.len(), 1); + assert_eq!(mon_events[0].2.len(), 2); + + let node_ser = nodes[1].node.encode(); + let mon_a_ser = get_monitor!(nodes[1], chan_a).encode(); + let mon_b_ser = get_monitor!(nodes[1], chan_b).encode(); + let mons = &[&mon_a_ser[..], &mon_b_ser[..]]; + reload_node!(nodes[1], cfg, &node_ser, mons, persister, new_chain_mon, node_b_reload); + + let preimage_events = nodes[1].node.get_and_clear_pending_events(); + assert_eq!(preimage_events.len(), 2, "{preimage_events:?}"); + for ev in preimage_events { + match ev { + Event::PaymentSent { payment_hash, .. } => { + assert_eq!(payment_hash, hash_b); + }, + Event::PaymentForwarded { claim_from_onchain_tx, .. } => { + assert!(claim_from_onchain_tx); + }, + _ => panic!("Wrong event {ev:?}"), + } + } + + // After the background events are processed in `get_and_clear_pending_events`, above, node B + // will create the requisite `ChannelMontiorUpdate` for claiming the forwarded payment back. + // The HTLC, however, is added to the holding cell for replay after the peer connects, below. + check_added_monitors(&nodes[1], 1); + + nodes[0].node.peer_disconnected(nodes[1].node.get_our_node_id()); + + let mut reconnect_args = ReconnectArgs::new(&nodes[0], &nodes[1]); + reconnect_args.pending_cell_htlc_claims = (1, 0); + reconnect_nodes(reconnect_args); + expect_payment_sent(&nodes[0], preimage_a, None, true, true); +} + +#[test] +fn test_lost_preimage_monitor_events() { + do_test_lost_preimage_monitor_events(true); + do_test_lost_preimage_monitor_events(false); +} From 4b4aad2dbc240698c3d7d29d0a43c54c0dea0a4e Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Fri, 1 Aug 2025 23:16:16 +0000 Subject: [PATCH 17/23] `rustfmt` and clean up `get_onchain_failed_outbound_htlcs` Backport of 9186900a5b7f2387a4cbb35f49c07cfff0a963fc Resolved trivial conflicts in: * lightning/src/chain/channelmonitor.rs due to splicing's introduction of the `funding` field in monitors. --- lightning/src/chain/channelmonitor.rs | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index 6106f46fc13..9c8a9663a15 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -2553,28 +2553,29 @@ impl ChannelMonitor { /// This is similar to [`Self::get_pending_or_resolved_outbound_htlcs`] except it includes /// HTLCs which were resolved on-chain (i.e. where the final HTLC resolution was done by an /// event from this `ChannelMonitor`). - pub(crate) fn get_all_current_outbound_htlcs(&self) -> HashMap)> { + pub(crate) fn get_all_current_outbound_htlcs( + &self, + ) -> HashMap)> { let mut res = new_hash_map(); // Just examine the available counterparty commitment transactions. See docs on // `fail_unbroadcast_htlcs`, below, for justification. let us = self.inner.lock().unwrap(); - macro_rules! walk_counterparty_commitment { - ($txid: expr) => { - if let Some(ref latest_outpoints) = us.counterparty_claimable_outpoints.get($txid) { - for &(ref htlc, ref source_option) in latest_outpoints.iter() { - if let &Some(ref source) = source_option { - res.insert((**source).clone(), (htlc.clone(), - us.counterparty_fulfilled_htlcs.get(&SentHTLCId::from_source(source)).cloned())); - } + let mut walk_counterparty_commitment = |txid| { + if let Some(latest_outpoints) = us.counterparty_claimable_outpoints.get(txid) { + for &(ref htlc, ref source_option) in latest_outpoints.iter() { + if let &Some(ref source) = source_option { + let htlc_id = SentHTLCId::from_source(source); + let preimage_opt = us.counterparty_fulfilled_htlcs.get(&htlc_id).cloned(); + res.insert((**source).clone(), (htlc.clone(), preimage_opt)); } } } - } + }; if let Some(ref txid) = us.current_counterparty_commitment_txid { - walk_counterparty_commitment!(txid); + walk_counterparty_commitment(txid); } if let Some(ref txid) = us.prev_counterparty_commitment_txid { - walk_counterparty_commitment!(txid); + walk_counterparty_commitment(txid); } res } From da85861e4ad96c47e1ec5c38ca3b319b4d53347b Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Fri, 1 Aug 2025 23:16:30 +0000 Subject: [PATCH 18/23] Re-fail perm-failed HTLCs on startup in case of `MonitorEvent` loss `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fashion - if the `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets persisted (i.e. due to a block update) then the node crashes, prior to persisting the `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be lost. This isn't likely in a sync persist environment, but in an async one this could be an issue. Note that this is only an issue for closed channels - `MonitorEvent`s only inform the `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. In a previous commit we handled the case of claimed HTLCs by replaying payment preimages on startup to avoid `MonitorEvent` loss causing us to miss an HTLC claim. Here we handle the HTLC-failed case similarly. Unlike with HTLC claims via preimage, we don't already have replay logic in `ChannelManager` startup, but its easy enough to add one. Luckily, we already track when an HTLC reaches permanently-failed state in `ChannelMonitor` (i.e. it has `ANTI_REORG_DELAY` confirmations on-chain on the failing transaction), so all we need to do is add the ability to query for that and fail them on `ChannelManager` startup. Backport of f809e6c88f4900bf392901832ed9db07557d6088 Resolved conflicts in: * lightning/src/chain/channelmonitor.rs due to splicing-related changes in the upstream branch, * lightning/src/ln/channelmanager.rs due to lack of the `LocalHTLCFailureReason` type in this branch, and * lightning/src/ln/monitor_tests.rs due to changes to upstream bump events and commitment announcement logic. --- lightning/src/chain/channelmonitor.rs | 116 ++++++++++++ lightning/src/ln/channelmanager.rs | 26 ++- lightning/src/ln/monitor_tests.rs | 245 ++++++++++++++++++++++++++ 3 files changed, 385 insertions(+), 2 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index 9c8a9663a15..e1a30024a6c 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -2580,6 +2580,122 @@ impl ChannelMonitor { res } + /// Gets the set of outbound HTLCs which hit the chain and ultimately were claimed by us via + /// the timeout path and reached [`ANTI_REORG_DELAY`] confirmations. This is used to determine + /// if an HTLC has failed without the `ChannelManager` having seen it prior to being persisted. + pub(crate) fn get_onchain_failed_outbound_htlcs(&self) -> HashMap { + let mut res = new_hash_map(); + let us = self.inner.lock().unwrap(); + + // We only want HTLCs with ANTI_REORG_DELAY confirmations, which implies the commitment + // transaction has least ANTI_REORG_DELAY confirmations for any dependent HTLC transactions + // to have been confirmed. + let confirmed_txid = us.funding_spend_confirmed.or_else(|| { + us.onchain_events_awaiting_threshold_conf.iter().find_map(|event| { + if let OnchainEvent::FundingSpendConfirmation { .. } = event.event { + if event.height + ANTI_REORG_DELAY - 1 <= us.best_block.height { + Some(event.txid) + } else { + None + } + } else { + None + } + }) + }); + + let confirmed_txid = if let Some(txid) = confirmed_txid { + txid + } else { + return res; + }; + + macro_rules! walk_htlcs { + ($htlc_iter: expr) => { + let mut walk_candidate_htlcs = |htlcs| { + for &(ref candidate_htlc, ref candidate_source) in htlcs { + let candidate_htlc: &HTLCOutputInCommitment = &candidate_htlc; + let candidate_source: &Option> = &candidate_source; + + let source: &HTLCSource = if let Some(source) = candidate_source { + source + } else { + continue; + }; + let confirmed = $htlc_iter.find(|(_, conf_src)| Some(source) == *conf_src); + if let Some((confirmed_htlc, _)) = confirmed { + let filter = |v: &&IrrevocablyResolvedHTLC| { + v.commitment_tx_output_idx + == confirmed_htlc.transaction_output_index + }; + + // The HTLC was included in the confirmed commitment transaction, so we + // need to see if it has been irrevocably failed yet. + if confirmed_htlc.transaction_output_index.is_none() { + // Dust HTLCs are always implicitly failed once the commitment + // transaction reaches ANTI_REORG_DELAY confirmations. + res.insert(source.clone(), confirmed_htlc.payment_hash); + } else if let Some(state) = + us.htlcs_resolved_on_chain.iter().filter(filter).next() + { + if state.payment_preimage.is_none() { + res.insert(source.clone(), confirmed_htlc.payment_hash); + } + } + } else { + // The HTLC was not included in the confirmed commitment transaction, + // which has now reached ANTI_REORG_DELAY confirmations and thus the + // HTLC has been failed. + res.insert(source.clone(), candidate_htlc.payment_hash); + } + } + }; + + // We walk the set of HTLCs in the unrevoked counterparty commitment transactions (see + // `fail_unbroadcast_htlcs` for a description of why). + if let Some(ref txid) = us.current_counterparty_commitment_txid { + let htlcs = us.counterparty_claimable_outpoints.get(txid); + walk_candidate_htlcs(htlcs.expect("Missing tx info for latest tx")); + } + if let Some(ref txid) = us.prev_counterparty_commitment_txid { + let htlcs = us.counterparty_claimable_outpoints.get(txid); + walk_candidate_htlcs(htlcs.expect("Missing tx info for previous tx")); + } + }; + } + + if Some(confirmed_txid) == us.current_counterparty_commitment_txid + || Some(confirmed_txid) == us.prev_counterparty_commitment_txid + { + let htlcs = us.counterparty_claimable_outpoints.get(&confirmed_txid).unwrap(); + walk_htlcs!(htlcs.iter().filter_map(|(a, b)| { + if let &Some(ref source) = b { + Some((a, Some(&**source))) + } else { + None + } + })); + } else if confirmed_txid == us.current_holder_commitment_tx.txid { + let mut htlcs = + us.current_holder_commitment_tx.htlc_outputs.iter().map(|(a, _, c)| (a, c.as_ref())); + walk_htlcs!(htlcs); + } else if let Some(prev_commitment_tx) = &us.prev_holder_signed_commitment_tx { + if confirmed_txid == prev_commitment_tx.txid { + let mut htlcs = + prev_commitment_tx.htlc_outputs.iter().map(|(a, _, c)| (a, c.as_ref())); + walk_htlcs!(htlcs); + } else { + let htlcs_confirmed: &[(&HTLCOutputInCommitment, _)] = &[]; + walk_htlcs!(htlcs_confirmed.iter()); + } + } else { + let htlcs_confirmed: &[(&HTLCOutputInCommitment, _)] = &[]; + walk_htlcs!(htlcs_confirmed.iter()); + } + + res + } + /// Gets the set of outbound HTLCs which are pending resolution in this channel or which were /// resolved with a preimage from our counterparty. /// diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 7b6563404d6..de22f030c70 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -13968,6 +13968,27 @@ where }, } } + for (htlc_source, payment_hash) in monitor.get_onchain_failed_outbound_htlcs() { + if let Some(node_id) = monitor.get_counterparty_node_id() { + log_info!( + args.logger, + "Failing HTLC with payment hash {} as it was resolved on-chain.", + payment_hash + ); + failed_htlcs.push(( + htlc_source, + payment_hash, + node_id, + monitor.channel_id(), + )); + } else { + log_warn!( + args.logger, + "Unable to fail HTLC with payment hash {} after being resolved on-chain due to incredibly old monitor.", + payment_hash + ); + } + } } // Whether the downstream channel was closed or not, try to re-apply any payment @@ -14554,8 +14575,9 @@ where } for htlc_source in failed_htlcs.drain(..) { - let (source, payment_hash, counterparty_node_id, channel_id) = htlc_source; - let receiver = HTLCDestination::NextHopChannel { node_id: Some(counterparty_node_id), channel_id }; + let (source, payment_hash, counterparty_id, channel_id) = htlc_source; + let receiver = + HTLCDestination::NextHopChannel { node_id: Some(counterparty_id), channel_id }; let reason = HTLCFailReason::from_failure_code(0x4000 | 8); channel_manager.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver); } diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs index 7bc54726791..24ea285391b 100644 --- a/lightning/src/ln/monitor_tests.rs +++ b/lightning/src/ln/monitor_tests.rs @@ -3423,3 +3423,248 @@ fn test_lost_preimage_monitor_events() { do_test_lost_preimage_monitor_events(true); do_test_lost_preimage_monitor_events(false); } + +#[derive(PartialEq)] +enum CommitmentType { + RevokedCounterparty, + LatestCounterparty, + PreviousCounterparty, + LocalWithoutLastHTLC, + LocalWithLastHTLC, +} + +fn do_test_lost_timeout_monitor_events(confirm_tx: CommitmentType, dust_htlcs: bool) { + // `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fashion - if the + // `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets + // persisted (i.e. due to a block update) then the node crashes, prior to persisting the + // `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be + // lost. This isn't likely in a sync persist environment, but in an async one this could be an + // issue. + // + // Note that this is only an issue for closed channels - `MonitorEvent`s only inform the + // `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup + // or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes + // completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved + // on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. + // + // Here we test that losing `MonitorEvent`s that contain HTLC resolution via timeouts does not + // cause us to lose a `PaymentFailed` event. + let mut cfg = test_default_channel_config(); + cfg.manually_accept_inbound_channels = true; + cfg.channel_handshake_config.negotiate_anchors_zero_fee_htlc_tx = true; + let cfgs = [Some(cfg.clone()), Some(cfg.clone()), Some(cfg.clone())]; + + let chanmon_cfgs = create_chanmon_cfgs(3); + let node_cfgs = create_node_cfgs(3, &chanmon_cfgs); + let persister; + let new_chain_mon; + let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &cfgs); + let node_b_reload; + let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs); + + provide_anchor_reserves(&nodes); + + let node_a_id = nodes[0].node.get_our_node_id(); + let node_b_id = nodes[1].node.get_our_node_id(); + let node_c_id = nodes[2].node.get_our_node_id(); + + let chan_a = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 0).2; + let chan_b = create_announced_chan_between_nodes_with_value(&nodes, 1, 2, 1_000_000, 0).2; + + // Ensure all nodes are at the same height + let node_max_height = + nodes.iter().map(|node| node.blocks.lock().unwrap().len()).max().unwrap() as u32; + connect_blocks(&nodes[0], node_max_height - nodes[0].best_block_info().1); + connect_blocks(&nodes[1], node_max_height - nodes[1].best_block_info().1); + connect_blocks(&nodes[2], node_max_height - nodes[2].best_block_info().1); + + send_payment(&nodes[0], &[&nodes[1], &nodes[2]], 25_000_000); + + let cs_revoked_commit = get_local_commitment_txn!(nodes[2], chan_b); + assert_eq!(cs_revoked_commit.len(), 1); + + let amt = if dust_htlcs { 1_000 } else { 10_000_000 }; + let (_, hash_a, ..) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], amt); + + let cs_previous_commit = get_local_commitment_txn!(nodes[2], chan_b); + assert_eq!(cs_previous_commit.len(), 1); + + let (route, hash_b, _, payment_secret_b) = + get_route_and_payment_hash!(nodes[1], nodes[2], amt); + let onion = RecipientOnionFields::secret_only(payment_secret_b); + nodes[1].node.send_payment_with_route(route, hash_b, onion, PaymentId(hash_b.0)).unwrap(); + check_added_monitors(&nodes[1], 1); + + let updates = get_htlc_update_msgs(&nodes[1], &node_c_id); + nodes[2].node.handle_update_add_htlc(node_b_id, &updates.update_add_htlcs[0]); + nodes[2].node.handle_commitment_signed(node_b_id, &updates.commitment_signed); + check_added_monitors(&nodes[2], 1); + + let (cs_raa, cs_cs) = get_revoke_commit_msgs!(nodes[2], node_b_id); + if confirm_tx == CommitmentType::LocalWithLastHTLC { + // Only deliver the last RAA + CS if we need to update the local commitment with the third + // HTLC. + nodes[1].node.handle_revoke_and_ack(node_c_id, &cs_raa); + check_added_monitors(&nodes[1], 1); + nodes[1].node.handle_commitment_signed(node_c_id, &cs_cs); + check_added_monitors(&nodes[1], 1); + + let _bs_raa = get_event_msg!(nodes[1], MessageSendEvent::SendRevokeAndACK, node_c_id); + } + + nodes[1].node.peer_disconnected(nodes[2].node.get_our_node_id()); + nodes[2].node.peer_disconnected(nodes[1].node.get_our_node_id()); + + // Force-close the channel, confirming a commitment transaction then letting C claim the HTLCs. + let message = "Closed".to_owned(); + nodes[2] + .node + .force_close_broadcasting_latest_txn(&chan_b, &node_b_id, message) + .unwrap(); + check_added_monitors(&nodes[2], 1); + let c_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true) }; + check_closed_event!(nodes[2], 1, c_reason, [node_b_id], 1_000_000); + check_closed_broadcast!(nodes[2], true); + + let cs_commit_tx = nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + assert_eq!(cs_commit_tx.len(), 1); + + let message = "Closed".to_owned(); + nodes[1] + .node + .force_close_broadcasting_latest_txn(&chan_b, &node_c_id, message) + .unwrap(); + check_added_monitors(&nodes[1], 1); + let b_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true) }; + check_closed_event!(nodes[1], 1, b_reason, [node_c_id], 1_000_000); + check_closed_broadcast!(nodes[1], true); + + let bs_commit_tx = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + assert_eq!(bs_commit_tx.len(), 1); + + let selected_commit_tx = match confirm_tx { + CommitmentType::RevokedCounterparty => &cs_revoked_commit[0], + CommitmentType::PreviousCounterparty => &cs_previous_commit[0], + CommitmentType::LatestCounterparty => &cs_commit_tx[0], + CommitmentType::LocalWithoutLastHTLC|CommitmentType::LocalWithLastHTLC => &bs_commit_tx[0], + }; + + mine_transaction(&nodes[1], selected_commit_tx); + // If the block gets connected first we may re-broadcast B's commitment transaction before + // seeing the C's confirm. In any case, if we confirmed the revoked counterparty commitment + // transaction, we want to go ahead and confirm the spend of it. + let bs_transactions = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + if confirm_tx == CommitmentType::RevokedCounterparty { + assert!(bs_transactions.len() == 1 || bs_transactions.len() == 2); + mine_transaction(&nodes[1], bs_transactions.last().unwrap()); + } else { + assert!(bs_transactions.len() == 1 || bs_transactions.len() == 0); + } + + connect_blocks(&nodes[1], ANTI_REORG_DELAY - 1); + let mut events = nodes[1].chain_monitor.chain_monitor.get_and_clear_pending_events(); + match confirm_tx { + CommitmentType::LocalWithoutLastHTLC|CommitmentType::LocalWithLastHTLC => { + assert_eq!(events.len(), 0, "{events:?}"); + }, + CommitmentType::PreviousCounterparty|CommitmentType::LatestCounterparty => { + assert_eq!(events.len(), 1, "{events:?}"); + match events[0] { + Event::SpendableOutputs { .. } => {}, + _ => panic!("Unexpected event {events:?}"), + } + }, + CommitmentType::RevokedCounterparty => { + assert_eq!(events.len(), 2, "{events:?}"); + for event in events { + match event { + Event::SpendableOutputs { .. } => {}, + _ => panic!("Unexpected event {event:?}"), + } + } + }, + } + + if confirm_tx != CommitmentType::RevokedCounterparty { + connect_blocks(&nodes[1], TEST_FINAL_CLTV - ANTI_REORG_DELAY + 1); + if confirm_tx == CommitmentType::LocalWithoutLastHTLC || confirm_tx == CommitmentType::LocalWithLastHTLC { + if !dust_htlcs { + handle_bump_htlc_event(&nodes[1], 1); + } + } + } + + let bs_htlc_timeouts = + nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + if dust_htlcs || confirm_tx == CommitmentType::RevokedCounterparty { + assert_eq!(bs_htlc_timeouts.len(), 0); + } else { + assert_eq!(bs_htlc_timeouts.len(), 1); + + // Now replay the timeouts on node B, which after 6 confirmations should fail the HTLCs via + // `MonitorUpdate`s + mine_transaction(&nodes[1], &bs_htlc_timeouts[0]); + connect_blocks(&nodes[1], ANTI_REORG_DELAY - 1); + } + + // Now simulate a restart where the B<->C ChannelMonitor has been persisted (i.e. because we + // just processed a new block) but the ChannelManager was not. This should be exceedingly rare + // given we have to be connecting a block at the right moment and not manage to get a + // ChannelManager persisted after it does a thing that should immediately precede persistence, + // but with async persist it is more common. + // + // We do this by wiping the `MonitorEvent`s from the monitors and then reloading with the + // latest state. + let mon_events = nodes[1].chain_monitor.chain_monitor.release_pending_monitor_events(); + assert_eq!(mon_events.len(), 1); + assert_eq!(mon_events[0].2.len(), 2); + + let node_ser = nodes[1].node.encode(); + let mon_a_ser = get_monitor!(nodes[1], chan_a).encode(); + let mon_b_ser = get_monitor!(nodes[1], chan_b).encode(); + let mons = &[&mon_a_ser[..], &mon_b_ser[..]]; + reload_node!(nodes[1], cfg, &node_ser, mons, persister, new_chain_mon, node_b_reload); + + let timeout_events = nodes[1].node.get_and_clear_pending_events(); + assert_eq!(timeout_events.len(), 4, "{timeout_events:?}"); + for ev in timeout_events { + match ev { + Event::PendingHTLCsForwardable { .. } => {}, + Event::PaymentPathFailed { payment_hash, .. } => { + assert_eq!(payment_hash, hash_b); + }, + Event::PaymentFailed { payment_hash, .. } => { + assert_eq!(payment_hash, Some(hash_b)); + }, + Event::HTLCHandlingFailed { prev_channel_id, .. } => { + assert_eq!(prev_channel_id, chan_a); + }, + _ => panic!("Wrong event {ev:?}"), + } + } + + nodes[0].node.peer_disconnected(nodes[1].node.get_our_node_id()); + + reconnect_nodes(ReconnectArgs::new(&nodes[0], &nodes[1])); + + nodes[1].node.process_pending_htlc_forwards(); + check_added_monitors(&nodes[1], 1); + let bs_fail = get_htlc_update_msgs(&nodes[1], &node_a_id); + nodes[0].node.handle_update_fail_htlc(node_b_id, &bs_fail.update_fail_htlcs[0]); + commitment_signed_dance!(nodes[0], nodes[1], bs_fail.commitment_signed, true, true); + expect_payment_failed!(nodes[0], hash_a, false); +} + +#[test] +fn test_lost_timeout_monitor_events() { + do_test_lost_timeout_monitor_events(CommitmentType::RevokedCounterparty, false); + do_test_lost_timeout_monitor_events(CommitmentType::RevokedCounterparty, true); + do_test_lost_timeout_monitor_events(CommitmentType::PreviousCounterparty, false); + do_test_lost_timeout_monitor_events(CommitmentType::PreviousCounterparty, true); + do_test_lost_timeout_monitor_events(CommitmentType::LatestCounterparty, false); + do_test_lost_timeout_monitor_events(CommitmentType::LatestCounterparty, true); + do_test_lost_timeout_monitor_events(CommitmentType::LocalWithoutLastHTLC, false); + do_test_lost_timeout_monitor_events(CommitmentType::LocalWithoutLastHTLC, true); + do_test_lost_timeout_monitor_events(CommitmentType::LocalWithLastHTLC, false); + do_test_lost_timeout_monitor_events(CommitmentType::LocalWithLastHTLC, true); +} From 071cb047646dd35f083dc9c3ab5a583837a07c7e Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Wed, 30 Jul 2025 17:36:42 +0000 Subject: [PATCH 19/23] Add a new `ChannelMoniorUpdateStep::ReleasePaymentComplete` `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fasion - if the `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets persisted (i.e. due to a block update) then the node crashes, prior to persisting the `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be lost. This isn't likely in a sync persist environment, but in an async one this could be an issue. Note that this is only an issue for closed channels - `MonitorEvent`s only inform the `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. In previous commits we ensured that HTLC resolutions which came to `ChannelManager` via a `MonitorEvent` were replayed on startup if the `MonitorEvent` was lost. However, in cases where the `ChannelManager` was so stale that it didn't have the payment state for an HTLC at all, we only re-add it in cases where `ChannelMonitor::get_pending_or_resolved_outbound_htlcs` includes it. Because constantly re-adding a payment state and then failing it would generate lots of noise for users on startup (not to mention risk of confusing stale payment events for the latest state of a payment when the `PaymentId` has been reused to retry a payment). Thus, `get_pending_or_resolved_outbound_htlcs` does not include state for HTLCs which were resolved on chain with a preimage or HTLCs which were resolved on chain with a timeout after `ANTI_REORG_DELAY` confirmations. This critera matches the critera for generating a `MonitorEvent`, and works great under the assumption that `MonitorEvent`s are reliably delivered. However, if they are not, and our `ChannelManager` is lost or substantially old (or, in a future where we do not persist `ChannelManager` at all), we will not end up seeing payment resolution events for an HTLC. Instead, we really want to tell our `ChannelMonitor`s when the resolution of an HTLC is complete. Note that we don't particularly care about non-payment HTLCs, as there is no re-hydration of state to do there - `ChannelManager` load ignores forwarded HTLCs coming back from `get_pending_or_resolved_outbound_htlcs` as there's nothing to do - we always attempt to replay the success/failure and figure out if it mattered based on whether there was still an HTLC to claim/fail. Here we take the first step towards that notification, adding a new `ChannelMonitorUpdateStep` for the completion notification, and tracking HTLCs which make it to the `ChannelMonitor` in such updates in a new map. Backport of c49ce57b1d400d925c39a171bf9c07f93ed34a28 Trivial conflicts resolved in: * lightning/src/chain/channelmonitor.rs --- lightning/src/chain/channelmonitor.rs | 43 +++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index e1a30024a6c..d764f47d36f 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -567,6 +567,20 @@ pub(crate) enum ChannelMonitorUpdateStep { ShutdownScript { scriptpubkey: ScriptBuf, }, + /// When a payment is finally resolved by the user handling an [`Event::PaymentSent`] or + /// [`Event::PaymentFailed`] event, the `ChannelManager` no longer needs to hear about it on + /// startup (which would cause it to re-hydrate the payment information even though the user + /// already learned about the payment's result). + /// + /// This will remove the HTLC from [`ChannelMonitor::get_all_current_outbound_htlcs`] and + /// [`ChannelMonitor::get_onchain_failed_outbound_htlcs`]. + /// + /// Note that this is only generated for closed channels as this is implicit in the + /// [`Self::CommitmentSecret`] update which clears the payment information from all un-revoked + /// counterparty commitment transactions. + ReleasePaymentComplete { + htlc: SentHTLCId, + }, } impl ChannelMonitorUpdateStep { @@ -578,6 +592,7 @@ impl ChannelMonitorUpdateStep { ChannelMonitorUpdateStep::CommitmentSecret { .. } => "CommitmentSecret", ChannelMonitorUpdateStep::ChannelForceClosed { .. } => "ChannelForceClosed", ChannelMonitorUpdateStep::ShutdownScript { .. } => "ShutdownScript", + ChannelMonitorUpdateStep::ReleasePaymentComplete { .. } => "ReleasePaymentComplete", } } } @@ -612,6 +627,9 @@ impl_writeable_tlv_based_enum_upgradable!(ChannelMonitorUpdateStep, (5, ShutdownScript) => { (0, scriptpubkey, required), }, + (7, ReleasePaymentComplete) => { + (1, htlc, required), + }, ); /// Indicates whether the balance is derived from a cooperative close, a force-close @@ -1000,6 +1018,14 @@ pub(crate) struct ChannelMonitorImpl { /// spending CSV for revocable outputs). htlcs_resolved_on_chain: Vec, + /// When a payment is resolved through an on-chain transaction, we tell the `ChannelManager` + /// about this via [`ChannelMonitor::get_onchain_failed_outbound_htlcs`] and + /// [`ChannelMonitor::get_all_current_outbound_htlcs`] at startup. We'll keep repeating the + /// same payments until they're eventually fully resolved by the user processing a + /// `PaymentSent` or `PaymentFailed` event, at which point the `ChannelManager` will inform of + /// this and we'll store the set of fully resolved payments here. + htlcs_resolved_to_user: HashSet, + /// The set of `SpendableOutput` events which we have already passed upstream to be claimed. /// These are tracked explicitly to ensure that we don't generate the same events redundantly /// if users duplicatively confirm old transactions. Specifically for transactions claiming a @@ -1255,6 +1281,7 @@ impl Writeable for ChannelMonitorImpl { (21, self.balances_empty_height, option), (23, self.holder_pays_commitment_tx_fee, option), (25, self.payment_preimages, required), + (33, self.htlcs_resolved_to_user, required), }); Ok(()) @@ -1458,6 +1485,7 @@ impl ChannelMonitor { funding_spend_confirmed: None, confirmed_commitment_tx_counterparty_output: None, htlcs_resolved_on_chain: Vec::new(), + htlcs_resolved_to_user: new_hash_set(), spendable_txids_confirmed: Vec::new(), best_block, @@ -3402,6 +3430,10 @@ impl ChannelMonitorImpl { panic!("Attempted to replace shutdown script {} with {}", shutdown_script, scriptpubkey); } }, + ChannelMonitorUpdateStep::ReleasePaymentComplete { htlc } => { + log_trace!(logger, "HTLC {htlc:?} permanently and fully resolved"); + self.htlcs_resolved_to_user.insert(*htlc); + }, } } @@ -3423,11 +3455,13 @@ impl ChannelMonitorImpl { |ChannelMonitorUpdateStep::CommitmentSecret { .. } => is_pre_close_update = true, // After a channel is closed, we don't communicate with our peer about it, so the - // only things we will update is getting a new preimage (from a different channel) - // or being told that the channel is closed. All other updates are generated while - // talking to our peer. + // only things we will update is getting a new preimage (from a different channel), + // being told that the channel is closed, or being told a payment which was + // resolved on-chain has had its resolution communicated to the user. All other + // updates are generated while talking to our peer. ChannelMonitorUpdateStep::PaymentPreimage { .. } => {}, ChannelMonitorUpdateStep::ChannelForceClosed { .. } => {}, + ChannelMonitorUpdateStep::ReleasePaymentComplete { .. } => {}, } } @@ -5188,6 +5222,7 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP let mut funding_spend_confirmed = None; let mut htlcs_resolved_on_chain = Some(Vec::new()); + let mut htlcs_resolved_to_user = Some(new_hash_set()); let mut funding_spend_seen = Some(false); let mut counterparty_node_id = None; let mut confirmed_commitment_tx_counterparty_output = None; @@ -5212,6 +5247,7 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP (21, balances_empty_height, option), (23, holder_pays_commitment_tx_fee, option), (25, payment_preimages_with_info, option), + (33, htlcs_resolved_to_user, option), }); if let Some(payment_preimages_with_info) = payment_preimages_with_info { if payment_preimages_with_info.len() != payment_preimages.len() { @@ -5302,6 +5338,7 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP funding_spend_confirmed, confirmed_commitment_tx_counterparty_output, htlcs_resolved_on_chain: htlcs_resolved_on_chain.unwrap(), + htlcs_resolved_to_user: htlcs_resolved_to_user.unwrap(), spendable_txids_confirmed: spendable_txids_confirmed.unwrap(), best_block, From 6d6ed299fa6fdd0ba9e71ebdd99616392424b053 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Sat, 2 Aug 2025 02:03:23 +0000 Subject: [PATCH 20/23] Prepare to provide new `ReleasePaymentComplete` monitor updates `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fasion - if the `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets persisted (i.e. due to a block update) then the node crashes, prior to persisting the `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be lost. This isn't likely in a sync persist environment, but in an async one this could be an issue. Note that this is only an issue for closed channels - `MonitorEvent`s only inform the `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. In previous commits we ensured that HTLC resolutions which came to `ChannelManager` via a `MonitorEvent` were replayed on startup if the `MonitorEvent` was lost. However, in cases where the `ChannelManager` was so stale that it didn't have the payment state for an HTLC at all, we only re-add it in cases where `ChannelMonitor::get_pending_or_resolved_outbound_htlcs` includes it. Because constantly re-adding a payment state and then failing it would generate lots of noise for users on startup (not to mention risk of confusing stale payment events for the latest state of a payment when the `PaymentId` has been reused to retry a payment). Thus, `get_pending_or_resolved_outbound_htlcs` does not include state for HTLCs which were resolved on chain with a preimage or HTLCs which were resolved on chain with a timeout after `ANTI_REORG_DELAY` confirmations. This critera matches the critera for generating a `MonitorEvent`, and works great under the assumption that `MonitorEvent`s are reliably delivered. However, if they are not, and our `ChannelManager` is lost or substantially old (or, in a future where we do not persist `ChannelManager` at all), we will not end up seeing payment resolution events for an HTLC. Instead, we really want to tell our `ChannelMonitor`s when the resolution of an HTLC is complete. Note that we don't particularly care about non-payment HTLCs, as there is no re-hydration of state to do there - `ChannelManager` load ignores forwarded HTLCs coming back from `get_pending_or_resolved_outbound_htlcs` as there's nothing to do - we always attempt to replay the success/failure and figure out if it mattered based on whether there was still an HTLC to claim/fail. Here we prepare to generate the new `ChannelMonitorUpdateStep::ReleasePaymentComplete` updates, adding a new `PaymentCompleteUpdate` struct to track the new update before we generate the `ChannelMonitorUpdate` and passing through to the right places in `ChannelManager`. The only cases where we want to generate the new update is after a `PaymentSent` or `PaymentFailed` event when the event was the result of a `MonitorEvent` or the equivalent read during startup. Backport of 8b637cc3c36089b6215c7fbc2f77fcacdacfb929 Conflicts resolved in: * lightning/src/ln/channelmanager.rs * lightning/src/ln/outbound_payment.rs --- lightning/src/ln/channelmanager.rs | 183 +++++++++++++++++++++------ lightning/src/ln/outbound_payment.rs | 14 +- 2 files changed, 153 insertions(+), 44 deletions(-) diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index de22f030c70..bd2ebb1dc28 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -1183,6 +1183,21 @@ impl_writeable_tlv_based_enum_upgradable!(MonitorUpdateCompletionAction, }, ); +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) struct PaymentCompleteUpdate { + counterparty_node_id: PublicKey, + channel_funding_outpoint: OutPoint, + channel_id: ChannelId, + htlc_id: SentHTLCId, +} + +impl_writeable_tlv_based!(PaymentCompleteUpdate, { + (1, channel_funding_outpoint, required), + (3, counterparty_node_id, required), + (5, channel_id, required), + (7, htlc_id, required), +}); + #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) enum EventCompletionAction { ReleaseRAAChannelMonitorUpdate { @@ -3298,7 +3313,7 @@ macro_rules! handle_monitor_update_completion { $self.finalize_claims(updates.finalized_claimed_htlcs); for failure in updates.failed_htlcs.drain(..) { let receiver = HTLCDestination::NextHopChannel { node_id: Some(counterparty_node_id), channel_id }; - $self.fail_htlc_backwards_internal(&failure.0, &failure.1, &failure.2, receiver); + $self.fail_htlc_backwards_internal(&failure.0, &failure.1, &failure.2, receiver, None); } } } } @@ -3923,7 +3938,7 @@ where for htlc_source in failed_htlcs.drain(..) { let reason = HTLCFailReason::from_failure_code(0x4000 | 8); let receiver = HTLCDestination::NextHopChannel { node_id: Some(*counterparty_node_id), channel_id: *channel_id }; - self.fail_htlc_backwards_internal(&htlc_source.0, &htlc_source.1, &reason, receiver); + self.fail_htlc_backwards_internal(&htlc_source.0, &htlc_source.1, &reason, receiver, None); } if let Some(shutdown_result) = shutdown_result { @@ -4046,7 +4061,7 @@ where let (source, payment_hash, counterparty_node_id, channel_id) = htlc_source; let reason = HTLCFailReason::from_failure_code(0x4000 | 8); let receiver = HTLCDestination::NextHopChannel { node_id: Some(counterparty_node_id), channel_id }; - self.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver); + self.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver, None); } if let Some((_, funding_txo, _channel_id, monitor_update)) = shutdown_res.monitor_update { debug_assert!(false, "This should have been handled in `locked_close_channel`"); @@ -5642,7 +5657,7 @@ where let failure_reason = HTLCFailReason::from_failure_code(0x4000 | 10); let destination = HTLCDestination::UnknownNextHop { requested_forward_scid: short_channel_id }; - self.fail_htlc_backwards_internal(&htlc_source, &payment.forward_info.payment_hash, &failure_reason, destination); + self.fail_htlc_backwards_internal(&htlc_source, &payment.forward_info.payment_hash, &failure_reason, destination, None); } else { unreachable!() } // Only `PendingHTLCRouting::Forward`s are intercepted Ok(()) @@ -6324,7 +6339,13 @@ where &self.pending_events, &self.logger, |args| self.send_payment_along_path(args)); for (htlc_source, payment_hash, failure_reason, destination) in failed_forwards.drain(..) { - self.fail_htlc_backwards_internal(&htlc_source, &payment_hash, &failure_reason, destination); + self.fail_htlc_backwards_internal( + &htlc_source, + &payment_hash, + &failure_reason, + destination, + None, + ); } self.forward_htlcs(&mut phantom_receives); @@ -6686,7 +6707,7 @@ where let source = HTLCSource::PreviousHopData(htlc_source.0.clone()); let reason = HTLCFailReason::from_failure_code(23); let receiver = HTLCDestination::FailedPayment { payment_hash: htlc_source.1 }; - self.fail_htlc_backwards_internal(&source, &htlc_source.1, &reason, receiver); + self.fail_htlc_backwards_internal(&source, &htlc_source.1, &reason, receiver, None); } for (err, counterparty_node_id) in handle_errors.drain(..) { @@ -6751,7 +6772,7 @@ where let reason = self.get_htlc_fail_reason_from_failure_code(failure_code, &htlc); let source = HTLCSource::PreviousHopData(htlc.prev_hop); let receiver = HTLCDestination::FailedPayment { payment_hash: *payment_hash }; - self.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver); + self.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver, None); } } } @@ -6830,18 +6851,26 @@ where for (htlc_src, payment_hash) in htlcs_to_fail.drain(..) { let reason = HTLCFailReason::reason(failure_code, onion_failure_data.clone()); let receiver = HTLCDestination::NextHopChannel { node_id: Some(counterparty_node_id.clone()), channel_id }; - self.fail_htlc_backwards_internal(&htlc_src, &payment_hash, &reason, receiver); + self.fail_htlc_backwards_internal(&htlc_src, &payment_hash, &reason, receiver, None); } } - fn fail_htlc_backwards_internal(&self, source: &HTLCSource, payment_hash: &PaymentHash, onion_error: &HTLCFailReason, destination: HTLCDestination) { - let push_forward_event = self.fail_htlc_backwards_internal_without_forward_event(source, payment_hash, onion_error, destination); + fn fail_htlc_backwards_internal( + &self, source: &HTLCSource, payment_hash: &PaymentHash, onion_error: &HTLCFailReason, + destination: HTLCDestination, + from_monitor_update_completion: Option, + ) { + let push_forward_event = self.fail_htlc_backwards_internal_without_forward_event(source, payment_hash, onion_error, destination, from_monitor_update_completion); if push_forward_event { self.push_pending_forwards_ev(); } } /// Fails an HTLC backwards to the sender of it to us. /// Note that we do not assume that channels corresponding to failed HTLCs are still available. - fn fail_htlc_backwards_internal_without_forward_event(&self, source: &HTLCSource, payment_hash: &PaymentHash, onion_error: &HTLCFailReason, destination: HTLCDestination) -> bool { + fn fail_htlc_backwards_internal_without_forward_event( + &self, source: &HTLCSource, payment_hash: &PaymentHash, onion_error: &HTLCFailReason, + destination: HTLCDestination, + mut from_monitor_update_completion: Option, + ) -> bool { // Ensure that no peer state channel storage lock is held when calling this function. // This ensures that future code doesn't introduce a lock-order requirement for // `forward_htlcs` to be locked after the `per_peer_state` peer locks, which calling @@ -6862,9 +6891,28 @@ where let mut push_forward_event; match source { HTLCSource::OutboundRoute { ref path, ref session_priv, ref payment_id, .. } => { - push_forward_event = self.pending_outbound_payments.fail_htlc(source, payment_hash, onion_error, path, - session_priv, payment_id, self.probing_cookie_secret, &self.secp_ctx, - &self.pending_events, &self.logger); + push_forward_event = self.pending_outbound_payments.fail_htlc( + source, + payment_hash, + onion_error, + path, + session_priv, + payment_id, + self.probing_cookie_secret, + &self.secp_ctx, + &self.pending_events, + &self.logger, + &mut from_monitor_update_completion, + ); + if let Some(update) = from_monitor_update_completion { + // If `fail_htlc` didn't `take` the post-event action, we should go ahead and + // complete it here as the failure was duplicative - we've already handled it. + // This should mostly only happen on startup, but it is possible to hit it in + // rare cases where a MonitorUpdate is replayed after restart because a + // ChannelMonitor wasn't persisted after it was applied (even though the + // ChannelManager was). + // TODO + } }, HTLCSource::PreviousHopData(HTLCPreviousHopData { ref short_channel_id, ref htlc_id, ref incoming_packet_shared_secret, @@ -6979,7 +7027,7 @@ where let reason = self.get_htlc_fail_reason_from_failure_code(FailureCode::InvalidOnionPayload(None), &htlc); let source = HTLCSource::PreviousHopData(htlc.prev_hop); let receiver = HTLCDestination::FailedPayment { payment_hash }; - self.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver); + self.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver, None); } return; } @@ -7085,7 +7133,7 @@ where let source = HTLCSource::PreviousHopData(htlc.prev_hop); let reason = HTLCFailReason::reason(0x4000 | 15, htlc_msat_height_data); let receiver = HTLCDestination::FailedPayment { payment_hash }; - self.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver); + self.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver, None); } self.claimable_payments.lock().unwrap().pending_claiming_payments.remove(&payment_hash); } @@ -7356,14 +7404,34 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ if let Some(pubkey) = next_channel_counterparty_node_id { debug_assert_eq!(pubkey, path.hops[0].pubkey); } - let ev_completion_action = EventCompletionAction::ReleaseRAAChannelMonitorUpdate { - channel_funding_outpoint: Some(next_channel_outpoint), - channel_id: next_channel_id, - counterparty_node_id: path.hops[0].pubkey, + let mut ev_completion_action = + Some(EventCompletionAction::ReleaseRAAChannelMonitorUpdate { + channel_funding_outpoint: Some(next_channel_outpoint), + channel_id: next_channel_id, + counterparty_node_id: path.hops[0].pubkey, + }); + self.pending_outbound_payments.claim_htlc( + payment_id, + payment_preimage, + session_priv, + path, + from_onchain, + &mut ev_completion_action, + &self.pending_events, + &self.logger, + ); + // If an event was generated, `claim_htlc` set `ev_completion_action` to None, if + // not, we should go ahead and run it now (as the claim was duplicative), at least + // if a PaymentClaimed event with the same action isn't already pending. + let have_action = if ev_completion_action.is_some() { + let pending_events = self.pending_events.lock().unwrap(); + pending_events.iter().any(|(_, act)| *act == ev_completion_action) + } else { + false }; - self.pending_outbound_payments.claim_htlc(payment_id, payment_preimage, - session_priv, path, from_onchain, ev_completion_action, &self.pending_events, - &self.logger); + if !have_action { + self.handle_post_event_actions(ev_completion_action); + } }, HTLCSource::PreviousHopData(hop_data) => { let prev_channel_id = hop_data.channel_id; @@ -8709,7 +8777,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ for htlc_source in dropped_htlcs.drain(..) { let receiver = HTLCDestination::NextHopChannel { node_id: Some(counterparty_node_id.clone()), channel_id: msg.channel_id }; let reason = HTLCFailReason::from_failure_code(0x4000 | 8); - self.fail_htlc_backwards_internal(&htlc_source.0, &htlc_source.1, &reason, receiver); + self.fail_htlc_backwards_internal(&htlc_source.0, &htlc_source.1, &reason, receiver, None); } if let Some(shutdown_res) = finish_shutdown { self.finish_close_channel(shutdown_res); @@ -9120,7 +9188,13 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ } for (htlc_source, payment_hash, failure_reason, destination) in failed_intercept_forwards.drain(..) { - push_forward_event |= self.fail_htlc_backwards_internal_without_forward_event(&htlc_source, &payment_hash, &failure_reason, destination); + push_forward_event |= self.fail_htlc_backwards_internal_without_forward_event( + &htlc_source, + &payment_hash, + &failure_reason, + destination, + None, + ); } if !new_intercept_events.is_empty() { @@ -9461,7 +9535,13 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ log_trace!(logger, "Failing HTLC with hash {} from our monitor", &htlc_update.payment_hash); let receiver = HTLCDestination::NextHopChannel { node_id: counterparty_node_id, channel_id }; let reason = HTLCFailReason::from_failure_code(0x4000 | 8); - self.fail_htlc_backwards_internal(&htlc_update.source, &htlc_update.payment_hash, &reason, receiver); + self.fail_htlc_backwards_internal( + &htlc_update.source, + &htlc_update.payment_hash, + &reason, + receiver, + None, + ); } }, MonitorEvent::HolderForceClosed(_) | MonitorEvent::HolderForceClosedWithInfo { .. } => { @@ -10806,8 +10886,8 @@ where } } - fn handle_post_event_actions(&self, actions: Vec) { - for action in actions { + fn handle_post_event_actions>(&self, actions: I) { + for action in actions.into_iter() { match action { EventCompletionAction::ReleaseRAAChannelMonitorUpdate { channel_funding_outpoint: _, @@ -11313,7 +11393,7 @@ where } for (source, payment_hash, reason, destination) in timed_out_htlcs.drain(..) { - self.fail_htlc_backwards_internal(&source, &payment_hash, &reason, destination); + self.fail_htlc_backwards_internal(&source, &payment_hash, &reason, destination, None); } } @@ -13378,7 +13458,7 @@ where log_error!(logger, " The ChannelMonitor for channel {} is at counterparty commitment transaction number {} but the ChannelManager is at counterparty commitment transaction number {}.", &channel.context.channel_id(), monitor.get_cur_counterparty_commitment_number(), channel.get_cur_counterparty_commitment_transaction_number()); } - let mut shutdown_result = channel.context.force_shutdown(true, ClosureReason::OutdatedChannelManager); + let shutdown_result = channel.context.force_shutdown(true, ClosureReason::OutdatedChannelManager); if shutdown_result.unbroadcasted_batch_funding_txid.is_some() { return Err(DecodeError::InvalidValue); } @@ -13399,7 +13479,9 @@ where counterparty_node_id, funding_txo, channel_id, update }); } - failed_htlcs.append(&mut shutdown_result.dropped_outbound_htlcs); + for (source, hash, cp_id, chan_id) in shutdown_result.dropped_outbound_htlcs { + failed_htlcs.push((source, hash, cp_id, chan_id, None)); + } channel_closures.push_back((events::Event::ChannelClosed { channel_id: channel.context.channel_id(), user_channel_id: channel.context.get_user_id(), @@ -13426,7 +13508,13 @@ where log_info!(logger, "Failing HTLC with hash {} as it is missing in the ChannelMonitor for channel {} but was present in the (stale) ChannelManager", &channel.context.channel_id(), &payment_hash); - failed_htlcs.push((channel_htlc_source.clone(), *payment_hash, channel.context.get_counterparty_node_id(), channel.context.channel_id())); + failed_htlcs.push(( + channel_htlc_source.clone(), + *payment_hash, + channel.context.get_counterparty_node_id(), + channel.context.channel_id(), + None, + )); } } } else { @@ -13955,14 +14043,23 @@ where // generating a `PaymentPathSuccessful` event but regenerating // it and the `PaymentSent` on every restart until the // `ChannelMonitor` is removed. - let compl_action = + let mut compl_action = Some( EventCompletionAction::ReleaseRAAChannelMonitorUpdate { channel_funding_outpoint: Some(monitor.get_funding_txo().0), channel_id: monitor.channel_id(), counterparty_node_id: path.hops[0].pubkey, - }; - pending_outbounds.claim_htlc(payment_id, preimage, session_priv, - path, false, compl_action, &pending_events, &&logger); + }, + ); + pending_outbounds.claim_htlc( + payment_id, + preimage, + session_priv, + path, + false, + &mut compl_action, + &pending_events, + &&logger, + ); pending_events_read = pending_events.into_inner().unwrap(); } }, @@ -13975,11 +14072,18 @@ where "Failing HTLC with payment hash {} as it was resolved on-chain.", payment_hash ); + let completion_action = Some(PaymentCompleteUpdate { + counterparty_node_id: node_id, + channel_funding_outpoint: monitor.get_funding_txo().0, + channel_id: monitor.channel_id(), + htlc_id: SentHTLCId::from_source(&htlc_source), + }); failed_htlcs.push(( htlc_source, payment_hash, node_id, monitor.channel_id(), + completion_action, )); } else { log_warn!( @@ -14574,12 +14678,13 @@ where } } - for htlc_source in failed_htlcs.drain(..) { - let (source, payment_hash, counterparty_id, channel_id) = htlc_source; + for htlc_source in failed_htlcs { + let (source, hash, counterparty_id, channel_id, ev_action) = htlc_source; let receiver = HTLCDestination::NextHopChannel { node_id: Some(counterparty_id), channel_id }; let reason = HTLCFailReason::from_failure_code(0x4000 | 8); - channel_manager.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver); + channel_manager + .fail_htlc_backwards_internal(&source, &hash, &reason, receiver, ev_action); } for (source, preimage, downstream_value, downstream_closed, downstream_node_id, downstream_funding, downstream_channel_id) in pending_claims_to_replay { diff --git a/lightning/src/ln/outbound_payment.rs b/lightning/src/ln/outbound_payment.rs index 5e12cc86a39..5b11fd5d3a2 100644 --- a/lightning/src/ln/outbound_payment.rs +++ b/lightning/src/ln/outbound_payment.rs @@ -17,7 +17,9 @@ use crate::blinded_path::{IntroductionNode, NodeIdLookUp}; use crate::events::{self, PaymentFailureReason}; use crate::types::payment::{PaymentHash, PaymentPreimage, PaymentSecret}; use crate::ln::channel_state::ChannelDetails; -use crate::ln::channelmanager::{EventCompletionAction, HTLCSource, PaymentId}; +use crate::ln::channelmanager::{ + EventCompletionAction, HTLCSource, PaymentCompleteUpdate, PaymentId, +}; use crate::types::features::Bolt12InvoiceFeatures; use crate::ln::onion_utils; use crate::ln::onion_utils::{DecodedOnionFailure, HTLCFailReason}; @@ -1931,7 +1933,7 @@ impl OutboundPayments { pub(super) fn claim_htlc( &self, payment_id: PaymentId, payment_preimage: PaymentPreimage, session_priv: SecretKey, - path: Path, from_onchain: bool, ev_completion_action: EventCompletionAction, + path: Path, from_onchain: bool, ev_completion_action: &mut Option, pending_events: &Mutex)>>, logger: &L, ) where L::Target: Logger { @@ -1949,7 +1951,7 @@ impl OutboundPayments { payment_preimage, payment_hash, fee_paid_msat, - }, Some(ev_completion_action.clone()))); + }, ev_completion_action.take())); payment.get_mut().mark_fulfilled(); } @@ -1966,7 +1968,7 @@ impl OutboundPayments { payment_id, payment_hash, path, - }, Some(ev_completion_action))); + }, ev_completion_action.take())); } } } else { @@ -2091,7 +2093,8 @@ impl OutboundPayments { &self, source: &HTLCSource, payment_hash: &PaymentHash, onion_error: &HTLCFailReason, path: &Path, session_priv: &SecretKey, payment_id: &PaymentId, probing_cookie_secret: [u8; 32], secp_ctx: &Secp256k1, - pending_events: &Mutex)>>, logger: &L, + pending_events: &Mutex)>>, + logger: &L, completion_action: &mut Option, ) -> bool where L::Target: Logger { #[cfg(test)] let DecodedOnionFailure { @@ -2214,6 +2217,7 @@ impl OutboundPayments { } }; let mut pending_events = pending_events.lock().unwrap(); + // TODO: Handle completion_action pending_events.push_back((path_failure, None)); if let Some(ev) = full_failure_ev { pending_events.push_back((ev, None)); } pending_retry_ev From e64ed33c05960e3574466c9f79c0a9548f5d9795 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Sat, 2 Aug 2025 02:19:49 +0000 Subject: [PATCH 21/23] Generate new `ReleasePaymentComplete` monitor updates `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fasion - if the `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets persisted (i.e. due to a block update) then the node crashes, prior to persisting the `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be lost. This isn't likely in a sync persist environment, but in an async one this could be an issue. Note that this is only an issue for closed channels - `MonitorEvent`s only inform the `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. In previous commits we ensured that HTLC resolutions which came to `ChannelManager` via a `MonitorEvent` were replayed on startup if the `MonitorEvent` was lost. However, in cases where the `ChannelManager` was so stale that it didn't have the payment state for an HTLC at all, we only re-add it in cases where `ChannelMonitor::get_pending_or_resolved_outbound_htlcs` includes it. Because constantly re-adding a payment state and then failing it would generate lots of noise for users on startup (not to mention risk of confusing stale payment events for the latest state of a payment when the `PaymentId` has been reused to retry a payment). Thus, `get_pending_or_resolved_outbound_htlcs` does not include state for HTLCs which were resolved on chain with a preimage or HTLCs which were resolved on chain with a timeout after `ANTI_REORG_DELAY` confirmations. This critera matches the critera for generating a `MonitorEvent`, and works great under the assumption that `MonitorEvent`s are reliably delivered. However, if they are not, and our `ChannelManager` is lost or substantially old (or, in a future where we do not persist `ChannelManager` at all), we will not end up seeing payment resolution events for an HTLC. Instead, we really want to tell our `ChannelMonitor`s when the resolution of an HTLC is complete. Note that we don't particularly care about non-payment HTLCs, as there is no re-hydration of state to do there - `ChannelManager` load ignores forwarded HTLCs coming back from `get_pending_or_resolved_outbound_htlcs` as there's nothing to do - we always attempt to replay the success/failure and figure out if it mattered based on whether there was still an HTLC to claim/fail. Here we begin generating the new `ChannelMonitorUpdateStep::ReleasePaymentComplete` updates, updating functional tests for the new `ChannelMonitorUpdate`s where required. Backport of 71a364c13345396fd75b22877e03b6a7b1d2bca1 Conflicts resolved in: * lightning/src/ln/chanmon_update_fail_tests.rs * lightning/src/ln/channelmanager.rs * lightning/src/ln/functional_test_utils.rs * lightning/src/ln/functional_tests.rs * lightning/src/ln/monitor_tests.rs * lightning/src/ln/outbound_payment.rs * lightning/src/ln/payment_tests.rs Note that unlike the original commit, on this branch we do not fail to deserialize a `ChannelMonitor` if the `counterparty_node_id` is `None` (implying it has not seen a `ChannelMonitorUpdate` since LDK 0.0.118). Thus, we skip the new logic in some cases, generating a warning log instead. As we assumed that it is now reasonable to require `counterparty_node_id`s in LDK 0.2, it seems reasonable to skip the new logic (potentially generating some additional spurious payment events on restart) now here as well. --- lightning/src/chain/channelmonitor.rs | 1 + lightning/src/ln/chanmon_update_fail_tests.rs | 1 + lightning/src/ln/channelmanager.rs | 181 ++++++++++++++++-- lightning/src/ln/functional_test_utils.rs | 29 ++- lightning/src/ln/functional_tests.rs | 39 ++-- lightning/src/ln/monitor_tests.rs | 48 +++-- lightning/src/ln/outbound_payment.rs | 14 +- lightning/src/ln/payment_tests.rs | 50 ++++- lightning/src/ln/reorg_tests.rs | 9 +- 9 files changed, 315 insertions(+), 57 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index d764f47d36f..4e0de092d4d 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -3356,6 +3356,7 @@ impl ChannelMonitorImpl { if updates.update_id == LEGACY_CLOSED_CHANNEL_UPDATE_ID || self.lockdown_from_offchain { assert_eq!(updates.updates.len(), 1); match updates.updates[0] { + ChannelMonitorUpdateStep::ReleasePaymentComplete { .. } => {}, ChannelMonitorUpdateStep::ChannelForceClosed { .. } => {}, // We should have already seen a `ChannelForceClosed` update if we're trying to // provide a preimage at this point. diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs index 2e06876bbad..6b2808eceda 100644 --- a/lightning/src/ln/chanmon_update_fail_tests.rs +++ b/lightning/src/ln/chanmon_update_fail_tests.rs @@ -3314,6 +3314,7 @@ fn do_test_durable_preimages_on_closed_channel(close_chans_before_reload: bool, nodes[0].node.force_close_broadcasting_latest_txn(&chan_id_ab, &nodes[1].node.get_our_node_id(), error_message.to_string()).unwrap(); check_closed_event(&nodes[0], 1, ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true) }, false, &[nodes[1].node.get_our_node_id()], 100000); + check_added_monitors(&nodes[0], 1); let as_closing_tx = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); assert_eq!(as_closing_tx.len(), 1); diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index bd2ebb1dc28..e114b23b1e7 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -1206,6 +1206,12 @@ pub(crate) enum EventCompletionAction { channel_funding_outpoint: Option, channel_id: ChannelId, }, + + /// When a payment's resolution is communicated to the downstream logic via + /// [`Event::PaymentSent`] or [`Event::PaymentFailed`] we may want to mark the payment as + /// fully-resolved in the [`ChannelMonitor`], which we do via this action. + /// Note that this action will be dropped on downgrade to LDK prior to 0.2! + ReleasePaymentCompleteChannelMonitorUpdate(PaymentCompleteUpdate), } impl_writeable_tlv_based_enum!(EventCompletionAction, (0, ReleaseRAAChannelMonitorUpdate) => { @@ -1218,6 +1224,7 @@ impl_writeable_tlv_based_enum!(EventCompletionAction, ChannelId::v1_from_funding_outpoint(channel_funding_outpoint.unwrap()) })), } + {1, ReleasePaymentCompleteChannelMonitorUpdate} => (), ); /// The source argument which is passed to [`ChannelManager::claim_mpp_part`]. @@ -6907,11 +6914,20 @@ where if let Some(update) = from_monitor_update_completion { // If `fail_htlc` didn't `take` the post-event action, we should go ahead and // complete it here as the failure was duplicative - we've already handled it. - // This should mostly only happen on startup, but it is possible to hit it in - // rare cases where a MonitorUpdate is replayed after restart because a - // ChannelMonitor wasn't persisted after it was applied (even though the - // ChannelManager was). - // TODO + // This can happen in rare cases where a MonitorUpdate is replayed after + // restart because a ChannelMonitor wasn't persisted after it was applied (even + // though the ChannelManager was). + // For such cases, we also check that there's no existing pending event to + // complete this action already, which we let finish instead. + let action = + EventCompletionAction::ReleasePaymentCompleteChannelMonitorUpdate(update); + let have_action = { + let pending_events = self.pending_events.lock().unwrap(); + pending_events.iter().any(|(_, act)| act.as_ref() == Some(&action)) + }; + if !have_action { + self.handle_post_event_actions([action]); + } } }, HTLCSource::PreviousHopData(HTLCPreviousHopData { @@ -7397,19 +7413,38 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ startup_replay: bool, next_channel_counterparty_node_id: Option, next_channel_outpoint: OutPoint, next_channel_id: ChannelId, next_user_channel_id: Option, ) { + debug_assert_eq!( + startup_replay, + !self.background_events_processed_since_startup.load(Ordering::Acquire) + ); + let htlc_id = SentHTLCId::from_source(&source); match source { HTLCSource::OutboundRoute { session_priv, payment_id, path, .. } => { - debug_assert!(self.background_events_processed_since_startup.load(Ordering::Acquire), + debug_assert!(!startup_replay, "We don't support claim_htlc claims during startup - monitors may not be available yet"); if let Some(pubkey) = next_channel_counterparty_node_id { debug_assert_eq!(pubkey, path.hops[0].pubkey); } - let mut ev_completion_action = + let mut ev_completion_action = if from_onchain { + if let Some(counterparty_node_id) = next_channel_counterparty_node_id { + let release = PaymentCompleteUpdate { + counterparty_node_id, + channel_funding_outpoint: next_channel_outpoint, + channel_id: next_channel_id, + htlc_id, + }; + Some(EventCompletionAction::ReleasePaymentCompleteChannelMonitorUpdate(release)) + } else { + log_warn!(self.logger, "Missing counterparty node id in monitor when trying to re-claim a payment resolved on chain. This may lead to redundant PaymentSent events on restart"); + None + } + } else { Some(EventCompletionAction::ReleaseRAAChannelMonitorUpdate { channel_funding_outpoint: Some(next_channel_outpoint), channel_id: next_channel_id, counterparty_node_id: path.hops[0].pubkey, - }); + }) + }; self.pending_outbound_payments.claim_htlc( payment_id, payment_preimage, @@ -9535,12 +9570,24 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ log_trace!(logger, "Failing HTLC with hash {} from our monitor", &htlc_update.payment_hash); let receiver = HTLCDestination::NextHopChannel { node_id: counterparty_node_id, channel_id }; let reason = HTLCFailReason::from_failure_code(0x4000 | 8); + let completion_update = + if let Some(counterparty_node_id) = counterparty_node_id { + Some(PaymentCompleteUpdate { + counterparty_node_id, + channel_funding_outpoint: funding_outpoint, + channel_id, + htlc_id: SentHTLCId::from_source(&htlc_update.source), + }) + } else { + log_warn!(self.logger, "Missing counterparty node id in monitor when trying to re-claim a payment resolved on chain. This may lead to redundant PaymentSent events on restart"); + None + }; self.fail_htlc_backwards_internal( &htlc_update.source, &htlc_update.payment_hash, &reason, receiver, - None, + completion_update, ); } }, @@ -10894,8 +10941,63 @@ where channel_id, counterparty_node_id, } => { + let startup_complete = + self.background_events_processed_since_startup.load(Ordering::Acquire); + debug_assert!(startup_complete); self.handle_monitor_update_release(counterparty_node_id, channel_id, None); }, + EventCompletionAction::ReleasePaymentCompleteChannelMonitorUpdate( + PaymentCompleteUpdate { + counterparty_node_id, + channel_funding_outpoint, + channel_id, + htlc_id, + }, + ) => { + let per_peer_state = self.per_peer_state.read().unwrap(); + let mut peer_state = per_peer_state + .get(&counterparty_node_id) + .map(|state| state.lock().unwrap()) + .expect("Channels originating a payment resolution must have peer state"); + let update_id = peer_state + .closed_channel_monitor_update_ids + .get_mut(&channel_id) + .expect("Channels originating a payment resolution must have a monitor"); + *update_id += 1; + + let update = ChannelMonitorUpdate { + update_id: *update_id, + channel_id: Some(channel_id), + counterparty_node_id: Some(counterparty_node_id), + updates: vec![ChannelMonitorUpdateStep::ReleasePaymentComplete { + htlc: htlc_id, + }], + }; + + let during_startup = + !self.background_events_processed_since_startup.load(Ordering::Acquire); + if during_startup { + let event = BackgroundEvent::MonitorUpdateRegeneratedOnStartup { + counterparty_node_id, + funding_txo: channel_funding_outpoint, + channel_id, + update, + }; + self.pending_background_events.lock().unwrap().push(event); + } else { + handle_new_monitor_update!( + self, + channel_funding_outpoint, + update, + peer_state, + peer_state, + per_peer_state, + counterparty_node_id, + channel_id, + POST_CHANNEL_CLOSE + ); + } + }, } } } @@ -13984,6 +14086,7 @@ where if counterparty_opt.is_none() { for (htlc_source, (htlc, preimage_opt)) in monitor.get_all_current_outbound_htlcs() { let logger = WithChannelMonitor::from(&args.logger, monitor, Some(htlc.payment_hash)); + let htlc_id = SentHTLCId::from_source(&htlc_source); match htlc_source { HTLCSource::PreviousHopData(prev_hop_data) => { let pending_forward_matches_htlc = |info: &PendingAddHTLCInfo| { @@ -14035,6 +14138,21 @@ where HTLCSource::OutboundRoute { payment_id, session_priv, path, .. } => { if let Some(preimage) = preimage_opt { let pending_events = Mutex::new(pending_events_read); + let mut compl_action = + if let Some(counterparty_node_id) = monitor.get_counterparty_node_id() { + let update = PaymentCompleteUpdate { + counterparty_node_id, + channel_funding_outpoint: monitor.get_funding_txo().0, + channel_id: monitor.channel_id(), + htlc_id, + }; + Some( + EventCompletionAction::ReleasePaymentCompleteChannelMonitorUpdate(update) + ) + } else { + log_warn!(logger, "Missing counterparty node id in monitor when trying to re-claim a payment resolved on chain. This may lead to redundant PaymentSent events on restart"); + None + }; // Note that we set `from_onchain` to "false" here, // deliberately keeping the pending payment around forever. // Given it should only occur when we have a channel we're @@ -14043,13 +14161,6 @@ where // generating a `PaymentPathSuccessful` event but regenerating // it and the `PaymentSent` on every restart until the // `ChannelMonitor` is removed. - let mut compl_action = Some( - EventCompletionAction::ReleaseRAAChannelMonitorUpdate { - channel_funding_outpoint: Some(monitor.get_funding_txo().0), - channel_id: monitor.channel_id(), - counterparty_node_id: path.hops[0].pubkey, - }, - ); pending_outbounds.claim_htlc( payment_id, preimage, @@ -14060,6 +14171,44 @@ where &pending_events, &&logger, ); + // If the completion action was not consumed, then there was no + // payment to claim, and we need to tell the `ChannelMonitor` + // we don't need to hear about the HTLC again, at least as long + // as the PaymentSent event isn't still sitting around in our + // event queue. + let have_action = if compl_action.is_some() { + let pending_events = pending_events.lock().unwrap(); + pending_events.iter().any(|(_, act)| *act == compl_action) + } else { + false + }; + if !have_action && compl_action.is_some() { + if let Some(counterparty_node_id) = monitor.get_counterparty_node_id() { + let mut peer_state = per_peer_state + .get(&counterparty_node_id) + .map(|state| state.lock().unwrap()) + .expect("Channels originating a preimage must have peer state"); + let update_id = peer_state + .closed_channel_monitor_update_ids + .get_mut(&monitor.channel_id()) + .expect("Channels originating a preimage must have a monitor"); + *update_id += 1; + + pending_background_events.push(BackgroundEvent::MonitorUpdateRegeneratedOnStartup { + counterparty_node_id: counterparty_node_id, + funding_txo: monitor.get_funding_txo().0, + channel_id: monitor.channel_id(), + update: ChannelMonitorUpdate { + update_id: *update_id, + channel_id: Some(monitor.channel_id()), + updates: vec![ChannelMonitorUpdateStep::ReleasePaymentComplete { + htlc: htlc_id, + }], + counterparty_node_id: Some(counterparty_node_id), + }, + }); + } + } pending_events_read = pending_events.into_inner().unwrap(); } }, diff --git a/lightning/src/ln/functional_test_utils.rs b/lightning/src/ln/functional_test_utils.rs index c4cb7bcebb0..273dd12e4b2 100644 --- a/lightning/src/ln/functional_test_utils.rs +++ b/lightning/src/ln/functional_test_utils.rs @@ -2332,10 +2332,14 @@ macro_rules! expect_payment_claimed { } } -pub fn expect_payment_sent>(node: &H, - expected_payment_preimage: PaymentPreimage, expected_fee_msat_opt: Option>, - expect_per_path_claims: bool, expect_post_ev_mon_update: bool, +pub fn expect_payment_sent>( + node: &H, expected_payment_preimage: PaymentPreimage, + expected_fee_msat_opt: Option>, expect_per_path_claims: bool, + expect_post_ev_mon_update: bool, ) { + if expect_post_ev_mon_update { + check_added_monitors(node, 0); + } let events = node.node().get_and_clear_pending_events(); let expected_payment_hash = PaymentHash( bitcoin::hashes::sha256::Hash::hash(&expected_payment_preimage.0).to_byte_array()); @@ -2535,6 +2539,7 @@ pub struct PaymentFailedConditions<'a> { pub(crate) expected_blamed_scid: Option, pub(crate) expected_blamed_chan_closed: Option, pub(crate) expected_mpp_parts_remain: bool, + pub(crate) from_mon_update: bool, } impl<'a> PaymentFailedConditions<'a> { @@ -2544,6 +2549,7 @@ impl<'a> PaymentFailedConditions<'a> { expected_blamed_scid: None, expected_blamed_chan_closed: None, expected_mpp_parts_remain: false, + from_mon_update: false, } } pub fn mpp_parts_remain(mut self) -> Self { @@ -2562,6 +2568,10 @@ impl<'a> PaymentFailedConditions<'a> { self.expected_htlc_error_data = Some((code, data)); self } + pub fn from_mon_update(mut self) -> Self { + self.from_mon_update = true; + self + } } #[cfg(test)] @@ -2647,8 +2657,19 @@ pub fn expect_payment_failed_conditions<'a, 'b, 'c, 'd, 'e>( node: &'a Node<'b, 'c, 'd>, expected_payment_hash: PaymentHash, expected_payment_failed_permanently: bool, conditions: PaymentFailedConditions<'e> ) { + if conditions.from_mon_update { + check_added_monitors(node, 0); + } let events = node.node.get_and_clear_pending_events(); - expect_payment_failed_conditions_event(events, expected_payment_hash, expected_payment_failed_permanently, conditions); + if conditions.from_mon_update { + check_added_monitors(node, 1); + } + expect_payment_failed_conditions_event( + events, + expected_payment_hash, + expected_payment_failed_permanently, + conditions, + ); } pub fn send_along_route_with_secret<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, route: Route, expected_paths: &[&[&Node<'a, 'b, 'c>]], recv_value: u64, our_payment_hash: PaymentHash, our_payment_secret: PaymentSecret) -> PaymentId { diff --git a/lightning/src/ln/functional_tests.rs b/lightning/src/ln/functional_tests.rs index 74c2c15337a..07a6c4b8412 100644 --- a/lightning/src/ln/functional_tests.rs +++ b/lightning/src/ln/functional_tests.rs @@ -2920,7 +2920,8 @@ fn claim_htlc_outputs() { // ANTI_REORG_DELAY confirmations. mine_transaction(&nodes[1], accepted_claim); connect_blocks(&nodes[1], ANTI_REORG_DELAY - 1); - expect_payment_failed!(nodes[1], payment_hash_2, false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[1], payment_hash_2, false, conditions); } get_announce_close_broadcast_events(&nodes, 0, 1); assert_eq!(nodes[0].node.list_channels().len(), 0); @@ -3374,6 +3375,7 @@ fn test_htlc_on_chain_success() { check_closed_broadcast!(nodes[0], true); check_added_monitors!(nodes[0], 1); let events = nodes[0].node.get_and_clear_pending_events(); + check_added_monitors(&nodes[0], 2); assert_eq!(events.len(), 5); let mut first_claimed = false; for event in events { @@ -3705,7 +3707,11 @@ fn do_test_commitment_revoked_fail_backward_exhaustive(deliver_bs_raa: bool, use check_added_monitors!(nodes[1], 1); connect_blocks(&nodes[1], ANTI_REORG_DELAY - 1); + check_added_monitors(&nodes[1], 0); let events = nodes[1].node.get_and_clear_pending_events(); + if deliver_bs_raa { + check_added_monitors(&nodes[1], 1); + } assert_eq!(events.len(), if deliver_bs_raa { 3 + nodes.len() - 1 } else { 4 + nodes.len() }); assert!(events.iter().any(|ev| matches!( ev, @@ -5058,7 +5064,8 @@ fn test_static_spendable_outputs_timeout_tx() { mine_transaction(&nodes[1], &node_txn[0]); check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed, [nodes[0].node.get_our_node_id()], 100000); connect_blocks(&nodes[1], ANTI_REORG_DELAY - 1); - expect_payment_failed!(nodes[1], our_payment_hash, false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[1], our_payment_hash, false, conditions); let spend_txn = check_spendable_outputs!(nodes[1], node_cfgs[1].keys_manager); assert_eq!(spend_txn.len(), 3); // SpendableOutput: remote_commitment_tx.to_remote, timeout_tx.output @@ -5917,7 +5924,8 @@ fn test_dynamic_spendable_outputs_local_htlc_timeout_tx() { mine_transaction(&nodes[0], &htlc_timeout); connect_blocks(&nodes[0], BREAKDOWN_TIMEOUT as u32 - 1); - expect_payment_failed!(nodes[0], our_payment_hash, false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[0], our_payment_hash, false, conditions); // Verify that A is able to spend its own HTLC-Timeout tx thanks to spendable output event given back by its ChannelMonitor let spend_txn = check_spendable_outputs!(nodes[0], node_cfgs[0].keys_manager); @@ -6004,7 +6012,8 @@ fn test_key_derivation_params() { mine_transaction(&nodes[0], &htlc_timeout); connect_blocks(&nodes[0], BREAKDOWN_TIMEOUT as u32 - 1); - expect_payment_failed!(nodes[0], our_payment_hash, false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[0], our_payment_hash, false, conditions); // Verify that A is able to spend its own HTLC-Timeout tx thanks to spendable output event given back by its ChannelMonitor let new_keys_manager = test_utils::TestKeysInterface::new(&seed, Network::Testnet); @@ -7479,7 +7488,9 @@ fn do_test_failure_delay_dust_htlc_local_commitment(announce_latest: bool) { assert_eq!(nodes[0].node.get_and_clear_pending_events().len(), 0); connect_blocks(&nodes[0], ANTI_REORG_DELAY - 1); + check_added_monitors(&nodes[0], 0); let events = nodes[0].node.get_and_clear_pending_events(); + check_added_monitors(&nodes[0], 2); // Only 2 PaymentPathFailed events should show up, over-dust HTLC has to be failed by timeout tx assert_eq!(events.len(), 4); let mut first_failed = false; @@ -7539,12 +7550,14 @@ fn do_test_sweep_outbound_htlc_failure_update(revoked: bool, local: bool) { // We fail dust-HTLC 1 by broadcast of local commitment tx mine_transaction(&nodes[0], &as_commitment_tx[0]); check_closed_event!(nodes[0], 1, ClosureReason::CommitmentTxConfirmed, [nodes[1].node.get_our_node_id()], 100000); + check_closed_broadcast!(nodes[0], true); + check_added_monitors(&nodes[0], 1); connect_blocks(&nodes[0], ANTI_REORG_DELAY - 1); - expect_payment_failed!(nodes[0], dust_hash, false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[0], dust_hash, false, conditions); connect_blocks(&nodes[0], TEST_FINAL_CLTV + LATENCY_GRACE_PERIOD_BLOCKS - ANTI_REORG_DELAY); - check_closed_broadcast!(nodes[0], true); - check_added_monitors!(nodes[0], 1); + check_added_monitors!(nodes[0], 0); assert_eq!(nodes[0].node.get_and_clear_pending_events().len(), 0); timeout_tx.push(nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap()[0].clone()); assert_eq!(timeout_tx[0].input[0].witness.last().unwrap().len(), OFFERED_HTLC_SCRIPT_WEIGHT); @@ -7552,7 +7565,8 @@ fn do_test_sweep_outbound_htlc_failure_update(revoked: bool, local: bool) { assert_eq!(nodes[0].node.get_and_clear_pending_events().len(), 0); mine_transaction(&nodes[0], &timeout_tx[0]); connect_blocks(&nodes[0], ANTI_REORG_DELAY - 1); - expect_payment_failed!(nodes[0], non_dust_hash, false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[0], non_dust_hash, false, conditions); } else { // We fail dust-HTLC 1 by broadcast of remote commitment tx. If revoked, fail also non-dust HTLC mine_transaction(&nodes[0], &bs_commitment_tx[0]); @@ -7567,7 +7581,8 @@ fn do_test_sweep_outbound_htlc_failure_update(revoked: bool, local: bool) { check_spends!(timeout_tx[0], bs_commitment_tx[0]); // For both a revoked or non-revoked commitment transaction, after ANTI_REORG_DELAY the // dust HTLC should have been failed. - expect_payment_failed!(nodes[0], dust_hash, false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[0], dust_hash, false, conditions); if !revoked { assert_eq!(timeout_tx[0].input[0].witness.last().unwrap().len(), ACCEPTED_HTLC_SCRIPT_WEIGHT); @@ -7578,7 +7593,8 @@ fn do_test_sweep_outbound_htlc_failure_update(revoked: bool, local: bool) { mine_transaction(&nodes[0], &timeout_tx[0]); assert_eq!(nodes[0].node.get_and_clear_pending_events().len(), 0); connect_blocks(&nodes[0], ANTI_REORG_DELAY - 1); - expect_payment_failed!(nodes[0], non_dust_hash, false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[0], non_dust_hash, false, conditions); } } @@ -9198,7 +9214,8 @@ fn test_htlc_no_detection() { connect_block(&nodes[0], &create_dummy_block(nodes[0].best_block_hash(), 42, vec![htlc_timeout.clone()])); connect_blocks(&nodes[0], ANTI_REORG_DELAY - 1); - expect_payment_failed!(nodes[0], our_payment_hash, false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[0], our_payment_hash, false, conditions); } fn do_test_onchain_htlc_settlement_after_close(broadcast_alice: bool, go_onchain_before_fulfill: bool) { diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs index 24ea285391b..6ade9f231d6 100644 --- a/lightning/src/ln/monitor_tests.rs +++ b/lightning/src/ln/monitor_tests.rs @@ -163,7 +163,8 @@ fn revoked_output_htlc_resolution_timing() { assert!(nodes[1].node.get_and_clear_pending_events().is_empty()); connect_blocks(&nodes[1], ANTI_REORG_DELAY - 1); - expect_payment_failed!(nodes[1], payment_hash_1, false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[1], payment_hash_1, false, conditions); } #[test] @@ -268,7 +269,7 @@ fn archive_fully_resolved_monitors() { // Finally, we process the pending `MonitorEvent` from nodes[0], allowing the `ChannelMonitor` // to be archived `ARCHIVAL_DELAY_BLOCKS` blocks later. - expect_payment_sent(&nodes[0], payment_preimage, None, true, false); + expect_payment_sent(&nodes[0], payment_preimage, None, true, true); nodes[0].chain_monitor.chain_monitor.archive_fully_resolved_channel_monitors(); assert_eq!(nodes[0].chain_monitor.chain_monitor.list_monitors().len(), 1); connect_blocks(&nodes[0], ARCHIVAL_DELAY_BLOCKS - 1); @@ -695,7 +696,8 @@ fn do_test_claim_value_force_close(anchors: bool, prev_commitment_tx: bool) { sorted_vec(nodes[1].chain_monitor.chain_monitor.get_monitor(funding_outpoint).unwrap().get_claimable_balances())); connect_blocks(&nodes[0], ANTI_REORG_DELAY - 1); - expect_payment_failed!(nodes[0], dust_payment_hash, false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[0], dust_payment_hash, false, conditions); connect_blocks(&nodes[1], ANTI_REORG_DELAY - 1); // After ANTI_REORG_DELAY, A will consider its balance fully spendable and generate a @@ -718,8 +720,9 @@ fn do_test_claim_value_force_close(anchors: bool, prev_commitment_tx: bool) { mine_transaction(&nodes[0], &b_broadcast_txn[0]); if prev_commitment_tx { expect_payment_path_successful!(nodes[0]); + check_added_monitors(&nodes[0], 1); } else { - expect_payment_sent(&nodes[0], payment_preimage, None, true, false); + expect_payment_sent(&nodes[0], payment_preimage, None, true, true); } assert_eq!(sorted_vec(vec![sent_htlc_balance.clone(), sent_htlc_timeout_balance.clone()]), sorted_vec(nodes[0].chain_monitor.chain_monitor.get_monitor(funding_outpoint).unwrap().get_claimable_balances())); @@ -751,7 +754,8 @@ fn do_test_claim_value_force_close(anchors: bool, prev_commitment_tx: bool) { connect_blocks(&nodes[0], ANTI_REORG_DELAY - 1); assert_eq!(Vec::::new(), nodes[0].chain_monitor.chain_monitor.get_monitor(funding_outpoint).unwrap().get_claimable_balances()); - expect_payment_failed!(nodes[0], timeout_payment_hash, false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[0], timeout_payment_hash, false, conditions); test_spendable_output(&nodes[0], &a_htlc_timeout_tx, false); @@ -966,7 +970,7 @@ fn do_test_balances_on_local_commitment_htlcs(anchors: bool) { // Now confirm nodes[1]'s HTLC claim, giving nodes[0] the preimage. Note that the "maybe // claimable" balance remains until we see ANTI_REORG_DELAY blocks. mine_transaction(&nodes[0], &bs_htlc_claim_txn[0]); - expect_payment_sent(&nodes[0], payment_preimage_2, None, true, false); + expect_payment_sent(&nodes[0], payment_preimage_2, None, true, true); assert_eq!(sorted_vec(vec![Balance::ClaimableAwaitingConfirmations { amount_satoshis: 1_000_000 - 10_000 - 20_000 - commitment_tx_fee - anchor_outputs_value, confirmation_height: node_a_commitment_claimable, @@ -1008,7 +1012,8 @@ fn do_test_balances_on_local_commitment_htlcs(anchors: bool) { // panicked as described in the test introduction. This will remove the "maybe claimable" // spendable output as nodes[1] has fully claimed the second HTLC. connect_blocks(&nodes[0], ANTI_REORG_DELAY - 1); - expect_payment_failed!(nodes[0], payment_hash, false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[0], payment_hash, false, conditions); assert_eq!(sorted_vec(vec![Balance::ClaimableAwaitingConfirmations { amount_satoshis: 1_000_000 - 10_000 - 20_000 - commitment_tx_fee - anchor_outputs_value, @@ -1239,7 +1244,8 @@ fn test_no_preimage_inbound_htlc_balances() { // Once as_htlc_timeout_claim[0] reaches ANTI_REORG_DELAY confirmations, we should get a // payment failure event. connect_blocks(&nodes[0], ANTI_REORG_DELAY - 2); - expect_payment_failed!(nodes[0], to_b_failed_payment_hash, false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[0], to_b_failed_payment_hash, false, conditions); connect_blocks(&nodes[0], 1); assert_eq!(sorted_vec(vec![Balance::ClaimableAwaitingConfirmations { @@ -1287,7 +1293,8 @@ fn test_no_preimage_inbound_htlc_balances() { sorted_vec(nodes[1].chain_monitor.chain_monitor.get_monitor(funding_outpoint).unwrap().get_claimable_balances())); connect_blocks(&nodes[1], ANTI_REORG_DELAY - 2); - expect_payment_failed!(nodes[1], to_a_failed_payment_hash, false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[1], to_a_failed_payment_hash, false, conditions); assert_eq!(vec![b_received_htlc_balance.clone()], nodes[1].chain_monitor.chain_monitor.get_monitor(funding_outpoint).unwrap().get_claimable_balances()); @@ -1548,7 +1555,9 @@ fn do_test_revoked_counterparty_commitment_balances(anchors: bool, confirm_htlc_ connect_blocks(&nodes[1], 3); test_spendable_output(&nodes[1], &as_revoked_txn[0], false); + check_added_monitors(&nodes[1], 0); let mut payment_failed_events = nodes[1].node.get_and_clear_pending_events(); + check_added_monitors(&nodes[1], 2); expect_payment_failed_conditions_event(payment_failed_events[..2].to_vec(), missing_htlc_payment_hash, false, PaymentFailedConditions::new()); expect_payment_failed_conditions_event(payment_failed_events[2..].to_vec(), @@ -1557,7 +1566,9 @@ fn do_test_revoked_counterparty_commitment_balances(anchors: bool, confirm_htlc_ connect_blocks(&nodes[1], 1); if confirm_htlc_spend_first { test_spendable_output(&nodes[1], &claim_txn[0], false); + check_added_monitors(&nodes[1], 0); let mut payment_failed_events = nodes[1].node.get_and_clear_pending_events(); + check_added_monitors(&nodes[1], 2); expect_payment_failed_conditions_event(payment_failed_events[..2].to_vec(), live_payment_hash, false, PaymentFailedConditions::new()); expect_payment_failed_conditions_event(payment_failed_events[2..].to_vec(), @@ -1570,7 +1581,9 @@ fn do_test_revoked_counterparty_commitment_balances(anchors: bool, confirm_htlc_ test_spendable_output(&nodes[1], &claim_txn[1], false); } else { test_spendable_output(&nodes[1], &claim_txn[0], false); + check_added_monitors(&nodes[1], 0); let mut payment_failed_events = nodes[1].node.get_and_clear_pending_events(); + check_added_monitors(&nodes[1], 2); expect_payment_failed_conditions_event(payment_failed_events[..2].to_vec(), live_payment_hash, false, PaymentFailedConditions::new()); expect_payment_failed_conditions_event(payment_failed_events[2..].to_vec(), @@ -2018,7 +2031,7 @@ fn do_test_revoked_counterparty_aggregated_claims(anchors: bool) { as_revoked_txn[1].clone() }; mine_transaction(&nodes[1], &htlc_success_claim); - expect_payment_sent(&nodes[1], claimed_payment_preimage, None, true, false); + expect_payment_sent(&nodes[1], claimed_payment_preimage, None, true, true); let mut claim_txn_2 = nodes[1].tx_broadcaster.txn_broadcast(); // Once B sees the HTLC-Success transaction it splits its claim transaction into two, though in @@ -2119,7 +2132,8 @@ fn do_test_revoked_counterparty_aggregated_claims(anchors: bool) { assert!(nodes[1].node.get_and_clear_pending_events().is_empty()); // We shouldn't fail the payment until we spend the output connect_blocks(&nodes[1], 5); - expect_payment_failed!(nodes[1], revoked_payment_hash, false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[1], revoked_payment_hash, false, conditions); let spendable_output_events = nodes[1].chain_monitor.chain_monitor.get_and_clear_pending_events(); assert_eq!(spendable_output_events.len(), 2); for event in spendable_output_events { @@ -2592,7 +2606,8 @@ fn do_test_yield_anchors_events(have_htlcs: bool) { connect_blocks(&nodes[0], ANTI_REORG_DELAY - 1); assert!(nodes[0].chain_monitor.chain_monitor.get_and_clear_pending_events().is_empty()); - expect_payment_failed!(nodes[0], payment_hash_1.unwrap(), false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[0], payment_hash_1.unwrap(), false, conditions); connect_blocks(&nodes[0], BREAKDOWN_TIMEOUT as u32); @@ -3391,6 +3406,7 @@ fn do_test_lost_preimage_monitor_events(on_counterparty_tx: bool) { let mons = &[&mon_a_ser[..], &mon_b_ser[..]]; reload_node!(nodes[1], cfg, &node_ser, mons, persister, new_chain_mon, node_b_reload); + check_added_monitors(&nodes[1], 0); let preimage_events = nodes[1].node.get_and_clear_pending_events(); assert_eq!(preimage_events.len(), 2, "{preimage_events:?}"); for ev in preimage_events { @@ -3408,7 +3424,9 @@ fn do_test_lost_preimage_monitor_events(on_counterparty_tx: bool) { // After the background events are processed in `get_and_clear_pending_events`, above, node B // will create the requisite `ChannelMontiorUpdate` for claiming the forwarded payment back. // The HTLC, however, is added to the holding cell for replay after the peer connects, below. - check_added_monitors(&nodes[1], 1); + // It will also apply a `ChannelMonitorUpdate` to let the `ChannelMonitor` know that the + // payment can now be forgotten as the `PaymentSent` event was handled. + check_added_monitors(&nodes[1], 2); nodes[0].node.peer_disconnected(nodes[1].node.get_our_node_id()); @@ -3625,8 +3643,12 @@ fn do_test_lost_timeout_monitor_events(confirm_tx: CommitmentType, dust_htlcs: b let mons = &[&mon_a_ser[..], &mon_b_ser[..]]; reload_node!(nodes[1], cfg, &node_ser, mons, persister, new_chain_mon, node_b_reload); + // After reload, once we process the `PaymentFailed` event, the sent HTLC will be marked + // handled so that we won't ever see the event again. + check_added_monitors(&nodes[1], 0); let timeout_events = nodes[1].node.get_and_clear_pending_events(); assert_eq!(timeout_events.len(), 4, "{timeout_events:?}"); + check_added_monitors(&nodes[1], 1); for ev in timeout_events { match ev { Event::PendingHTLCsForwardable { .. } => {}, diff --git a/lightning/src/ln/outbound_payment.rs b/lightning/src/ln/outbound_payment.rs index 5b11fd5d3a2..b3a8bd9ffee 100644 --- a/lightning/src/ln/outbound_payment.rs +++ b/lightning/src/ln/outbound_payment.rs @@ -1960,8 +1960,6 @@ impl OutboundPayments { // This could potentially lead to removing a pending payment too early, // with a reorg of one block causing us to re-add the fulfilled payment on // restart. - // TODO: We should have a second monitor event that informs us of payments - // irrevocably fulfilled. if payment.get_mut().remove(&session_priv_bytes, Some(&path)) { let payment_hash = Some(PaymentHash(Sha256::hash(&payment_preimage.0).to_byte_array())); pending_events.push_back((events::Event::PaymentPathSuccessful { @@ -2217,9 +2215,15 @@ impl OutboundPayments { } }; let mut pending_events = pending_events.lock().unwrap(); - // TODO: Handle completion_action - pending_events.push_back((path_failure, None)); - if let Some(ev) = full_failure_ev { pending_events.push_back((ev, None)); } + let completion_action = completion_action + .take() + .map(|act| EventCompletionAction::ReleasePaymentCompleteChannelMonitorUpdate(act)); + if let Some(ev) = full_failure_ev { + pending_events.push_back((path_failure, None)); + pending_events.push_back((ev, completion_action)); + } else { + pending_events.push_back((path_failure, completion_action)); + } pending_retry_ev } diff --git a/lightning/src/ln/payment_tests.rs b/lightning/src/ln/payment_tests.rs index 12f7a639908..52fda5a6329 100644 --- a/lightning/src/ln/payment_tests.rs +++ b/lightning/src/ln/payment_tests.rs @@ -754,7 +754,7 @@ fn do_retry_with_no_persist(confirm_before_reload: bool) { assert_eq!(txn[0].compute_txid(), as_commitment_tx.compute_txid()); } mine_transaction(&nodes[0], &bs_htlc_claim_txn); - expect_payment_sent(&nodes[0], payment_preimage_1, None, true, false); + expect_payment_sent(&nodes[0], payment_preimage_1, None, true, true); connect_blocks(&nodes[0], TEST_FINAL_CLTV*4 + 20); let (first_htlc_timeout_tx, second_htlc_timeout_tx) = { let mut txn = nodes[0].tx_broadcaster.unique_txn_broadcast(); @@ -769,7 +769,8 @@ fn do_retry_with_no_persist(confirm_before_reload: bool) { confirm_transaction(&nodes[0], &first_htlc_timeout_tx); } nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().clear(); - expect_payment_failed_conditions(&nodes[0], payment_hash, false, PaymentFailedConditions::new()); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[0], payment_hash, false, conditions); // Finally, retry the payment (which was reloaded from the ChannelMonitor when nodes[0] was // reloaded) via a route over the new channel, which work without issue and eventually be @@ -956,7 +957,8 @@ fn do_test_completed_payment_not_retryable_on_reload(use_dust: bool) { // (which should also still work). connect_blocks(&nodes[0], ANTI_REORG_DELAY - 1); connect_blocks(&nodes[1], ANTI_REORG_DELAY - 1); - expect_payment_failed_conditions(&nodes[0], payment_hash, false, PaymentFailedConditions::new()); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[0], payment_hash, false, conditions); let chan_0_monitor_serialized = get_monitor!(nodes[0], chan_id).encode(); let chan_1_monitor_serialized = get_monitor!(nodes[0], chan_id_3).encode(); @@ -971,6 +973,9 @@ fn do_test_completed_payment_not_retryable_on_reload(use_dust: bool) { nodes[1].node.peer_disconnected(nodes[0].node.get_our_node_id()); nodes[0].node.test_process_background_events(); + check_added_monitors(&nodes[0], 1); // TODO: Removed in the next commit as this only required + // when we are still seeing all payments, even resolved + // ones. let mut reconnect_args = ReconnectArgs::new(&nodes[0], &nodes[1]); reconnect_args.send_channel_ready = (true, true); @@ -1000,6 +1005,9 @@ fn do_test_completed_payment_not_retryable_on_reload(use_dust: bool) { nodes[1].node.peer_disconnected(nodes[0].node.get_our_node_id()); nodes[0].node.test_process_background_events(); + check_added_monitors(&nodes[0], 1); // TODO: Removed in the next commit as this only required + // when we are still seeing all payments, even resolved + // ones. reconnect_nodes(ReconnectArgs::new(&nodes[0], &nodes[1])); @@ -1109,9 +1117,10 @@ fn do_test_dup_htlc_onchain_doesnt_fail_on_reload(persist_manager_post_event: bo let chan_0_monitor_serialized = get_monitor!(nodes[0], chan_id).encode(); if payment_timeout { - expect_payment_failed!(nodes[0], payment_hash, false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[0], payment_hash, false, conditions); } else { - expect_payment_sent(&nodes[0], payment_preimage, None, true, false); + expect_payment_sent(&nodes[0], payment_preimage, None, true, true); } // If we persist the ChannelManager after we get the PaymentSent event, we shouldn't get it @@ -1123,12 +1132,20 @@ fn do_test_dup_htlc_onchain_doesnt_fail_on_reload(persist_manager_post_event: bo // Now reload nodes[0]... reload_node!(nodes[0], &chan_manager_serialized, &[&chan_0_monitor_serialized], persister, new_chain_monitor, nodes_0_deserialized); + check_added_monitors(&nodes[0], 0); if persist_manager_post_event { assert!(nodes[0].node.get_and_clear_pending_events().is_empty()); + check_added_monitors(&nodes[0], 2); } else if payment_timeout { - expect_payment_failed!(nodes[0], payment_hash, false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[0], payment_hash, false, conditions); } else { + // After reload, the ChannelManager identified the failed payment and queued up the + // PaymentSent and corresponding ChannelMonitorUpdate to mark the payment handled, but + // while processing the pending `MonitorEvent`s (which were not processed before the + // monitor was persisted) we will end up with a duplicate ChannelMonitorUpdate. expect_payment_sent(&nodes[0], payment_preimage, None, true, false); + check_added_monitors(&nodes[0], 2); } // Note that if we re-connect the block which exposed nodes[0] to the payment preimage (but @@ -1373,7 +1390,9 @@ fn onchain_failed_probe_yields_event() { check_closed_broadcast!(&nodes[0], true); check_added_monitors!(nodes[0], 1); + check_added_monitors(&nodes[0], 0); let mut events = nodes[0].node.get_and_clear_pending_events(); + check_added_monitors(&nodes[0], 1); assert_eq!(events.len(), 2); let mut found_probe_failed = false; for event in events.drain(..) { @@ -3467,7 +3486,14 @@ fn do_no_missing_sent_on_reload(persist_manager_with_payment: bool, at_midpoint: let chan_0_monitor_serialized = get_monitor!(nodes[0], chan_id).encode(); reload_node!(nodes[0], test_default_channel_config(), &nodes_0_serialized, &[&chan_0_monitor_serialized], persister_a, chain_monitor_a, nodes_0_deserialized); + // When we first process background events, we'll apply a channel-closed monitor update... + check_added_monitors(&nodes[0], 0); + nodes[0].node.test_process_background_events(); + check_added_monitors(&nodes[0], 1); + // Then once we process the PaymentSent event we'll apply a monitor update to remove the + // pending payment from being re-hydrated on the next startup. let events = nodes[0].node.get_and_clear_pending_events(); + check_added_monitors(&nodes[0], 1); assert_eq!(events.len(), 2); if let Event::ChannelClosed { reason: ClosureReason::OutdatedChannelManager, .. } = events[0] {} else { panic!(); } if let Event::PaymentSent { payment_preimage, .. } = events[1] { assert_eq!(payment_preimage, our_payment_preimage); } else { panic!(); } @@ -3486,8 +3512,16 @@ fn do_no_missing_sent_on_reload(persist_manager_with_payment: bool, at_midpoint: let chan_0_monitor_serialized = get_monitor!(nodes[0], chan_id).encode(); reload_node!(nodes[0], test_default_channel_config(), &nodes[0].node.encode(), &[&chan_0_monitor_serialized], persister_b, chain_monitor_b, nodes_0_deserialized_b); + + // Because the pending payment will currently stick around forever, we'll apply a + // ChannelMonitorUpdate on each startup to attempt to remove it. + // TODO: This will be dropped in the next commit after we actually remove the payment! + check_added_monitors(&nodes[0], 0); + nodes[0].node.test_process_background_events(); + check_added_monitors(&nodes[0], 1); let events = nodes[0].node.get_and_clear_pending_events(); assert!(events.is_empty()); + check_added_monitors(&nodes[0], 0); // Ensure that we don't generate any further events even after the channel-closing commitment // transaction is confirmed on-chain. @@ -3496,11 +3530,15 @@ fn do_no_missing_sent_on_reload(persist_manager_with_payment: bool, at_midpoint: let events = nodes[0].node.get_and_clear_pending_events(); assert!(events.is_empty()); + check_added_monitors(&nodes[0], 0); let chan_0_monitor_serialized = get_monitor!(nodes[0], chan_id).encode(); reload_node!(nodes[0], test_default_channel_config(), &nodes[0].node.encode(), &[&chan_0_monitor_serialized], persister_c, chain_monitor_c, nodes_0_deserialized_c); let events = nodes[0].node.get_and_clear_pending_events(); assert!(events.is_empty()); + + // TODO: This will be dropped in the next commit after we actually remove the payment! + check_added_monitors(&nodes[0], 2); } #[test] diff --git a/lightning/src/ln/reorg_tests.rs b/lightning/src/ln/reorg_tests.rs index 56760c510a3..03e621d3e37 100644 --- a/lightning/src/ln/reorg_tests.rs +++ b/lightning/src/ln/reorg_tests.rs @@ -232,12 +232,13 @@ fn test_counterparty_revoked_reorg() { // Connect the HTLC claim transaction for HTLC 3 mine_transaction(&nodes[1], &unrevoked_local_txn[2]); - expect_payment_sent(&nodes[1], payment_preimage_3, None, true, false); + expect_payment_sent(&nodes[1], payment_preimage_3, None, true, true); assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty()); // Connect blocks to confirm the unrevoked commitment transaction connect_blocks(&nodes[1], ANTI_REORG_DELAY - 2); - expect_payment_failed!(nodes[1], payment_hash_4, false); + let conditions = PaymentFailedConditions::new().from_mon_update(); + expect_payment_failed_conditions(&nodes[1], payment_hash_4, false, conditions) } fn do_test_unconf_chan(reload_node: bool, reorg_after_reload: bool, use_funding_unconfirmed: bool, connect_style: ConnectStyle) { @@ -1001,7 +1002,9 @@ fn do_test_split_htlc_expiry_tracking(use_third_htlc: bool, reorg_out: bool) { let mut txn = nodes[0].tx_broadcaster.txn_broadcast(); assert_eq!(txn.len(), 0); + check_added_monitors(&nodes[0], 0); let sent_events = nodes[0].node.get_and_clear_pending_events(); + check_added_monitors(&nodes[0], 2); assert_eq!(sent_events.len(), 4, "{sent_events:?}"); let mut found_expected_events = [false, false, false, false]; for event in sent_events { @@ -1090,7 +1093,9 @@ fn do_test_split_htlc_expiry_tracking(use_third_htlc: bool, reorg_out: bool) { // Connect two more blocks to get `as_third_htlc_spend_tx` to `ANTI_REORG_DELAY` confs. connect_blocks(&nodes[0], 2); if use_third_htlc { + check_added_monitors(&nodes[0], 0); let failed_events = nodes[0].node.get_and_clear_pending_events(); + check_added_monitors(&nodes[0], 1); assert_eq!(failed_events.len(), 2); let mut found_expected_events = [false, false]; for event in failed_events { From 77430bfba6e3f4e3a11df72b4afc2d9b71975ac6 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Sat, 2 Aug 2025 02:39:55 +0000 Subject: [PATCH 22/23] Stop re-hydrating pending payments once they are fully resolved `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fasion - if the `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets persisted (i.e. due to a block update) then the node crashes, prior to persisting the `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be lost. This isn't likely in a sync persist environment, but in an async one this could be an issue. Note that this is only an issue for closed channels - `MonitorEvent`s only inform the `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. In previous commits we ensured that HTLC resolutions which came to `ChannelManager` via a `MonitorEvent` were replayed on startup if the `MonitorEvent` was lost. However, in cases where the `ChannelManager` was so stale that it didn't have the payment state for an HTLC at all, we only re-add it in cases where `ChannelMonitor::get_pending_or_resolved_outbound_htlcs` includes it. Because constantly re-adding a payment state and then failing it would generate lots of noise for users on startup (not to mention risk of confusing stale payment events for the latest state of a payment when the `PaymentId` has been reused to retry a payment). Thus, `get_pending_or_resolved_outbound_htlcs` does not include state for HTLCs which were resolved on chain with a preimage or HTLCs which were resolved on chain with a timeout after `ANTI_REORG_DELAY` confirmations. This critera matches the critera for generating a `MonitorEvent`, and works great under the assumption that `MonitorEvent`s are reliably delivered. However, if they are not, and our `ChannelManager` is lost or substantially old (or, in a future where we do not persist `ChannelManager` at all), we will not end up seeing payment resolution events for an HTLC. Instead, we really want to tell our `ChannelMonitor`s when the resolution of an HTLC is complete. Note that we don't particularly care about non-payment HTLCs, as there is no re-hydration of state to do there - `ChannelManager` load ignores forwarded HTLCs coming back from `get_pending_or_resolved_outbound_htlcs` as there's nothing to do - we always attempt to replay the success/failure and figure out if it mattered based on whether there was still an HTLC to claim/fail. Here we, finally, begin actually using the new `ChannelMonitorUpdateStep::ReleasePaymentComplete` updates, skipping re-hydration of pending payments once they have been fully resolved through to a user `Event`. Backport of 226520b28b8e538988c208186c3e403356168074 Fixed conflicts in: * lightning/src/chain/channelmonitor.rs * lightning/src/ln/channelmanager.rs * lightning/src/ln/functional_tests.rs to address differences causing upstream to (somewhat spuriously) generate `ChannelMonitorUpdate`s for channels force-closed by commitment transaction confirmation whereas this branch does not, * lightning/src/ln/payment_tests.rs --- lightning/src/chain/channelmonitor.rs | 103 +++----------------------- lightning/src/ln/channelmanager.rs | 2 +- lightning/src/ln/functional_tests.rs | 4 +- lightning/src/ln/payment_tests.rs | 75 +++++++++++-------- 4 files changed, 58 insertions(+), 126 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index 4e0de092d4d..9dcc18ec52e 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -2577,10 +2577,6 @@ impl ChannelMonitor { /// Gets the set of outbound HTLCs which can be (or have been) resolved by this /// `ChannelMonitor`. This is used to determine if an HTLC was removed from the channel prior /// to the `ChannelManager` having been persisted. - /// - /// This is similar to [`Self::get_pending_or_resolved_outbound_htlcs`] except it includes - /// HTLCs which were resolved on-chain (i.e. where the final HTLC resolution was done by an - /// event from this `ChannelMonitor`). pub(crate) fn get_all_current_outbound_htlcs( &self, ) -> HashMap)> { @@ -2593,8 +2589,11 @@ impl ChannelMonitor { for &(ref htlc, ref source_option) in latest_outpoints.iter() { if let &Some(ref source) = source_option { let htlc_id = SentHTLCId::from_source(source); - let preimage_opt = us.counterparty_fulfilled_htlcs.get(&htlc_id).cloned(); - res.insert((**source).clone(), (htlc.clone(), preimage_opt)); + if !us.htlcs_resolved_to_user.contains(&htlc_id) { + let preimage_opt = + us.counterparty_fulfilled_htlcs.get(&htlc_id).cloned(); + res.insert((**source).clone(), (htlc.clone(), preimage_opt)); + } } } } @@ -2650,6 +2649,11 @@ impl ChannelMonitor { } else { continue; }; + let htlc_id = SentHTLCId::from_source(source); + if us.htlcs_resolved_to_user.contains(&htlc_id) { + continue; + } + let confirmed = $htlc_iter.find(|(_, conf_src)| Some(source) == *conf_src); if let Some((confirmed_htlc, _)) = confirmed { let filter = |v: &&IrrevocablyResolvedHTLC| { @@ -2724,93 +2728,6 @@ impl ChannelMonitor { res } - /// Gets the set of outbound HTLCs which are pending resolution in this channel or which were - /// resolved with a preimage from our counterparty. - /// - /// This is used to reconstruct pending outbound payments on restart in the ChannelManager. - /// - /// Currently, the preimage is unused, however if it is present in the relevant internal state - /// an HTLC is always included even if it has been resolved. - pub(crate) fn get_pending_or_resolved_outbound_htlcs(&self) -> HashMap)> { - let us = self.inner.lock().unwrap(); - // We're only concerned with the confirmation count of HTLC transactions, and don't - // actually care how many confirmations a commitment transaction may or may not have. Thus, - // we look for either a FundingSpendConfirmation event or a funding_spend_confirmed. - let confirmed_txid = us.funding_spend_confirmed.or_else(|| { - us.onchain_events_awaiting_threshold_conf.iter().find_map(|event| { - if let OnchainEvent::FundingSpendConfirmation { .. } = event.event { - Some(event.txid) - } else { None } - }) - }); - - if confirmed_txid.is_none() { - // If we have not seen a commitment transaction on-chain (ie the channel is not yet - // closed), just get the full set. - mem::drop(us); - return self.get_all_current_outbound_htlcs(); - } - - let mut res = new_hash_map(); - macro_rules! walk_htlcs { - ($holder_commitment: expr, $htlc_iter: expr) => { - for (htlc, source) in $htlc_iter { - if us.htlcs_resolved_on_chain.iter().any(|v| v.commitment_tx_output_idx == htlc.transaction_output_index) { - // We should assert that funding_spend_confirmed is_some() here, but we - // have some unit tests which violate HTLC transaction CSVs entirely and - // would fail. - // TODO: Once tests all connect transactions at consensus-valid times, we - // should assert here like we do in `get_claimable_balances`. - } else if htlc.offered == $holder_commitment { - // If the payment was outbound, check if there's an HTLCUpdate - // indicating we have spent this HTLC with a timeout, claiming it back - // and awaiting confirmations on it. - let htlc_update_confd = us.onchain_events_awaiting_threshold_conf.iter().any(|event| { - if let OnchainEvent::HTLCUpdate { commitment_tx_output_idx: Some(commitment_tx_output_idx), .. } = event.event { - // If the HTLC was timed out, we wait for ANTI_REORG_DELAY blocks - // before considering it "no longer pending" - this matches when we - // provide the ChannelManager an HTLC failure event. - Some(commitment_tx_output_idx) == htlc.transaction_output_index && - us.best_block.height >= event.height + ANTI_REORG_DELAY - 1 - } else if let OnchainEvent::HTLCSpendConfirmation { commitment_tx_output_idx, .. } = event.event { - // If the HTLC was fulfilled with a preimage, we consider the HTLC - // immediately non-pending, matching when we provide ChannelManager - // the preimage. - Some(commitment_tx_output_idx) == htlc.transaction_output_index - } else { false } - }); - let counterparty_resolved_preimage_opt = - us.counterparty_fulfilled_htlcs.get(&SentHTLCId::from_source(source)).cloned(); - if !htlc_update_confd || counterparty_resolved_preimage_opt.is_some() { - res.insert(source.clone(), (htlc.clone(), counterparty_resolved_preimage_opt)); - } - } - } - } - } - - let txid = confirmed_txid.unwrap(); - if Some(txid) == us.current_counterparty_commitment_txid || Some(txid) == us.prev_counterparty_commitment_txid { - walk_htlcs!(false, us.counterparty_claimable_outpoints.get(&txid).unwrap().iter().filter_map(|(a, b)| { - if let &Some(ref source) = b { - Some((a, &**source)) - } else { None } - })); - } else if txid == us.current_holder_commitment_tx.txid { - walk_htlcs!(true, us.current_holder_commitment_tx.htlc_outputs.iter().filter_map(|(a, _, c)| { - if let Some(source) = c { Some((a, source)) } else { None } - })); - } else if let Some(prev_commitment) = &us.prev_holder_signed_commitment_tx { - if txid == prev_commitment.txid { - walk_htlcs!(true, prev_commitment.htlc_outputs.iter().filter_map(|(a, _, c)| { - if let Some(source) = c { Some((a, source)) } else { None } - })); - } - } - - res - } - pub(crate) fn get_stored_preimages(&self) -> HashMap)> { self.inner.lock().unwrap().payment_preimages.clone() } diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index e114b23b1e7..ea895b423fd 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -14064,7 +14064,7 @@ where let counterparty_opt = outpoint_to_peer.get(&monitor.get_funding_txo().0); // outpoint_to_peer missing the funding outpoint implies the channel is closed if counterparty_opt.is_none() { - for (htlc_source, (htlc, _)) in monitor.get_pending_or_resolved_outbound_htlcs() { + for (htlc_source, (htlc, _)) in monitor.get_all_current_outbound_htlcs() { let logger = WithChannelMonitor::from(&args.logger, monitor, Some(htlc.payment_hash)); if let HTLCSource::OutboundRoute { payment_id, session_priv, path, .. } = htlc_source { if path.hops.is_empty() { diff --git a/lightning/src/ln/functional_tests.rs b/lightning/src/ln/functional_tests.rs index 07a6c4b8412..41a5fb7cd4d 100644 --- a/lightning/src/ln/functional_tests.rs +++ b/lightning/src/ln/functional_tests.rs @@ -3711,6 +3711,8 @@ fn do_test_commitment_revoked_fail_backward_exhaustive(deliver_bs_raa: bool, use let events = nodes[1].node.get_and_clear_pending_events(); if deliver_bs_raa { check_added_monitors(&nodes[1], 1); + } else { + check_added_monitors(&nodes[1], 0); } assert_eq!(events.len(), if deliver_bs_raa { 3 + nodes.len() - 1 } else { 4 + nodes.len() }); assert!(events.iter().any(|ev| matches!( @@ -3727,7 +3729,7 @@ fn do_test_commitment_revoked_fail_backward_exhaustive(deliver_bs_raa: bool, use ))); nodes[1].node.process_pending_htlc_forwards(); - check_added_monitors!(nodes[1], 1); + check_added_monitors(&nodes[1], 1); let mut events = nodes[1].node.get_and_clear_pending_msg_events(); assert_eq!(events.len(), if deliver_bs_raa { 4 } else { 3 }); diff --git a/lightning/src/ln/payment_tests.rs b/lightning/src/ln/payment_tests.rs index 52fda5a6329..c29d115b337 100644 --- a/lightning/src/ln/payment_tests.rs +++ b/lightning/src/ln/payment_tests.rs @@ -973,9 +973,6 @@ fn do_test_completed_payment_not_retryable_on_reload(use_dust: bool) { nodes[1].node.peer_disconnected(nodes[0].node.get_our_node_id()); nodes[0].node.test_process_background_events(); - check_added_monitors(&nodes[0], 1); // TODO: Removed in the next commit as this only required - // when we are still seeing all payments, even resolved - // ones. let mut reconnect_args = ReconnectArgs::new(&nodes[0], &nodes[1]); reconnect_args.send_channel_ready = (true, true); @@ -1005,9 +1002,6 @@ fn do_test_completed_payment_not_retryable_on_reload(use_dust: bool) { nodes[1].node.peer_disconnected(nodes[0].node.get_our_node_id()); nodes[0].node.test_process_background_events(); - check_added_monitors(&nodes[0], 1); // TODO: Removed in the next commit as this only required - // when we are still seeing all payments, even resolved - // ones. reconnect_nodes(ReconnectArgs::new(&nodes[0], &nodes[1])); @@ -1024,7 +1018,10 @@ fn test_completed_payment_not_retryable_on_reload() { do_test_completed_payment_not_retryable_on_reload(false); } -fn do_test_dup_htlc_onchain_doesnt_fail_on_reload(persist_manager_post_event: bool, confirm_commitment_tx: bool, payment_timeout: bool) { +fn do_test_dup_htlc_onchain_doesnt_fail_on_reload( + persist_manager_post_event: bool, persist_monitor_after_events: bool, + confirm_commitment_tx: bool, payment_timeout: bool, +) { // When a Channel is closed, any outbound HTLCs which were relayed through it are simply // dropped. From there, the ChannelManager relies on the ChannelMonitor having a copy of the // relevant fail-/claim-back data and processes the HTLC fail/claim when the ChannelMonitor tells @@ -1115,36 +1112,58 @@ fn do_test_dup_htlc_onchain_doesnt_fail_on_reload(persist_manager_post_event: bo chan_manager_serialized = nodes[0].node.encode(); } - let chan_0_monitor_serialized = get_monitor!(nodes[0], chan_id).encode(); + let mut mon_ser = Vec::new(); + if !persist_monitor_after_events { + mon_ser = get_monitor!(nodes[0], chan_id).encode(); + } if payment_timeout { let conditions = PaymentFailedConditions::new().from_mon_update(); expect_payment_failed_conditions(&nodes[0], payment_hash, false, conditions); } else { expect_payment_sent(&nodes[0], payment_preimage, None, true, true); } + // Note that if we persist the monitor before processing the events, above, we'll always get + // them replayed on restart no matter what + if persist_monitor_after_events { + mon_ser = get_monitor!(nodes[0], chan_id).encode(); + } // If we persist the ChannelManager after we get the PaymentSent event, we shouldn't get it // twice. if persist_manager_post_event { chan_manager_serialized = nodes[0].node.encode(); + } else if persist_monitor_after_events { + // Persisting the monitor after the events (resulting in a new monitor being persisted) but + // didn't persist the manager will result in an FC, which we don't test here. + panic!(); } // Now reload nodes[0]... - reload_node!(nodes[0], &chan_manager_serialized, &[&chan_0_monitor_serialized], persister, new_chain_monitor, nodes_0_deserialized); + reload_node!(nodes[0], &chan_manager_serialized, &[&mon_ser], persister, new_chain_monitor, nodes_0_deserialized); check_added_monitors(&nodes[0], 0); - if persist_manager_post_event { + if persist_manager_post_event && persist_monitor_after_events { assert!(nodes[0].node.get_and_clear_pending_events().is_empty()); - check_added_monitors(&nodes[0], 2); + check_added_monitors(&nodes[0], 0); } else if payment_timeout { - let conditions = PaymentFailedConditions::new().from_mon_update(); + let mut conditions = PaymentFailedConditions::new(); + if !persist_monitor_after_events { + conditions = conditions.from_mon_update(); + } expect_payment_failed_conditions(&nodes[0], payment_hash, false, conditions); + check_added_monitors(&nodes[0], 0); } else { + if persist_manager_post_event { + assert!(nodes[0].node.get_and_clear_pending_events().is_empty()); + } else { + expect_payment_sent(&nodes[0], payment_preimage, None, true, false); + } // After reload, the ChannelManager identified the failed payment and queued up the - // PaymentSent and corresponding ChannelMonitorUpdate to mark the payment handled, but - // while processing the pending `MonitorEvent`s (which were not processed before the - // monitor was persisted) we will end up with a duplicate ChannelMonitorUpdate. - expect_payment_sent(&nodes[0], payment_preimage, None, true, false); + // PaymentSent (or not, if `persist_manager_post_event` resulted in us detecting we + // already did that) and corresponding ChannelMonitorUpdate to mark the payment + // handled, but while processing the pending `MonitorEvent`s (which were not processed + // before the monitor was persisted) we will end up with a duplicate + // ChannelMonitorUpdate. check_added_monitors(&nodes[0], 2); } @@ -1158,12 +1177,15 @@ fn do_test_dup_htlc_onchain_doesnt_fail_on_reload(persist_manager_post_event: bo #[test] fn test_dup_htlc_onchain_doesnt_fail_on_reload() { - do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, true, true); - do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, true, false); - do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, false, false); - do_test_dup_htlc_onchain_doesnt_fail_on_reload(false, true, true); - do_test_dup_htlc_onchain_doesnt_fail_on_reload(false, true, false); - do_test_dup_htlc_onchain_doesnt_fail_on_reload(false, false, false); + do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, true, true, true); + do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, true, true, false); + do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, true, false, false); + do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, false, true, true); + do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, false, true, false); + do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, false, false, false); + do_test_dup_htlc_onchain_doesnt_fail_on_reload(false, false, true, true); + do_test_dup_htlc_onchain_doesnt_fail_on_reload(false, false, true, false); + do_test_dup_htlc_onchain_doesnt_fail_on_reload(false, false, false, false); } #[test] @@ -3513,15 +3535,9 @@ fn do_no_missing_sent_on_reload(persist_manager_with_payment: bool, at_midpoint: let chan_0_monitor_serialized = get_monitor!(nodes[0], chan_id).encode(); reload_node!(nodes[0], test_default_channel_config(), &nodes[0].node.encode(), &[&chan_0_monitor_serialized], persister_b, chain_monitor_b, nodes_0_deserialized_b); - // Because the pending payment will currently stick around forever, we'll apply a - // ChannelMonitorUpdate on each startup to attempt to remove it. - // TODO: This will be dropped in the next commit after we actually remove the payment! - check_added_monitors(&nodes[0], 0); nodes[0].node.test_process_background_events(); - check_added_monitors(&nodes[0], 1); let events = nodes[0].node.get_and_clear_pending_events(); assert!(events.is_empty()); - check_added_monitors(&nodes[0], 0); // Ensure that we don't generate any further events even after the channel-closing commitment // transaction is confirmed on-chain. @@ -3536,9 +3552,6 @@ fn do_no_missing_sent_on_reload(persist_manager_with_payment: bool, at_midpoint: reload_node!(nodes[0], test_default_channel_config(), &nodes[0].node.encode(), &[&chan_0_monitor_serialized], persister_c, chain_monitor_c, nodes_0_deserialized_c); let events = nodes[0].node.get_and_clear_pending_events(); assert!(events.is_empty()); - - // TODO: This will be dropped in the next commit after we actually remove the payment! - check_added_monitors(&nodes[0], 2); } #[test] From 5b6b4cea228c6eda359633a7f23fdc33067976f6 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Fri, 1 Aug 2025 22:37:39 +0000 Subject: [PATCH 23/23] Properly provide `PaymentPathSuccessful` event for replay claims When a payment was sent and ultimately completed through an on-chain HTLC claim which we discover during startup, we deliberately break the payment tracking logic to keep it around forever, declining to send a `PaymentPathSuccessful` event but ensuring that we don't constantly replay the claim on every startup. However, now that we now have logic to complete a claim by marking it as completed in a `ChannelMonitor` and not replaying information about the claim on every startup. Thus, we no longer need to take the conservative stance and can correctly replay claims now, generating `PaymentPathSuccessful` events and allowing the state to be removed. Backport of ba6528f0ae1d8a236008f51bbf62e120499ecd30 Fixed conflicts in: * lightning/src/ln/channelmanager.rs * lightning/src/ln/payment_tests.rs --- lightning/src/ln/channelmanager.rs | 10 +------- lightning/src/ln/monitor_tests.rs | 5 +++- lightning/src/ln/payment_tests.rs | 37 ++++++++++++++++++++++-------- 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index ea895b423fd..85bb9aeda93 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -14153,20 +14153,12 @@ where log_warn!(logger, "Missing counterparty node id in monitor when trying to re-claim a payment resolved on chain. This may lead to redundant PaymentSent events on restart"); None }; - // Note that we set `from_onchain` to "false" here, - // deliberately keeping the pending payment around forever. - // Given it should only occur when we have a channel we're - // force-closing for being stale that's okay. - // The alternative would be to wipe the state when claiming, - // generating a `PaymentPathSuccessful` event but regenerating - // it and the `PaymentSent` on every restart until the - // `ChannelMonitor` is removed. pending_outbounds.claim_htlc( payment_id, preimage, session_priv, path, - false, + true, &mut compl_action, &pending_events, &&logger, diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs index 6ade9f231d6..63673bbd894 100644 --- a/lightning/src/ln/monitor_tests.rs +++ b/lightning/src/ln/monitor_tests.rs @@ -3408,12 +3408,15 @@ fn do_test_lost_preimage_monitor_events(on_counterparty_tx: bool) { check_added_monitors(&nodes[1], 0); let preimage_events = nodes[1].node.get_and_clear_pending_events(); - assert_eq!(preimage_events.len(), 2, "{preimage_events:?}"); + assert_eq!(preimage_events.len(), 3, "{preimage_events:?}"); for ev in preimage_events { match ev { Event::PaymentSent { payment_hash, .. } => { assert_eq!(payment_hash, hash_b); }, + Event::PaymentPathSuccessful { payment_hash, .. } => { + assert_eq!(payment_hash, Some(hash_b)); + }, Event::PaymentForwarded { claim_from_onchain_tx, .. } => { assert!(claim_from_onchain_tx); }, diff --git a/lightning/src/ln/payment_tests.rs b/lightning/src/ln/payment_tests.rs index c29d115b337..47c12c358eb 100644 --- a/lightning/src/ln/payment_tests.rs +++ b/lightning/src/ln/payment_tests.rs @@ -1158,13 +1158,19 @@ fn do_test_dup_htlc_onchain_doesnt_fail_on_reload( } else { expect_payment_sent(&nodes[0], payment_preimage, None, true, false); } - // After reload, the ChannelManager identified the failed payment and queued up the - // PaymentSent (or not, if `persist_manager_post_event` resulted in us detecting we - // already did that) and corresponding ChannelMonitorUpdate to mark the payment - // handled, but while processing the pending `MonitorEvent`s (which were not processed - // before the monitor was persisted) we will end up with a duplicate - // ChannelMonitorUpdate. - check_added_monitors(&nodes[0], 2); + if persist_manager_post_event { + // After reload, the ChannelManager identified the failed payment and queued up the + // PaymentSent (or not, if `persist_manager_post_event` resulted in us detecting we + // already did that) and corresponding ChannelMonitorUpdate to mark the payment + // handled, but while processing the pending `MonitorEvent`s (which were not processed + // before the monitor was persisted) we will end up with a duplicate + // ChannelMonitorUpdate. + check_added_monitors(&nodes[0], 2); + } else { + // ...unless we got the PaymentSent event, in which case we have de-duplication logic + // preventing a redundant monitor event. + check_added_monitors(&nodes[0], 1); + } } // Note that if we re-connect the block which exposed nodes[0] to the payment preimage (but @@ -3516,9 +3522,20 @@ fn do_no_missing_sent_on_reload(persist_manager_with_payment: bool, at_midpoint: // pending payment from being re-hydrated on the next startup. let events = nodes[0].node.get_and_clear_pending_events(); check_added_monitors(&nodes[0], 1); - assert_eq!(events.len(), 2); - if let Event::ChannelClosed { reason: ClosureReason::OutdatedChannelManager, .. } = events[0] {} else { panic!(); } - if let Event::PaymentSent { payment_preimage, .. } = events[1] { assert_eq!(payment_preimage, our_payment_preimage); } else { panic!(); } + assert_eq!(events.len(), 3, "{events:?}"); + if let Event::ChannelClosed { reason: ClosureReason::OutdatedChannelManager, .. } = events[0] { + } else { + panic!(); + } + if let Event::PaymentSent { payment_preimage, .. } = events[1] { + assert_eq!(payment_preimage, our_payment_preimage); + } else { + panic!(); + } + if let Event::PaymentPathSuccessful { .. } = events[2] { + } else { + panic!(); + } // Note that we don't get a PaymentPathSuccessful here as we leave the HTLC pending to avoid // the double-claim that would otherwise appear at the end of this test. nodes[0].node.timer_tick_occurred();