diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index f72eea850d5..c29fcbe88b4 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -77,7 +77,7 @@ use crate::util::ser::{ use crate::prelude::*; use crate::io::{self, Error}; -use crate::sync::{LockTestExt, Mutex}; +use crate::sync::Mutex; use core::ops::Deref; use core::{cmp, mem}; @@ -1371,18 +1371,30 @@ macro_rules! holder_commitment_htlcs { /// Transaction outputs to watch for on-chain spends. pub type TransactionOutputs = (Txid, Vec<(u32, TxOut)>); +// Because we have weird workarounds for `ChannelMonitor` equality checks in `OnchainTxHandler` and +// `PackageTemplate` the equality implementation isn't really fit for public consumption. Instead, +// we only expose it during tests. +#[cfg(any(feature = "_test_utils", test))] impl PartialEq for ChannelMonitor where Signer: PartialEq, { - #[rustfmt::skip] fn eq(&self, other: &Self) -> bool { + use crate::sync::LockTestExt; // We need some kind of total lockorder. Absent a better idea, we sort by position in // memory and take locks in that order (assuming that we can't move within memory while a // lock is held). let ord = ((self as *const _) as usize) < ((other as *const _) as usize); - let a = if ord { self.inner.unsafe_well_ordered_double_lock_self() } else { other.inner.unsafe_well_ordered_double_lock_self() }; - let b = if ord { other.inner.unsafe_well_ordered_double_lock_self() } else { self.inner.unsafe_well_ordered_double_lock_self() }; + let a = if ord { + self.inner.unsafe_well_ordered_double_lock_self() + } else { + other.inner.unsafe_well_ordered_double_lock_self() + }; + let b = if ord { + other.inner.unsafe_well_ordered_double_lock_self() + } else { + self.inner.unsafe_well_ordered_double_lock_self() + }; a.eq(&b) } } @@ -2944,33 +2956,152 @@ impl ChannelMonitor { /// This is similar to [`Self::get_pending_or_resolved_outbound_htlcs`] except it includes /// HTLCs which were resolved on-chain (i.e. where the final HTLC resolution was done by an /// event from this `ChannelMonitor`). - #[rustfmt::skip] - pub(crate) fn get_all_current_outbound_htlcs(&self) -> HashMap)> { + pub(crate) fn get_all_current_outbound_htlcs( + &self, + ) -> HashMap)> { let mut res = new_hash_map(); // Just examine the available counterparty commitment transactions. See docs on // `fail_unbroadcast_htlcs`, below, for justification. let us = self.inner.lock().unwrap(); - macro_rules! walk_counterparty_commitment { - ($txid: expr) => { - if let Some(ref latest_outpoints) = us.funding.counterparty_claimable_outpoints.get($txid) { - for &(ref htlc, ref source_option) in latest_outpoints.iter() { - if let &Some(ref source) = source_option { - res.insert((**source).clone(), (htlc.clone(), - us.counterparty_fulfilled_htlcs.get(&SentHTLCId::from_source(source)).cloned())); - } + let mut walk_counterparty_commitment = |txid| { + if let Some(latest_outpoints) = us.funding.counterparty_claimable_outpoints.get(txid) { + for &(ref htlc, ref source_option) in latest_outpoints.iter() { + if let &Some(ref source) = source_option { + let htlc_id = SentHTLCId::from_source(source); + let preimage_opt = us.counterparty_fulfilled_htlcs.get(&htlc_id).cloned(); + res.insert((**source).clone(), (htlc.clone(), preimage_opt)); } } } - } + }; if let Some(ref txid) = us.funding.current_counterparty_commitment_txid { - walk_counterparty_commitment!(txid); + walk_counterparty_commitment(txid); } if let Some(ref txid) = us.funding.prev_counterparty_commitment_txid { - walk_counterparty_commitment!(txid); + walk_counterparty_commitment(txid); } res } + /// Gets the set of outbound HTLCs which hit the chain and ultimately were claimed by us via + /// the timeout path and reached [`ANTI_REORG_DELAY`] confirmations. This is used to determine + /// if an HTLC has failed without the `ChannelManager` having seen it prior to being persisted. + pub(crate) fn get_onchain_failed_outbound_htlcs(&self) -> HashMap { + let mut res = new_hash_map(); + let us = self.inner.lock().unwrap(); + + // We only want HTLCs with ANTI_REORG_DELAY confirmations, which implies the commitment + // transaction has least ANTI_REORG_DELAY confirmations for any dependent HTLC transactions + // to have been confirmed. + let confirmed_txid = us.funding_spend_confirmed.or_else(|| { + us.onchain_events_awaiting_threshold_conf.iter().find_map(|event| { + if let OnchainEvent::FundingSpendConfirmation { .. } = event.event { + if event.height <= us.best_block.height - ANTI_REORG_DELAY + 1 { + Some(event.txid) + } else { + None + } + } else { + None + } + }) + }); + + let confirmed_txid = if let Some(txid) = confirmed_txid { + txid + } else { + return res; + }; + + macro_rules! walk_htlcs { + ($holder_commitment: expr, $htlc_iter: expr) => { + let mut walk_candidate_htlcs = |htlcs| { + for &(ref candidate_htlc, ref candidate_source) in htlcs { + let candidate_htlc: &HTLCOutputInCommitment = &candidate_htlc; + let candidate_source: &Option> = &candidate_source; + + let source: &HTLCSource = if let Some(source) = candidate_source { + source + } else { + continue; + }; + let confirmed = $htlc_iter.find(|(_, conf_src)| Some(source) == *conf_src); + if let Some((confirmed_htlc, _)) = confirmed { + let filter = |v: &&IrrevocablyResolvedHTLC| { + v.commitment_tx_output_idx + == confirmed_htlc.transaction_output_index + }; + + // The HTLC was included in the confirmed commitment transaction, so we + // need to see if it has been irrevocably failed yet. + if confirmed_htlc.transaction_output_index.is_none() { + // Dust HTLCs are always implicitly failed once the commitment + // transaction reaches ANTI_REORG_DELAY confirmations. + res.insert(source.clone(), confirmed_htlc.payment_hash); + } else if let Some(state) = + us.htlcs_resolved_on_chain.iter().filter(filter).next() + { + if state.payment_preimage.is_none() { + res.insert(source.clone(), confirmed_htlc.payment_hash); + } + } + } else { + // The HTLC was not included in the confirmed commitment transaction, + // which has now reached ANTI_REORG_DELAY confirmations and thus the + // HTLC has been failed. + res.insert(source.clone(), candidate_htlc.payment_hash); + } + } + }; + + // We walk the set of HTLCs in the unrevoked counterparty commitment transactions (see + // `fail_unbroadcast_htlcs` for a description of why). + if let Some(ref txid) = us.funding.current_counterparty_commitment_txid { + if let Some(htlcs) = us.funding.counterparty_claimable_outpoints.get(txid) { + walk_candidate_htlcs(htlcs); + } + } + if let Some(ref txid) = us.funding.prev_counterparty_commitment_txid { + if let Some(htlcs) = us.funding.counterparty_claimable_outpoints.get(txid) { + walk_candidate_htlcs(htlcs); + } + } + }; + } + + let funding = get_confirmed_funding_scope!(us); + + if Some(confirmed_txid) == funding.current_counterparty_commitment_txid + || Some(confirmed_txid) == funding.prev_counterparty_commitment_txid + { + let htlcs = funding.counterparty_claimable_outpoints.get(&confirmed_txid).unwrap(); + walk_htlcs!( + false, + htlcs.iter().filter_map(|(a, b)| { + if let &Some(ref source) = b { + Some((a, Some(&**source))) + } else { + None + } + }) + ); + } else if confirmed_txid == funding.current_holder_commitment_tx.trust().txid() { + walk_htlcs!(true, holder_commitment_htlcs!(us, CURRENT_WITH_SOURCES)); + } else if let Some(prev_commitment_tx) = &funding.prev_holder_commitment_tx { + if confirmed_txid == prev_commitment_tx.trust().txid() { + walk_htlcs!(true, holder_commitment_htlcs!(us, PREV_WITH_SOURCES).unwrap()); + } else { + let htlcs_confirmed: &[(&HTLCOutputInCommitment, _)] = &[]; + walk_htlcs!(false, htlcs_confirmed.iter()); + } + } else { + let htlcs_confirmed: &[(&HTLCOutputInCommitment, _)] = &[]; + walk_htlcs!(false, htlcs_confirmed.iter()); + } + + res + } + /// Gets the set of outbound HTLCs which are pending resolution in this channel or which were /// resolved with a preimage from our counterparty. /// @@ -5775,6 +5906,7 @@ impl ChannelMonitorImpl { on_to_local_output_csv: None, }, }); + self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage); self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate { source, payment_preimage: Some(payment_preimage), @@ -5798,6 +5930,7 @@ impl ChannelMonitorImpl { on_to_local_output_csv: None, }, }); + self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage); self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate { source, payment_preimage: Some(payment_preimage), diff --git a/lightning/src/chain/package.rs b/lightning/src/chain/package.rs index 3de690e492f..bdc5774e598 100644 --- a/lightning/src/chain/package.rs +++ b/lightning/src/chain/package.rs @@ -1093,7 +1093,7 @@ enum PackageMalleability { /// /// As packages are time-sensitive, we fee-bump and rebroadcast them at scheduled intervals. /// Failing to confirm a package translate as a loss of funds for the user. -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, Eq)] pub struct PackageTemplate { // List of onchain outputs and solving data to generate satisfying witnesses. inputs: Vec<(BitcoinOutPoint, PackageSolvingData)>, @@ -1122,6 +1122,50 @@ pub struct PackageTemplate { height_timer: u32, } +impl PartialEq for PackageTemplate { + fn eq(&self, o: &Self) -> bool { + if self.inputs != o.inputs + || self.malleability != o.malleability + || self.feerate_previous != o.feerate_previous + || self.height_timer != o.height_timer + { + return false; + } + #[cfg(test)] + { + // In some cases we may reset `counterparty_spendable_height` to zero on reload, which + // can cause our test assertions that ChannelMonitors round-trip exactly to trip. Here + // we allow exactly the same case as we tweak in the `PackageTemplate` `Readable` + // implementation. + if self.counterparty_spendable_height == 0 { + for (_, input) in self.inputs.iter() { + if let PackageSolvingData::RevokedHTLCOutput(RevokedHTLCOutput { + htlc, .. + }) = input + { + if !htlc.offered && htlc.cltv_expiry != 0 { + return true; + } + } + } + } + if o.counterparty_spendable_height == 0 { + for (_, input) in o.inputs.iter() { + if let PackageSolvingData::RevokedHTLCOutput(RevokedHTLCOutput { + htlc, .. + }) = input + { + if !htlc.offered && htlc.cltv_expiry != 0 { + return true; + } + } + } + } + } + self.counterparty_spendable_height == o.counterparty_spendable_height + } +} + impl PackageTemplate { #[rustfmt::skip] pub(crate) fn can_merge_with(&self, other: &PackageTemplate, cur_height: u32) -> bool { diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index e4614351ee9..0a4d8b1cc78 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -15739,7 +15739,7 @@ where log_error!(logger, " The ChannelMonitor for channel {} is at counterparty commitment transaction number {} but the ChannelManager is at counterparty commitment transaction number {}.", &channel.context.channel_id(), monitor.get_cur_counterparty_commitment_number(), channel.get_cur_counterparty_commitment_transaction_number()); } - let mut shutdown_result = + let shutdown_result = channel.force_shutdown(ClosureReason::OutdatedChannelManager); if shutdown_result.unbroadcasted_batch_funding_txid.is_some() { return Err(DecodeError::InvalidValue); @@ -15771,7 +15771,10 @@ where }, ); } - failed_htlcs.append(&mut shutdown_result.dropped_outbound_htlcs); + for (source, hash, cp_id, chan_id) in shutdown_result.dropped_outbound_htlcs { + let reason = LocalHTLCFailureReason::ChannelClosed; + failed_htlcs.push((source, hash, cp_id, chan_id, reason)); + } channel_closures.push_back(( events::Event::ChannelClosed { channel_id: channel.context.channel_id(), @@ -15813,6 +15816,7 @@ where *payment_hash, channel.context.get_counterparty_node_id(), channel.context.channel_id(), + LocalHTLCFailureReason::ChannelClosed, )); } } @@ -16386,6 +16390,10 @@ where // payments which are still in-flight via their on-chain state. // We only rebuild the pending payments map if we were most recently serialized by // 0.0.102+ + // + // First we rebuild the pending payments, and only once we do so we go through and + // re-claim and re-fail pending payments. This avoids edge-cases around MPP payments + // resulting in redundant actions. for (channel_id, monitor) in args.channel_monitors.iter() { let mut is_channel_closed = false; let counterparty_node_id = monitor.get_counterparty_node_id(); @@ -16424,6 +16432,18 @@ where ); } } + } + } + for (channel_id, monitor) in args.channel_monitors.iter() { + let mut is_channel_closed = false; + let counterparty_node_id = monitor.get_counterparty_node_id(); + if let Some(peer_state_mtx) = per_peer_state.get(&counterparty_node_id) { + let mut peer_state_lock = peer_state_mtx.lock().unwrap(); + let peer_state = &mut *peer_state_lock; + is_channel_closed = !peer_state.channel_by_id.contains_key(channel_id); + } + + if is_channel_closed { for (htlc_source, (htlc, preimage_opt)) in monitor.get_all_current_outbound_htlcs() { @@ -16521,6 +16541,20 @@ where }, } } + for (htlc_source, payment_hash) in monitor.get_onchain_failed_outbound_htlcs() { + log_info!( + args.logger, + "Failing HTLC with payment hash {} as it was resolved on-chain.", + payment_hash + ); + failed_htlcs.push(( + htlc_source, + payment_hash, + monitor.get_counterparty_node_id(), + monitor.channel_id(), + LocalHTLCFailureReason::OnChainTimeout, + )); + } } // Whether the downstream channel was closed or not, try to re-apply any payment @@ -17201,13 +17235,10 @@ where } } - for htlc_source in failed_htlcs.drain(..) { - let (source, payment_hash, counterparty_node_id, channel_id) = htlc_source; - let failure_reason = LocalHTLCFailureReason::ChannelClosed; - let receiver = HTLCHandlingFailureType::Forward { - node_id: Some(counterparty_node_id), - channel_id, - }; + for htlc_source in failed_htlcs { + let (source, payment_hash, counterparty_id, channel_id, failure_reason) = htlc_source; + let receiver = + HTLCHandlingFailureType::Forward { node_id: Some(counterparty_id), channel_id }; let reason = HTLCFailReason::from_failure_code(failure_reason); channel_manager.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver); } diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs index 11dd13317e0..2c5d0d9e061 100644 --- a/lightning/src/ln/monitor_tests.rs +++ b/lightning/src/ln/monitor_tests.rs @@ -12,6 +12,7 @@ //! Further functional tests which test blockchain reorganizations. use crate::sign::{ecdsa::EcdsaChannelSigner, OutputSpender, SignerProvider, SpendableOutputDescriptor}; +use crate::chain::Watch; use crate::chain::channelmonitor::{ANTI_REORG_DELAY, ARCHIVAL_DELAY_BLOCKS,LATENCY_GRACE_PERIOD_BLOCKS, COUNTERPARTY_CLAIMABLE_WITHIN_BLOCKS_PINNABLE, Balance, BalanceSource, ChannelMonitorUpdateStep}; use crate::chain::transaction::OutPoint; use crate::chain::chaininterface::{ConfirmationTarget, LowerBoundedFeeEstimator, compute_feerate_sat_per_1000_weight}; @@ -3298,3 +3299,413 @@ fn test_claim_event_never_handled() { // `ChannelMonitorUpdate`. check_added_monitors(&nodes[1], 2); } + +fn do_test_lost_preimage_monitor_events(on_counterparty_tx: bool) { + // `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fashion - if the + // `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets + // persisted (i.e. due to a block update) then the node crashes, prior to persisting the + // `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be + // lost. This isn't likely in a sync persist environment, but in an async one this could be an + // issue. + // + // Note that this is only an issue for closed channels - `MonitorEvent`s only inform the + // `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup + // or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes + // completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved + // on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. + // + // Here we test that losing `MonitorEvent`s that contain HTLC resolution preimages does not + // cause us to lose funds or miss a `PaymentSent` event. + let mut cfg = test_default_channel_config(); + cfg.manually_accept_inbound_channels = true; + cfg.channel_handshake_config.negotiate_anchors_zero_fee_htlc_tx = true; + let cfgs = [Some(cfg.clone()), Some(cfg.clone()), Some(cfg.clone())]; + + let chanmon_cfgs = create_chanmon_cfgs(3); + let node_cfgs = create_node_cfgs(3, &chanmon_cfgs); + let persister; + let new_chain_mon; + let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &cfgs); + let node_b_reload; + let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs); + + let coinbase_tx = provide_anchor_reserves(&nodes); + + let node_b_id = nodes[1].node.get_our_node_id(); + let node_c_id = nodes[2].node.get_our_node_id(); + + let chan_a = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 0).2; + let chan_b = create_announced_chan_between_nodes_with_value(&nodes, 1, 2, 1_000_000, 0).2; + + let (preimage_a, hash_a, ..) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 1_000_000); + let (preimage_b, hash_b, ..) = route_payment(&nodes[1], &[&nodes[2]], 1_000_000); + + nodes[1].node.peer_disconnected(nodes[2].node.get_our_node_id()); + nodes[2].node.peer_disconnected(nodes[1].node.get_our_node_id()); + + nodes[2].node.claim_funds(preimage_a); + check_added_monitors(&nodes[2], 1); + expect_payment_claimed!(nodes[2], hash_a, 1_000_000); + nodes[2].node.claim_funds(preimage_b); + check_added_monitors(&nodes[2], 1); + expect_payment_claimed!(nodes[2], hash_b, 1_000_000); + + // Force-close the channel, confirming a commitment transaction then letting C claim the HTLCs. + let message = "Closed".to_owned(); + nodes[2] + .node + .force_close_broadcasting_latest_txn(&chan_b, &node_b_id, message.clone()) + .unwrap(); + check_added_monitors(&nodes[2], 1); + let c_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true), message }; + check_closed_event!(nodes[2], 1, c_reason, [node_b_id], 1_000_000); + check_closed_broadcast!(nodes[2], true); + + handle_bump_events(&nodes[2], true, 0); + let cs_commit_tx = nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + assert_eq!(cs_commit_tx.len(), 1); + + let message = "Closed".to_owned(); + nodes[1] + .node + .force_close_broadcasting_latest_txn(&chan_b, &node_c_id, message.clone()) + .unwrap(); + check_added_monitors(&nodes[1], 1); + let b_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true), message }; + check_closed_event!(nodes[1], 1, b_reason, [node_c_id], 1_000_000); + check_closed_broadcast!(nodes[1], true); + + handle_bump_events(&nodes[1], true, 0); + let bs_commit_tx = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + assert_eq!(bs_commit_tx.len(), 1); + + let selected_commit_tx = if on_counterparty_tx { + &cs_commit_tx[0] + } else { + &bs_commit_tx[0] + }; + + mine_transaction(&nodes[2], selected_commit_tx); + let mut cs_transactions = nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + let c_replays_commitment = nodes[2].connect_style.borrow().updates_best_block_first(); + let cs_htlc_claims = if on_counterparty_tx { + assert_eq!(cs_transactions.len(), 0); + + handle_bump_events(&nodes[2], c_replays_commitment, 1); + let mut cs_transactions = + nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + if c_replays_commitment { + assert_eq!(cs_transactions.len(), 3); + check_spends!(cs_transactions[1], cs_transactions[0], coinbase_tx); + } else { + assert_eq!(cs_transactions.len(), 1); + } + vec![cs_transactions.pop().unwrap()] + } else { + assert_eq!(cs_transactions.len(), 1); + cs_transactions + }; + + assert_eq!(cs_htlc_claims.len(), 1); + check_spends!(cs_htlc_claims[0], selected_commit_tx, coinbase_tx); + + // Now replay the claims on node B, which should generate preimage-containing `MonitorUpdate`s + mine_transaction(&nodes[1], selected_commit_tx); + mine_transaction(&nodes[1], &cs_htlc_claims[0]); + + // Now simulate a restart where the B<->C ChannelMonitor has been persisted (i.e. because we + // just processed a new block) but the ChannelManager was not. This should be exceedingly rare + // given we have to be connecting a block at the right moment and not manage to get a + // ChannelManager persisted after it does a thing that should immediately precede persistence, + // but with async persist it is more common. + // + // We do this by wiping the `MonitorEvent`s from the monitors and then reloading with the + // latest state. + let mon_events = nodes[1].chain_monitor.chain_monitor.release_pending_monitor_events(); + assert_eq!(mon_events.len(), 1); + assert_eq!(mon_events[0].2.len(), 2); + + let node_ser = nodes[1].node.encode(); + let mon_a_ser = get_monitor!(nodes[1], chan_a).encode(); + let mon_b_ser = get_monitor!(nodes[1], chan_b).encode(); + let mons = &[&mon_a_ser[..], &mon_b_ser[..]]; + reload_node!(nodes[1], cfg, &node_ser, mons, persister, new_chain_mon, node_b_reload); + + let preimage_events = nodes[1].node.get_and_clear_pending_events(); + assert_eq!(preimage_events.len(), 2, "{preimage_events:?}"); + for ev in preimage_events { + match ev { + Event::PaymentSent { payment_hash, .. } => { + assert_eq!(payment_hash, hash_b); + }, + Event::PaymentForwarded { claim_from_onchain_tx, .. } => { + assert!(claim_from_onchain_tx); + }, + _ => panic!("Wrong event {ev:?}"), + } + } + + // After the background events are processed in `get_and_clear_pending_events`, above, node B + // will create the requisite `ChannelMontiorUpdate` for claiming the forwarded payment back. + // The HTLC, however, is added to the holding cell for replay after the peer connects, below. + check_added_monitors(&nodes[1], 1); + + nodes[0].node.peer_disconnected(nodes[1].node.get_our_node_id()); + + let mut reconnect_args = ReconnectArgs::new(&nodes[0], &nodes[1]); + reconnect_args.pending_cell_htlc_claims = (1, 0); + reconnect_nodes(reconnect_args); + expect_payment_sent(&nodes[0], preimage_a, None, true, true); +} + +#[test] +fn test_lost_preimage_monitor_events() { + do_test_lost_preimage_monitor_events(true); + do_test_lost_preimage_monitor_events(false); +} + +#[derive(PartialEq)] +enum CommitmentType { + RevokedCounterparty, + LatestCounterparty, + PreviousCounterparty, + LocalWithoutLastHTLC, + LocalWithLastHTLC, +} + +fn do_test_lost_timeout_monitor_events(confirm_tx: CommitmentType, dust_htlcs: bool) { + // `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fashion - if the + // `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets + // persisted (i.e. due to a block update) then the node crashes, prior to persisting the + // `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be + // lost. This isn't likely in a sync persist environment, but in an async one this could be an + // issue. + // + // Note that this is only an issue for closed channels - `MonitorEvent`s only inform the + // `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup + // or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes + // completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved + // on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. + // + // Here we test that losing `MonitorEvent`s that contain HTLC resolution via timeouts does not + // cause us to lose a `PaymentFailed` event. + let mut cfg = test_default_channel_config(); + cfg.manually_accept_inbound_channels = true; + cfg.channel_handshake_config.negotiate_anchors_zero_fee_htlc_tx = true; + let cfgs = [Some(cfg.clone()), Some(cfg.clone()), Some(cfg.clone())]; + + let chanmon_cfgs = create_chanmon_cfgs(3); + let node_cfgs = create_node_cfgs(3, &chanmon_cfgs); + let persister; + let new_chain_mon; + let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &cfgs); + let node_b_reload; + let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs); + + provide_anchor_reserves(&nodes); + + let node_a_id = nodes[0].node.get_our_node_id(); + let node_b_id = nodes[1].node.get_our_node_id(); + let node_c_id = nodes[2].node.get_our_node_id(); + + let chan_a = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 0).2; + let chan_b = create_announced_chan_between_nodes_with_value(&nodes, 1, 2, 1_000_000, 0).2; + + // Ensure all nodes are at the same height + let node_max_height = + nodes.iter().map(|node| node.blocks.lock().unwrap().len()).max().unwrap() as u32; + connect_blocks(&nodes[0], node_max_height - nodes[0].best_block_info().1); + connect_blocks(&nodes[1], node_max_height - nodes[1].best_block_info().1); + connect_blocks(&nodes[2], node_max_height - nodes[2].best_block_info().1); + + send_payment(&nodes[0], &[&nodes[1], &nodes[2]], 25_000_000); + + let cs_revoked_commit = get_local_commitment_txn!(nodes[2], chan_b); + assert_eq!(cs_revoked_commit.len(), 1); + + let amt = if dust_htlcs { 1_000 } else { 10_000_000 }; + let (_, hash_a, ..) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], amt); + + let cs_previous_commit = get_local_commitment_txn!(nodes[2], chan_b); + assert_eq!(cs_previous_commit.len(), 1); + + let (route, hash_b, _, payment_secret_b) = + get_route_and_payment_hash!(nodes[1], nodes[2], amt); + let onion = RecipientOnionFields::secret_only(payment_secret_b); + nodes[1].node.send_payment_with_route(route, hash_b, onion, PaymentId(hash_b.0)).unwrap(); + check_added_monitors(&nodes[1], 1); + + let updates = get_htlc_update_msgs(&nodes[1], &node_c_id); + nodes[2].node.handle_update_add_htlc(node_b_id, &updates.update_add_htlcs[0]); + nodes[2].node.handle_commitment_signed_batch_test(node_b_id, &updates.commitment_signed); + check_added_monitors(&nodes[2], 1); + + let (cs_raa, cs_cs) = get_revoke_commit_msgs!(nodes[2], node_b_id); + if confirm_tx == CommitmentType::LocalWithLastHTLC { + // Only deliver the last RAA + CS if we need to update the local commitment with the third + // HTLC. + nodes[1].node.handle_revoke_and_ack(node_c_id, &cs_raa); + check_added_monitors(&nodes[1], 1); + nodes[1].node.handle_commitment_signed_batch_test(node_c_id, &cs_cs); + check_added_monitors(&nodes[1], 1); + + let _bs_raa = get_event_msg!(nodes[1], MessageSendEvent::SendRevokeAndACK, node_c_id); + } + + nodes[1].node.peer_disconnected(nodes[2].node.get_our_node_id()); + nodes[2].node.peer_disconnected(nodes[1].node.get_our_node_id()); + + // Force-close the channel, confirming a commitment transaction then letting C claim the HTLCs. + let message = "Closed".to_owned(); + nodes[2] + .node + .force_close_broadcasting_latest_txn(&chan_b, &node_b_id, message.clone()) + .unwrap(); + check_added_monitors(&nodes[2], 1); + let c_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true), message }; + check_closed_event!(nodes[2], 1, c_reason, [node_b_id], 1_000_000); + check_closed_broadcast!(nodes[2], true); + + handle_bump_events(&nodes[2], true, 0); + let cs_commit_tx = nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + assert_eq!(cs_commit_tx.len(), 1); + + let message = "Closed".to_owned(); + nodes[1] + .node + .force_close_broadcasting_latest_txn(&chan_b, &node_c_id, message.clone()) + .unwrap(); + check_added_monitors(&nodes[1], 1); + let b_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true), message }; + check_closed_event!(nodes[1], 1, b_reason, [node_c_id], 1_000_000); + check_closed_broadcast!(nodes[1], true); + + handle_bump_events(&nodes[1], true, 0); + let bs_commit_tx = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + assert_eq!(bs_commit_tx.len(), 1); + + let selected_commit_tx = match confirm_tx { + CommitmentType::RevokedCounterparty => &cs_revoked_commit[0], + CommitmentType::PreviousCounterparty => &cs_previous_commit[0], + CommitmentType::LatestCounterparty => &cs_commit_tx[0], + CommitmentType::LocalWithoutLastHTLC|CommitmentType::LocalWithLastHTLC => &bs_commit_tx[0], + }; + + mine_transaction(&nodes[1], selected_commit_tx); + // If the block gets connected first we may re-broadcast B's commitment transaction before + // seeing the C's confirm. In any case, if we confirmed the revoked counterparty commitment + // transaction, we want to go ahead and confirm the spend of it. + let bs_transactions = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + if confirm_tx == CommitmentType::RevokedCounterparty { + assert!(bs_transactions.len() == 1 || bs_transactions.len() == 2); + mine_transaction(&nodes[1], bs_transactions.last().unwrap()); + } else { + assert!(bs_transactions.len() == 1 || bs_transactions.len() == 0); + } + + connect_blocks(&nodes[1], ANTI_REORG_DELAY - 1); + let mut events = nodes[1].chain_monitor.chain_monitor.get_and_clear_pending_events(); + match confirm_tx { + CommitmentType::LocalWithoutLastHTLC|CommitmentType::LocalWithLastHTLC => { + assert_eq!(events.len(), 0, "{events:?}"); + }, + CommitmentType::PreviousCounterparty|CommitmentType::LatestCounterparty => { + assert_eq!(events.len(), 1, "{events:?}"); + match events[0] { + Event::SpendableOutputs { .. } => {}, + _ => panic!("Unexpected event {events:?}"), + } + }, + CommitmentType::RevokedCounterparty => { + assert_eq!(events.len(), 2, "{events:?}"); + for event in events { + match event { + Event::SpendableOutputs { .. } => {}, + _ => panic!("Unexpected event {event:?}"), + } + } + }, + } + + if confirm_tx != CommitmentType::RevokedCounterparty { + connect_blocks(&nodes[1], TEST_FINAL_CLTV - ANTI_REORG_DELAY + 1); + if confirm_tx == CommitmentType::LocalWithoutLastHTLC || confirm_tx == CommitmentType::LocalWithLastHTLC { + if !dust_htlcs { + handle_bump_events(&nodes[1], false, 1); + } + } + } + + let bs_htlc_timeouts = + nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0); + if dust_htlcs || confirm_tx == CommitmentType::RevokedCounterparty { + assert_eq!(bs_htlc_timeouts.len(), 0); + } else { + assert_eq!(bs_htlc_timeouts.len(), 1); + + // Now replay the timeouts on node B, which after 6 confirmations should fail the HTLCs via + // `MonitorUpdate`s + mine_transaction(&nodes[1], &bs_htlc_timeouts[0]); + connect_blocks(&nodes[1], ANTI_REORG_DELAY - 1); + } + + // Now simulate a restart where the B<->C ChannelMonitor has been persisted (i.e. because we + // just processed a new block) but the ChannelManager was not. This should be exceedingly rare + // given we have to be connecting a block at the right moment and not manage to get a + // ChannelManager persisted after it does a thing that should immediately precede persistence, + // but with async persist it is more common. + // + // We do this by wiping the `MonitorEvent`s from the monitors and then reloading with the + // latest state. + let mon_events = nodes[1].chain_monitor.chain_monitor.release_pending_monitor_events(); + assert_eq!(mon_events.len(), 1); + assert_eq!(mon_events[0].2.len(), 2); + + let node_ser = nodes[1].node.encode(); + let mon_a_ser = get_monitor!(nodes[1], chan_a).encode(); + let mon_b_ser = get_monitor!(nodes[1], chan_b).encode(); + let mons = &[&mon_a_ser[..], &mon_b_ser[..]]; + reload_node!(nodes[1], cfg, &node_ser, mons, persister, new_chain_mon, node_b_reload); + + let timeout_events = nodes[1].node.get_and_clear_pending_events(); + assert_eq!(timeout_events.len(), 3, "{timeout_events:?}"); + for ev in timeout_events { + match ev { + Event::PaymentPathFailed { payment_hash, .. } => { + assert_eq!(payment_hash, hash_b); + }, + Event::PaymentFailed { payment_hash, .. } => { + assert_eq!(payment_hash, Some(hash_b)); + }, + Event::HTLCHandlingFailed { prev_channel_id, .. } => { + assert_eq!(prev_channel_id, chan_a); + }, + _ => panic!("Wrong event {ev:?}"), + } + } + + nodes[0].node.peer_disconnected(nodes[1].node.get_our_node_id()); + + reconnect_nodes(ReconnectArgs::new(&nodes[0], &nodes[1])); + + nodes[1].node.process_pending_htlc_forwards(); + check_added_monitors(&nodes[1], 1); + let bs_fail = get_htlc_update_msgs(&nodes[1], &node_a_id); + nodes[0].node.handle_update_fail_htlc(node_b_id, &bs_fail.update_fail_htlcs[0]); + commitment_signed_dance!(nodes[0], nodes[1], bs_fail.commitment_signed, true, true); + expect_payment_failed!(nodes[0], hash_a, false); +} + +#[test] +fn test_lost_timeout_monitor_events() { + do_test_lost_timeout_monitor_events(CommitmentType::RevokedCounterparty, false); + do_test_lost_timeout_monitor_events(CommitmentType::RevokedCounterparty, true); + do_test_lost_timeout_monitor_events(CommitmentType::PreviousCounterparty, false); + do_test_lost_timeout_monitor_events(CommitmentType::PreviousCounterparty, true); + do_test_lost_timeout_monitor_events(CommitmentType::LatestCounterparty, false); + do_test_lost_timeout_monitor_events(CommitmentType::LatestCounterparty, true); + do_test_lost_timeout_monitor_events(CommitmentType::LocalWithoutLastHTLC, false); + do_test_lost_timeout_monitor_events(CommitmentType::LocalWithoutLastHTLC, true); + do_test_lost_timeout_monitor_events(CommitmentType::LocalWithLastHTLC, false); + do_test_lost_timeout_monitor_events(CommitmentType::LocalWithLastHTLC, true); +} diff --git a/lightning/src/ln/payment_tests.rs b/lightning/src/ln/payment_tests.rs index 4c63bfa4f8e..b421a4b5433 100644 --- a/lightning/src/ln/payment_tests.rs +++ b/lightning/src/ln/payment_tests.rs @@ -14,7 +14,7 @@ use crate::chain::channelmonitor::{ ANTI_REORG_DELAY, HTLC_FAIL_BACK_BUFFER, LATENCY_GRACE_PERIOD_BLOCKS, }; -use crate::chain::{ChannelMonitorUpdateStatus, Confirm, Listen}; +use crate::chain::{Confirm, Listen}; use crate::events::{ ClosureReason, Event, HTLCHandlingFailureType, PathFailure, PaymentFailureReason, PaymentPurpose, @@ -1310,16 +1310,15 @@ fn do_test_dup_htlc_onchain_doesnt_fail_on_reload( connect_blocks(&nodes[0], ANTI_REORG_DELAY - 2); } - // Now connect the HTLC claim transaction with the ChainMonitor-generated ChannelMonitor update - // returning InProgress. This should cause the claim event to never make its way to the - // ChannelManager. - chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress); - + // Now connect the HTLC claim transaction. Note that ChannelMonitors aren't re-persisted on + // each block connection (as the block being reconnected on startup should get us the same + // result). if payment_timeout { connect_blocks(&nodes[0], 1); } else { connect_block(&nodes[0], &claim_block); } + check_added_monitors(&nodes[0], 0); // Note that we skip persisting ChannelMonitors. We should still be generating the payment sent // event without ChannelMonitor persistence. If we reset to a previous state on reload, the block