Skip to content

Commit 60dce06

Browse files
committed
Store preimages we learned on chain in case of MonitorEvent loss
`MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fashion - if the `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets persisted (i.e. due to a block update) then the node crashes, prior to persisting the `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be lost. This isn't likely in a sync persist environment, but in an async one this could be an issue. Note that this is only an issue for closed channels - `MonitorEvent`s only inform the `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. When we restart and, during `ChannelManager` load, see a `ChannelMonitor` for a closed channel, we scan it for preimages that we passed to it and re-apply those to any pending or forwarded payments. However, we didn't scan it for preimages it learned from transactions on-chain. In cases where a `MonitorEvent` is lost, this can lead to a lost preimage. Here we fix it by simply tracking preimages we learned on-chain the same way we track preimages picked up during normal channel operation. Backport of 543cc85 Resolved conflicts in * lightning/src/ln/monitor_tests.rs due to trivial changes upstream as well as changes to upstream bump events and commitment announcement logic.
1 parent 52035cb commit 60dce06

File tree

2 files changed

+163
-0
lines changed

2 files changed

+163
-0
lines changed

lightning/src/chain/channelmonitor.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4695,6 +4695,7 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitorImpl<Signer> {
46954695
on_to_local_output_csv: None,
46964696
},
46974697
});
4698+
self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage);
46984699
self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate {
46994700
source,
47004701
payment_preimage: Some(payment_preimage),
@@ -4718,6 +4719,7 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitorImpl<Signer> {
47184719
on_to_local_output_csv: None,
47194720
},
47204721
});
4722+
self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage);
47214723
self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate {
47224724
source,
47234725
payment_preimage: Some(payment_preimage),

lightning/src/ln/monitor_tests.rs

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//! Further functional tests which test blockchain reorganizations.
1111
1212
use crate::sign::{ecdsa::EcdsaChannelSigner, OutputSpender, SpendableOutputDescriptor};
13+
use crate::chain::Watch;
1314
use crate::chain::channelmonitor::{ANTI_REORG_DELAY, ARCHIVAL_DELAY_BLOCKS,LATENCY_GRACE_PERIOD_BLOCKS, COUNTERPARTY_CLAIMABLE_WITHIN_BLOCKS_PINNABLE, Balance, BalanceSource, ChannelMonitorUpdateStep};
1415
use crate::chain::transaction::OutPoint;
1516
use crate::chain::chaininterface::{ConfirmationTarget, LowerBoundedFeeEstimator, compute_feerate_sat_per_1000_weight};
@@ -3262,3 +3263,163 @@ fn test_claim_event_never_handled() {
32623263
// `ChannelMonitorUpdate`.
32633264
check_added_monitors(&nodes[1], 2);
32643265
}
3266+
3267+
fn do_test_lost_preimage_monitor_events(on_counterparty_tx: bool) {
3268+
// `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fashion - if the
3269+
// `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets
3270+
// persisted (i.e. due to a block update) then the node crashes, prior to persisting the
3271+
// `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be
3272+
// lost. This isn't likely in a sync persist environment, but in an async one this could be an
3273+
// issue.
3274+
//
3275+
// Note that this is only an issue for closed channels - `MonitorEvent`s only inform the
3276+
// `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup
3277+
// or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes
3278+
// completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved
3279+
// on-chain post closure. Of the three, only the last is problematic to lose prior to a reload.
3280+
//
3281+
// Here we test that losing `MonitorEvent`s that contain HTLC resolution preimages does not
3282+
// cause us to lose funds or miss a `PaymentSent` event.
3283+
let mut cfg = test_default_channel_config();
3284+
cfg.manually_accept_inbound_channels = true;
3285+
cfg.channel_handshake_config.negotiate_anchors_zero_fee_htlc_tx = true;
3286+
let cfgs = [Some(cfg.clone()), Some(cfg.clone()), Some(cfg.clone())];
3287+
3288+
let chanmon_cfgs = create_chanmon_cfgs(3);
3289+
let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
3290+
let persister;
3291+
let new_chain_mon;
3292+
let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &cfgs);
3293+
let node_b_reload;
3294+
let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs);
3295+
3296+
let coinbase_tx = provide_anchor_reserves(&nodes);
3297+
3298+
let node_b_id = nodes[1].node.get_our_node_id();
3299+
let node_c_id = nodes[2].node.get_our_node_id();
3300+
3301+
let chan_a = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 0).2;
3302+
let chan_b = create_announced_chan_between_nodes_with_value(&nodes, 1, 2, 1_000_000, 0).2;
3303+
3304+
let (preimage_a, hash_a, ..) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 1_000_000);
3305+
let (preimage_b, hash_b, ..) = route_payment(&nodes[1], &[&nodes[2]], 1_000_000);
3306+
3307+
nodes[1].node.peer_disconnected(nodes[2].node.get_our_node_id());
3308+
nodes[2].node.peer_disconnected(nodes[1].node.get_our_node_id());
3309+
3310+
nodes[2].node.claim_funds(preimage_a);
3311+
check_added_monitors(&nodes[2], 1);
3312+
expect_payment_claimed!(nodes[2], hash_a, 1_000_000);
3313+
nodes[2].node.claim_funds(preimage_b);
3314+
check_added_monitors(&nodes[2], 1);
3315+
expect_payment_claimed!(nodes[2], hash_b, 1_000_000);
3316+
3317+
// Force-close the channel, confirming a commitment transaction then letting C claim the HTLCs.
3318+
let message = "Closed".to_owned();
3319+
nodes[2]
3320+
.node
3321+
.force_close_broadcasting_latest_txn(&chan_b, &node_b_id, message)
3322+
.unwrap();
3323+
check_added_monitors(&nodes[2], 1);
3324+
let c_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true) };
3325+
check_closed_event!(nodes[2], 1, c_reason, [node_b_id], 1_000_000);
3326+
check_closed_broadcast!(nodes[2], true);
3327+
3328+
let cs_commit_tx = nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3329+
assert_eq!(cs_commit_tx.len(), 1);
3330+
3331+
let message = "Closed".to_owned();
3332+
nodes[1]
3333+
.node
3334+
.force_close_broadcasting_latest_txn(&chan_b, &node_c_id, message)
3335+
.unwrap();
3336+
check_added_monitors(&nodes[1], 1);
3337+
let b_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true) };
3338+
check_closed_event!(nodes[1], 1, b_reason, [node_c_id], 1_000_000);
3339+
check_closed_broadcast!(nodes[1], true);
3340+
3341+
let bs_commit_tx = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3342+
assert_eq!(bs_commit_tx.len(), 1);
3343+
3344+
let selected_commit_tx = if on_counterparty_tx {
3345+
&cs_commit_tx[0]
3346+
} else {
3347+
&bs_commit_tx[0]
3348+
};
3349+
3350+
mine_transaction(&nodes[2], selected_commit_tx);
3351+
let mut cs_transactions = nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3352+
let c_replays_commitment = nodes[2].connect_style.borrow().updates_best_block_first();
3353+
let cs_htlc_claims = if on_counterparty_tx {
3354+
handle_bump_htlc_event(&nodes[2], 1);
3355+
let mut cs_transactions =
3356+
nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3357+
assert_eq!(cs_transactions.len(), 1);
3358+
vec![cs_transactions.pop().unwrap()]
3359+
} else {
3360+
if c_replays_commitment {
3361+
assert_eq!(cs_transactions.len(), 2, "{cs_transactions:?}");
3362+
vec![cs_transactions.pop().unwrap()]
3363+
} else {
3364+
assert_eq!(cs_transactions.len(), 1, "{cs_transactions:?}");
3365+
cs_transactions
3366+
}
3367+
};
3368+
3369+
assert_eq!(cs_htlc_claims.len(), 1);
3370+
check_spends!(cs_htlc_claims[0], selected_commit_tx, coinbase_tx);
3371+
3372+
// Now replay the claims on node B, which should generate preimage-containing `MonitorUpdate`s
3373+
mine_transaction(&nodes[1], selected_commit_tx);
3374+
mine_transaction(&nodes[1], &cs_htlc_claims[0]);
3375+
3376+
// Now simulate a restart where the B<->C ChannelMonitor has been persisted (i.e. because we
3377+
// just processed a new block) but the ChannelManager was not. This should be exceedingly rare
3378+
// given we have to be connecting a block at the right moment and not manage to get a
3379+
// ChannelManager persisted after it does a thing that should immediately precede persistence,
3380+
// but with async persist it is more common.
3381+
//
3382+
// We do this by wiping the `MonitorEvent`s from the monitors and then reloading with the
3383+
// latest state.
3384+
let mon_events = nodes[1].chain_monitor.chain_monitor.release_pending_monitor_events();
3385+
assert_eq!(mon_events.len(), 1);
3386+
assert_eq!(mon_events[0].2.len(), 2);
3387+
3388+
let node_ser = nodes[1].node.encode();
3389+
let mon_a_ser = get_monitor!(nodes[1], chan_a).encode();
3390+
let mon_b_ser = get_monitor!(nodes[1], chan_b).encode();
3391+
let mons = &[&mon_a_ser[..], &mon_b_ser[..]];
3392+
reload_node!(nodes[1], cfg, &node_ser, mons, persister, new_chain_mon, node_b_reload);
3393+
3394+
let preimage_events = nodes[1].node.get_and_clear_pending_events();
3395+
assert_eq!(preimage_events.len(), 2, "{preimage_events:?}");
3396+
for ev in preimage_events {
3397+
match ev {
3398+
Event::PaymentSent { payment_hash, .. } => {
3399+
assert_eq!(payment_hash, hash_b);
3400+
},
3401+
Event::PaymentForwarded { claim_from_onchain_tx, .. } => {
3402+
assert!(claim_from_onchain_tx);
3403+
},
3404+
_ => panic!("Wrong event {ev:?}"),
3405+
}
3406+
}
3407+
3408+
// After the background events are processed in `get_and_clear_pending_events`, above, node B
3409+
// will create the requisite `ChannelMontiorUpdate` for claiming the forwarded payment back.
3410+
// The HTLC, however, is added to the holding cell for replay after the peer connects, below.
3411+
check_added_monitors(&nodes[1], 1);
3412+
3413+
nodes[0].node.peer_disconnected(nodes[1].node.get_our_node_id());
3414+
3415+
let mut reconnect_args = ReconnectArgs::new(&nodes[0], &nodes[1]);
3416+
reconnect_args.pending_cell_htlc_claims = (1, 0);
3417+
reconnect_nodes(reconnect_args);
3418+
expect_payment_sent(&nodes[0], preimage_a, None, true, true);
3419+
}
3420+
3421+
#[test]
3422+
fn test_lost_preimage_monitor_events() {
3423+
do_test_lost_preimage_monitor_events(true);
3424+
do_test_lost_preimage_monitor_events(false);
3425+
}

0 commit comments

Comments
 (0)