Skip to content

Commit b24f8b5

Browse files
committed
Store preimages we learned on chain in case of MonitorEvent loss
`MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fasion - if the `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets persisted (i.e. due to a block update) then the node crashes, prior to persisting the `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be lost. This isn't likely in a sync persist environment, but in an async one this could be an issue. Note that this is only an issue for closed channels - `MonitorEvent`s only inform the `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. When we restart and, during `ChannelManager` load, see a `ChannelMonitor` for a closed channel, we scan it for preimages that we passed to it and re-apply those to any pending or forwarded payments. However, we didn't scan it for preimages it learned from transactions on-chain. In cases where a `MonitorEvent` is lost, this can lead to a lost preimage. Here we fix it by simply tracking preimages we learned on-chain the same way we track preimages picked up during normal channel operation.
1 parent f00a9b0 commit b24f8b5

File tree

2 files changed

+165
-0
lines changed

2 files changed

+165
-0
lines changed

lightning/src/chain/channelmonitor.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5502,6 +5502,7 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitorImpl<Signer> {
55025502
on_to_local_output_csv: None,
55035503
},
55045504
});
5505+
self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage);
55055506
self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate {
55065507
source,
55075508
payment_preimage: Some(payment_preimage),
@@ -5525,6 +5526,7 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitorImpl<Signer> {
55255526
on_to_local_output_csv: None,
55265527
},
55275528
});
5529+
self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage);
55285530
self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate {
55295531
source,
55305532
payment_preimage: Some(payment_preimage),

lightning/src/ln/monitor_tests.rs

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
//! Further functional tests which test blockchain reorganizations.
1313
1414
use crate::sign::{ecdsa::EcdsaChannelSigner, OutputSpender, SignerProvider, SpendableOutputDescriptor};
15+
use crate::chain::Watch;
1516
use crate::chain::channelmonitor::{ANTI_REORG_DELAY, ARCHIVAL_DELAY_BLOCKS,LATENCY_GRACE_PERIOD_BLOCKS, COUNTERPARTY_CLAIMABLE_WITHIN_BLOCKS_PINNABLE, Balance, BalanceSource, ChannelMonitorUpdateStep};
1617
use crate::chain::transaction::OutPoint;
1718
use crate::chain::chaininterface::{ConfirmationTarget, LowerBoundedFeeEstimator, compute_feerate_sat_per_1000_weight};
@@ -3211,3 +3212,165 @@ fn test_update_replay_panics() {
32113212
monitor.update_monitor(&updates[2], &nodes[1].tx_broadcaster, &nodes[1].fee_estimator, &nodes[1].logger).unwrap();
32123213
monitor.update_monitor(&updates[3], &nodes[1].tx_broadcaster, &nodes[1].fee_estimator, &nodes[1].logger).unwrap();
32133214
}
3215+
3216+
fn do_test_lost_preimage_monitor_events(on_counterparty_tx: bool) {
3217+
// `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fasion - if the
3218+
// `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets
3219+
// persisted (i.e. due to a block update) then the node crashes, prior to persisting the
3220+
// `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be
3221+
// lost. This isn't likely in a sync persist environment, but in an async one this could be an
3222+
// issue.
3223+
//
3224+
// Note that this is only an issue for closed channels - `MonitorEvent`s only inform the
3225+
// `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup
3226+
// or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes
3227+
// completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved
3228+
// on-chain post closure. Of the three, only the last is problematic to lose prior to a reload.
3229+
//
3230+
// Here we test that losing `MonitorEvent`s that contain HTLC resolution preimages does not
3231+
// cause us to lose funds or miss a `PaymentSent` event.
3232+
let mut cfg = test_default_channel_config();
3233+
cfg.manually_accept_inbound_channels = true;
3234+
cfg.channel_handshake_config.negotiate_anchors_zero_fee_htlc_tx = true;
3235+
let cfgs = [Some(cfg.clone()), Some(cfg.clone()), Some(cfg.clone())];
3236+
3237+
let chanmon_cfgs = create_chanmon_cfgs(3);
3238+
let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
3239+
let persister;
3240+
let new_chain_mon;
3241+
let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &cfgs);
3242+
let node_b_reload;
3243+
let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs);
3244+
3245+
let coinbase_tx = provide_anchor_reserves(&nodes);
3246+
3247+
let node_b_id = nodes[1].node.get_our_node_id();
3248+
let node_c_id = nodes[2].node.get_our_node_id();
3249+
3250+
let chan_a = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 0).2;
3251+
let chan_b = create_announced_chan_between_nodes_with_value(&nodes, 1, 2, 1_000_000, 0).2;
3252+
3253+
let (preimage_a, hash_a, ..) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 1_000_000);
3254+
let (preimage_b, hash_b, ..) = route_payment(&nodes[1], &[&nodes[2]], 1_000_000);
3255+
3256+
nodes[1].node.peer_disconnected(nodes[2].node.get_our_node_id());
3257+
nodes[2].node.peer_disconnected(nodes[1].node.get_our_node_id());
3258+
3259+
nodes[2].node.claim_funds(preimage_a);
3260+
check_added_monitors(&nodes[2], 1);
3261+
expect_payment_claimed!(nodes[2], hash_a, 1_000_000);
3262+
nodes[2].node.claim_funds(preimage_b);
3263+
check_added_monitors(&nodes[2], 1);
3264+
expect_payment_claimed!(nodes[2], hash_b, 1_000_000);
3265+
3266+
// Force-close the channel, confirming a commitment transaction then letting C claim the HTLCs.
3267+
let message = "Closed".to_owned();
3268+
nodes[2]
3269+
.node
3270+
.force_close_broadcasting_latest_txn(&chan_b, &node_b_id, message.clone())
3271+
.unwrap();
3272+
check_added_monitors(&nodes[2], 1);
3273+
let c_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true), message };
3274+
check_closed_event!(nodes[2], 1, c_reason, [node_b_id], 1_000_000);
3275+
check_closed_broadcast!(nodes[2], true);
3276+
3277+
let cs_commit_tx = nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3278+
assert_eq!(cs_commit_tx.len(), 1);
3279+
3280+
let message = "Closed".to_owned();
3281+
nodes[1]
3282+
.node
3283+
.force_close_broadcasting_latest_txn(&chan_b, &node_c_id, message.clone())
3284+
.unwrap();
3285+
check_added_monitors(&nodes[1], 1);
3286+
let b_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true), message };
3287+
check_closed_event!(nodes[1], 1, b_reason, [node_c_id], 1_000_000);
3288+
check_closed_broadcast!(nodes[1], true);
3289+
3290+
let bs_commit_tx = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3291+
assert_eq!(bs_commit_tx.len(), 1);
3292+
3293+
let selected_commit_tx = if on_counterparty_tx {
3294+
&cs_commit_tx[0]
3295+
} else {
3296+
&bs_commit_tx[0]
3297+
};
3298+
3299+
mine_transaction(&nodes[2], selected_commit_tx);
3300+
let mut cs_transactions = nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3301+
let c_replays_commitment = nodes[2].connect_style.borrow().updates_best_block_first();
3302+
let cs_htlc_claims = if on_counterparty_tx {
3303+
assert_eq!(cs_transactions.len(), if c_replays_commitment { 1 } else { 0 });
3304+
3305+
let mut events = nodes[2].chain_monitor.chain_monitor.get_and_clear_pending_events();
3306+
assert_eq!(events.len(), 1);
3307+
match events.pop().unwrap() {
3308+
Event::BumpTransaction(bump_event) => {
3309+
nodes[2].bump_tx_handler.handle_event(&bump_event);
3310+
},
3311+
_ => panic!("Unexpected event"),
3312+
}
3313+
3314+
nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0)
3315+
} else {
3316+
assert_eq!(cs_transactions.len(), if c_replays_commitment { 2 } else { 1 });
3317+
vec![cs_transactions.pop().unwrap()]
3318+
};
3319+
3320+
assert_eq!(cs_htlc_claims.len(), 1);
3321+
check_spends!(cs_htlc_claims[0], selected_commit_tx, coinbase_tx);
3322+
3323+
// Now replay the claims on node B, which should generate preimage-containing `MonitorUpdate`s
3324+
mine_transaction(&nodes[1], selected_commit_tx);
3325+
mine_transaction(&nodes[1], &cs_htlc_claims[0]);
3326+
3327+
// Now simulate a restart where the B<->C ChannelMonitor has been persisted (i.e. because we
3328+
// just processed a new block) but the ChannelManager was not. This should be exceedingly rare
3329+
// given we have to be connecting a block at the right moment and not manage to get a
3330+
// ChannelManager persisted after it does a thing that should immediately precede persistence,
3331+
// but with async persist it is more common.
3332+
//
3333+
// We do this by wiping the `MonitorEvent`s from the monitors and then reloading with the
3334+
// latest state.
3335+
let mon_events = nodes[1].chain_monitor.chain_monitor.release_pending_monitor_events();
3336+
assert_eq!(mon_events.len(), 1);
3337+
assert_eq!(mon_events[0].2.len(), 2);
3338+
3339+
let node_ser = nodes[1].node.encode();
3340+
let mon_a_ser = get_monitor!(nodes[1], chan_a).encode();
3341+
let mon_b_ser = get_monitor!(nodes[1], chan_b).encode();
3342+
let mons = &[&mon_a_ser[..], &mon_b_ser[..]];
3343+
reload_node!(nodes[1], cfg, &node_ser, mons, persister, new_chain_mon, node_b_reload);
3344+
3345+
let preimage_events = nodes[1].node.get_and_clear_pending_events();
3346+
assert_eq!(preimage_events.len(), 2, "{preimage_events:?}");
3347+
for ev in preimage_events {
3348+
match ev {
3349+
Event::PaymentSent { payment_hash, .. } => {
3350+
assert_eq!(payment_hash, hash_b);
3351+
},
3352+
Event::PaymentForwarded { claim_from_onchain_tx, .. } => {
3353+
assert!(claim_from_onchain_tx);
3354+
},
3355+
_ => panic!("Wrong event {ev:?}"),
3356+
}
3357+
}
3358+
3359+
// After the background events are processed in `get_and_clear_pending_events`, above, node B
3360+
// will create the requisite `ChannelMontiorUpdate` for claiming the forwarded payment back.
3361+
// The HTLC, however, is added to the holding cell for replay after the peer connects, below.
3362+
check_added_monitors(&nodes[1], 1);
3363+
3364+
nodes[0].node.peer_disconnected(nodes[1].node.get_our_node_id());
3365+
3366+
let mut reconnect_args = ReconnectArgs::new(&nodes[0], &nodes[1]);
3367+
reconnect_args.pending_cell_htlc_claims = (1, 0);
3368+
reconnect_nodes(reconnect_args);
3369+
expect_payment_sent(&nodes[0], preimage_a, None, true, true);
3370+
}
3371+
3372+
#[test]
3373+
fn test_lost_preimage_monitor_events() {
3374+
do_test_lost_preimage_monitor_events(true);
3375+
do_test_lost_preimage_monitor_events(false);
3376+
}

0 commit comments

Comments
 (0)