Skip to content

Commit 17e143f

Browse files
committed
Store preimages we learned on chain in case of MonitorEvent loss
`MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fasion - if the `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets persisted (i.e. due to a block update) then the node crashes, prior to persisting the `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be lost. This isn't likely in a sync persist environment, but in an async one this could be an issue. Note that this is only an issue for closed channels - `MonitorEvent`s only inform the `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. When we restart and, during `ChannelManager` load, see a `ChannelMonitor` for a closed channel, we scan it for preimages that we passed to it and re-apply those to any pending or forwarded payments. However, we didn't scan it for preimages it learned from transactions on-chain. In cases where a `MonitorEvent` is lost, this can lead to a lost preimage. Here we fix it by simply tracking preimages we learned on-chain the same way we track preimages picked up during normal channel operation.
1 parent 22b0f37 commit 17e143f

File tree

2 files changed

+167
-0
lines changed

2 files changed

+167
-0
lines changed

lightning/src/chain/channelmonitor.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5691,6 +5691,7 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitorImpl<Signer> {
56915691
on_to_local_output_csv: None,
56925692
},
56935693
});
5694+
self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage);
56945695
self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate {
56955696
source,
56965697
payment_preimage: Some(payment_preimage),
@@ -5714,6 +5715,7 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitorImpl<Signer> {
57145715
on_to_local_output_csv: None,
57155716
},
57165717
});
5718+
self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage);
57175719
self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate {
57185720
source,
57195721
payment_preimage: Some(payment_preimage),

lightning/src/ln/monitor_tests.rs

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
//! Further functional tests which test blockchain reorganizations.
1313
1414
use crate::sign::{ecdsa::EcdsaChannelSigner, OutputSpender, SignerProvider, SpendableOutputDescriptor};
15+
use crate::chain::Watch;
1516
use crate::chain::channelmonitor::{ANTI_REORG_DELAY, ARCHIVAL_DELAY_BLOCKS,LATENCY_GRACE_PERIOD_BLOCKS, COUNTERPARTY_CLAIMABLE_WITHIN_BLOCKS_PINNABLE, Balance, BalanceSource, ChannelMonitorUpdateStep};
1617
use crate::chain::transaction::OutPoint;
1718
use crate::chain::chaininterface::{ConfirmationTarget, LowerBoundedFeeEstimator, compute_feerate_sat_per_1000_weight};
@@ -3290,3 +3291,167 @@ fn test_claim_event_never_handled() {
32903291
// `ChannelMonitorUpdate`.
32913292
check_added_monitors(&nodes[1], 2);
32923293
}
3294+
3295+
fn do_test_lost_preimage_monitor_events(on_counterparty_tx: bool) {
3296+
// `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fasion - if the
3297+
// `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets
3298+
// persisted (i.e. due to a block update) then the node crashes, prior to persisting the
3299+
// `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be
3300+
// lost. This isn't likely in a sync persist environment, but in an async one this could be an
3301+
// issue.
3302+
//
3303+
// Note that this is only an issue for closed channels - `MonitorEvent`s only inform the
3304+
// `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup
3305+
// or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes
3306+
// completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved
3307+
// on-chain post closure. Of the three, only the last is problematic to lose prior to a reload.
3308+
//
3309+
// Here we test that losing `MonitorEvent`s that contain HTLC resolution preimages does not
3310+
// cause us to lose funds or miss a `PaymentSent` event.
3311+
let mut cfg = test_default_channel_config();
3312+
cfg.manually_accept_inbound_channels = true;
3313+
cfg.channel_handshake_config.negotiate_anchors_zero_fee_htlc_tx = true;
3314+
let cfgs = [Some(cfg.clone()), Some(cfg.clone()), Some(cfg.clone())];
3315+
3316+
let chanmon_cfgs = create_chanmon_cfgs(3);
3317+
let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
3318+
let persister;
3319+
let new_chain_mon;
3320+
let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &cfgs);
3321+
let node_b_reload;
3322+
let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs);
3323+
3324+
let coinbase_tx = provide_anchor_reserves(&nodes);
3325+
3326+
let node_b_id = nodes[1].node.get_our_node_id();
3327+
let node_c_id = nodes[2].node.get_our_node_id();
3328+
3329+
let chan_a = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 0).2;
3330+
let chan_b = create_announced_chan_between_nodes_with_value(&nodes, 1, 2, 1_000_000, 0).2;
3331+
3332+
let (preimage_a, hash_a, ..) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 1_000_000);
3333+
let (preimage_b, hash_b, ..) = route_payment(&nodes[1], &[&nodes[2]], 1_000_000);
3334+
3335+
nodes[1].node.peer_disconnected(nodes[2].node.get_our_node_id());
3336+
nodes[2].node.peer_disconnected(nodes[1].node.get_our_node_id());
3337+
3338+
nodes[2].node.claim_funds(preimage_a);
3339+
check_added_monitors(&nodes[2], 1);
3340+
expect_payment_claimed!(nodes[2], hash_a, 1_000_000);
3341+
nodes[2].node.claim_funds(preimage_b);
3342+
check_added_monitors(&nodes[2], 1);
3343+
expect_payment_claimed!(nodes[2], hash_b, 1_000_000);
3344+
3345+
// Force-close the channel, confirming a commitment transaction then letting C claim the HTLCs.
3346+
let message = "Closed".to_owned();
3347+
nodes[2]
3348+
.node
3349+
.force_close_broadcasting_latest_txn(&chan_b, &node_b_id, message.clone())
3350+
.unwrap();
3351+
check_added_monitors(&nodes[2], 1);
3352+
let c_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true), message };
3353+
check_closed_event!(nodes[2], 1, c_reason, [node_b_id], 1_000_000);
3354+
check_closed_broadcast!(nodes[2], true);
3355+
3356+
handle_bump_events(&nodes[2], true, 0);
3357+
let cs_commit_tx = nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3358+
assert_eq!(cs_commit_tx.len(), 1);
3359+
3360+
let message = "Closed".to_owned();
3361+
nodes[1]
3362+
.node
3363+
.force_close_broadcasting_latest_txn(&chan_b, &node_c_id, message.clone())
3364+
.unwrap();
3365+
check_added_monitors(&nodes[1], 1);
3366+
let b_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true), message };
3367+
check_closed_event!(nodes[1], 1, b_reason, [node_c_id], 1_000_000);
3368+
check_closed_broadcast!(nodes[1], true);
3369+
3370+
handle_bump_events(&nodes[1], true, 0);
3371+
let bs_commit_tx = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3372+
assert_eq!(bs_commit_tx.len(), 1);
3373+
3374+
let selected_commit_tx = if on_counterparty_tx {
3375+
&cs_commit_tx[0]
3376+
} else {
3377+
&bs_commit_tx[0]
3378+
};
3379+
3380+
mine_transaction(&nodes[2], selected_commit_tx);
3381+
let mut cs_transactions = nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3382+
let c_replays_commitment = nodes[2].connect_style.borrow().updates_best_block_first();
3383+
let cs_htlc_claims = if on_counterparty_tx {
3384+
assert_eq!(cs_transactions.len(), 0);
3385+
3386+
handle_bump_events(&nodes[2], c_replays_commitment, 1);
3387+
let mut cs_transactions =
3388+
nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3389+
if c_replays_commitment {
3390+
assert_eq!(cs_transactions.len(), 3);
3391+
check_spends!(cs_transactions[1], cs_transactions[0], coinbase_tx);
3392+
} else {
3393+
assert_eq!(cs_transactions.len(), 1);
3394+
}
3395+
vec![cs_transactions.pop().unwrap()]
3396+
} else {
3397+
assert_eq!(cs_transactions.len(), 1);
3398+
cs_transactions
3399+
};
3400+
3401+
assert_eq!(cs_htlc_claims.len(), 1);
3402+
check_spends!(cs_htlc_claims[0], selected_commit_tx, coinbase_tx);
3403+
3404+
// Now replay the claims on node B, which should generate preimage-containing `MonitorUpdate`s
3405+
mine_transaction(&nodes[1], selected_commit_tx);
3406+
mine_transaction(&nodes[1], &cs_htlc_claims[0]);
3407+
3408+
// Now simulate a restart where the B<->C ChannelMonitor has been persisted (i.e. because we
3409+
// just processed a new block) but the ChannelManager was not. This should be exceedingly rare
3410+
// given we have to be connecting a block at the right moment and not manage to get a
3411+
// ChannelManager persisted after it does a thing that should immediately precede persistence,
3412+
// but with async persist it is more common.
3413+
//
3414+
// We do this by wiping the `MonitorEvent`s from the monitors and then reloading with the
3415+
// latest state.
3416+
let mon_events = nodes[1].chain_monitor.chain_monitor.release_pending_monitor_events();
3417+
assert_eq!(mon_events.len(), 1);
3418+
assert_eq!(mon_events[0].2.len(), 2);
3419+
3420+
let node_ser = nodes[1].node.encode();
3421+
let mon_a_ser = get_monitor!(nodes[1], chan_a).encode();
3422+
let mon_b_ser = get_monitor!(nodes[1], chan_b).encode();
3423+
let mons = &[&mon_a_ser[..], &mon_b_ser[..]];
3424+
reload_node!(nodes[1], cfg, &node_ser, mons, persister, new_chain_mon, node_b_reload);
3425+
3426+
let preimage_events = nodes[1].node.get_and_clear_pending_events();
3427+
assert_eq!(preimage_events.len(), 2, "{preimage_events:?}");
3428+
for ev in preimage_events {
3429+
match ev {
3430+
Event::PaymentSent { payment_hash, .. } => {
3431+
assert_eq!(payment_hash, hash_b);
3432+
},
3433+
Event::PaymentForwarded { claim_from_onchain_tx, .. } => {
3434+
assert!(claim_from_onchain_tx);
3435+
},
3436+
_ => panic!("Wrong event {ev:?}"),
3437+
}
3438+
}
3439+
3440+
// After the background events are processed in `get_and_clear_pending_events`, above, node B
3441+
// will create the requisite `ChannelMontiorUpdate` for claiming the forwarded payment back.
3442+
// The HTLC, however, is added to the holding cell for replay after the peer connects, below.
3443+
check_added_monitors(&nodes[1], 1);
3444+
3445+
nodes[0].node.peer_disconnected(nodes[1].node.get_our_node_id());
3446+
3447+
let mut reconnect_args = ReconnectArgs::new(&nodes[0], &nodes[1]);
3448+
reconnect_args.pending_cell_htlc_claims = (1, 0);
3449+
reconnect_nodes(reconnect_args);
3450+
expect_payment_sent(&nodes[0], preimage_a, None, true, true);
3451+
}
3452+
3453+
#[test]
3454+
fn test_lost_preimage_monitor_events() {
3455+
do_test_lost_preimage_monitor_events(true);
3456+
do_test_lost_preimage_monitor_events(false);
3457+
}

0 commit comments

Comments
 (0)