Skip to content

Commit 0a264e8

Browse files
committed
Store preimages we learned on chain in case of MonitorEvent loss
`MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fashion - if the `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets persisted (i.e. due to a block update) then the node crashes, prior to persisting the `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be lost. This isn't likely in a sync persist environment, but in an async one this could be an issue. Note that this is only an issue for closed channels - `MonitorEvent`s only inform the `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. When we restart and, during `ChannelManager` load, see a `ChannelMonitor` for a closed channel, we scan it for preimages that we passed to it and re-apply those to any pending or forwarded payments. However, we didn't scan it for preimages it learned from transactions on-chain. In cases where a `MonitorEvent` is lost, this can lead to a lost preimage. Here we fix it by simply tracking preimages we learned on-chain the same way we track preimages picked up during normal channel operation.
1 parent e3225a9 commit 0a264e8

File tree

2 files changed

+167
-0
lines changed

2 files changed

+167
-0
lines changed

lightning/src/chain/channelmonitor.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5841,6 +5841,7 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitorImpl<Signer> {
58415841
on_to_local_output_csv: None,
58425842
},
58435843
});
5844+
self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage);
58445845
self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate {
58455846
source,
58465847
payment_preimage: Some(payment_preimage),
@@ -5864,6 +5865,7 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitorImpl<Signer> {
58645865
on_to_local_output_csv: None,
58655866
},
58665867
});
5868+
self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage);
58675869
self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate {
58685870
source,
58695871
payment_preimage: Some(payment_preimage),

lightning/src/ln/monitor_tests.rs

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
//! Further functional tests which test blockchain reorganizations.
1313
1414
use crate::sign::{ecdsa::EcdsaChannelSigner, OutputSpender, SignerProvider, SpendableOutputDescriptor};
15+
use crate::chain::Watch;
1516
use crate::chain::channelmonitor::{ANTI_REORG_DELAY, ARCHIVAL_DELAY_BLOCKS,LATENCY_GRACE_PERIOD_BLOCKS, COUNTERPARTY_CLAIMABLE_WITHIN_BLOCKS_PINNABLE, Balance, BalanceSource, ChannelMonitorUpdateStep};
1617
use crate::chain::transaction::OutPoint;
1718
use crate::chain::chaininterface::{ConfirmationTarget, LowerBoundedFeeEstimator, compute_feerate_sat_per_1000_weight};
@@ -3300,3 +3301,167 @@ fn test_claim_event_never_handled() {
33003301
// `ChannelMonitorUpdate`.
33013302
check_added_monitors(&nodes[1], 2);
33023303
}
3304+
3305+
fn do_test_lost_preimage_monitor_events(on_counterparty_tx: bool) {
3306+
// `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fashion - if the
3307+
// `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets
3308+
// persisted (i.e. due to a block update) then the node crashes, prior to persisting the
3309+
// `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be
3310+
// lost. This isn't likely in a sync persist environment, but in an async one this could be an
3311+
// issue.
3312+
//
3313+
// Note that this is only an issue for closed channels - `MonitorEvent`s only inform the
3314+
// `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup
3315+
// or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes
3316+
// completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved
3317+
// on-chain post closure. Of the three, only the last is problematic to lose prior to a reload.
3318+
//
3319+
// Here we test that losing `MonitorEvent`s that contain HTLC resolution preimages does not
3320+
// cause us to lose funds or miss a `PaymentSent` event.
3321+
let mut cfg = test_default_channel_config();
3322+
cfg.manually_accept_inbound_channels = true;
3323+
cfg.channel_handshake_config.negotiate_anchors_zero_fee_htlc_tx = true;
3324+
let cfgs = [Some(cfg.clone()), Some(cfg.clone()), Some(cfg.clone())];
3325+
3326+
let chanmon_cfgs = create_chanmon_cfgs(3);
3327+
let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
3328+
let persister;
3329+
let new_chain_mon;
3330+
let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &cfgs);
3331+
let node_b_reload;
3332+
let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs);
3333+
3334+
let coinbase_tx = provide_anchor_reserves(&nodes);
3335+
3336+
let node_b_id = nodes[1].node.get_our_node_id();
3337+
let node_c_id = nodes[2].node.get_our_node_id();
3338+
3339+
let chan_a = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 0).2;
3340+
let chan_b = create_announced_chan_between_nodes_with_value(&nodes, 1, 2, 1_000_000, 0).2;
3341+
3342+
let (preimage_a, hash_a, ..) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 1_000_000);
3343+
let (preimage_b, hash_b, ..) = route_payment(&nodes[1], &[&nodes[2]], 1_000_000);
3344+
3345+
nodes[1].node.peer_disconnected(nodes[2].node.get_our_node_id());
3346+
nodes[2].node.peer_disconnected(nodes[1].node.get_our_node_id());
3347+
3348+
nodes[2].node.claim_funds(preimage_a);
3349+
check_added_monitors(&nodes[2], 1);
3350+
expect_payment_claimed!(nodes[2], hash_a, 1_000_000);
3351+
nodes[2].node.claim_funds(preimage_b);
3352+
check_added_monitors(&nodes[2], 1);
3353+
expect_payment_claimed!(nodes[2], hash_b, 1_000_000);
3354+
3355+
// Force-close the channel, confirming a commitment transaction then letting C claim the HTLCs.
3356+
let message = "Closed".to_owned();
3357+
nodes[2]
3358+
.node
3359+
.force_close_broadcasting_latest_txn(&chan_b, &node_b_id, message.clone())
3360+
.unwrap();
3361+
check_added_monitors(&nodes[2], 1);
3362+
let c_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true), message };
3363+
check_closed_event!(nodes[2], 1, c_reason, [node_b_id], 1_000_000);
3364+
check_closed_broadcast!(nodes[2], true);
3365+
3366+
handle_bump_events(&nodes[2], true, 0);
3367+
let cs_commit_tx = nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3368+
assert_eq!(cs_commit_tx.len(), 1);
3369+
3370+
let message = "Closed".to_owned();
3371+
nodes[1]
3372+
.node
3373+
.force_close_broadcasting_latest_txn(&chan_b, &node_c_id, message.clone())
3374+
.unwrap();
3375+
check_added_monitors(&nodes[1], 1);
3376+
let b_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true), message };
3377+
check_closed_event!(nodes[1], 1, b_reason, [node_c_id], 1_000_000);
3378+
check_closed_broadcast!(nodes[1], true);
3379+
3380+
handle_bump_events(&nodes[1], true, 0);
3381+
let bs_commit_tx = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3382+
assert_eq!(bs_commit_tx.len(), 1);
3383+
3384+
let selected_commit_tx = if on_counterparty_tx {
3385+
&cs_commit_tx[0]
3386+
} else {
3387+
&bs_commit_tx[0]
3388+
};
3389+
3390+
mine_transaction(&nodes[2], selected_commit_tx);
3391+
let mut cs_transactions = nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3392+
let c_replays_commitment = nodes[2].connect_style.borrow().updates_best_block_first();
3393+
let cs_htlc_claims = if on_counterparty_tx {
3394+
assert_eq!(cs_transactions.len(), 0);
3395+
3396+
handle_bump_events(&nodes[2], c_replays_commitment, 1);
3397+
let mut cs_transactions =
3398+
nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3399+
if c_replays_commitment {
3400+
assert_eq!(cs_transactions.len(), 3);
3401+
check_spends!(cs_transactions[1], cs_transactions[0], coinbase_tx);
3402+
} else {
3403+
assert_eq!(cs_transactions.len(), 1);
3404+
}
3405+
vec![cs_transactions.pop().unwrap()]
3406+
} else {
3407+
assert_eq!(cs_transactions.len(), 1);
3408+
cs_transactions
3409+
};
3410+
3411+
assert_eq!(cs_htlc_claims.len(), 1);
3412+
check_spends!(cs_htlc_claims[0], selected_commit_tx, coinbase_tx);
3413+
3414+
// Now replay the claims on node B, which should generate preimage-containing `MonitorUpdate`s
3415+
mine_transaction(&nodes[1], selected_commit_tx);
3416+
mine_transaction(&nodes[1], &cs_htlc_claims[0]);
3417+
3418+
// Now simulate a restart where the B<->C ChannelMonitor has been persisted (i.e. because we
3419+
// just processed a new block) but the ChannelManager was not. This should be exceedingly rare
3420+
// given we have to be connecting a block at the right moment and not manage to get a
3421+
// ChannelManager persisted after it does a thing that should immediately precede persistence,
3422+
// but with async persist it is more common.
3423+
//
3424+
// We do this by wiping the `MonitorEvent`s from the monitors and then reloading with the
3425+
// latest state.
3426+
let mon_events = nodes[1].chain_monitor.chain_monitor.release_pending_monitor_events();
3427+
assert_eq!(mon_events.len(), 1);
3428+
assert_eq!(mon_events[0].2.len(), 3);
3429+
3430+
let node_ser = nodes[1].node.encode();
3431+
let mon_a_ser = get_monitor!(nodes[1], chan_a).encode();
3432+
let mon_b_ser = get_monitor!(nodes[1], chan_b).encode();
3433+
let mons = &[&mon_a_ser[..], &mon_b_ser[..]];
3434+
reload_node!(nodes[1], cfg, &node_ser, mons, persister, new_chain_mon, node_b_reload);
3435+
3436+
let preimage_events = nodes[1].node.get_and_clear_pending_events();
3437+
assert_eq!(preimage_events.len(), 2, "{preimage_events:?}");
3438+
for ev in preimage_events {
3439+
match ev {
3440+
Event::PaymentSent { payment_hash, .. } => {
3441+
assert_eq!(payment_hash, hash_b);
3442+
},
3443+
Event::PaymentForwarded { claim_from_onchain_tx, .. } => {
3444+
assert!(claim_from_onchain_tx);
3445+
},
3446+
_ => panic!("Wrong event {ev:?}"),
3447+
}
3448+
}
3449+
3450+
// After the background events are processed in `get_and_clear_pending_events`, above, node B
3451+
// will create the requisite `ChannelMontiorUpdate` for claiming the forwarded payment back.
3452+
// The HTLC, however, is added to the holding cell for replay after the peer connects, below.
3453+
check_added_monitors(&nodes[1], 1);
3454+
3455+
nodes[0].node.peer_disconnected(nodes[1].node.get_our_node_id());
3456+
3457+
let mut reconnect_args = ReconnectArgs::new(&nodes[0], &nodes[1]);
3458+
reconnect_args.pending_cell_htlc_claims = (1, 0);
3459+
reconnect_nodes(reconnect_args);
3460+
expect_payment_sent(&nodes[0], preimage_a, None, true, true);
3461+
}
3462+
3463+
#[test]
3464+
fn test_lost_preimage_monitor_events() {
3465+
do_test_lost_preimage_monitor_events(true);
3466+
do_test_lost_preimage_monitor_events(false);
3467+
}

0 commit comments

Comments
 (0)