Skip to content

Commit aba56d6

Browse files
committed
Store preimages we learned on chain in case of MonitorEvent loss
`MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fasion - if the `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets persisted (i.e. due to a block update) then the node crashes, prior to persisting the `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be lost. This isn't likely in a sync persist environment, but in an async one this could be an issue. Note that this is only an issue for closed channels - `MonitorEvent`s only inform the `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. When we restart and, during `ChannelManager` load, see a `ChannelMonitor` for a closed channel, we scan it for preimages that we passed to it and re-apply those to any pending or forwarded payments. However, we didn't scan it for preimages it learned from transactions on-chain. In cases where a `MonitorEvent` is lost, this can lead to a lost preimage. Here we fix it by simply tracking preimages we learned on-chain the same way we track preimages picked up during normal channel operation.
1 parent 3508047 commit aba56d6

File tree

2 files changed

+167
-0
lines changed

2 files changed

+167
-0
lines changed

lightning/src/chain/channelmonitor.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5787,6 +5787,7 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitorImpl<Signer> {
57875787
on_to_local_output_csv: None,
57885788
},
57895789
});
5790+
self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage);
57905791
self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate {
57915792
source,
57925793
payment_preimage: Some(payment_preimage),
@@ -5810,6 +5811,7 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitorImpl<Signer> {
58105811
on_to_local_output_csv: None,
58115812
},
58125813
});
5814+
self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage);
58135815
self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate {
58145816
source,
58155817
payment_preimage: Some(payment_preimage),

lightning/src/ln/monitor_tests.rs

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
//! Further functional tests which test blockchain reorganizations.
1313
1414
use crate::sign::{ecdsa::EcdsaChannelSigner, OutputSpender, SignerProvider, SpendableOutputDescriptor};
15+
use crate::chain::Watch;
1516
use crate::chain::channelmonitor::{ANTI_REORG_DELAY, ARCHIVAL_DELAY_BLOCKS,LATENCY_GRACE_PERIOD_BLOCKS, COUNTERPARTY_CLAIMABLE_WITHIN_BLOCKS_PINNABLE, Balance, BalanceSource, ChannelMonitorUpdateStep};
1617
use crate::chain::transaction::OutPoint;
1718
use crate::chain::chaininterface::{ConfirmationTarget, LowerBoundedFeeEstimator, compute_feerate_sat_per_1000_weight};
@@ -3298,3 +3299,167 @@ fn test_claim_event_never_handled() {
32983299
// `ChannelMonitorUpdate`.
32993300
check_added_monitors(&nodes[1], 2);
33003301
}
3302+
3303+
fn do_test_lost_preimage_monitor_events(on_counterparty_tx: bool) {
3304+
// `MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fasion - if the
3305+
// `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets
3306+
// persisted (i.e. due to a block update) then the node crashes, prior to persisting the
3307+
// `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be
3308+
// lost. This isn't likely in a sync persist environment, but in an async one this could be an
3309+
// issue.
3310+
//
3311+
// Note that this is only an issue for closed channels - `MonitorEvent`s only inform the
3312+
// `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup
3313+
// or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes
3314+
// completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved
3315+
// on-chain post closure. Of the three, only the last is problematic to lose prior to a reload.
3316+
//
3317+
// Here we test that losing `MonitorEvent`s that contain HTLC resolution preimages does not
3318+
// cause us to lose funds or miss a `PaymentSent` event.
3319+
let mut cfg = test_default_channel_config();
3320+
cfg.manually_accept_inbound_channels = true;
3321+
cfg.channel_handshake_config.negotiate_anchors_zero_fee_htlc_tx = true;
3322+
let cfgs = [Some(cfg.clone()), Some(cfg.clone()), Some(cfg.clone())];
3323+
3324+
let chanmon_cfgs = create_chanmon_cfgs(3);
3325+
let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
3326+
let persister;
3327+
let new_chain_mon;
3328+
let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &cfgs);
3329+
let node_b_reload;
3330+
let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs);
3331+
3332+
let coinbase_tx = provide_anchor_reserves(&nodes);
3333+
3334+
let node_b_id = nodes[1].node.get_our_node_id();
3335+
let node_c_id = nodes[2].node.get_our_node_id();
3336+
3337+
let chan_a = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 0).2;
3338+
let chan_b = create_announced_chan_between_nodes_with_value(&nodes, 1, 2, 1_000_000, 0).2;
3339+
3340+
let (preimage_a, hash_a, ..) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 1_000_000);
3341+
let (preimage_b, hash_b, ..) = route_payment(&nodes[1], &[&nodes[2]], 1_000_000);
3342+
3343+
nodes[1].node.peer_disconnected(nodes[2].node.get_our_node_id());
3344+
nodes[2].node.peer_disconnected(nodes[1].node.get_our_node_id());
3345+
3346+
nodes[2].node.claim_funds(preimage_a);
3347+
check_added_monitors(&nodes[2], 1);
3348+
expect_payment_claimed!(nodes[2], hash_a, 1_000_000);
3349+
nodes[2].node.claim_funds(preimage_b);
3350+
check_added_monitors(&nodes[2], 1);
3351+
expect_payment_claimed!(nodes[2], hash_b, 1_000_000);
3352+
3353+
// Force-close the channel, confirming a commitment transaction then letting C claim the HTLCs.
3354+
let message = "Closed".to_owned();
3355+
nodes[2]
3356+
.node
3357+
.force_close_broadcasting_latest_txn(&chan_b, &node_b_id, message.clone())
3358+
.unwrap();
3359+
check_added_monitors(&nodes[2], 1);
3360+
let c_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true), message };
3361+
check_closed_event!(nodes[2], 1, c_reason, [node_b_id], 1_000_000);
3362+
check_closed_broadcast!(nodes[2], true);
3363+
3364+
handle_bump_events(&nodes[2], true, 0);
3365+
let cs_commit_tx = nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3366+
assert_eq!(cs_commit_tx.len(), 1);
3367+
3368+
let message = "Closed".to_owned();
3369+
nodes[1]
3370+
.node
3371+
.force_close_broadcasting_latest_txn(&chan_b, &node_c_id, message.clone())
3372+
.unwrap();
3373+
check_added_monitors(&nodes[1], 1);
3374+
let b_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true), message };
3375+
check_closed_event!(nodes[1], 1, b_reason, [node_c_id], 1_000_000);
3376+
check_closed_broadcast!(nodes[1], true);
3377+
3378+
handle_bump_events(&nodes[1], true, 0);
3379+
let bs_commit_tx = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3380+
assert_eq!(bs_commit_tx.len(), 1);
3381+
3382+
let selected_commit_tx = if on_counterparty_tx {
3383+
&cs_commit_tx[0]
3384+
} else {
3385+
&bs_commit_tx[0]
3386+
};
3387+
3388+
mine_transaction(&nodes[2], selected_commit_tx);
3389+
let mut cs_transactions = nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3390+
let c_replays_commitment = nodes[2].connect_style.borrow().updates_best_block_first();
3391+
let cs_htlc_claims = if on_counterparty_tx {
3392+
assert_eq!(cs_transactions.len(), 0);
3393+
3394+
handle_bump_events(&nodes[2], c_replays_commitment, 1);
3395+
let mut cs_transactions =
3396+
nodes[2].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
3397+
if c_replays_commitment {
3398+
assert_eq!(cs_transactions.len(), 3);
3399+
check_spends!(cs_transactions[1], cs_transactions[0], coinbase_tx);
3400+
} else {
3401+
assert_eq!(cs_transactions.len(), 1);
3402+
}
3403+
vec![cs_transactions.pop().unwrap()]
3404+
} else {
3405+
assert_eq!(cs_transactions.len(), 1);
3406+
cs_transactions
3407+
};
3408+
3409+
assert_eq!(cs_htlc_claims.len(), 1);
3410+
check_spends!(cs_htlc_claims[0], selected_commit_tx, coinbase_tx);
3411+
3412+
// Now replay the claims on node B, which should generate preimage-containing `MonitorUpdate`s
3413+
mine_transaction(&nodes[1], selected_commit_tx);
3414+
mine_transaction(&nodes[1], &cs_htlc_claims[0]);
3415+
3416+
// Now simulate a restart where the B<->C ChannelMonitor has been persisted (i.e. because we
3417+
// just processed a new block) but the ChannelManager was not. This should be exceedingly rare
3418+
// given we have to be connecting a block at the right moment and not manage to get a
3419+
// ChannelManager persisted after it does a thing that should immediately precede persistence,
3420+
// but with async persist it is more common.
3421+
//
3422+
// We do this by wiping the `MonitorEvent`s from the monitors and then reloading with the
3423+
// latest state.
3424+
let mon_events = nodes[1].chain_monitor.chain_monitor.release_pending_monitor_events();
3425+
assert_eq!(mon_events.len(), 1);
3426+
assert_eq!(mon_events[0].2.len(), 2);
3427+
3428+
let node_ser = nodes[1].node.encode();
3429+
let mon_a_ser = get_monitor!(nodes[1], chan_a).encode();
3430+
let mon_b_ser = get_monitor!(nodes[1], chan_b).encode();
3431+
let mons = &[&mon_a_ser[..], &mon_b_ser[..]];
3432+
reload_node!(nodes[1], cfg, &node_ser, mons, persister, new_chain_mon, node_b_reload);
3433+
3434+
let preimage_events = nodes[1].node.get_and_clear_pending_events();
3435+
assert_eq!(preimage_events.len(), 2, "{preimage_events:?}");
3436+
for ev in preimage_events {
3437+
match ev {
3438+
Event::PaymentSent { payment_hash, .. } => {
3439+
assert_eq!(payment_hash, hash_b);
3440+
},
3441+
Event::PaymentForwarded { claim_from_onchain_tx, .. } => {
3442+
assert!(claim_from_onchain_tx);
3443+
},
3444+
_ => panic!("Wrong event {ev:?}"),
3445+
}
3446+
}
3447+
3448+
// After the background events are processed in `get_and_clear_pending_events`, above, node B
3449+
// will create the requisite `ChannelMontiorUpdate` for claiming the forwarded payment back.
3450+
// The HTLC, however, is added to the holding cell for replay after the peer connects, below.
3451+
check_added_monitors(&nodes[1], 1);
3452+
3453+
nodes[0].node.peer_disconnected(nodes[1].node.get_our_node_id());
3454+
3455+
let mut reconnect_args = ReconnectArgs::new(&nodes[0], &nodes[1]);
3456+
reconnect_args.pending_cell_htlc_claims = (1, 0);
3457+
reconnect_nodes(reconnect_args);
3458+
expect_payment_sent(&nodes[0], preimage_a, None, true, true);
3459+
}
3460+
3461+
#[test]
3462+
fn test_lost_preimage_monitor_events() {
3463+
do_test_lost_preimage_monitor_events(true);
3464+
do_test_lost_preimage_monitor_events(false);
3465+
}

0 commit comments

Comments
 (0)