Skip to content

Commit 674b047

Browse files
committed
Block the mon update removing a preimage until upstream mon writes
When we forward a payment and receive an `update_fulfill_htlc` message from the downstream channel, we immediately claim the HTLC on the upstream channel, before even doing a `commitment_signed` dance on the downstream channel. This implies that our `ChannelMonitorUpdate`s "go out" in the right order - first we ensure we'll get our money by writing the preimage down, then we write the update that resolves giving money on the downstream node. This is safe as long as `ChannelMonitorUpdate`s complete in the order in which they are generated, but of course looking forward we want to support asynchronous updates, which may complete in any order. Thus, here, we enforce the correct ordering by blocking the downstream `ChannelMonitorUpdate` until the upstream one completes. Like the `PaymentSent` event handling we do so only for the `revoke_and_ack` `ChannelMonitorUpdate`, ensuring the preimage-containing upstream update has a full RTT to complete before we actually manage to slow anything down.
1 parent 94f5213 commit 674b047

File tree

3 files changed

+193
-28
lines changed

3 files changed

+193
-28
lines changed

lightning/src/ln/chanmon_update_fail_tests.rs

Lines changed: 139 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3035,8 +3035,8 @@ fn test_blocked_chan_preimage_release() {
30353035
let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]);
30363036
let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs);
30373037

3038-
create_announced_chan_between_nodes(&nodes, 0, 1).2;
3039-
create_announced_chan_between_nodes(&nodes, 1, 2).2;
3038+
create_announced_chan_between_nodes(&nodes, 0, 1);
3039+
let chan_id_2 = create_announced_chan_between_nodes(&nodes, 1, 2).2;
30403040

30413041
send_payment(&nodes[0], &[&nodes[1], &nodes[2]], 5_000_000);
30423042

@@ -3065,20 +3065,29 @@ fn test_blocked_chan_preimage_release() {
30653065
let as_htlc_fulfill_updates = get_htlc_update_msgs!(nodes[0], nodes[1].node.get_our_node_id());
30663066
nodes[1].node.handle_update_fulfill_htlc(&nodes[0].node.get_our_node_id(), &as_htlc_fulfill_updates.update_fulfill_htlcs[0]);
30673067
check_added_monitors(&nodes[1], 1); // We generate only a preimage monitor update
3068+
assert!(get_monitor!(nodes[1], chan_id_2).get_stored_preimages().contains_key(&payment_hash_2));
30683069
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
30693070

3070-
// Finish the CS dance between nodes[0] and nodes[1].
3071-
do_commitment_signed_dance(&nodes[1], &nodes[0], &as_htlc_fulfill_updates.commitment_signed, false, false);
3071+
// Finish the CS dance between nodes[0] and nodes[1]. Note that until the event handling, the
3072+
// update_fulfill_htlc + CS is held, even though the preimage is already on disk for the
3073+
// channel.
3074+
nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &as_htlc_fulfill_updates.commitment_signed);
3075+
check_added_monitors(&nodes[1], 1);
3076+
let (a, raa) = do_main_commitment_signed_dance(&nodes[1], &nodes[0], false);
3077+
assert!(a.is_none());
3078+
3079+
nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &raa);
30723080
check_added_monitors(&nodes[1], 0);
3081+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
30733082

30743083
let events = nodes[1].node.get_and_clear_pending_events();
30753084
assert_eq!(events.len(), 3);
30763085
if let Event::PaymentSent { .. } = events[0] {} else { panic!(); }
30773086
if let Event::PaymentPathSuccessful { .. } = events[2] {} else { panic!(); }
30783087
if let Event::PaymentForwarded { .. } = events[1] {} else { panic!(); }
30793088

3080-
// The event processing should release the last RAA update.
3081-
check_added_monitors(&nodes[1], 1);
3089+
// The event processing should release the last RAA updates on both channels.
3090+
check_added_monitors(&nodes[1], 2);
30823091

30833092
// When we fetch the next update the message getter will generate the next update for nodes[2],
30843093
// generating a further monitor update.
@@ -3089,3 +3098,127 @@ fn test_blocked_chan_preimage_release() {
30893098
do_commitment_signed_dance(&nodes[2], &nodes[1], &bs_htlc_fulfill_updates.commitment_signed, false, false);
30903099
expect_payment_sent(&nodes[2], payment_preimage_2, None, true, true);
30913100
}
3101+
3102+
fn do_test_inverted_mon_completion_order(complete_bc_commitment_dance: bool) {
3103+
// When we forward a payment and receive an `update_fulfill_htlc` message from the downstream
3104+
// channel, we immediately claim the HTLC on the upstream channel, before even doing a
3105+
// `commitment_signed` dance on the downstream channel. This implies that our
3106+
// `ChannelMonitorUpdate`s "go out" in the right order - first we ensure we'll get our money,
3107+
// then we write the update that resolves giving money on the downstream node. This is safe as
3108+
// long as `ChannelMonitorUpdate`s complete in the order in which they are generated, but of
3109+
// course this may not be the case. For asynchronous update writes, we have to ensure monitor
3110+
// updates can block each other, preventing the inversion all together.
3111+
let chanmon_cfgs = create_chanmon_cfgs(3);
3112+
let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
3113+
3114+
let persister;
3115+
let new_chain_monitor;
3116+
let nodes_1_deserialized;
3117+
3118+
let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]);
3119+
let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs);
3120+
3121+
let chan_id_ab = create_announced_chan_between_nodes(&nodes, 0, 1).2;
3122+
let chan_id_bc = create_announced_chan_between_nodes(&nodes, 1, 2).2;
3123+
3124+
// Route a payment from A, through B, to C, then claim it on C. Once we pass B the
3125+
// `update_fulfill_htlc` we have a monitor update for both of B's channels. We complete the one
3126+
// on the B<->C channel but leave the A<->B monitor update pending, then reload B.
3127+
let (payment_preimage, payment_hash, _) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 100_000);
3128+
3129+
let mon_ab = get_monitor!(nodes[1], chan_id_ab).encode();
3130+
3131+
nodes[2].node.claim_funds(payment_preimage);
3132+
check_added_monitors(&nodes[2], 1);
3133+
expect_payment_claimed!(nodes[2], payment_hash, 100_000);
3134+
3135+
chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
3136+
let cs_updates = get_htlc_update_msgs(&nodes[2], &nodes[1].node.get_our_node_id());
3137+
nodes[1].node.handle_update_fulfill_htlc(&nodes[2].node.get_our_node_id(), &cs_updates.update_fulfill_htlcs[0]);
3138+
3139+
// B generates a new monitor update for the A <-> B channel, but doesn't send the new messages
3140+
// for it since the monitor update is marked in-progress.
3141+
check_added_monitors(&nodes[1], 1);
3142+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
3143+
3144+
// Now step the Commitment Signed Dance between B and C forward a bit (or fully), ensuring we
3145+
// won't get the preimage when the nodes reconnect and we have to get it from the
3146+
// ChannelMonitor.
3147+
nodes[1].node.handle_commitment_signed(&nodes[2].node.get_our_node_id(), &cs_updates.commitment_signed);
3148+
check_added_monitors(&nodes[1], 1);
3149+
if complete_bc_commitment_dance {
3150+
let (bs_revoke_and_ack, bs_commitment_signed) = get_revoke_commit_msgs!(nodes[1], nodes[2].node.get_our_node_id());
3151+
nodes[2].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_revoke_and_ack);
3152+
check_added_monitors(&nodes[2], 1);
3153+
nodes[2].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_commitment_signed);
3154+
check_added_monitors(&nodes[2], 1);
3155+
let cs_raa = get_event_msg!(nodes[2], MessageSendEvent::SendRevokeAndACK, nodes[1].node.get_our_node_id());
3156+
3157+
// At this point node B still hasn't persisted the `ChannelMonitorUpdate` with the
3158+
// preimage in the A <-> B channel, which will prevent it from persisting the
3159+
// `ChannelMonitorUpdate` here to avoid "losing" the preimage.
3160+
nodes[1].node.handle_revoke_and_ack(&nodes[2].node.get_our_node_id(), &cs_raa);
3161+
check_added_monitors(&nodes[1], 0);
3162+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
3163+
}
3164+
3165+
// Now reload node B
3166+
let manager_b = nodes[1].node.encode();
3167+
3168+
let mon_bc = get_monitor!(nodes[1], chan_id_bc).encode();
3169+
reload_node!(nodes[1], &manager_b, &[&mon_ab, &mon_bc], persister, new_chain_monitor, nodes_1_deserialized);
3170+
3171+
nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id());
3172+
nodes[2].node.peer_disconnected(&nodes[1].node.get_our_node_id());
3173+
3174+
// If we used the latest ChannelManager to reload from, we should have both channels still
3175+
// live. The B <-> C channel's final RAA ChannelMonitorUpdate must still be blocked as
3176+
// before - the ChannelMonitorUpdate for the A <-> B channel hasn't completed.
3177+
// When we call `timer_tick_occurred` we will get that monitor update back, which we'll
3178+
// complete after reconnecting to our peers.
3179+
persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
3180+
nodes[1].node.timer_tick_occurred();
3181+
check_added_monitors(&nodes[1], 1);
3182+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
3183+
3184+
// Now reconnect B to both A and C. If the B <-> C commitment signed dance wasn't run to
3185+
// the end go ahead and do that, though the -2 in `reconnect_nodes` indicates that we
3186+
// expect to *not* receive the final RAA ChannelMonitorUpdate.
3187+
if complete_bc_commitment_dance {
3188+
reconnect_nodes(ReconnectArgs::new(&nodes[1], &nodes[2]));
3189+
} else {
3190+
let mut reconnect_args = ReconnectArgs::new(&nodes[1], &nodes[2]);
3191+
reconnect_args.pending_responding_commitment_signed.1 = true;
3192+
reconnect_args.pending_responding_commitment_signed_dup_monitor.1 = true;
3193+
reconnect_args.pending_raa = (false, true);
3194+
reconnect_nodes(reconnect_args);
3195+
}
3196+
3197+
reconnect_nodes(ReconnectArgs::new(&nodes[0], &nodes[1]));
3198+
3199+
// (Finally) complete the A <-> B ChannelMonitorUpdate, ensuring the preimage is durably on
3200+
// disk in the proper ChannelMonitor, unblocking the B <-> C ChannelMonitor updating
3201+
// process.
3202+
let (outpoint, _, ab_update_id) = nodes[1].chain_monitor.latest_monitor_update_id.lock().unwrap().get(&chan_id_ab).unwrap().clone();
3203+
nodes[1].chain_monitor.chain_monitor.channel_monitor_updated(outpoint, ab_update_id).unwrap();
3204+
3205+
// When we fetch B's HTLC update messages here (now that the ChannelMonitorUpdate has
3206+
// completed), it will also release the final RAA ChannelMonitorUpdate on the B <-> C
3207+
// channel.
3208+
let bs_updates = get_htlc_update_msgs(&nodes[1], &nodes[0].node.get_our_node_id());
3209+
check_added_monitors(&nodes[1], 1);
3210+
3211+
nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_updates.update_fulfill_htlcs[0]);
3212+
do_commitment_signed_dance(&nodes[0], &nodes[1], &bs_updates.commitment_signed, false, false);
3213+
3214+
expect_payment_forwarded!(nodes[1], &nodes[0], &nodes[2], Some(1_000), false, false);
3215+
3216+
// Finally, check that the payment was, ultimately, seen as sent by node A.
3217+
expect_payment_sent(&nodes[0], payment_preimage, None, true, true);
3218+
}
3219+
3220+
#[test]
3221+
fn test_inverted_mon_completion_order() {
3222+
do_test_inverted_mon_completion_order(true);
3223+
do_test_inverted_mon_completion_order(false);
3224+
}

lightning/src/ln/channelmanager.rs

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -655,7 +655,6 @@ pub(crate) enum RAAMonitorUpdateBlockingAction {
655655
}
656656

657657
impl RAAMonitorUpdateBlockingAction {
658-
#[allow(unused)]
659658
fn from_prev_hop_data(prev_hop: &HTLCPreviousHopData) -> Self {
660659
Self::ForwardedPaymentInboundClaim {
661660
channel_id: prev_hop.outpoint.to_channel_id(),
@@ -5127,11 +5126,17 @@ where
51275126
self.pending_outbound_payments.finalize_claims(sources, &self.pending_events);
51285127
}
51295128

5130-
fn claim_funds_internal(&self, source: HTLCSource, payment_preimage: PaymentPreimage, forwarded_htlc_value_msat: Option<u64>, from_onchain: bool, next_channel_outpoint: OutPoint) {
5129+
fn claim_funds_internal(&self, source: HTLCSource, payment_preimage: PaymentPreimage,
5130+
forwarded_htlc_value_msat: Option<u64>, from_onchain: bool,
5131+
next_channel_counterparty_node_id: Option<PublicKey>, next_channel_outpoint: OutPoint
5132+
) {
51315133
match source {
51325134
HTLCSource::OutboundRoute { session_priv, payment_id, path, .. } => {
51335135
debug_assert!(self.background_events_processed_since_startup.load(Ordering::Acquire),
51345136
"We don't support claim_htlc claims during startup - monitors may not be available yet");
5137+
if let Some(pubkey) = next_channel_counterparty_node_id {
5138+
debug_assert_eq!(pubkey, path.hops[0].pubkey);
5139+
}
51355140
let ev_completion_action = EventCompletionAction::ReleaseRAAChannelMonitorUpdate {
51365141
channel_funding_outpoint: next_channel_outpoint,
51375142
counterparty_node_id: path.hops[0].pubkey,
@@ -5142,6 +5147,7 @@ where
51425147
},
51435148
HTLCSource::PreviousHopData(hop_data) => {
51445149
let prev_outpoint = hop_data.outpoint;
5150+
let completed_blocker = RAAMonitorUpdateBlockingAction::from_prev_hop_data(&hop_data);
51455151
let res = self.claim_funds_from_hop(hop_data, payment_preimage,
51465152
|htlc_claim_value_msat| {
51475153
if let Some(forwarded_htlc_value) = forwarded_htlc_value_msat {
@@ -5157,7 +5163,17 @@ where
51575163
next_channel_id: Some(next_channel_outpoint.to_channel_id()),
51585164
outbound_amount_forwarded_msat: forwarded_htlc_value_msat,
51595165
},
5160-
downstream_counterparty_and_funding_outpoint: None,
5166+
downstream_counterparty_and_funding_outpoint:
5167+
if let Some(node_id) = next_channel_counterparty_node_id {
5168+
Some((node_id, next_channel_outpoint, completed_blocker))
5169+
} else {
5170+
// We can only get `None` here if we are processing a
5171+
// `ChannelMonitor`-originated event, in which case we
5172+
// don't care about ensuring we wake the downstream
5173+
// channel's monitor updating - the channel is already
5174+
// closed.
5175+
None
5176+
},
51615177
})
51625178
} else { None }
51635179
});
@@ -5953,13 +5969,24 @@ where
59535969
match peer_state.channel_by_id.entry(msg.channel_id) {
59545970
hash_map::Entry::Occupied(mut chan) => {
59555971
let res = try_chan_entry!(self, chan.get_mut().update_fulfill_htlc(&msg), chan);
5972+
if let HTLCSource::PreviousHopData(prev_hop) = &res.0 {
5973+
peer_state.actions_blocking_raa_monitor_updates.entry(msg.channel_id)
5974+
.or_insert_with(Vec::new)
5975+
.push(RAAMonitorUpdateBlockingAction::from_prev_hop_data(&prev_hop));
5976+
}
5977+
// Note that we do not need to push an `actions_blocking_raa_monitor_updates`
5978+
// entry here, even though we *do* need to block the next RAA monitor update.
5979+
// We do this instead in the `claim_funds_internal` by attaching a
5980+
// `ReleaseRAAChannelMonitorUpdate` action to the event generated when the
5981+
// outbound HTLC is claimed. This is guaranteed to all complete before we
5982+
// process the RAA as messages are processed from single peers serially.
59565983
funding_txo = chan.get().context.get_funding_txo().expect("We won't accept a fulfill until funded");
59575984
res
59585985
},
59595986
hash_map::Entry::Vacant(_) => return Err(MsgHandleErrInternal::send_err_msg_no_close(format!("Got a message for a channel from the wrong node! No such channel for the passed counterparty_node_id {}", counterparty_node_id), msg.channel_id))
59605987
}
59615988
};
5962-
self.claim_funds_internal(htlc_source, msg.payment_preimage.clone(), Some(forwarded_htlc_value), false, funding_txo);
5989+
self.claim_funds_internal(htlc_source, msg.payment_preimage.clone(), Some(forwarded_htlc_value), false, Some(*counterparty_node_id), funding_txo);
59635990
Ok(())
59645991
}
59655992

@@ -6146,6 +6173,23 @@ where
61466173
})
61476174
}
61486175

6176+
#[cfg(any(test, feature = "_test_utils"))]
6177+
pub(crate) fn test_raa_monitor_updates_held(&self,
6178+
counterparty_node_id: PublicKey, channel_id: ChannelId
6179+
) -> bool {
6180+
let per_peer_state = self.per_peer_state.read().unwrap();
6181+
if let Some(peer_state_mtx) = per_peer_state.get(&counterparty_node_id) {
6182+
let mut peer_state_lck = peer_state_mtx.lock().unwrap();
6183+
let peer_state = &mut *peer_state_lck;
6184+
6185+
if let Some(chan) = peer_state.channel_by_id.get(&channel_id) {
6186+
return self.raa_monitor_updates_held(&peer_state.actions_blocking_raa_monitor_updates,
6187+
chan.context.get_funding_txo().unwrap(), counterparty_node_id);
6188+
}
6189+
}
6190+
false
6191+
}
6192+
61496193
fn internal_revoke_and_ack(&self, counterparty_node_id: &PublicKey, msg: &msgs::RevokeAndACK) -> Result<(), MsgHandleErrInternal> {
61506194
let (htlcs_to_fail, res) = {
61516195
let per_peer_state = self.per_peer_state.read().unwrap();
@@ -6342,8 +6386,8 @@ where
63426386
match monitor_event {
63436387
MonitorEvent::HTLCEvent(htlc_update) => {
63446388
if let Some(preimage) = htlc_update.payment_preimage {
6345-
log_trace!(self.logger, "Claiming HTLC with preimage {} from our monitor", &preimage);
6346-
self.claim_funds_internal(htlc_update.source, preimage, htlc_update.htlc_value_satoshis.map(|v| v * 1000), true, funding_outpoint);
6389+
log_trace!(self.logger, "Claiming HTLC with preimage {} from our monitor", preimage);
6390+
self.claim_funds_internal(htlc_update.source, preimage, htlc_update.htlc_value_satoshis.map(|v| v * 1000), true, counterparty_node_id, funding_outpoint);
63476391
} else {
63486392
log_trace!(self.logger, "Failing HTLC with hash {} from our monitor", &htlc_update.payment_hash);
63496393
let receiver = HTLCDestination::NextHopChannel { node_id: counterparty_node_id, channel_id: funding_outpoint.to_channel_id() };
@@ -9135,6 +9179,7 @@ where
91359179
// downstream chan is closed (because we don't have a
91369180
// channel_id -> peer map entry).
91379181
counterparty_opt.is_none(),
9182+
counterparty_opt.cloned().or(monitor.get_counterparty_node_id()),
91389183
monitor.get_funding_txo().0))
91399184
} else { None }
91409185
} else {
@@ -9406,12 +9451,12 @@ where
94069451
channel_manager.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver);
94079452
}
94089453

9409-
for (source, preimage, downstream_value, downstream_closed, downstream_funding) in pending_claims_to_replay {
9454+
for (source, preimage, downstream_value, downstream_closed, downstream_node_id, downstream_funding) in pending_claims_to_replay {
94109455
// We use `downstream_closed` in place of `from_onchain` here just as a guess - we
94119456
// don't remember in the `ChannelMonitor` where we got a preimage from, but if the
94129457
// channel is closed we just assume that it probably came from an on-chain claim.
94139458
channel_manager.claim_funds_internal(source, preimage, Some(downstream_value),
9414-
downstream_closed, downstream_funding);
9459+
downstream_closed, downstream_node_id, downstream_funding);
94159460
}
94169461

94179462
//TODO: Broadcast channel update for closed channels, but only after we've made a

lightning/src/ln/functional_test_utils.rs

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1794,20 +1794,7 @@ pub fn do_commitment_signed_dance(node_a: &Node<'_, '_, '_>, node_b: &Node<'_, '
17941794
check_added_monitors!(node_a, 1);
17951795

17961796
// If this commitment signed dance was due to a claim, don't check for an RAA monitor update.
1797-
let got_claim = node_a.node.pending_events.lock().unwrap().iter().any(|(ev, action)| {
1798-
let matching_action = if let Some(channelmanager::EventCompletionAction::ReleaseRAAChannelMonitorUpdate
1799-
{ channel_funding_outpoint, counterparty_node_id }) = action
1800-
{
1801-
if channel_funding_outpoint.to_channel_id() == commitment_signed.channel_id {
1802-
assert_eq!(*counterparty_node_id, node_b.node.get_our_node_id());
1803-
true
1804-
} else { false }
1805-
} else { false };
1806-
if matching_action {
1807-
if let Event::PaymentSent { .. } = ev {} else { panic!(); }
1808-
}
1809-
matching_action
1810-
});
1797+
let got_claim = node_a.node.test_raa_monitor_updates_held(node_b.node.get_our_node_id(), commitment_signed.channel_id);
18111798
if fail_backwards { assert!(!got_claim); }
18121799
commitment_signed_dance!(node_a, node_b, (), fail_backwards, true, false, got_claim);
18131800

0 commit comments

Comments
 (0)