Skip to content

Commit 35a2b24

Browse files
committed
Block the mon update removing a preimage until upstream mon writes
When we forward a payment and receive an `update_fulfill_htlc` message from the downstream channel, we immediately claim the HTLC on the upstream channel, before even doing a `commitment_signed` dance on the downstream channel. This implies that our `ChannelMonitorUpdate`s "go out" in the right order - first we ensure we'll get our money by writing the preimage down, then we write the update that resolves giving money on the downstream node. This is safe as long as `ChannelMonitorUpdate`s complete in the order in which they are generated, but of course looking forward we want to support asynchronous updates, which may complete in any order. Thus, here, we enforce the correct ordering by blocking the downstream `ChannelMonitorUpdate` until the upstream one completes. Like the `PaymentSent` event handling we do so only for the `revoke_and_ack` `ChannelMonitorUpdate`, ensuring the preimage-containing upstream update has a full RTT to complete before we actually manage to slow anything down.
1 parent b479969 commit 35a2b24

File tree

3 files changed

+217
-40
lines changed

3 files changed

+217
-40
lines changed

lightning/src/ln/chanmon_update_fail_tests.rs

Lines changed: 133 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3047,18 +3047,27 @@ fn test_blocked_chan_preimage_release() {
30473047
check_added_monitors(&nodes[1], 1); // We generate only a preimage monitor update
30483048
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
30493049

3050-
// Finish the CS dance between nodes[0] and nodes[1].
3051-
commitment_signed_dance!(nodes[1], nodes[0], as_htlc_fulfill_updates.commitment_signed, false);
3050+
// Finish the CS dance between nodes[0] and nodes[1]. Note that until the final RAA CS is held
3051+
// until the full set of `ChannelMonitorUpdate`s on the nodes[1] <-> nodes[2] channel are
3052+
// complete, while the preimage that we care about ensuring is on disk did make it there above,
3053+
// the holding logic doesn't care about the type of update, it just cares that there is one.
3054+
nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &as_htlc_fulfill_updates.commitment_signed);
3055+
check_added_monitors(&nodes[1], 1);
3056+
let (a, raa) = do_main_commitment_signed_dance(&nodes[1], &nodes[0], false);
3057+
assert!(a.is_none());
3058+
3059+
nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &raa);
30523060
check_added_monitors(&nodes[1], 0);
3061+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
30533062

30543063
let events = nodes[1].node.get_and_clear_pending_events();
30553064
assert_eq!(events.len(), 3);
30563065
if let Event::PaymentSent { .. } = events[0] {} else { panic!(); }
30573066
if let Event::PaymentPathSuccessful { .. } = events[2] {} else { panic!(); }
30583067
if let Event::PaymentForwarded { .. } = events[1] {} else { panic!(); }
30593068

3060-
// The event processing should release the last RAA update.
3061-
check_added_monitors(&nodes[1], 1);
3069+
// The event processing should release the last RAA updates on both channels.
3070+
check_added_monitors(&nodes[1], 2);
30623071

30633072
// When we fetch the next update the message getter will generate the next update for nodes[2],
30643073
// generating a further monitor update.
@@ -3069,3 +3078,123 @@ fn test_blocked_chan_preimage_release() {
30693078
commitment_signed_dance!(nodes[2], nodes[1], bs_htlc_fulfill_updates.commitment_signed, false);
30703079
expect_payment_sent(&nodes[2], payment_preimage_2, None, true, true);
30713080
}
3081+
3082+
fn do_test_inverted_mon_completion_order(complete_bc_commitment_dance: bool) {
3083+
// When we forward a payment and receive an `update_fulfill_htlc` message from the downstream
3084+
// channel, we immediately claim the HTLC on the upstream channel, before even doing a
3085+
// `commitment_signed` dance on the downstream channel. This implies that our
3086+
// `ChannelMonitorUpdate`s "go out" in the right order - first we ensure we'll get our money,
3087+
// then we write the update that resolves giving money on the downstream node. This is safe as
3088+
// long as `ChannelMonitorUpdate`s complete in the order in which they are generated, but of
3089+
// course this may not be the case. For asynchronous update writes, we have to ensure monitor
3090+
// updates can block each other, preventing the inversion all together.
3091+
let chanmon_cfgs = create_chanmon_cfgs(3);
3092+
let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
3093+
3094+
let persister;
3095+
let new_chain_monitor;
3096+
let nodes_1_deserialized;
3097+
3098+
let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]);
3099+
let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs);
3100+
3101+
let chan_id_ab = create_announced_chan_between_nodes(&nodes, 0, 1).2;
3102+
let chan_id_bc = create_announced_chan_between_nodes(&nodes, 1, 2).2;
3103+
3104+
// Route a payment from A, through B, to C, then claim it on C. Once we pass B the
3105+
// `update_fulfill_htlc` we have a monitor update for both of B's channels. We complete the one
3106+
// on the B<->C channel but leave the A<->B monitor update pending, then reload B.
3107+
let (payment_preimage, payment_hash, _) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 100_000);
3108+
3109+
let mon_ab = get_monitor!(nodes[1], chan_id_ab).encode();
3110+
3111+
nodes[2].node.claim_funds(payment_preimage);
3112+
check_added_monitors(&nodes[2], 1);
3113+
expect_payment_claimed!(nodes[2], payment_hash, 100_000);
3114+
3115+
chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
3116+
let cs_updates = get_htlc_update_msgs(&nodes[2], &nodes[1].node.get_our_node_id());
3117+
nodes[1].node.handle_update_fulfill_htlc(&nodes[2].node.get_our_node_id(), &cs_updates.update_fulfill_htlcs[0]);
3118+
3119+
// B generates a new monitor update for the A <-> B channel, but doesn't send the new messages
3120+
// for it since the monitor update is marked in-progress.
3121+
check_added_monitors(&nodes[1], 1);
3122+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
3123+
3124+
// Now step the Commitment Signed Dance between B and C forward a bit (or fully), ensuring we
3125+
// won't get the preimage when the nodes reconnect, at which point we have to ensure we get it
3126+
// from the ChannelMonitor.
3127+
nodes[1].node.handle_commitment_signed(&nodes[2].node.get_our_node_id(), &cs_updates.commitment_signed);
3128+
check_added_monitors(&nodes[1], 1);
3129+
if complete_bc_commitment_dance {
3130+
let (bs_revoke_and_ack, bs_commitment_signed) = get_revoke_commit_msgs!(nodes[1], nodes[2].node.get_our_node_id());
3131+
nodes[2].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_revoke_and_ack);
3132+
check_added_monitors(&nodes[2], 1);
3133+
nodes[2].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_commitment_signed);
3134+
check_added_monitors(&nodes[2], 1);
3135+
let cs_raa = get_event_msg!(nodes[2], MessageSendEvent::SendRevokeAndACK, nodes[1].node.get_our_node_id());
3136+
3137+
// At this point node B still hasn't persisted the `ChannelMonitorUpdate` with the
3138+
// preimage in the A <-> B channel, which will prevent it from persisting the
3139+
// `ChannelMonitorUpdate` here to avoid "losing" the preimage.
3140+
nodes[1].node.handle_revoke_and_ack(&nodes[2].node.get_our_node_id(), &cs_raa);
3141+
check_added_monitors(&nodes[1], 0);
3142+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
3143+
}
3144+
3145+
// Now reload node B
3146+
let manager_b = nodes[1].node.encode();
3147+
3148+
let mon_bc = get_monitor!(nodes[1], chan_id_bc).encode();
3149+
reload_node!(nodes[1], &manager_b, &[&mon_ab, &mon_bc], persister, new_chain_monitor, nodes_1_deserialized);
3150+
3151+
nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id());
3152+
nodes[2].node.peer_disconnected(&nodes[1].node.get_our_node_id());
3153+
3154+
// If we used the latest ChannelManager to reload from, we should have both channels still
3155+
// live. The B <-> C channel's final RAA ChannelMonitorUpdate must still be blocked as
3156+
// before - the ChannelMonitorUpdate for the A <-> B channel hasn't completed.
3157+
// When we call `timer_tick_occurred` we will get that monitor update back, which we'll
3158+
// complete after reconnecting to our peers.
3159+
persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
3160+
nodes[1].node.timer_tick_occurred();
3161+
check_added_monitors(&nodes[1], 1);
3162+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
3163+
3164+
// Now reconnect B to both A and C. If the B <-> C commitment signed dance wasn't run to
3165+
// the end go ahead and do that, though the -2 in `reconnect_nodes` indicates that we
3166+
// expect to *not* receive the final RAA ChannelMonitorUpdate.
3167+
if complete_bc_commitment_dance {
3168+
reconnect_nodes(&nodes[1], &nodes[2], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
3169+
} else {
3170+
reconnect_nodes(&nodes[1], &nodes[2], (false, false), (0, -2), (0, 0), (0, 0), (0, 0), (0, 0), (false, true));
3171+
}
3172+
3173+
reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
3174+
3175+
// (Finally) complete the A <-> B ChannelMonitorUpdate, ensuring the preimage is durably on
3176+
// disk in the proper ChannelMonitor, unblocking the B <-> C ChannelMonitor updating
3177+
// process.
3178+
let (outpoint, _, ab_update_id) = nodes[1].chain_monitor.latest_monitor_update_id.lock().unwrap().get(&chan_id_ab).unwrap().clone();
3179+
nodes[1].chain_monitor.chain_monitor.channel_monitor_updated(outpoint, ab_update_id).unwrap();
3180+
3181+
// When we fetch B's HTLC update messages here (now that the ChannelMonitorUpdate has
3182+
// completed), it will also release the final RAA ChannelMonitorUpdate on the B <-> C
3183+
// channel.
3184+
let bs_updates = get_htlc_update_msgs(&nodes[1], &nodes[0].node.get_our_node_id());
3185+
check_added_monitors(&nodes[1], 1);
3186+
3187+
nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_updates.update_fulfill_htlcs[0]);
3188+
do_commitment_signed_dance(&nodes[0], &nodes[1], &bs_updates.commitment_signed, false, false);
3189+
3190+
expect_payment_forwarded!(nodes[1], &nodes[0], &nodes[2], Some(1_000), false, false);
3191+
3192+
// Finally, check that the payment was, ultimately, seen as sent by node A.
3193+
expect_payment_sent(&nodes[0], payment_preimage, None, true, true);
3194+
}
3195+
3196+
#[test]
3197+
fn test_inverted_mon_completion_order() {
3198+
do_test_inverted_mon_completion_order(true);
3199+
do_test_inverted_mon_completion_order(false);
3200+
}

lightning/src/ln/channelmanager.rs

Lines changed: 56 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -591,7 +591,6 @@ pub(crate) enum RAAMonitorUpdateBlockingAction {
591591
}
592592

593593
impl RAAMonitorUpdateBlockingAction {
594-
#[allow(unused)]
595594
fn from_prev_hop_data(prev_hop: &HTLCPreviousHopData) -> Self {
596595
Self::ForwardedPaymentInboundClaim {
597596
channel_id: prev_hop.outpoint.to_channel_id(),
@@ -4726,10 +4725,13 @@ where
47264725
self.pending_outbound_payments.finalize_claims(sources, &self.pending_events);
47274726
}
47284727

4729-
fn claim_funds_internal(&self, source: HTLCSource, payment_preimage: PaymentPreimage, forwarded_htlc_value_msat: Option<u64>, from_onchain: bool, next_channel_outpoint: OutPoint, during_init: bool) {
4728+
fn claim_funds_internal(&self, source: HTLCSource, payment_preimage: PaymentPreimage, forwarded_htlc_value_msat: Option<u64>, from_onchain: bool, next_channel_counterparty_node_id: Option<PublicKey>, next_channel_outpoint: OutPoint, during_init: bool) {
47304729
match source {
47314730
HTLCSource::OutboundRoute { session_priv, payment_id, path, .. } => {
47324731
debug_assert!(!during_init);
4732+
if let Some(pubkey) = next_channel_counterparty_node_id {
4733+
debug_assert_eq!(pubkey, path.hops[0].pubkey);
4734+
}
47334735
let ev_completion_action = EventCompletionAction::ReleaseRAAChannelMonitorUpdate {
47344736
channel_funding_outpoint: next_channel_outpoint,
47354737
counterparty_node_id: path.hops[0].pubkey,
@@ -4740,6 +4742,7 @@ where
47404742
},
47414743
HTLCSource::PreviousHopData(hop_data) => {
47424744
let prev_outpoint = hop_data.outpoint;
4745+
let completed_blocker = RAAMonitorUpdateBlockingAction::from_prev_hop_data(&hop_data);
47434746
let res = self.claim_funds_from_hop(hop_data, payment_preimage,
47444747
|htlc_claim_value_msat| {
47454748
if let Some(forwarded_htlc_value) = forwarded_htlc_value_msat {
@@ -4755,7 +4758,17 @@ where
47554758
next_channel_id: Some(next_channel_outpoint.to_channel_id()),
47564759
outbound_amount_forwarded_msat: forwarded_htlc_value_msat,
47574760
},
4758-
downstream_counterparty_and_funding_outpoint: None,
4761+
downstream_counterparty_and_funding_outpoint:
4762+
if let Some(node_id) = next_channel_counterparty_node_id {
4763+
Some((node_id, next_channel_outpoint, completed_blocker))
4764+
} else {
4765+
// We can only get `None` here if we are processing a
4766+
// `ChannelMonitor`-originated event, in which case we
4767+
// don't care about ensuring we wake the downstream
4768+
// channel's monitor updating - the channel is already
4769+
// closed.
4770+
None
4771+
},
47594772
})
47604773
} else { None }
47614774
}, during_init);
@@ -5493,13 +5506,27 @@ where
54935506
match peer_state.channel_by_id.entry(msg.channel_id) {
54945507
hash_map::Entry::Occupied(mut chan) => {
54955508
let res = try_chan_entry!(self, chan.get_mut().update_fulfill_htlc(&msg), chan);
5509+
if let HTLCSource::PreviousHopData(prev_hop) = &res.0 {
5510+
peer_state.actions_blocking_raa_monitor_updates.entry(msg.channel_id)
5511+
.or_insert_with(Vec::new)
5512+
.push(RAAMonitorUpdateBlockingAction::from_prev_hop_data(&prev_hop));
5513+
}
5514+
// Note that we do not need to push an `actions_blocking_raa_monitor_updates`
5515+
// entry here, even though we *do* need to block the next RAA coming in from
5516+
// generating a monitor update which we let fly. We do this instead in the
5517+
// `claim_funds_internal` by attaching a `ReleaseRAAChannelMonitorUpdate`
5518+
// action to the event generated when we "claim" the sent payment. This is
5519+
// guaranteed to all complete before we process the RAA even though there is no
5520+
// lock held through that point as we aren't allowed to see another P2P message
5521+
// from the counterparty until we return, but `claim_funds_internal` runs
5522+
// first.
54965523
funding_txo = chan.get().context.get_funding_txo().expect("We won't accept a fulfill until funded");
54975524
res
54985525
},
54995526
hash_map::Entry::Vacant(_) => return Err(MsgHandleErrInternal::send_err_msg_no_close(format!("Got a message for a channel from the wrong node! No such channel for the passed counterparty_node_id {}", counterparty_node_id), msg.channel_id))
55005527
}
55015528
};
5502-
self.claim_funds_internal(htlc_source, msg.payment_preimage.clone(), Some(forwarded_htlc_value), false, funding_txo, false);
5529+
self.claim_funds_internal(htlc_source, msg.payment_preimage.clone(), Some(forwarded_htlc_value), false, Some(*counterparty_node_id), funding_txo, false);
55035530
Ok(())
55045531
}
55055532

@@ -5680,6 +5707,23 @@ where
56805707
})
56815708
}
56825709

5710+
#[cfg(any(test, feature = "_test_utils"))]
5711+
pub(crate) fn test_raa_monitor_updates_held(&self, counterparty_node_id: PublicKey,
5712+
channel_id: [u8; 32])
5713+
-> bool {
5714+
let per_peer_state = self.per_peer_state.read().unwrap();
5715+
if let Some(peer_state_mtx) = per_peer_state.get(&counterparty_node_id) {
5716+
let mut peer_state_lck = peer_state_mtx.lock().unwrap();
5717+
let peer_state = &mut *peer_state_lck;
5718+
5719+
if let Some(chan) = peer_state.channel_by_id.get(&channel_id) {
5720+
return self.raa_monitor_updates_held(&peer_state.actions_blocking_raa_monitor_updates,
5721+
chan.context.get_funding_txo().unwrap(), counterparty_node_id);
5722+
}
5723+
}
5724+
false
5725+
}
5726+
56835727
fn internal_revoke_and_ack(&self, counterparty_node_id: &PublicKey, msg: &msgs::RevokeAndACK) -> Result<(), MsgHandleErrInternal> {
56845728
let (htlcs_to_fail, res) = {
56855729
let per_peer_state = self.per_peer_state.read().unwrap();
@@ -5692,12 +5736,10 @@ where
56925736
match peer_state.channel_by_id.entry(msg.channel_id) {
56935737
hash_map::Entry::Occupied(mut chan) => {
56945738
let funding_txo = chan.get().context.get_funding_txo();
5695-
let mon_update_blocked = self.pending_events.lock().unwrap().iter().any(|(_, action)| {
5696-
action == &Some(EventCompletionAction::ReleaseRAAChannelMonitorUpdate {
5697-
channel_funding_outpoint: funding_txo.expect("We won't accept an RAA until funded"),
5698-
counterparty_node_id: *counterparty_node_id,
5699-
})
5700-
});
5739+
let mon_update_blocked = self.raa_monitor_updates_held(
5740+
&peer_state.actions_blocking_raa_monitor_updates,
5741+
chan.get().context.get_funding_txo().expect("We won't accept an RAA until funded"),
5742+
*counterparty_node_id);
57015743
let (htlcs_to_fail, monitor_update_opt) = try_chan_entry!(self,
57025744
chan.get_mut().revoke_and_ack(&msg, &self.logger, mon_update_blocked), chan);
57035745
let res = if let Some(monitor_update) = monitor_update_opt {
@@ -5876,7 +5918,7 @@ where
58765918
MonitorEvent::HTLCEvent(htlc_update) => {
58775919
if let Some(preimage) = htlc_update.payment_preimage {
58785920
log_trace!(self.logger, "Claiming HTLC with preimage {} from our monitor", log_bytes!(preimage.0));
5879-
self.claim_funds_internal(htlc_update.source, preimage, htlc_update.htlc_value_satoshis.map(|v| v * 1000), true, funding_outpoint, false);
5921+
self.claim_funds_internal(htlc_update.source, preimage, htlc_update.htlc_value_satoshis.map(|v| v * 1000), true, counterparty_node_id, funding_outpoint, false);
58805922
} else {
58815923
log_trace!(self.logger, "Failing HTLC with hash {} from our monitor", log_bytes!(htlc_update.payment_hash.0));
58825924
let receiver = HTLCDestination::NextHopChannel { node_id: counterparty_node_id, channel_id: funding_outpoint.to_channel_id() };
@@ -8563,6 +8605,7 @@ where
85638605
if let Some(payment_preimage) = preimage_opt {
85648606
Some((htlc_source, payment_preimage, htlc.amount_msat,
85658607
counterparty_opt.is_none(), // i.e. the downstream chan is closed
8608+
counterparty_opt.cloned().or(monitor.get_counterparty_node_id()),
85668609
monitor.get_funding_txo().0))
85678610
} else { None }
85688611
} else {
@@ -8827,9 +8870,9 @@ where
88278870
channel_manager.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver);
88288871
}
88298872

8830-
for (source, preimage, downstream_value, downstream_closed, downstream_funding) in pending_claims_to_replay {
8873+
for (source, preimage, downstream_value, downstream_closed, downstream_node_id, downstream_funding) in pending_claims_to_replay {
88318874
channel_manager.claim_funds_internal(source, preimage, Some(downstream_value),
8832-
downstream_closed, downstream_funding, true);
8875+
downstream_closed, downstream_node_id, downstream_funding, true);
88338876
}
88348877

88358878
//TODO: Broadcast channel update for closed channels, but only after we've made a

0 commit comments

Comments
 (0)