Skip to content

Commit ade1f34

Browse files
Fix race condition causing async payment failure
As the LSP of an async sender, when we receive an update_add with the hold_htlc flag set, after its onion is decoded we transition the pending HTLC to the ChannelManager::pending_intercepted_htlcs. However, if we receive the release_held_htlc message from the receiver *before* we've had a chance to make this transition, we'll fail to release the HTLC and it will sit in the pending intercepts map until it is failed backwards. To fix this race condition, if we receive release_held_htlc from the recipient we'll not only check the pending_intercepted_htlcs map for the presence of this HTLC but also check the map where we keep HTLCs prior to their onions being decoded.
1 parent d323b9c commit ade1f34

File tree

4 files changed

+131
-9
lines changed

4 files changed

+131
-9
lines changed

lightning/src/blinded_path/message.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,20 @@ pub enum AsyncPaymentsContext {
587587
/// An identifier for the HTLC that should be released by us as the sender's always-online
588588
/// channel counterparty to the often-offline recipient.
589589
intercept_id: InterceptId,
590+
/// The short channel id alias corresponding to the to-be-released inbound HTLC, to help locate
591+
/// the HTLC internally if the [`ReleaseHeldHtlc`] races our node decoding the held HTLC's
592+
/// onion.
593+
///
594+
/// We use the outbound scid alias because it is stable even if the channel splices, unlike
595+
/// regular short channel ids.
596+
///
597+
/// [`ReleaseHeldHtlc`]: crate::onion_message::async_payments::ReleaseHeldHtlc
598+
prev_outbound_scid_alias: u64,
599+
/// The id of the to-be-released HTLC, to help locate the HTLC internally if the
600+
/// [`ReleaseHeldHtlc`] races our node decoding the held HTLC's onion.
601+
///
602+
/// [`ReleaseHeldHtlc`]: crate::onion_message::async_payments::ReleaseHeldHtlc
603+
htlc_id: u64,
590604
},
591605
}
592606

@@ -645,6 +659,8 @@ impl_writeable_tlv_based_enum!(AsyncPaymentsContext,
645659
},
646660
(6, ReleaseHeldHtlc) => {
647661
(0, intercept_id, required),
662+
(2, prev_outbound_scid_alias, required),
663+
(4, htlc_id, required),
648664
},
649665
);
650666

lightning/src/ln/async_payments_tests.rs

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3349,3 +3349,70 @@ fn fail_held_htlcs_when_cfg_unset() {
33493349
PaymentFailureReason::RetriesExhausted,
33503350
);
33513351
}
3352+
3353+
#[test]
3354+
fn release_htlc_races_htlc_onion_decode() {
3355+
// Test that an async sender's LSP will release held HTLCs even if they receive the
3356+
// release_held_htlc message before they have a chance to process the held HTLC's onion. This was
3357+
// previously broken.
3358+
let chanmon_cfgs = create_chanmon_cfgs(4);
3359+
let node_cfgs = create_node_cfgs(4, &chanmon_cfgs);
3360+
3361+
let (sender_cfg, recipient_cfg) = (often_offline_node_cfg(), often_offline_node_cfg());
3362+
let mut sender_lsp_cfg = test_default_channel_config();
3363+
sender_lsp_cfg.enable_htlc_hold = true;
3364+
let mut invoice_server_cfg = test_default_channel_config();
3365+
invoice_server_cfg.accept_forwards_to_priv_channels = true;
3366+
3367+
let node_chanmgrs = create_node_chanmgrs(
3368+
4,
3369+
&node_cfgs,
3370+
&[Some(sender_cfg), Some(sender_lsp_cfg), Some(invoice_server_cfg), Some(recipient_cfg)],
3371+
);
3372+
let nodes = create_network(4, &node_cfgs, &node_chanmgrs);
3373+
create_unannounced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 0);
3374+
create_announced_chan_between_nodes_with_value(&nodes, 1, 2, 1_000_000, 0);
3375+
create_unannounced_chan_between_nodes_with_value(&nodes, 2, 3, 1_000_000, 0);
3376+
unify_blockheight_across_nodes(&nodes);
3377+
let sender = &nodes[0];
3378+
let sender_lsp = &nodes[1];
3379+
let invoice_server = &nodes[2];
3380+
let recipient = &nodes[3];
3381+
3382+
let amt_msat = 5000;
3383+
let (static_invoice, peer_id, static_invoice_om) =
3384+
build_async_offer_and_init_payment(amt_msat, &nodes);
3385+
let payment_hash =
3386+
lock_in_htlc_for_static_invoice(&static_invoice_om, peer_id, sender, sender_lsp);
3387+
3388+
// The LSP has not transitioned the HTLC to the intercepts map internally because
3389+
// process_pending_htlc_forwards has not been called.
3390+
let (peer_id, held_htlc_om) =
3391+
extract_held_htlc_available_oms(sender, &[sender_lsp, invoice_server, recipient])
3392+
.pop()
3393+
.unwrap();
3394+
recipient.onion_messenger.handle_onion_message(peer_id, &held_htlc_om);
3395+
3396+
// Extract the release_htlc_om and ensure the sender's LSP will release the HTLC on the next call
3397+
// to process_pending_htlc_forwards, even though the HTLC was not yet officially intercepted when
3398+
// the release message arrived.
3399+
let (peer_id, release_htlc_om) =
3400+
extract_release_htlc_oms(recipient, &[sender, sender_lsp, invoice_server]).pop().unwrap();
3401+
sender_lsp.onion_messenger.handle_onion_message(peer_id, &release_htlc_om);
3402+
3403+
sender_lsp.node.process_pending_htlc_forwards();
3404+
let mut events = sender_lsp.node.get_and_clear_pending_msg_events();
3405+
assert_eq!(events.len(), 1);
3406+
let ev = remove_first_msg_event_to_node(&invoice_server.node.get_our_node_id(), &mut events);
3407+
check_added_monitors!(sender_lsp, 1);
3408+
3409+
let path: &[&Node] = &[invoice_server, recipient];
3410+
let args = PassAlongPathArgs::new(sender_lsp, path, amt_msat, payment_hash, ev);
3411+
let claimable_ev = do_pass_along_path(args).unwrap();
3412+
3413+
let route: &[&[&Node]] = &[&[sender_lsp, invoice_server, recipient]];
3414+
let keysend_preimage = extract_payment_preimage(&claimable_ev);
3415+
let (res, _) =
3416+
claim_payment_along_route(ClaimAlongRouteArgs::new(sender, route, keysend_preimage));
3417+
assert_eq!(res, Some(PaidBolt12Invoice::StaticInvoice(static_invoice)));
3418+
}

lightning/src/ln/channelmanager.rs

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3509,6 +3509,7 @@ macro_rules! emit_initial_channel_ready_event {
35093509
macro_rules! handle_monitor_update_completion {
35103510
($self: ident, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan: expr) => { {
35113511
let channel_id = $chan.context.channel_id();
3512+
let outbound_scid_alias = $chan.context().outbound_scid_alias();
35123513
let counterparty_node_id = $chan.context.get_counterparty_node_id();
35133514
#[cfg(debug_assertions)]
35143515
{
@@ -3521,7 +3522,7 @@ macro_rules! handle_monitor_update_completion {
35213522
let mut updates = $chan.monitor_updating_restored(&&logger,
35223523
&$self.node_signer, $self.chain_hash, &*$self.config.read().unwrap(),
35233524
$self.best_block.read().unwrap().height,
3524-
|htlc_id| $self.path_for_release_held_htlc(htlc_id, &channel_id, &counterparty_node_id));
3525+
|htlc_id| $self.path_for_release_held_htlc(htlc_id, outbound_scid_alias, &channel_id, &counterparty_node_id));
35253526
let channel_update = if updates.channel_ready.is_some()
35263527
&& $chan.context.is_usable()
35273528
&& $peer_state.is_connected
@@ -5623,11 +5624,17 @@ where
56235624
/// [`HeldHtlcAvailable`] onion message, so the recipient's [`ReleaseHeldHtlc`] response will be
56245625
/// received to our node.
56255626
fn path_for_release_held_htlc(
5626-
&self, htlc_id: u64, channel_id: &ChannelId, counterparty_node_id: &PublicKey,
5627+
&self, htlc_id: u64, prev_outbound_scid_alias: u64, channel_id: &ChannelId,
5628+
counterparty_node_id: &PublicKey,
56275629
) -> BlindedMessagePath {
56285630
let intercept_id =
56295631
InterceptId::from_htlc_id_and_chan_id(htlc_id, channel_id, counterparty_node_id);
5630-
self.flow.path_for_release_held_htlc(intercept_id, &*self.entropy_source)
5632+
self.flow.path_for_release_held_htlc(
5633+
intercept_id,
5634+
prev_outbound_scid_alias,
5635+
htlc_id,
5636+
&*self.entropy_source,
5637+
)
56315638
}
56325639

56335640
/// Signals that no further attempts for the given payment should occur. Useful if you have a
@@ -11302,14 +11309,15 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
1130211309
// disconnect, so Channel's reestablish will never hand us any holding cell
1130311310
// freed HTLCs to fail backwards. If in the future we no longer drop pending
1130411311
// add-HTLCs on disconnect, we may be handed HTLCs to fail backwards here.
11312+
let outbound_scid_alias = chan.context.outbound_scid_alias();
1130511313
let res = chan.channel_reestablish(
1130611314
msg,
1130711315
&&logger,
1130811316
&self.node_signer,
1130911317
self.chain_hash,
1131011318
&self.config.read().unwrap(),
1131111319
&*self.best_block.read().unwrap(),
11312-
|htlc_id| self.path_for_release_held_htlc(htlc_id, &msg.channel_id, counterparty_node_id)
11320+
|htlc_id| self.path_for_release_held_htlc(htlc_id, outbound_scid_alias, &msg.channel_id, counterparty_node_id)
1131311321
);
1131411322
let responses = try_channel_entry!(self, peer_state, res, chan_entry);
1131511323
let mut channel_update = None;
@@ -11786,11 +11794,12 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
1178611794
// Returns whether we should remove this channel as it's just been closed.
1178711795
let unblock_chan = |chan: &mut Channel<SP>, pending_msg_events: &mut Vec<MessageSendEvent>| -> Option<ShutdownResult> {
1178811796
let channel_id = chan.context().channel_id();
11797+
let outbound_scid_alias = chan.context().outbound_scid_alias();
1178911798
let logger = WithChannelContext::from(&self.logger, &chan.context(), None);
1179011799
let node_id = chan.context().get_counterparty_node_id();
1179111800
if let Some(msgs) = chan.signer_maybe_unblocked(
1179211801
self.chain_hash, &&logger,
11793-
|htlc_id| self.path_for_release_held_htlc(htlc_id, &channel_id, &node_id)
11802+
|htlc_id| self.path_for_release_held_htlc(htlc_id, outbound_scid_alias, &channel_id, &node_id)
1179411803
) {
1179511804
if chan.context().is_connected() {
1179611805
if let Some(msg) = msgs.open_channel {
@@ -15029,7 +15038,34 @@ where
1502915038
);
1503015039
}
1503115040
},
15032-
AsyncPaymentsContext::ReleaseHeldHtlc { intercept_id } => {
15041+
AsyncPaymentsContext::ReleaseHeldHtlc {
15042+
intercept_id,
15043+
prev_outbound_scid_alias,
15044+
htlc_id,
15045+
} => {
15046+
// It's possible the release_held_htlc message raced ahead of us transitioning the pending
15047+
// update_add to `Self::pending_intercept_htlcs`. If that's the case, update the pending
15048+
// update_add to indicate that the HTLC should be released immediately.
15049+
//
15050+
// Check for the HTLC here before checking `pending_intercept_htlcs` to avoid a different
15051+
// race where the HTLC gets transitioned to `pending_intercept_htlcs` after we drop that
15052+
// map's lock but before acquiring the `decode_update_add_htlcs` lock.
15053+
let mut decode_update_add_htlcs = self.decode_update_add_htlcs.lock().unwrap();
15054+
if let Some(htlcs) = decode_update_add_htlcs.get_mut(&prev_outbound_scid_alias) {
15055+
for update_add in htlcs.iter_mut() {
15056+
if update_add.htlc_id == htlc_id {
15057+
log_trace!(
15058+
self.logger,
15059+
"Marking held htlc with intercept_id {} as ready to release",
15060+
intercept_id
15061+
);
15062+
update_add.hold_htlc.take();
15063+
return;
15064+
}
15065+
}
15066+
}
15067+
core::mem::drop(decode_update_add_htlcs);
15068+
1503315069
let mut htlc = {
1503415070
let mut pending_intercept_htlcs =
1503515071
self.pending_intercepted_htlcs.lock().unwrap();

lightning/src/offers/flow.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1227,14 +1227,17 @@ where
12271227
///
12281228
/// [`ReleaseHeldHtlc`]: crate::onion_message::async_payments::ReleaseHeldHtlc
12291229
pub fn path_for_release_held_htlc<ES: Deref>(
1230-
&self, intercept_id: InterceptId, entropy: ES,
1230+
&self, intercept_id: InterceptId, prev_outbound_scid_alias: u64, htlc_id: u64, entropy: ES,
12311231
) -> BlindedMessagePath
12321232
where
12331233
ES::Target: EntropySource,
12341234
{
12351235
// In the future, we should support multi-hop paths here.
1236-
let context =
1237-
MessageContext::AsyncPayments(AsyncPaymentsContext::ReleaseHeldHtlc { intercept_id });
1236+
let context = MessageContext::AsyncPayments(AsyncPaymentsContext::ReleaseHeldHtlc {
1237+
intercept_id,
1238+
prev_outbound_scid_alias,
1239+
htlc_id,
1240+
});
12381241
let num_dummy_hops = PADDED_PATH_LENGTH.saturating_sub(1);
12391242
BlindedMessagePath::new_with_dummy_hops(
12401243
&[],

0 commit comments

Comments
 (0)