Skip to content

Commit 103f197

Browse files
committed
Track message type for messages we are expecting to receive
This was previously not considered because we only ever expected to receive a specific message at a time. This will change with quiescence, as we may be waiting for the counterparty's `stfu` in response to ours, while at the same time waiting for a counterparty's `revoke_and_ack` to irrevocably commit any pending updates. If this were unchanged, we could potentially overwrite our expectation to receive `stfu` with the `revoke_and_ack` and the channel would become stuck (until a disconnect) if the counterparty chooses to never send the `stfu`.
1 parent a91196b commit 103f197

File tree

1 file changed

+50
-28
lines changed

1 file changed

+50
-28
lines changed

lightning/src/ln/channel.rs

Lines changed: 50 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ use crate::io;
6868
use crate::prelude::*;
6969
use core::{cmp,mem,fmt};
7070
use core::ops::Deref;
71+
use alloc::collections::BTreeMap;
7172
#[cfg(any(test, fuzzing, debug_assertions))]
7273
use crate::sync::Mutex;
7374
use crate::sign::type_resolver::ChannelSignerType;
@@ -1112,12 +1113,17 @@ pub(crate) const MIN_AFFORDABLE_HTLC_COUNT: usize = 4;
11121113
pub(crate) const EXPIRE_PREV_CONFIG_TICKS: usize = 5;
11131114

11141115
/// The number of ticks that may elapse while we're waiting for a response to a
1115-
/// [`msgs::RevokeAndACK`] or [`msgs::ChannelReestablish`] message before we attempt to disconnect
1116-
/// them.
1116+
/// [`AwaitingResponseType`] message before we attempt to disconnect them.
11171117
///
11181118
/// See [`ChannelContext::sent_message_awaiting_response`] for more information.
11191119
pub(crate) const DISCONNECT_PEER_AWAITING_RESPONSE_TICKS: usize = 2;
11201120

1121+
#[derive(Eq, PartialEq, Ord, PartialOrd)]
1122+
pub(crate) enum AwaitingResponseType {
1123+
ChannelReestablish,
1124+
RevokeAndAck,
1125+
}
1126+
11211127
/// The number of ticks that may elapse while we're waiting for an unfunded outbound/inbound channel
11221128
/// to be promoted to a [`FundedChannel`] since the unfunded channel was created. An unfunded channel
11231129
/// exceeding this age limit will be force-closed and purged from memory.
@@ -1802,10 +1808,7 @@ pub(super) struct ChannelContext<SP: Deref> where SP::Target: SignerProvider {
18021808
/// This behavior is mostly motivated by a lnd bug in which we don't receive a message we expect
18031809
/// to in a timely manner, which may lead to channels becoming unusable and/or force-closed. An
18041810
/// example of such can be found at <https://github.com/lightningnetwork/lnd/issues/7682>.
1805-
///
1806-
/// This is currently only used when waiting for a [`msgs::ChannelReestablish`] or
1807-
/// [`msgs::RevokeAndACK`] message from the counterparty.
1808-
sent_message_awaiting_response: Option<usize>,
1811+
sent_message_awaiting_response: BTreeMap<AwaitingResponseType, usize>,
18091812

18101813
/// This channel's type, as negotiated during channel open
18111814
channel_type: ChannelTypeFeatures,
@@ -2517,7 +2520,7 @@ impl<SP: Deref> ChannelContext<SP> where SP::Target: SignerProvider {
25172520
next_remote_commitment_tx_fee_info_cached: Mutex::new(None),
25182521

25192522
workaround_lnd_bug_4006: None,
2520-
sent_message_awaiting_response: None,
2523+
sent_message_awaiting_response: BTreeMap::new(),
25212524

25222525
latest_inbound_scid_alias: None,
25232526
outbound_scid_alias: 0,
@@ -2747,7 +2750,7 @@ impl<SP: Deref> ChannelContext<SP> where SP::Target: SignerProvider {
27472750
next_remote_commitment_tx_fee_info_cached: Mutex::new(None),
27482751

27492752
workaround_lnd_bug_4006: None,
2750-
sent_message_awaiting_response: None,
2753+
sent_message_awaiting_response: BTreeMap::new(),
27512754

27522755
latest_inbound_scid_alias: None,
27532756
outbound_scid_alias,
@@ -5775,7 +5778,7 @@ impl<SP: Deref> FundedChannel<SP> where
57755778
// OK, we step the channel here and *then* if the new generation fails we can fail the
57765779
// channel based on that, but stepping stuff here should be safe either way.
57775780
self.context.channel_state.clear_awaiting_remote_revoke();
5778-
self.context.sent_message_awaiting_response = None;
5781+
self.mark_response_received(AwaitingResponseType::RevokeAndAck);
57795782
self.context.counterparty_prev_commitment_point = self.context.counterparty_cur_commitment_point;
57805783
self.context.counterparty_cur_commitment_point = Some(msg.next_per_commitment_point);
57815784
self.context.cur_counterparty_commitment_transaction_number -= 1;
@@ -6141,6 +6144,10 @@ impl<SP: Deref> FundedChannel<SP> where
61416144
return Err(())
61426145
}
61436146

6147+
// We only clear `peer_disconnected` if we were able to reestablish the channel. We always
6148+
// clear our awaiting responses in case we failed reestablishment and are disconnecting.
6149+
self.context.sent_message_awaiting_response.clear();
6150+
61446151
if self.context.channel_state.is_peer_disconnected() {
61456152
// While the below code should be idempotent, it's simpler to just return early, as
61466153
// redundant disconnect events can fire, though they should be rare.
@@ -6201,8 +6208,6 @@ impl<SP: Deref> FundedChannel<SP> where
62016208
}
62026209
}
62036210

6204-
self.context.sent_message_awaiting_response = None;
6205-
62066211
self.context.channel_state.set_peer_disconnected();
62076212
log_trace!(logger, "Peer disconnection resulted in {} remote-announced HTLC drops on channel {}", inbound_drop_count, &self.context.channel_id());
62086213
Ok(())
@@ -6319,7 +6324,7 @@ impl<SP: Deref> FundedChannel<SP> where
63196324
}
63206325

63216326
if commitment_update.is_some() {
6322-
self.mark_awaiting_response();
6327+
self.mark_awaiting_response(AwaitingResponseType::RevokeAndAck);
63236328
}
63246329

63256330
self.context.monitor_pending_revoke_and_ack = false;
@@ -6424,6 +6429,10 @@ impl<SP: Deref> FundedChannel<SP> where
64246429
commitment_update = None;
64256430
}
64266431

6432+
if commitment_update.is_some() {
6433+
self.mark_awaiting_response(AwaitingResponseType::RevokeAndAck);
6434+
}
6435+
64276436
let (closing_signed, signed_closing_tx, shutdown_result) = if self.context.signer_pending_closing {
64286437
debug_assert!(self.context.last_sent_closing_fee.is_some());
64296438
if let Some((fee, skip_remote_output, fee_range, holder_sig)) = self.context.last_sent_closing_fee.clone() {
@@ -6675,7 +6684,7 @@ impl<SP: Deref> FundedChannel<SP> where
66756684
// Go ahead and unmark PeerDisconnected as various calls we may make check for it (and all
66766685
// remaining cases either succeed or ErrorMessage-fail).
66776686
self.context.channel_state.clear_peer_disconnected();
6678-
self.context.sent_message_awaiting_response = None;
6687+
self.mark_response_received(AwaitingResponseType::ChannelReestablish);
66796688

66806689
let shutdown_msg = self.get_outbound_shutdown();
66816690

@@ -6731,9 +6740,6 @@ impl<SP: Deref> FundedChannel<SP> where
67316740
// AwaitingRemoteRevoke set, which indicates we sent a commitment_signed but haven't gotten
67326741
// the corresponding revoke_and_ack back yet.
67336742
let is_awaiting_remote_revoke = self.context.channel_state.is_awaiting_remote_revoke();
6734-
if is_awaiting_remote_revoke && !self.is_awaiting_monitor_update() {
6735-
self.mark_awaiting_response();
6736-
}
67376743
let next_counterparty_commitment_number = INITIAL_COMMITMENT_NUMBER - self.context.cur_counterparty_commitment_transaction_number + if is_awaiting_remote_revoke { 1 } else { 0 };
67386744

67396745
let channel_ready = if msg.next_local_commitment_number == 1 && INITIAL_COMMITMENT_NUMBER - self.holder_commitment_point.transaction_number() == 1 {
@@ -6748,6 +6754,11 @@ impl<SP: Deref> FundedChannel<SP> where
67486754
log_debug!(logger, "Reconnected channel {} with no loss", &self.context.channel_id());
67496755
}
67506756

6757+
if is_awaiting_remote_revoke {
6758+
// We don't have a `commitment_signed` to send, but they do owe us a `revoke_and_ack`.
6759+
self.mark_awaiting_response(AwaitingResponseType::RevokeAndAck);
6760+
}
6761+
67516762
Ok(ReestablishResponses {
67526763
channel_ready, shutdown_msg, announcement_sigs,
67536764
raa: required_revoke,
@@ -6785,6 +6796,12 @@ impl<SP: Deref> FundedChannel<SP> where
67856796
} else {
67866797
required_revoke
67876798
};
6799+
6800+
if is_awaiting_remote_revoke && !self.context.signer_pending_commitment_update {
6801+
// We only expect a `revoke_and_ack` once we're ready to send `commitment_signed`.
6802+
self.mark_awaiting_response(AwaitingResponseType::RevokeAndAck);
6803+
}
6804+
67886805
Ok(ReestablishResponses {
67896806
channel_ready, shutdown_msg, announcement_sigs,
67906807
raa, commitment_update,
@@ -6921,23 +6938,28 @@ impl<SP: Deref> FundedChannel<SP> where
69216938
// Marks a channel as waiting for a response from the counterparty. If it's not received
69226939
// [`DISCONNECT_PEER_AWAITING_RESPONSE_TICKS`] after sending our own to them, then we'll attempt
69236940
// a reconnection.
6924-
fn mark_awaiting_response(&mut self) {
6925-
self.context.sent_message_awaiting_response = Some(0);
6941+
fn mark_awaiting_response(&mut self, typ: AwaitingResponseType) {
6942+
let existing_ticks = self.context.sent_message_awaiting_response.insert(typ, 0);
6943+
debug_assert!(existing_ticks.is_none(), "We should always receive the response or give up before awaiting again");
6944+
}
6945+
6946+
fn mark_response_received(&mut self, typ: AwaitingResponseType) {
6947+
let awaiting_response_ticks = self.context.sent_message_awaiting_response.remove(&typ);
6948+
debug_assert!(awaiting_response_ticks.is_some(), "We should always await this response type");
69266949
}
69276950

69286951
/// Determines whether we should disconnect the counterparty due to not receiving a response
69296952
/// within our expected timeframe.
69306953
///
69316954
/// This should be called on every [`super::channelmanager::ChannelManager::timer_tick_occurred`].
69326955
pub fn should_disconnect_peer_awaiting_response(&mut self) -> bool {
6933-
let ticks_elapsed = if let Some(ticks_elapsed) = self.context.sent_message_awaiting_response.as_mut() {
6934-
ticks_elapsed
6935-
} else {
6936-
// Don't disconnect when we're not waiting on a response.
6937-
return false;
6938-
};
6939-
*ticks_elapsed += 1;
6940-
*ticks_elapsed >= DISCONNECT_PEER_AWAITING_RESPONSE_TICKS
6956+
for ticks_elapsed in self.context.sent_message_awaiting_response.values_mut() {
6957+
*ticks_elapsed += 1;
6958+
if *ticks_elapsed >= DISCONNECT_PEER_AWAITING_RESPONSE_TICKS {
6959+
return true;
6960+
}
6961+
}
6962+
false
69416963
}
69426964

69436965
pub fn shutdown(
@@ -8087,7 +8109,7 @@ impl<SP: Deref> FundedChannel<SP> where
80878109
log_info!(logger, "Sending a data_loss_protect with no previous remote per_commitment_secret for channel {}", &self.context.channel_id());
80888110
[0;32]
80898111
};
8090-
self.mark_awaiting_response();
8112+
self.mark_awaiting_response(AwaitingResponseType::ChannelReestablish);
80918113
msgs::ChannelReestablish {
80928114
channel_id: self.context.channel_id(),
80938115
// The protocol has two different commitment number concepts - the "commitment
@@ -10427,7 +10449,7 @@ impl<'a, 'b, 'c, ES: Deref, SP: Deref> ReadableArgs<(&'a ES, &'b SP, u32, &'c Ch
1042710449
next_remote_commitment_tx_fee_info_cached: Mutex::new(None),
1042810450

1042910451
workaround_lnd_bug_4006: None,
10430-
sent_message_awaiting_response: None,
10452+
sent_message_awaiting_response: BTreeMap::new(),
1043110453

1043210454
latest_inbound_scid_alias,
1043310455
// Later in the ChannelManager deserialization phase we scan for channels and assign scid aliases if its missing

0 commit comments

Comments
 (0)