Skip to content

Commit 3ea1394

Browse files
committed
Track message timeout ticks based on internal states
With the introduction of `has_pending_channel_update`, we can now determine whether any messages are owed to irrevocably commit HTLC updates based on the current channel state. We prefer using the channel state, over manually tracking as previously done, to have a single source of truth. We also gain the ability to expect to receive multiple messages at once, which will become relevant with the quiescence protocol, where we may be waiting on a counterparty `revoke_and_ack` and `stfu`.
1 parent 2a5a4cb commit 3ea1394

File tree

2 files changed

+49
-48
lines changed

2 files changed

+49
-48
lines changed

lightning/src/ln/channel.rs

Lines changed: 35 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,9 +1143,8 @@ pub(crate) const MIN_AFFORDABLE_HTLC_COUNT: usize = 4;
11431143
/// * `EXPIRE_PREV_CONFIG_TICKS` = convergence_delay / tick_interval
11441144
pub(crate) const EXPIRE_PREV_CONFIG_TICKS: usize = 5;
11451145

1146-
/// The number of ticks that may elapse while we're waiting for a response to a
1147-
/// [`msgs::RevokeAndACK`] or [`msgs::ChannelReestablish`] message before we attempt to disconnect
1148-
/// them.
1146+
/// The number of ticks that may elapse while we're waiting for a response before we attempt to
1147+
/// disconnect them.
11491148
///
11501149
/// See [`ChannelContext::sent_message_awaiting_response`] for more information.
11511150
pub(crate) const DISCONNECT_PEER_AWAITING_RESPONSE_TICKS: usize = 2;
@@ -1827,16 +1826,14 @@ pub(super) struct ChannelContext<SP: Deref> where SP::Target: SignerProvider {
18271826
pub workaround_lnd_bug_4006: Option<msgs::ChannelReady>,
18281827

18291828
/// An option set when we wish to track how many ticks have elapsed while waiting for a response
1830-
/// from our counterparty after sending a message. If the peer has yet to respond after reaching
1831-
/// `DISCONNECT_PEER_AWAITING_RESPONSE_TICKS`, a reconnection should be attempted to try to
1832-
/// unblock the state machine.
1829+
/// from our counterparty after entering specific states. If the peer has yet to respond after
1830+
/// reaching `DISCONNECT_PEER_AWAITING_RESPONSE_TICKS`, a reconnection should be attempted to
1831+
/// try to unblock the state machine.
18331832
///
1834-
/// This behavior is mostly motivated by a lnd bug in which we don't receive a message we expect
1835-
/// to in a timely manner, which may lead to channels becoming unusable and/or force-closed. An
1836-
/// example of such can be found at <https://github.com/lightningnetwork/lnd/issues/7682>.
1837-
///
1838-
/// This is currently only used when waiting for a [`msgs::ChannelReestablish`] or
1839-
/// [`msgs::RevokeAndACK`] message from the counterparty.
1833+
/// This behavior was initially motivated by a lnd bug in which we don't receive a message we
1834+
/// expect to in a timely manner, which may lead to channels becoming unusable and/or
1835+
/// force-closed. An example of such can be found at
1836+
/// <https://github.com/lightningnetwork/lnd/issues/7682>.
18401837
sent_message_awaiting_response: Option<usize>,
18411838

18421839
/// This channel's type, as negotiated during channel open
@@ -5896,7 +5893,7 @@ impl<SP: Deref> FundedChannel<SP> where
58965893
// OK, we step the channel here and *then* if the new generation fails we can fail the
58975894
// channel based on that, but stepping stuff here should be safe either way.
58985895
self.context.channel_state.clear_awaiting_remote_revoke();
5899-
self.context.sent_message_awaiting_response = None;
5896+
self.mark_response_received();
59005897
self.context.counterparty_prev_commitment_point = self.context.counterparty_cur_commitment_point;
59015898
self.context.counterparty_cur_commitment_point = Some(msg.next_per_commitment_point);
59025899
self.context.cur_counterparty_commitment_transaction_number -= 1;
@@ -6262,6 +6259,10 @@ impl<SP: Deref> FundedChannel<SP> where
62626259
return Err(())
62636260
}
62646261

6262+
// We only clear `peer_disconnected` if we were able to reestablish the channel. We always
6263+
// reset our awaiting response in case we failed reestablishment and are disconnecting.
6264+
self.context.sent_message_awaiting_response = None;
6265+
62656266
if self.context.channel_state.is_peer_disconnected() {
62666267
// While the below code should be idempotent, it's simpler to just return early, as
62676268
// redundant disconnect events can fire, though they should be rare.
@@ -6322,8 +6323,6 @@ impl<SP: Deref> FundedChannel<SP> where
63226323
}
63236324
}
63246325

6325-
self.context.sent_message_awaiting_response = None;
6326-
63276326
// Reset any quiescence-related state as it is implicitly terminated once disconnected.
63286327
if matches!(self.context.channel_state, ChannelState::ChannelReady(_)) {
63296328
self.context.channel_state.clear_awaiting_quiescence();
@@ -6448,10 +6447,6 @@ impl<SP: Deref> FundedChannel<SP> where
64486447
commitment_update = None;
64496448
}
64506449

6451-
if commitment_update.is_some() {
6452-
self.mark_awaiting_response();
6453-
}
6454-
64556450
self.context.monitor_pending_revoke_and_ack = false;
64566451
self.context.monitor_pending_commitment_signed = false;
64576452
let order = self.context.resend_order.clone();
@@ -6808,7 +6803,7 @@ impl<SP: Deref> FundedChannel<SP> where
68086803
// Go ahead and unmark PeerDisconnected as various calls we may make check for it (and all
68096804
// remaining cases either succeed or ErrorMessage-fail).
68106805
self.context.channel_state.clear_peer_disconnected();
6811-
self.context.sent_message_awaiting_response = None;
6806+
self.mark_response_received();
68126807

68136808
let shutdown_msg = self.get_outbound_shutdown();
68146809

@@ -6864,9 +6859,6 @@ impl<SP: Deref> FundedChannel<SP> where
68646859
// AwaitingRemoteRevoke set, which indicates we sent a commitment_signed but haven't gotten
68656860
// the corresponding revoke_and_ack back yet.
68666861
let is_awaiting_remote_revoke = self.context.channel_state.is_awaiting_remote_revoke();
6867-
if is_awaiting_remote_revoke && !self.is_awaiting_monitor_update() {
6868-
self.mark_awaiting_response();
6869-
}
68706862
let next_counterparty_commitment_number = INITIAL_COMMITMENT_NUMBER - self.context.cur_counterparty_commitment_transaction_number + if is_awaiting_remote_revoke { 1 } else { 0 };
68716863

68726864
let channel_ready = if msg.next_local_commitment_number == 1 && INITIAL_COMMITMENT_NUMBER - self.holder_commitment_point.transaction_number() == 1 {
@@ -7051,26 +7043,34 @@ impl<SP: Deref> FundedChannel<SP> where
70517043
Ok((closing_signed, None, None))
70527044
}
70537045

7054-
// Marks a channel as waiting for a response from the counterparty. If it's not received
7055-
// [`DISCONNECT_PEER_AWAITING_RESPONSE_TICKS`] after sending our own to them, then we'll attempt
7056-
// a reconnection.
7057-
fn mark_awaiting_response(&mut self) {
7058-
self.context.sent_message_awaiting_response = Some(0);
7046+
fn mark_response_received(&mut self) {
7047+
self.context.sent_message_awaiting_response = None;
70597048
}
70607049

70617050
/// Determines whether we should disconnect the counterparty due to not receiving a response
70627051
/// within our expected timeframe.
70637052
///
7064-
/// This should be called on every [`super::channelmanager::ChannelManager::timer_tick_occurred`].
7053+
/// This should be called for peers with an active socket on every
7054+
/// [`super::channelmanager::ChannelManager::timer_tick_occurred`].
7055+
#[allow(clippy::assertions_on_constants)]
70657056
pub fn should_disconnect_peer_awaiting_response(&mut self) -> bool {
7066-
let ticks_elapsed = if let Some(ticks_elapsed) = self.context.sent_message_awaiting_response.as_mut() {
7067-
ticks_elapsed
7057+
if let Some(ticks_elapsed) = self.context.sent_message_awaiting_response.as_mut() {
7058+
*ticks_elapsed += 1;
7059+
*ticks_elapsed >= DISCONNECT_PEER_AWAITING_RESPONSE_TICKS
7060+
} else if
7061+
// Cleared upon receiving `channel_reestablish`.
7062+
self.context.channel_state.is_peer_disconnected()
7063+
// Cleared upon receiving `revoke_and_ack`.
7064+
|| self.context.has_pending_channel_update()
7065+
{
7066+
// This is the first tick we've seen after expecting to make forward progress.
7067+
self.context.sent_message_awaiting_response = Some(1);
7068+
debug_assert!(DISCONNECT_PEER_AWAITING_RESPONSE_TICKS > 1);
7069+
false
70687070
} else {
70697071
// Don't disconnect when we're not waiting on a response.
7070-
return false;
7071-
};
7072-
*ticks_elapsed += 1;
7073-
*ticks_elapsed >= DISCONNECT_PEER_AWAITING_RESPONSE_TICKS
7072+
false
7073+
}
70747074
}
70757075

70767076
pub fn shutdown(
@@ -8233,7 +8233,6 @@ impl<SP: Deref> FundedChannel<SP> where
82338233
log_info!(logger, "Sending a data_loss_protect with no previous remote per_commitment_secret for channel {}", &self.context.channel_id());
82348234
[0;32]
82358235
};
8236-
self.mark_awaiting_response();
82378236
msgs::ChannelReestablish {
82388237
channel_id: self.context.channel_id(),
82398238
// The protocol has two different commitment number concepts - the "commitment

lightning/src/ln/channelmanager.rs

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6532,19 +6532,21 @@ where
65326532

65336533
funded_chan.context.maybe_expire_prev_config();
65346534

6535-
if funded_chan.should_disconnect_peer_awaiting_response() {
6536-
let logger = WithChannelContext::from(&self.logger, &funded_chan.context, None);
6537-
log_debug!(logger, "Disconnecting peer {} due to not making any progress on channel {}",
6538-
counterparty_node_id, chan_id);
6539-
pending_msg_events.push(MessageSendEvent::HandleError {
6540-
node_id: counterparty_node_id,
6541-
action: msgs::ErrorAction::DisconnectPeerWithWarning {
6542-
msg: msgs::WarningMessage {
6543-
channel_id: *chan_id,
6544-
data: "Disconnecting due to timeout awaiting response".to_owned(),
6535+
if peer_state.is_connected {
6536+
if funded_chan.should_disconnect_peer_awaiting_response() {
6537+
let logger = WithChannelContext::from(&self.logger, &funded_chan.context, None);
6538+
log_debug!(logger, "Disconnecting peer {} due to not making any progress on channel {}",
6539+
counterparty_node_id, chan_id);
6540+
pending_msg_events.push(MessageSendEvent::HandleError {
6541+
node_id: counterparty_node_id,
6542+
action: msgs::ErrorAction::DisconnectPeerWithWarning {
6543+
msg: msgs::WarningMessage {
6544+
channel_id: *chan_id,
6545+
data: "Disconnecting due to timeout awaiting response".to_owned(),
6546+
},
65456547
},
6546-
},
6547-
});
6548+
});
6549+
}
65486550
}
65496551

65506552
true

0 commit comments

Comments
 (0)