Skip to content

Commit 2892f98

Browse files
committed
Track message timeout ticks based on internal states
With the introduction of `has_pending_channel_update`, we can now determine whether any messages are owed to irrevocably commit HTLC updates based on the current channel state. We prefer using the channel state, over manually tracking as previously done, to have a single source of truth. We also gain the ability to expect to receive multiple messages at once, which will become relevant with the quiescence protocol, where we may be waiting on a counterparty `revoke_and_ack` and `stfu`.
1 parent 4832364 commit 2892f98

File tree

2 files changed

+49
-48
lines changed

2 files changed

+49
-48
lines changed

lightning/src/ln/channel.rs

Lines changed: 35 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,9 +1142,8 @@ pub(crate) const MIN_AFFORDABLE_HTLC_COUNT: usize = 4;
11421142
/// * `EXPIRE_PREV_CONFIG_TICKS` = convergence_delay / tick_interval
11431143
pub(crate) const EXPIRE_PREV_CONFIG_TICKS: usize = 5;
11441144

1145-
/// The number of ticks that may elapse while we're waiting for a response to a
1146-
/// [`msgs::RevokeAndACK`] or [`msgs::ChannelReestablish`] message before we attempt to disconnect
1147-
/// them.
1145+
/// The number of ticks that may elapse while we're waiting for a response before we attempt to
1146+
/// disconnect them.
11481147
///
11491148
/// See [`ChannelContext::sent_message_awaiting_response`] for more information.
11501149
pub(crate) const DISCONNECT_PEER_AWAITING_RESPONSE_TICKS: usize = 2;
@@ -1826,16 +1825,14 @@ pub(super) struct ChannelContext<SP: Deref> where SP::Target: SignerProvider {
18261825
pub workaround_lnd_bug_4006: Option<msgs::ChannelReady>,
18271826

18281827
/// An option set when we wish to track how many ticks have elapsed while waiting for a response
1829-
/// from our counterparty after sending a message. If the peer has yet to respond after reaching
1830-
/// `DISCONNECT_PEER_AWAITING_RESPONSE_TICKS`, a reconnection should be attempted to try to
1831-
/// unblock the state machine.
1828+
/// from our counterparty after entering specific states. If the peer has yet to respond after
1829+
/// reaching `DISCONNECT_PEER_AWAITING_RESPONSE_TICKS`, a reconnection should be attempted to
1830+
/// try to unblock the state machine.
18321831
///
1833-
/// This behavior is mostly motivated by a lnd bug in which we don't receive a message we expect
1834-
/// to in a timely manner, which may lead to channels becoming unusable and/or force-closed. An
1835-
/// example of such can be found at <https://github.com/lightningnetwork/lnd/issues/7682>.
1836-
///
1837-
/// This is currently only used when waiting for a [`msgs::ChannelReestablish`] or
1838-
/// [`msgs::RevokeAndACK`] message from the counterparty.
1832+
/// This behavior was initially motivated by a lnd bug in which we don't receive a message we
1833+
/// expect to in a timely manner, which may lead to channels becoming unusable and/or
1834+
/// force-closed. An example of such can be found at
1835+
/// <https://github.com/lightningnetwork/lnd/issues/7682>.
18391836
sent_message_awaiting_response: Option<usize>,
18401837

18411838
/// This channel's type, as negotiated during channel open
@@ -5928,7 +5925,7 @@ impl<SP: Deref> FundedChannel<SP> where
59285925
// OK, we step the channel here and *then* if the new generation fails we can fail the
59295926
// channel based on that, but stepping stuff here should be safe either way.
59305927
self.context.channel_state.clear_awaiting_remote_revoke();
5931-
self.context.sent_message_awaiting_response = None;
5928+
self.mark_response_received();
59325929
self.context.counterparty_prev_commitment_point = self.context.counterparty_cur_commitment_point;
59335930
self.context.counterparty_cur_commitment_point = Some(msg.next_per_commitment_point);
59345931
self.context.cur_counterparty_commitment_transaction_number -= 1;
@@ -6306,6 +6303,10 @@ impl<SP: Deref> FundedChannel<SP> where
63066303
return Err(())
63076304
}
63086305

6306+
// We only clear `peer_disconnected` if we were able to reestablish the channel. We always
6307+
// reset our awaiting response in case we failed reestablishment and are disconnecting.
6308+
self.context.sent_message_awaiting_response = None;
6309+
63096310
if self.context.channel_state.is_peer_disconnected() {
63106311
// While the below code should be idempotent, it's simpler to just return early, as
63116312
// redundant disconnect events can fire, though they should be rare.
@@ -6366,8 +6367,6 @@ impl<SP: Deref> FundedChannel<SP> where
63666367
}
63676368
}
63686369

6369-
self.context.sent_message_awaiting_response = None;
6370-
63716370
// Reset any quiescence-related state as it is implicitly terminated once disconnected.
63726371
match self.context.channel_state {
63736372
ChannelState::ChannelReady(_) => {
@@ -6500,10 +6499,6 @@ impl<SP: Deref> FundedChannel<SP> where
65006499
commitment_update = None;
65016500
}
65026501

6503-
if commitment_update.is_some() {
6504-
self.mark_awaiting_response();
6505-
}
6506-
65076502
self.context.monitor_pending_revoke_and_ack = false;
65086503
self.context.monitor_pending_commitment_signed = false;
65096504
let order = self.context.resend_order.clone();
@@ -6862,7 +6857,7 @@ impl<SP: Deref> FundedChannel<SP> where
68626857
// Go ahead and unmark PeerDisconnected as various calls we may make check for it (and all
68636858
// remaining cases either succeed or ErrorMessage-fail).
68646859
self.context.channel_state.clear_peer_disconnected();
6865-
self.context.sent_message_awaiting_response = None;
6860+
self.mark_response_received();
68666861

68676862
let shutdown_msg = self.get_outbound_shutdown();
68686863

@@ -6918,9 +6913,6 @@ impl<SP: Deref> FundedChannel<SP> where
69186913
// AwaitingRemoteRevoke set, which indicates we sent a commitment_signed but haven't gotten
69196914
// the corresponding revoke_and_ack back yet.
69206915
let is_awaiting_remote_revoke = self.context.channel_state.is_awaiting_remote_revoke();
6921-
if is_awaiting_remote_revoke && !self.is_awaiting_monitor_update() {
6922-
self.mark_awaiting_response();
6923-
}
69246916
let next_counterparty_commitment_number = INITIAL_COMMITMENT_NUMBER - self.context.cur_counterparty_commitment_transaction_number + if is_awaiting_remote_revoke { 1 } else { 0 };
69256917

69266918
let channel_ready = if msg.next_local_commitment_number == 1 && INITIAL_COMMITMENT_NUMBER - self.holder_commitment_point.transaction_number() == 1 {
@@ -7105,26 +7097,34 @@ impl<SP: Deref> FundedChannel<SP> where
71057097
Ok((closing_signed, None, None))
71067098
}
71077099

7108-
// Marks a channel as waiting for a response from the counterparty. If it's not received
7109-
// [`DISCONNECT_PEER_AWAITING_RESPONSE_TICKS`] after sending our own to them, then we'll attempt
7110-
// a reconnection.
7111-
fn mark_awaiting_response(&mut self) {
7112-
self.context.sent_message_awaiting_response = Some(0);
7100+
fn mark_response_received(&mut self) {
7101+
self.context.sent_message_awaiting_response = None;
71137102
}
71147103

71157104
/// Determines whether we should disconnect the counterparty due to not receiving a response
71167105
/// within our expected timeframe.
71177106
///
7118-
/// This should be called on every [`super::channelmanager::ChannelManager::timer_tick_occurred`].
7107+
/// This should be called for peers with an active socket on every
7108+
/// [`super::channelmanager::ChannelManager::timer_tick_occurred`].
71197109
pub fn should_disconnect_peer_awaiting_response(&mut self) -> bool {
7120-
let ticks_elapsed = if let Some(ticks_elapsed) = self.context.sent_message_awaiting_response.as_mut() {
7121-
ticks_elapsed
7110+
if let Some(ticks_elapsed) = self.context.sent_message_awaiting_response.as_mut() {
7111+
*ticks_elapsed += 1;
7112+
*ticks_elapsed >= DISCONNECT_PEER_AWAITING_RESPONSE_TICKS
7113+
} else if
7114+
// Cleared upon receiving `channel_reestablish`.
7115+
self.context.channel_state.is_peer_disconnected()
7116+
// Cleared upon receiving `revoke_and_ack`.
7117+
|| self.context.has_pending_channel_update(false)
7118+
|| self.context.has_pending_channel_update(true)
7119+
{
7120+
// This is the first tick we've seen after expecting to make forward progress.
7121+
self.context.sent_message_awaiting_response = Some(1);
7122+
debug_assert!(DISCONNECT_PEER_AWAITING_RESPONSE_TICKS > 1);
7123+
false
71227124
} else {
71237125
// Don't disconnect when we're not waiting on a response.
7124-
return false;
7125-
};
7126-
*ticks_elapsed += 1;
7127-
*ticks_elapsed >= DISCONNECT_PEER_AWAITING_RESPONSE_TICKS
7126+
false
7127+
}
71287128
}
71297129

71307130
pub fn shutdown(
@@ -8290,7 +8290,6 @@ impl<SP: Deref> FundedChannel<SP> where
82908290
log_info!(logger, "Sending a data_loss_protect with no previous remote per_commitment_secret for channel {}", &self.context.channel_id());
82918291
[0;32]
82928292
};
8293-
self.mark_awaiting_response();
82948293
msgs::ChannelReestablish {
82958294
channel_id: self.context.channel_id(),
82968295
// The protocol has two different commitment number concepts - the "commitment

lightning/src/ln/channelmanager.rs

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6520,19 +6520,21 @@ where
65206520

65216521
funded_chan.context.maybe_expire_prev_config();
65226522

6523-
if funded_chan.should_disconnect_peer_awaiting_response() {
6524-
let logger = WithChannelContext::from(&self.logger, &funded_chan.context, None);
6525-
log_debug!(logger, "Disconnecting peer {} due to not making any progress on channel {}",
6526-
counterparty_node_id, chan_id);
6527-
pending_msg_events.push(MessageSendEvent::HandleError {
6528-
node_id: counterparty_node_id,
6529-
action: msgs::ErrorAction::DisconnectPeerWithWarning {
6530-
msg: msgs::WarningMessage {
6531-
channel_id: *chan_id,
6532-
data: "Disconnecting due to timeout awaiting response".to_owned(),
6523+
if peer_state.is_connected {
6524+
if funded_chan.should_disconnect_peer_awaiting_response() {
6525+
let logger = WithChannelContext::from(&self.logger, &funded_chan.context, None);
6526+
log_debug!(logger, "Disconnecting peer {} due to not making any progress on channel {}",
6527+
counterparty_node_id, chan_id);
6528+
pending_msg_events.push(MessageSendEvent::HandleError {
6529+
node_id: counterparty_node_id,
6530+
action: msgs::ErrorAction::DisconnectPeerWithWarning {
6531+
msg: msgs::WarningMessage {
6532+
channel_id: *chan_id,
6533+
data: "Disconnecting due to timeout awaiting response".to_owned(),
6534+
},
65336535
},
6534-
},
6535-
});
6536+
});
6537+
}
65366538
}
65376539

65386540
true

0 commit comments

Comments
 (0)