Skip to content

Commit abdb9f6

Browse files
Always remove disconnected peers with no channels
When a peer disconnects but still has channels, the peer's `peer_state` entry in the `per_peer_state` is not removed by the `peer_disconnected` function. If the channels of to that peer is later closed while still being disconnected (i.e. force closed), we therefore need to remove the peer from `peer_state` separately. To remove the peers separately, we push such peers to a separate HashSet that holds peers awaiting removal, and remove the peers on a timer to limit the negative effects on paralleism as much as possible.
1 parent ec7b1c5 commit abdb9f6

File tree

1 file changed

+120
-17
lines changed

1 file changed

+120
-17
lines changed

lightning/src/ln/channelmanager.rs

Lines changed: 120 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,8 @@ pub(super) struct PeerState<Signer: Sign> {
464464
/// Messages to send to the peer - pushed to in the same lock that they are generated in (except
465465
/// for broadcast messages, where ordering isn't as strict).
466466
pub(super) pending_msg_events: Vec<MessageSendEvent>,
467+
/// Represents wether we're connected to the node or not.
468+
connected: bool,
467469
}
468470

469471
/// Stores a PaymentSecret and any other data we may need to validate an inbound payment is
@@ -593,6 +595,8 @@ pub type SimpleRefChannelManager<'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, M, T, F, L> = C
593595
// | |
594596
// | |__`best_block`
595597
// | |
598+
// | |__`pending_peers_awaiting_removal`
599+
// | |
596600
// | |__`pending_events`
597601
// | |
598602
// | |__`pending_background_events`
@@ -760,6 +764,16 @@ where
760764

761765
/// See `ChannelManager` struct-level documentation for lock order requirements.
762766
pending_events: Mutex<Vec<events::Event>>,
767+
/// When a peer disconnects but still has channels, the peer's `peer_state` entry in the
768+
/// `per_peer_state` is not removed by the `peer_disconnected` function. If the channels of
769+
/// to that peer is later closed while still being disconnected (i.e. force closed), we
770+
/// therefore need to remove the peer from `peer_state` separately.
771+
/// To avoid having to take the `per_peer_state` `write` lock once the channels are closed, we
772+
/// instead store such peers awaiting removal in this field, and remove them on a timer to
773+
/// limit the negative effects on parallelism as much as possible.
774+
///
775+
/// See `ChannelManager` struct-level documentation for lock order requirements.
776+
pending_peers_awaiting_removal: Mutex<HashSet<PublicKey>>,
763777
/// See `ChannelManager` struct-level documentation for lock order requirements.
764778
pending_background_events: Mutex<Vec<BackgroundEvent>>,
765779
/// Used when we have to take a BIG lock to make sure everything is self-consistent.
@@ -1290,10 +1304,11 @@ macro_rules! try_chan_entry {
12901304
}
12911305

12921306
macro_rules! remove_channel {
1293-
($self: expr, $entry: expr) => {
1307+
($self: expr, $entry: expr, $peer_state: expr) => {
12941308
{
12951309
let channel = $entry.remove_entry().1;
12961310
update_maps_on_chan_removal!($self, channel);
1311+
$self.add_pending_peer_to_be_removed(channel.get_counterparty_node_id(), $peer_state);
12971312
channel
12981313
}
12991314
}
@@ -1466,6 +1481,7 @@ where
14661481
per_peer_state: FairRwLock::new(HashMap::new()),
14671482

14681483
pending_events: Mutex::new(Vec::new()),
1484+
pending_peers_awaiting_removal: Mutex::new(HashSet::new()),
14691485
pending_background_events: Mutex::new(Vec::new()),
14701486
total_consistency_lock: RwLock::new(()),
14711487
persistence_notifier: Notifier::new(),
@@ -1704,7 +1720,7 @@ where
17041720
let (result, is_permanent) =
17051721
handle_monitor_update_res!(self, update_res, chan_entry.get_mut(), RAACommitmentOrder::CommitmentFirst, chan_entry.key(), NO_UPDATE);
17061722
if is_permanent {
1707-
remove_channel!(self, chan_entry);
1723+
remove_channel!(self, chan_entry, peer_state);
17081724
break result;
17091725
}
17101726
}
@@ -1715,7 +1731,7 @@ where
17151731
});
17161732

17171733
if chan_entry.get().is_shutdown() {
1718-
let channel = remove_channel!(self, chan_entry);
1734+
let channel = remove_channel!(self, chan_entry, peer_state);
17191735
if let Ok(channel_update) = self.get_channel_update_for_broadcast(&channel) {
17201736
peer_state.pending_msg_events.push(events::MessageSendEvent::BroadcastChannelUpdate {
17211737
msg: channel_update
@@ -1818,7 +1834,7 @@ where
18181834
} else {
18191835
self.issue_channel_close_events(chan.get(),ClosureReason::HolderForceClosed);
18201836
}
1821-
remove_channel!(self, chan)
1837+
remove_channel!(self, chan, peer_state)
18221838
} else {
18231839
return Err(APIError::ChannelUnavailable{ err: format!("Channel with id {} not found for the passed counterparty node_id {}", log_bytes!(*channel_id), peer_node_id) });
18241840
}
@@ -1857,6 +1873,13 @@ where
18571873
}
18581874
}
18591875

1876+
fn add_pending_peer_to_be_removed(&self, counterparty_node_id: PublicKey, peer_state: &mut PeerState<<SP::Target as SignerProvider>::Signer>) {
1877+
let peer_should_be_removed = !peer_state.connected && peer_state.channel_by_id.len() == 0;
1878+
if peer_should_be_removed {
1879+
self.pending_peers_awaiting_removal.lock().unwrap().insert(counterparty_node_id);
1880+
}
1881+
}
1882+
18601883
/// Force closes a channel, immediately broadcasting the latest local transaction(s) and
18611884
/// rejecting new HTLCs on the given channel. Fails if `channel_id` is unknown to
18621885
/// the manager, or if the `counterparty_node_id` isn't the counterparty of the corresponding
@@ -3281,6 +3304,34 @@ where
32813304
true
32823305
}
32833306

3307+
/// Removes peers which have been been added to `pending_peers_awaiting_removal` which are
3308+
/// still disconnected and we have no channels to.
3309+
///
3310+
/// Must be called without the `per_peer_state` lock acquired.
3311+
fn remove_peers_awaiting_removal(&self) {
3312+
let mut pending_peers_awaiting_removal = HashSet::new();
3313+
mem::swap(&mut *self.pending_peers_awaiting_removal.lock().unwrap(), &mut pending_peers_awaiting_removal);
3314+
if pending_peers_awaiting_removal.len() > 0 {
3315+
let mut per_peer_state = self.per_peer_state.write().unwrap();
3316+
for counterparty_node_id in pending_peers_awaiting_removal.drain() {
3317+
match per_peer_state.entry(counterparty_node_id) {
3318+
hash_map::Entry::Occupied(entry) => {
3319+
// Remove the entry if the peer is still disconnected and we still
3320+
// have no channels to the peer.
3321+
let remove_entry = {
3322+
let peer_state = entry.get().lock().unwrap();
3323+
!peer_state.connected && peer_state.channel_by_id.len() == 0
3324+
};
3325+
if remove_entry {
3326+
entry.remove_entry();
3327+
}
3328+
},
3329+
hash_map::Entry::Vacant(_) => { /* The PeerState has already been removed */ }
3330+
}
3331+
}
3332+
}
3333+
}
3334+
32843335
#[cfg(any(test, feature = "_test_utils"))]
32853336
/// Process background events, for functional testing
32863337
pub fn test_process_background_events(&self) {
@@ -3359,13 +3410,14 @@ where
33593410
let mut peer_state_lock = peer_state_mutex.lock().unwrap();
33603411
let peer_state = &mut *peer_state_lock;
33613412
let pending_msg_events = &mut peer_state.pending_msg_events;
3413+
let counterparty_node_id = *counterparty_node_id;
33623414
peer_state.channel_by_id.retain(|chan_id, chan| {
33633415
let chan_needs_persist = self.update_channel_fee(chan_id, chan, new_feerate);
33643416
if chan_needs_persist == NotifyOption::DoPersist { should_persist = NotifyOption::DoPersist; }
33653417

33663418
if let Err(e) = chan.timer_check_closing_negotiation_progress() {
33673419
let (needs_close, err) = convert_chan_err!(self, e, chan, chan_id);
3368-
handle_errors.push((Err(err), *counterparty_node_id));
3420+
handle_errors.push((Err(err), counterparty_node_id));
33693421
if needs_close { return false; }
33703422
}
33713423

@@ -3399,8 +3451,10 @@ where
33993451

34003452
true
34013453
});
3454+
self.add_pending_peer_to_be_removed(counterparty_node_id, peer_state);
34023455
}
34033456
}
3457+
self.remove_peers_awaiting_removal();
34043458

34053459
self.claimable_payments.lock().unwrap().claimable_htlcs.retain(|payment_hash, (_, htlcs)| {
34063460
if htlcs.is_empty() {
@@ -4136,7 +4190,7 @@ where
41364190
}
41374191
};
41384192
peer_state.pending_msg_events.push(send_msg_err_event);
4139-
let _ = remove_channel!(self, channel);
4193+
let _ = remove_channel!(self, channel, peer_state);
41404194
return Err(APIError::APIMisuseError { err: "Please use accept_inbound_channel_from_trusted_peer_0conf to accept channels with zero confirmations.".to_owned() });
41414195
}
41424196

@@ -4422,7 +4476,7 @@ where
44224476
let (result, is_permanent) =
44234477
handle_monitor_update_res!(self, update_res, chan_entry.get_mut(), RAACommitmentOrder::CommitmentFirst, chan_entry.key(), NO_UPDATE);
44244478
if is_permanent {
4425-
remove_channel!(self, chan_entry);
4479+
remove_channel!(self, chan_entry, peer_state);
44264480
break result;
44274481
}
44284482
}
@@ -4471,7 +4525,7 @@ where
44714525
// also implies there are no pending HTLCs left on the channel, so we can
44724526
// fully delete it from tracking (the channel monitor is still around to
44734527
// watch for old state broadcasts)!
4474-
(tx, Some(remove_channel!(self, chan_entry)))
4528+
(tx, Some(remove_channel!(self, chan_entry, peer_state)))
44754529
} else { (tx, None) }
44764530
},
44774531
hash_map::Entry::Vacant(_) => return Err(MsgHandleErrInternal::send_err_msg_no_close(format!("Got a message for a channel from the wrong node! No such channel for the passed counterparty_node_id {}", counterparty_node_id), msg.channel_id))
@@ -4974,12 +5028,11 @@ where
49745028
if let Some(peer_state_mutex) = per_peer_state.get(&counterparty_node_id) {
49755029
let mut peer_state_lock = peer_state_mutex.lock().unwrap();
49765030
let peer_state = &mut *peer_state_lock;
4977-
let pending_msg_events = &mut peer_state.pending_msg_events;
49785031
if let hash_map::Entry::Occupied(chan_entry) = peer_state.channel_by_id.entry(funding_outpoint.to_channel_id()) {
4979-
let mut chan = remove_channel!(self, chan_entry);
5032+
let mut chan = remove_channel!(self, chan_entry, peer_state);
49805033
failed_channels.push(chan.force_shutdown(false));
49815034
if let Ok(update) = self.get_channel_update_for_broadcast(&chan) {
4982-
pending_msg_events.push(events::MessageSendEvent::BroadcastChannelUpdate {
5035+
peer_state.pending_msg_events.push(events::MessageSendEvent::BroadcastChannelUpdate {
49835036
msg: update
49845037
});
49855038
}
@@ -4989,7 +5042,7 @@ where
49895042
ClosureReason::CommitmentTxConfirmed
49905043
};
49915044
self.issue_channel_close_events(&chan, reason);
4992-
pending_msg_events.push(events::MessageSendEvent::HandleError {
5045+
peer_state.pending_msg_events.push(events::MessageSendEvent::HandleError {
49935046
node_id: chan.get_counterparty_node_id(),
49945047
action: msgs::ErrorAction::SendErrorMessage {
49955048
msg: msgs::ErrorMessage { channel_id: chan.channel_id(), data: "Channel force-closed".to_owned() }
@@ -5031,7 +5084,7 @@ where
50315084
{
50325085
let per_peer_state = self.per_peer_state.read().unwrap();
50335086

5034-
for (_cp_id, peer_state_mutex) in per_peer_state.iter() {
5087+
for (cp_id, peer_state_mutex) in per_peer_state.iter() {
50355088
let mut peer_state_lock = peer_state_mutex.lock().unwrap();
50365089
let peer_state = &mut *peer_state_lock;
50375090
let pending_msg_events = &mut peer_state.pending_msg_events;
@@ -5071,6 +5124,7 @@ where
50715124
}
50725125
}
50735126
});
5127+
self.add_pending_peer_to_be_removed(*cp_id, peer_state);
50745128
}
50755129
}
50765130

@@ -5095,7 +5149,7 @@ where
50955149
{
50965150
let per_peer_state = self.per_peer_state.read().unwrap();
50975151

5098-
for (_cp_id, peer_state_mutex) in per_peer_state.iter() {
5152+
for (cp_id, peer_state_mutex) in per_peer_state.iter() {
50995153
let mut peer_state_lock = peer_state_mutex.lock().unwrap();
51005154
let peer_state = &mut *peer_state_lock;
51015155
let pending_msg_events = &mut peer_state.pending_msg_events;
@@ -5133,6 +5187,7 @@ where
51335187
}
51345188
}
51355189
});
5190+
self.add_pending_peer_to_be_removed(*cp_id, peer_state);
51365191
}
51375192
}
51385193

@@ -5696,7 +5751,7 @@ where
56965751
let mut timed_out_htlcs = Vec::new();
56975752
{
56985753
let per_peer_state = self.per_peer_state.read().unwrap();
5699-
for (_cp_id, peer_state_mutex) in per_peer_state.iter() {
5754+
for (cp_id, peer_state_mutex) in per_peer_state.iter() {
57005755
let mut peer_state_lock = peer_state_mutex.lock().unwrap();
57015756
let peer_state = &mut *peer_state_lock;
57025757
let pending_msg_events = &mut peer_state.pending_msg_events;
@@ -5780,6 +5835,7 @@ where
57805835
}
57815836
true
57825837
});
5838+
self.add_pending_peer_to_be_removed(*cp_id, peer_state);
57835839
}
57845840
}
57855841

@@ -6026,6 +6082,7 @@ where
60266082
let mut peer_state_lock = peer_state_mutex.lock().unwrap();
60276083
let peer_state = &mut *peer_state_lock;
60286084
let pending_msg_events = &mut peer_state.pending_msg_events;
6085+
peer_state.connected = false;
60296086
peer_state.channel_by_id.retain(|_, chan| {
60306087
chan.remove_uncommitted_htlcs_and_mark_paused(&self.logger);
60316088
if chan.is_shutdown() {
@@ -6091,17 +6148,20 @@ where
60916148
channel_by_id: HashMap::new(),
60926149
latest_features: init_msg.features.clone(),
60936150
pending_msg_events: Vec::new(),
6151+
connected: true,
60946152
}));
60956153
},
60966154
hash_map::Entry::Occupied(e) => {
6097-
e.get().lock().unwrap().latest_features = init_msg.features.clone();
6155+
let mut peer_state = e.get().lock().unwrap();
6156+
peer_state.latest_features = init_msg.features.clone();
6157+
peer_state.connected = true;
60986158
},
60996159
}
61006160
}
61016161

61026162
let per_peer_state = self.per_peer_state.read().unwrap();
61036163

6104-
for (_cp_id, peer_state_mutex) in per_peer_state.iter() {
6164+
for (cp_id, peer_state_mutex) in per_peer_state.iter() {
61056165
let mut peer_state_lock = peer_state_mutex.lock().unwrap();
61066166
let peer_state = &mut *peer_state_lock;
61076167
let pending_msg_events = &mut peer_state.pending_msg_events;
@@ -6133,6 +6193,7 @@ where
61336193
}
61346194
retain
61356195
});
6196+
self.add_pending_peer_to_be_removed(*cp_id, peer_state);
61366197
}
61376198
//TODO: Also re-broadcast announcement_signatures
61386199
Ok(())
@@ -6646,6 +6707,8 @@ where
66466707

66476708
write_ver_prefix!(writer, SERIALIZATION_VERSION, MIN_SERIALIZATION_VERSION);
66486709

6710+
self.remove_peers_awaiting_removal();
6711+
66496712
self.genesis_hash.write(writer)?;
66506713
{
66516714
let best_block = self.best_block.read().unwrap();
@@ -7113,6 +7176,7 @@ where
71137176
channel_by_id: peer_channels.remove(&peer_pubkey).unwrap_or(HashMap::new()),
71147177
latest_features: Readable::read(reader)?,
71157178
pending_msg_events: Vec::new(),
7179+
connected: false,
71167180
};
71177181
per_peer_state.insert(peer_pubkey, Mutex::new(peer_state));
71187182
}
@@ -7466,6 +7530,7 @@ where
74667530
per_peer_state: FairRwLock::new(per_peer_state),
74677531

74687532
pending_events: Mutex::new(pending_events_read),
7533+
pending_peers_awaiting_removal: Mutex::new(HashSet::new()),
74697534
pending_background_events: Mutex::new(pending_background_events_read),
74707535
total_consistency_lock: RwLock::new(()),
74717536
persistence_notifier: Notifier::new(),
@@ -7933,6 +7998,44 @@ mod tests {
79337998
}
79347999
}
79358000

8001+
#[test]
8002+
fn test_drop_disconnected_peers_when_removing_channels() {
8003+
let chanmon_cfgs = create_chanmon_cfgs(2);
8004+
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
8005+
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
8006+
let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
8007+
8008+
let chan = create_announced_chan_between_nodes(&nodes, 0, 1);
8009+
8010+
nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false);
8011+
nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id(), false);
8012+
8013+
nodes[0].node.force_close_broadcasting_latest_txn(&chan.2, &nodes[1].node.get_our_node_id()).unwrap();
8014+
check_closed_broadcast!(nodes[0], true);
8015+
check_added_monitors!(nodes[0], 1);
8016+
check_closed_event!(nodes[0], 1, ClosureReason::HolderForceClosed);
8017+
8018+
{
8019+
// Assert that nodes[1] is awaiting removal for nodes[0] once nodes[1] has been
8020+
// disconnected and the channel between has been force closed.
8021+
let nodes_0_per_peer_state = nodes[0].node.per_peer_state.read().unwrap();
8022+
let nodes_0_pending_peers_awaiting_removal = nodes[0].node.pending_peers_awaiting_removal.lock().unwrap();
8023+
assert_eq!(nodes_0_pending_peers_awaiting_removal.len(), 1);
8024+
assert!(nodes_0_pending_peers_awaiting_removal.get(&nodes[1].node.get_our_node_id()).is_some());
8025+
// Assert that nodes[1] isn't removed before `timer_tick_occurred` has been executed.
8026+
assert_eq!(nodes_0_per_peer_state.len(), 1);
8027+
assert!(nodes_0_per_peer_state.get(&nodes[1].node.get_our_node_id()).is_some());
8028+
}
8029+
8030+
nodes[0].node.timer_tick_occurred();
8031+
8032+
{
8033+
// Assert that nodes[1] has now been removed.
8034+
assert_eq!(nodes[0].node.per_peer_state.read().unwrap().len(), 0);
8035+
assert_eq!(nodes[0].node.pending_peers_awaiting_removal.lock().unwrap().len(), 0);
8036+
}
8037+
}
8038+
79368039
#[test]
79378040
fn bad_inbound_payment_hash() {
79388041
// Add coverage for checking that a user-provided payment hash matches the payment secret.

0 commit comments

Comments
 (0)