Skip to content

Commit b5a3f81

Browse files
refactor(swarm)!: don't share event buffer for established connections (#3188)
Currently, we only have a single channel for all established connections. This requires us to construct the channel ahead of time, before we even have a connection. As it turns out, sharing this buffer across all connections actually has downsides. In particular, this means a single, very busy connection can starve others by filling up this buffer, forcing other connections to wait until they can emit an event.
1 parent a34411c commit b5a3f81

File tree

3 files changed

+45
-37
lines changed

3 files changed

+45
-37
lines changed

swarm/CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
- Update to `libp2p-core` `v0.39.0`.
44

55
- Removed deprecated Swarm constructors. For transition notes see [0.41.0](#0.41.0). See [PR 3170].
6+
67
- Deprecate functions on `PollParameters` in preparation for `PollParameters` to be removed entirely eventually. See [PR 3153].
78

89
- Add `estblished_in` to `SwarmEvent::ConnectionEstablished`. See [PR 3134].
@@ -15,11 +16,18 @@
1516
- Remove type parameter from `PendingOutboundConnectionError` and `PendingInboundConnectionError`.
1617
These two types are always used with `std::io::Error`. See [PR 3272].
1718

19+
- Replace `SwarmBuilder::connection_event_buffer_size` with `SwarmBuilder::per_connection_event_buffer_size` .
20+
The configured value now applies _per_ connection.
21+
The default values remains 7.
22+
If you have previously set `connection_event_buffer_size` you should re-evaluate what a good size for a _per connection_ buffer is.
23+
See [PR 3188].
24+
1825
[PR 3170]: https://github.com/libp2p/rust-libp2p/pull/3170
1926
[PR 3134]: https://github.com/libp2p/rust-libp2p/pull/3134
2027
[PR 3153]: https://github.com/libp2p/rust-libp2p/pull/3153
2128
[PR 3264]: https://github.com/libp2p/rust-libp2p/pull/3264
2229
[PR 3272]: https://github.com/libp2p/rust-libp2p/pull/3272
30+
[PR 3188]: https://github.com/libp2p/rust-libp2p/pull/3188
2331

2432
# 0.41.1
2533

swarm/src/connection/pool.rs

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ use crate::{
3232
use concurrent_dial::ConcurrentDial;
3333
use fnv::FnvHashMap;
3434
use futures::prelude::*;
35+
use futures::stream::SelectAll;
3536
use futures::{
3637
channel::{mpsc, oneshot},
3738
future::{poll_fn, BoxFuture, Either},
@@ -41,6 +42,7 @@ use futures::{
4142
use instant::Instant;
4243
use libp2p_core::connection::Endpoint;
4344
use libp2p_core::muxing::{StreamMuxerBox, StreamMuxerExt};
45+
use std::task::Waker;
4446
use std::{
4547
collections::{hash_map, HashMap},
4648
convert::TryFrom as _,
@@ -117,6 +119,9 @@ where
117119
/// See [`Connection::max_negotiating_inbound_streams`].
118120
max_negotiating_inbound_streams: usize,
119121

122+
/// How many [`task::EstablishedConnectionEvent`]s can be buffered before the connection is back-pressured.
123+
per_connection_event_buffer_size: usize,
124+
120125
/// The executor to use for running connection tasks. Can either be a global executor
121126
/// or a local queue.
122127
executor: ExecSwitch,
@@ -128,14 +133,12 @@ where
128133
/// Receiver for events reported from pending tasks.
129134
pending_connection_events_rx: mpsc::Receiver<task::PendingConnectionEvent>,
130135

131-
/// Sender distributed to established tasks for reporting events back
132-
/// to the pool.
133-
established_connection_events_tx:
134-
mpsc::Sender<task::EstablishedConnectionEvent<THandler::Handler>>,
136+
/// Waker in case we haven't established any connections yet.
137+
no_established_connections_waker: Option<Waker>,
135138

136-
/// Receiver for events reported from established tasks.
137-
established_connection_events_rx:
138-
mpsc::Receiver<task::EstablishedConnectionEvent<THandler::Handler>>,
139+
/// Receivers for events reported from established connections.
140+
established_connection_events:
141+
SelectAll<mpsc::Receiver<task::EstablishedConnectionEvent<THandler::Handler>>>,
139142
}
140143

141144
#[derive(Debug)]
@@ -315,8 +318,6 @@ where
315318
/// Creates a new empty `Pool`.
316319
pub fn new(local_id: PeerId, config: PoolConfig, limits: ConnectionLimits) -> Self {
317320
let (pending_connection_events_tx, pending_connection_events_rx) = mpsc::channel(0);
318-
let (established_connection_events_tx, established_connection_events_rx) =
319-
mpsc::channel(config.task_event_buffer_size);
320321
let executor = match config.executor {
321322
Some(exec) => ExecSwitch::Executor(exec),
322323
None => ExecSwitch::LocalSpawn(Default::default()),
@@ -331,11 +332,12 @@ where
331332
dial_concurrency_factor: config.dial_concurrency_factor,
332333
substream_upgrade_protocol_override: config.substream_upgrade_protocol_override,
333334
max_negotiating_inbound_streams: config.max_negotiating_inbound_streams,
335+
per_connection_event_buffer_size: config.per_connection_event_buffer_size,
334336
executor,
335337
pending_connection_events_tx,
336338
pending_connection_events_rx,
337-
established_connection_events_tx,
338-
established_connection_events_rx,
339+
no_established_connections_waker: None,
340+
established_connection_events: Default::default(),
339341
}
340342
}
341343

@@ -547,9 +549,11 @@ where
547549
//
548550
// Note that established connections are polled before pending connections, thus
549551
// prioritizing established connections over pending connections.
550-
match self.established_connection_events_rx.poll_next_unpin(cx) {
552+
match self.established_connection_events.poll_next_unpin(cx) {
551553
Poll::Pending => {}
552-
Poll::Ready(None) => unreachable!("Pool holds both sender and receiver."),
554+
Poll::Ready(None) => {
555+
self.no_established_connections_waker = Some(cx.waker().clone());
556+
}
553557

554558
Poll::Ready(Some(task::EstablishedConnectionEvent::Notify { id, peer_id, event })) => {
555559
return Poll::Ready(PoolEvent::ConnectionEvent { peer_id, id, event });
@@ -750,27 +754,35 @@ where
750754

751755
let (command_sender, command_receiver) =
752756
mpsc::channel(self.task_command_buffer_size);
757+
let (event_sender, event_receiver) =
758+
mpsc::channel(self.per_connection_event_buffer_size);
759+
753760
conns.insert(
754761
id,
755762
EstablishedConnection {
756763
endpoint: endpoint.clone(),
757764
sender: command_sender,
758765
},
759766
);
767+
self.established_connection_events.push(event_receiver);
768+
if let Some(waker) = self.no_established_connections_waker.take() {
769+
waker.wake();
770+
}
760771

761772
let connection = Connection::new(
762773
muxer,
763774
handler.into_handler(&obtained_peer_id, &endpoint),
764775
self.substream_upgrade_protocol_override,
765776
self.max_negotiating_inbound_streams,
766777
);
778+
767779
self.spawn(
768780
task::new_for_established_connection(
769781
id,
770782
obtained_peer_id,
771783
connection,
772784
command_receiver,
773-
self.established_connection_events_tx.clone(),
785+
event_sender,
774786
)
775787
.boxed(),
776788
);
@@ -1069,7 +1081,7 @@ pub struct PoolConfig {
10691081

10701082
/// Size of the pending connection task event buffer and the established connection task event
10711083
/// buffer.
1072-
pub task_event_buffer_size: usize,
1084+
pub per_connection_event_buffer_size: usize,
10731085

10741086
/// Number of addresses concurrently dialed for a single outbound connection attempt.
10751087
pub dial_concurrency_factor: NonZeroU8,
@@ -1088,7 +1100,7 @@ impl PoolConfig {
10881100
Self {
10891101
executor,
10901102
task_command_buffer_size: 32,
1091-
task_event_buffer_size: 7,
1103+
per_connection_event_buffer_size: 7,
10921104
dial_concurrency_factor: NonZeroU8::new(8).expect("8 > 0"),
10931105
substream_upgrade_protocol_override: None,
10941106
max_negotiating_inbound_streams: 128,
@@ -1113,8 +1125,8 @@ impl PoolConfig {
11131125
/// When the buffer is full, the background tasks of all connections will stall.
11141126
/// In this way, the consumers of network events exert back-pressure on
11151127
/// the network connection I/O.
1116-
pub fn with_connection_event_buffer_size(mut self, n: usize) -> Self {
1117-
self.task_event_buffer_size = n;
1128+
pub fn with_per_connection_event_buffer_size(mut self, n: usize) -> Self {
1129+
self.per_connection_event_buffer_size = n;
11181130
self
11191131
}
11201132

swarm/src/lib.rs

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1489,31 +1489,19 @@ where
14891489
self
14901490
}
14911491

1492-
/// Configures the number of extra events from the [`ConnectionHandler`] in
1493-
/// destination to the [`NetworkBehaviour`] that can be buffered before
1494-
/// the [`ConnectionHandler`] has to go to sleep.
1495-
///
1496-
/// There exists a buffer of events received from [`ConnectionHandler`]s
1497-
/// that the [`NetworkBehaviour`] has yet to process. This buffer is
1498-
/// shared between all instances of [`ConnectionHandler`]. Each instance of
1499-
/// [`ConnectionHandler`] is guaranteed one slot in this buffer, meaning
1500-
/// that delivering an event for the first time is guaranteed to be
1501-
/// instantaneous. Any extra event delivery, however, must wait for that
1502-
/// first event to be delivered or for an "extra slot" to be available.
1492+
/// Configures the size of the buffer for events sent by a [`ConnectionHandler`] to the
1493+
/// [`NetworkBehaviour`].
15031494
///
1504-
/// This option configures the number of such "extra slots" in this
1505-
/// shared buffer. These extra slots are assigned in a first-come,
1506-
/// first-served basis.
1495+
/// Each connection has its own buffer.
15071496
///
1508-
/// The ideal value depends on the executor used, the CPU speed, the
1509-
/// average number of connections, and the volume of events. If this value
1510-
/// is too low, then the [`ConnectionHandler`]s will be sleeping more often
1497+
/// The ideal value depends on the executor used, the CPU speed and the volume of events.
1498+
/// If this value is too low, then the [`ConnectionHandler`]s will be sleeping more often
15111499
/// than necessary. Increasing this value increases the overall memory
15121500
/// usage, and more importantly the latency between the moment when an
15131501
/// event is emitted and the moment when it is received by the
15141502
/// [`NetworkBehaviour`].
1515-
pub fn connection_event_buffer_size(mut self, n: usize) -> Self {
1516-
self.pool_config = self.pool_config.with_connection_event_buffer_size(n);
1503+
pub fn per_connection_event_buffer_size(mut self, n: usize) -> Self {
1504+
self.pool_config = self.pool_config.with_per_connection_event_buffer_size(n);
15171505
self
15181506
}
15191507

0 commit comments

Comments
 (0)