Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 4 additions & 11 deletions protocols/autonat/tests/autonatv2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,6 @@ async fn dial_back_to_not_supporting() {
let (bob_done_tx, bob_done_rx) = oneshot::channel();

let hannes = new_dummy().await;
let hannes_peer_id = *hannes.local_peer_id();
let unreachable_address = hannes.external_addresses().next().unwrap().clone();
let bob_unreachable_address = unreachable_address.clone();
bob.behaviour_mut()
Expand All @@ -338,7 +337,7 @@ async fn dial_back_to_not_supporting() {
let handler = tokio::spawn(async { hannes.loop_on_next().await });

let alice_task = async {
let (alice_dialing_peer, alice_conn_id) = alice
let (alice_dialing_peer, _) = alice
.wait(|event| match event {
SwarmEvent::Dialing {
peer_id,
Expand All @@ -350,15 +349,9 @@ async fn dial_back_to_not_supporting() {
alice
.wait(|event| match event {
SwarmEvent::OutgoingConnectionError {
connection_id,
peer_id: Some(peer_id),
error: DialError::WrongPeerId { obtained, .. },
} if connection_id == alice_conn_id
&& peer_id == alice_dialing_peer
&& obtained == hannes_peer_id =>
{
Some(())
}
error: DialError::Transport(_),
..
} => Some(()),
_ => None,
})
.await;
Expand Down
2 changes: 1 addition & 1 deletion swarm/src/connection/pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ where
self.executor.spawn(
task::new_for_pending_outgoing_connection(
connection_id,
ConcurrentDial::new(dials, concurrency_factor),
ConcurrentDial::new(peer, dials, concurrency_factor),
abort_receiver,
self.pending_connection_events_tx.clone(),
)
Expand Down
14 changes: 12 additions & 2 deletions swarm/src/connection/pool/concurrent_dial.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ type Dial = BoxFuture<
>;

pub(crate) struct ConcurrentDial {
peer_id: Option<PeerId>,
dials: FuturesUnordered<Dial>,
pending_dials: Box<dyn Iterator<Item = Dial> + Send>,
errors: Vec<(Multiaddr, TransportError<std::io::Error>)>,
Expand All @@ -51,7 +52,11 @@ pub(crate) struct ConcurrentDial {
impl Unpin for ConcurrentDial {}

impl ConcurrentDial {
pub(crate) fn new(pending_dials: Vec<Dial>, concurrency_factor: NonZeroU8) -> Self {
pub(crate) fn new(
peer_id: Option<PeerId>,
pending_dials: Vec<Dial>,
concurrency_factor: NonZeroU8,
) -> Self {
let mut pending_dials = pending_dials.into_iter();

let dials = FuturesUnordered::new();
Expand All @@ -63,6 +68,7 @@ impl ConcurrentDial {
}

Self {
peer_id,
dials,
errors: Default::default(),
pending_dials: Box::new(pending_dials),
Expand All @@ -86,10 +92,14 @@ impl Future for ConcurrentDial {
fn poll(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Self::Output> {
loop {
match ready!(self.dials.poll_next_unpin(cx)) {
Some((addr, Ok(output))) => {
Some((addr, Ok(output))) if self.peer_id.is_none_or(|id| output.0 == id) => {
let errors = std::mem::take(&mut self.errors);
return Poll::Ready(Ok((addr, output, errors)));
}
Some((addr, Ok(_))) => {
let e = TransportError::Other(std::io::ErrorKind::PermissionDenied.into());
self.errors.push((addr, e));
}
Comment on lines +99 to +102
Copy link
Member

@elenaf9 elenaf9 Jul 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PermissionDenied is a bit misleading since it implies that the user doesn't have sufficient OS permissions to perform the action. Unfortunately there isn't really a good matching ErrorKind in this scenario. Maybe HostUnreachable, or just ErrorKind::Other..

But either way, with this change we'd never surface a DialError::WrongPeerId error anymore, even if all addresses belong to the same wrong peer-id (e.g. because the remote changed their identity), right? I am not sure if we should really change that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed - this is the patch we are floating on our fork to get connections flowing again. Connections to 127, 192, and 10 subnets were always winning the race against the relayed connection which gave a near 100% connection failure rate.

I was having a hard time finding a “proper” fix for this without a much bigger change because WrongPeerId is not a transport error and “Other” at the transport level made it difficult to determine the cause of the error higher up in the call stack.

The only solutions I can come up with:

  • give ConcurrentDial its own error enum
  • use the parent’s enum and return it from ConcurrentDial

Some((addr, Err(e))) => {
self.errors.push((addr, e));
if let Some(dial) = self.pending_dials.next() {
Expand Down
13 changes: 9 additions & 4 deletions swarm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1758,6 +1758,8 @@

#[cfg(test)]
mod tests {
use std::io::ErrorKind;

use libp2p_core::{
multiaddr,
multiaddr::multiaddr,
Expand Down Expand Up @@ -2153,10 +2155,13 @@
.await;
assert_eq!(peer_id.unwrap(), other_id);
match error {
DialError::WrongPeerId { obtained, address } => {
assert_eq!(obtained, *swarm1.local_peer_id());
assert_eq!(address, other_addr);
}
DialError::Transport(e) => match &e.get(0).unwrap().1 {

Check failure on line 2158 in swarm/src/lib.rs

View workflow job for this annotation

GitHub Actions / clippy (1.83.0)

accessing first element with `e.get(0)`

Check failure on line 2158 in swarm/src/lib.rs

View workflow job for this annotation

GitHub Actions / clippy (beta)

accessing first element with `e.get(0)`
TransportError::Other(e) => match e.kind() {
ErrorKind::PermissionDenied => {}
_ => panic!("wrong error {e:?}"),
},
_ => panic!("wrong error {e:?}"),
},
x => panic!("wrong error {x:?}"),
}
}
Expand Down
Loading