Skip to content

Commit bcc2bf1

Browse files
lidelMarcoPolo
authored andcommitted
fix(basic_host): set read deadline before multistream Close to prevent blocking
streamWrapper.Close() can block indefinitely when the remote peer is slow or unresponsive during the multistream-select handshake completion. The lazy multistream protocol negotiation defers reading the handshake response until Close() is called. If the remote peer doesn't respond, the read blocks forever, causing goroutine leaks. This is particularly problematic for bitswap servers where taskWorkers can get stuck trying to close streams after sending blocks. The fix sets a read deadline (using DefaultNegotiationTimeout) before calling the multistream Close(), ensuring the operation will time out rather than block indefinitely. Related: multiformats/go-multistream#47 Related: multiformats/go-multistream#48
1 parent 479b24b commit bcc2bf1

File tree

2 files changed

+86
-0
lines changed

2 files changed

+86
-0
lines changed

p2p/host/basic/basic_host.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,11 @@ func (s *streamWrapper) Write(b []byte) (int, error) {
683683
}
684684

685685
func (s *streamWrapper) Close() error {
686+
// Set a read deadline to prevent Close() from blocking indefinitely
687+
// waiting for the multistream-select handshake to complete.
688+
// This can happen when the remote peer is slow or unresponsive.
689+
// See: https://github.com/multiformats/go-multistream/issues/47
690+
_ = s.Stream.SetReadDeadline(time.Now().Add(DefaultNegotiationTimeout))
686691
return s.rw.Close()
687692
}
688693

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
//go:build go1.25
2+
3+
package basichost_test
4+
5+
import (
6+
"testing"
7+
"testing/synctest"
8+
"time"
9+
10+
"github.com/libp2p/go-libp2p/core/network"
11+
basichost "github.com/libp2p/go-libp2p/p2p/host/basic"
12+
"github.com/libp2p/go-libp2p/x/simlibp2p"
13+
14+
"github.com/stretchr/testify/require"
15+
)
16+
17+
// TestStreamCloseDoesNotHangOnUnresponsivePeer verifies that stream.Close()
18+
// returns within DefaultNegotiationTimeout even when the remote peer never
19+
// completes the multistream handshake. Without the read deadline fix in
20+
// streamWrapper.Close(), this would hang indefinitely.
21+
func TestStreamCloseDoesNotHangOnUnresponsivePeer_synctest(t *testing.T) {
22+
synctest.Test(t, func(t *testing.T) {
23+
ctx := t.Context()
24+
25+
h1, h2 := simlibp2p.GetBasicHostPair(t)
26+
defer h1.Close()
27+
defer h2.Close()
28+
29+
const testProto = "/test/hang"
30+
31+
// Manually add protocol to peerstore so h1 thinks h2 supports it.
32+
// This makes NewStream use lazy multistream (skipping negotiation until Close).
33+
h1.Peerstore().AddProtocols(h2.ID(), testProto)
34+
35+
// h2 accepts streams at the network level but never responds to
36+
// multistream protocol negotiation, simulating an unresponsive peer.
37+
h2.Network().SetStreamHandler(func(s network.Stream) {
38+
// Read incoming data but never write back - simulates unresponsive peer
39+
buf := make([]byte, 1024)
40+
for {
41+
_, err := s.Read(buf)
42+
if err != nil {
43+
return
44+
}
45+
}
46+
})
47+
48+
// Open stream to h2 - uses lazy multistream because protocol is "known"
49+
s, err := h1.NewStream(ctx, h2.ID(), testProto)
50+
require.NoError(t, err)
51+
52+
// Trigger the lazy handshake by writing data.
53+
// The write succeeds (buffered), but the read handshake will block
54+
// because h2 never sends a response.
55+
_, err = s.Write([]byte("trigger handshake"))
56+
require.NoError(t, err)
57+
58+
// Close() should return within DefaultNegotiationTimeout because the fix
59+
// sets a read deadline before calling the underlying Close().
60+
// Without the fix, this would hang indefinitely.
61+
elapsedCh := make(chan time.Duration)
62+
go func() {
63+
start := time.Now()
64+
_ = s.Close()
65+
elapsedCh <- time.Since(start)
66+
}()
67+
68+
maxExpected := basichost.DefaultNegotiationTimeout
69+
var elapsed time.Duration
70+
select {
71+
case elapsed = <-elapsedCh:
72+
case <-time.After(maxExpected + time.Second):
73+
t.Fatal("timeout waiting for Close()")
74+
}
75+
76+
require.Equal(t, elapsed, maxExpected,
77+
"Close() took %v, expected < %v (DefaultNegotiationTimeout + margin)", elapsed, maxExpected)
78+
79+
t.Logf("Close() returned in %v (limit: %v)", elapsed, maxExpected)
80+
})
81+
}

0 commit comments

Comments
 (0)