Skip to content

Commit 7bac4b1

Browse files
committed
TUN-5719: Re-attempt connection to edge with QUIC despite network error when there is no fallback
We have made 2 changes in the past that caused an unexpected edge case: 1. when faced with QUIC "no network activity", give up re-attempts and fall-back 2. when a protocol is chosen explicitly, rather than using auto (the default), do not fallback The reasoning for 1. was to fallback quickly in situations where the user may not have chosen QUIC, and simply got it because we auto-chose it (with the TXT DNS record), but the users' environment does not allow egress via UDP. The reasoning for 2. was to avoid falling back if the user explicitly chooses a protocol. E.g., if the user chooses QUIC, she may want to do UDP proxying, so if we fallback to HTTP2 protocol that will be unexpected since it does not support UDP (and same applies for HTTP2 falling back to h2mux and TCP proxying). This PR fixes the edge case that happens when both those changes 1. and 2. are put together: when faced with a QUIC "no network activity", we should only try to fallback if there is a possible fallback. Otherwise, we should exhaust the retries as normal.
1 parent 8a5343d commit 7bac4b1

File tree

2 files changed

+48
-21
lines changed

2 files changed

+48
-21
lines changed

origin/tunnel.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -185,12 +185,11 @@ func ServeTunnelLoop(
185185
case <-gracefulShutdownC:
186186
return nil
187187
case <-protocolFallback.BackoffTimer():
188-
var idleTimeoutError *quic.IdleTimeoutError
189188
if !selectNextProtocol(
190189
connLog.Logger(),
191190
protocolFallback,
192191
config.ProtocolSelector,
193-
errors.As(err, &idleTimeoutError),
192+
err,
194193
) {
195194
return err
196195
}
@@ -223,9 +222,13 @@ func selectNextProtocol(
223222
connLog *zerolog.Logger,
224223
protocolBackoff *protocolFallback,
225224
selector connection.ProtocolSelector,
226-
isNetworkActivityTimeout bool,
225+
cause error,
227226
) bool {
228-
if protocolBackoff.ReachedMaxRetries() || isNetworkActivityTimeout {
227+
var idleTimeoutError *quic.IdleTimeoutError
228+
isNetworkActivityTimeout := errors.As(cause, &idleTimeoutError)
229+
_, hasFallback := selector.Fallback()
230+
231+
if protocolBackoff.ReachedMaxRetries() || (hasFallback && isNetworkActivityTimeout) {
229232
fallback, hasFallback := selector.Fallback()
230233
if !hasFallback {
231234
return false

origin/tunnel_test.go

Lines changed: 41 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"testing"
55
"time"
66

7+
"github.com/lucas-clemente/quic-go"
78
"github.com/rs/zerolog"
89
"github.com/stretchr/testify/assert"
910

@@ -53,48 +54,71 @@ func TestWaitForBackoffFallback(t *testing.T) {
5354
initProtocol := protocolSelector.Current()
5455
assert.Equal(t, connection.HTTP2, initProtocol)
5556

56-
protocolFallback := &protocolFallback{
57+
protoFallback := &protocolFallback{
5758
backoff,
5859
initProtocol,
5960
false,
6061
}
6162

6263
// Retry #0 and #1. At retry #2, we switch protocol, so the fallback loop has one more retry than this
6364
for i := 0; i < int(maxRetries-1); i++ {
64-
protocolFallback.BackoffTimer() // simulate retry
65-
ok := selectNextProtocol(&log, protocolFallback, protocolSelector, false)
65+
protoFallback.BackoffTimer() // simulate retry
66+
ok := selectNextProtocol(&log, protoFallback, protocolSelector, nil)
6667
assert.True(t, ok)
67-
assert.Equal(t, initProtocol, protocolFallback.protocol)
68+
assert.Equal(t, initProtocol, protoFallback.protocol)
6869
}
6970

7071
// Retry fallback protocol
7172
for i := 0; i < int(maxRetries); i++ {
72-
protocolFallback.BackoffTimer() // simulate retry
73-
ok := selectNextProtocol(&log, protocolFallback, protocolSelector, false)
73+
protoFallback.BackoffTimer() // simulate retry
74+
ok := selectNextProtocol(&log, protoFallback, protocolSelector, nil)
7475
assert.True(t, ok)
7576
fallback, ok := protocolSelector.Fallback()
7677
assert.True(t, ok)
77-
assert.Equal(t, fallback, protocolFallback.protocol)
78+
assert.Equal(t, fallback, protoFallback.protocol)
7879
}
7980

8081
currentGlobalProtocol := protocolSelector.Current()
8182
assert.Equal(t, initProtocol, currentGlobalProtocol)
8283

8384
// No protocol to fallback, return error
84-
protocolFallback.BackoffTimer() // simulate retry
85-
ok := selectNextProtocol(&log, protocolFallback, protocolSelector, false)
85+
protoFallback.BackoffTimer() // simulate retry
86+
ok := selectNextProtocol(&log, protoFallback, protocolSelector, nil)
8687
assert.False(t, ok)
8788

88-
protocolFallback.reset()
89-
protocolFallback.BackoffTimer() // simulate retry
90-
ok = selectNextProtocol(&log, protocolFallback, protocolSelector, false)
89+
protoFallback.reset()
90+
protoFallback.BackoffTimer() // simulate retry
91+
ok = selectNextProtocol(&log, protoFallback, protocolSelector, nil)
9192
assert.True(t, ok)
92-
assert.Equal(t, initProtocol, protocolFallback.protocol)
93+
assert.Equal(t, initProtocol, protoFallback.protocol)
9394

94-
protocolFallback.reset()
95-
protocolFallback.BackoffTimer() // simulate retry
96-
ok = selectNextProtocol(&log, protocolFallback, protocolSelector, true)
95+
protoFallback.reset()
96+
protoFallback.BackoffTimer() // simulate retry
97+
ok = selectNextProtocol(&log, protoFallback, protocolSelector, &quic.IdleTimeoutError{})
9798
// Check that we get a true after the first try itself when this flag is true. This allows us to immediately
98-
// switch protocols.
99+
// switch protocols when there is a fallback.
99100
assert.True(t, ok)
101+
102+
// But if there is no fallback available, then we exhaust the retries despite the type of error.
103+
// The reason why there's no fallback available is because we pick a specific protocol instead of letting it be auto.
104+
protocolSelector, err = connection.NewProtocolSelector(
105+
"quic",
106+
warpRoutingEnabled,
107+
namedTunnel,
108+
mockFetcher.fetch(),
109+
resolveTTL,
110+
&log,
111+
)
112+
assert.NoError(t, err)
113+
protoFallback = &protocolFallback{backoff, protocolSelector.Current(), false}
114+
for i := 0; i < int(maxRetries-1); i++ {
115+
protoFallback.BackoffTimer() // simulate retry
116+
ok := selectNextProtocol(&log, protoFallback, protocolSelector, &quic.IdleTimeoutError{})
117+
assert.True(t, ok)
118+
assert.Equal(t, connection.QUIC, protoFallback.protocol)
119+
}
120+
// And finally it fails as it should, with no fallback.
121+
protoFallback.BackoffTimer()
122+
ok = selectNextProtocol(&log, protoFallback, protocolSelector, &quic.IdleTimeoutError{})
123+
assert.False(t, ok)
100124
}

0 commit comments

Comments
 (0)