Skip to content

Commit cfef0e7

Browse files
committed
TUN-6720: Remove forcibly closing connection during reconnect signal
Previously allowing the reconnect signal forcibly close the connection caused a race condition on which error was returned by the errgroup in the tunnel connection. Allowing the signal to return and provide a context cancel to the connection provides a safer shutdown of the tunnel for this test-only scenario.
1 parent 8ec0f77 commit cfef0e7

File tree

5 files changed

+22
-19
lines changed

5 files changed

+22
-19
lines changed

cmd/cloudflared/tunnel/cmd.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ func StartServer(
375375
errC <- metrics.ServeMetrics(metricsListener, ctx.Done(), readinessServer, quickTunnelURL, orchestrator, log)
376376
}()
377377

378-
reconnectCh := make(chan supervisor.ReconnectSignal, 1)
378+
reconnectCh := make(chan supervisor.ReconnectSignal, c.Int("ha-connections"))
379379
if c.IsSet("stdin-control") {
380380
log.Info().Msg("Enabling control through stdin")
381381
go stdinControl(reconnectCh, log)

component-tests/test_reconnect.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -47,17 +47,12 @@ def send_reconnect(self, cloudflared, secs):
4747
cloudflared.stdin.flush()
4848

4949
def assert_reconnect(self, config, cloudflared, repeat):
50-
wait_tunnel_ready(tunnel_url=config.get_url(), require_min_connections=self.default_ha_conns)
50+
wait_tunnel_ready(tunnel_url=config.get_url(),
51+
require_min_connections=self.default_ha_conns)
5152
for _ in range(repeat):
52-
for i in range(self.default_ha_conns):
53+
for _ in range(self.default_ha_conns):
5354
self.send_reconnect(cloudflared, self.default_reconnect_secs)
54-
expect_connections = self.default_ha_conns-i-1
55-
if expect_connections > 0:
56-
# Don't check if tunnel returns 200 here because there is a race condition between wait_tunnel_ready
57-
# retrying to get 200 response and reconnecting
58-
wait_tunnel_ready(require_min_connections=expect_connections)
59-
else:
60-
check_tunnel_not_connected()
61-
55+
check_tunnel_not_connected()
6256
sleep(self.default_reconnect_secs * 2)
63-
wait_tunnel_ready(tunnel_url=config.get_url(), require_min_connections=self.default_ha_conns)
57+
wait_tunnel_ready(tunnel_url=config.get_url(),
58+
require_min_connections=self.default_ha_conns)

component-tests/util.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
LOGGER = logging.getLogger(__name__)
1717

18+
1819
def select_platform(plat):
1920
return pytest.mark.skipif(
2021
platform.system() != plat, reason=f"Only runs on {plat}")
@@ -108,13 +109,15 @@ def _log_cloudflared_logs(cfd_logs):
108109
LOGGER.warning(line)
109110

110111

111-
@retry(stop_max_attempt_number=MAX_RETRIES * BACKOFF_SECS, wait_fixed=1000)
112+
@retry(stop_max_attempt_number=MAX_RETRIES, wait_fixed=BACKOFF_SECS * 1000)
112113
def check_tunnel_not_connected():
113114
url = f'http://localhost:{METRICS_PORT}/ready'
114115

115116
try:
116-
resp = requests.get(url, timeout=1)
117+
resp = requests.get(url, timeout=BACKOFF_SECS)
117118
assert resp.status_code == 503, f"Expect {url} returns 503, got {resp.status_code}"
119+
assert resp.json()[
120+
"readyConnections"] == 0, "Expected all connections to be terminated (pending reconnect)"
118121
# cloudflared might already terminate
119122
except requests.exceptions.ConnectionError as e:
120123
LOGGER.warning(f"Failed to connect to {url}, error: {e}")

supervisor/supervisor.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,8 +295,7 @@ func (s *Supervisor) initialize(
295295
s.config.ProtocolSelector.Current(),
296296
false,
297297
}
298-
ch := signal.New(make(chan struct{}))
299-
go s.startTunnel(ctx, i, ch)
298+
go s.startTunnel(ctx, i, s.newConnectedTunnelSignal(i))
300299
time.Sleep(registrationInterval)
301300
}
302301
return nil

supervisor/tunnel.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,13 @@ func (e *EdgeTunnelServer) serveH2mux(
546546
})
547547

548548
errGroup.Go(func() error {
549-
return listenReconnect(serveCtx, e.reconnectCh, e.gracefulShutdownC)
549+
err := listenReconnect(serveCtx, e.reconnectCh, e.gracefulShutdownC)
550+
if err != nil {
551+
// forcefully break the connection (this is only used for testing)
552+
// errgroup will return context canceled for the handler.ServeClassicTunnel
553+
connLog.Logger().Debug().Msg("Forcefully breaking h2mux connection")
554+
}
555+
return err
550556
})
551557

552558
return errGroup.Wait()
@@ -580,8 +586,8 @@ func (e *EdgeTunnelServer) serveHTTP2(
580586
err := listenReconnect(serveCtx, e.reconnectCh, e.gracefulShutdownC)
581587
if err != nil {
582588
// forcefully break the connection (this is only used for testing)
589+
// errgroup will return context canceled for the h2conn.Serve
583590
connLog.Logger().Debug().Msg("Forcefully breaking http2 connection")
584-
_ = tlsServerConn.Close()
585591
}
586592
return err
587593
})
@@ -636,8 +642,8 @@ func (e *EdgeTunnelServer) serveQUIC(
636642
err := listenReconnect(serveCtx, e.reconnectCh, e.gracefulShutdownC)
637643
if err != nil {
638644
// forcefully break the connection (this is only used for testing)
645+
// errgroup will return context canceled for the quicConn.Serve
639646
connLogger.Logger().Debug().Msg("Forcefully breaking quic connection")
640-
quicConn.Close()
641647
}
642648
return err
643649
})

0 commit comments

Comments
 (0)