Skip to content

Commit bf4954e

Browse files
committed
TUN-8861: Add session limiter to UDP session manager
## Summary In order to make cloudflared behavior more predictable and prevent an exhaustion of resources, we have decided to add session limits that can be configured by the user. This first commit introduces the session limiter and adds it to the UDP handling path. For now the limiter is set to run only in unlimited mode.
1 parent 8918b67 commit bf4954e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+3409
-1184
lines changed

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,3 +265,7 @@ fmt-check:
265265
.PHONY: lint
266266
lint:
267267
@golangci-lint run
268+
269+
.PHONY: mocks
270+
mocks:
271+
go generate mocks/mockgen.go

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ For example, as of January 2023 Cloudflare will support cloudflared version 2023
6767
- [capnpc-go](https://pkg.go.dev/zombiezen.com/go/capnproto2/capnpc-go)
6868
- [goimports](https://pkg.go.dev/golang.org/x/tools/cmd/goimports)
6969
- [golangci-lint](https://github.com/golangci/golangci-lint)
70+
- [gomocks](https://pkg.go.dev/go.uber.org/mock)
7071

7172
### Build
7273
To build cloudflared locally run `make cloudflared`
@@ -76,3 +77,6 @@ To locally run the tests run `make test`
7677

7778
### Linting
7879
To format the code and keep a good code quality use `make fmt` and `make lint`
80+
81+
### Mocks
82+
After changes on interfaces you might need to regenerate the mocks, so run `make mock`

connection/quic_connection_test.go

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ import (
2828
"github.com/stretchr/testify/require"
2929
"golang.org/x/net/nettest"
3030

31+
cfdsession "github.com/cloudflare/cloudflared/session"
32+
3133
"github.com/cloudflare/cloudflared/datagramsession"
3234
"github.com/cloudflare/cloudflared/ingress"
3335
"github.com/cloudflare/cloudflared/packet"
@@ -53,7 +55,8 @@ var _ ReadWriteAcker = (*streamReadWriteAcker)(nil)
5355
func TestQUICServer(t *testing.T) {
5456
// This is simply a sample websocket frame message.
5557
wsBuf := &bytes.Buffer{}
56-
wsutil.WriteClientBinary(wsBuf, []byte("Hello"))
58+
err := wsutil.WriteClientBinary(wsBuf, []byte("Hello"))
59+
require.NoError(t, err)
5760

5861
var tests = []struct {
5962
desc string
@@ -158,17 +161,19 @@ func TestQUICServer(t *testing.T) {
158161

159162
serverDone := make(chan struct{})
160163
go func() {
164+
// nolint: testifylint
161165
quicServer(
162166
ctx, t, quicListener, test.dest, test.connectionType, test.metadata, test.message, test.expectedResponse,
163167
)
164168
close(serverDone)
165169
}()
166170

171+
// nolint: gosec
167172
tunnelConn, _ := testTunnelConnection(t, netip.MustParseAddrPort(udpListener.LocalAddr().String()), uint8(i))
168173

169174
connDone := make(chan struct{})
170175
go func() {
171-
tunnelConn.Serve(ctx)
176+
_ = tunnelConn.Serve(ctx)
172177
close(connDone)
173178
}()
174179

@@ -254,14 +259,14 @@ func (moc *mockOriginProxyWithRequest) ProxyHTTP(w ResponseWriter, tr *tracing.T
254259
case "/ok":
255260
originRespEndpoint(w, http.StatusOK, []byte(http.StatusText(http.StatusOK)))
256261
case "/slow_echo_body":
257-
time.Sleep(5)
262+
time.Sleep(5 * time.Nanosecond)
258263
fallthrough
259264
case "/echo_body":
260265
resp := &http.Response{
261266
StatusCode: http.StatusOK,
262267
}
263268
_ = w.WriteRespHeaders(resp.StatusCode, resp.Header)
264-
io.Copy(w, r.Body)
269+
_, _ = io.Copy(w, r.Body)
265270
case "/error":
266271
return fmt.Errorf("Failed to proxy to origin")
267272
default:
@@ -493,16 +498,16 @@ func TestBuildHTTPRequest(t *testing.T) {
493498
test := test // capture range variable
494499
t.Run(test.name, func(t *testing.T) {
495500
req, err := buildHTTPRequest(context.Background(), test.connectRequest, test.body, 0, &log)
496-
assert.NoError(t, err)
501+
require.NoError(t, err)
497502
test.req = test.req.WithContext(req.Context())
498-
assert.Equal(t, test.req, req.Request)
503+
require.Equal(t, test.req, req.Request)
499504
})
500505
}
501506
}
502507

503508
func (moc *mockOriginProxyWithRequest) ProxyTCP(ctx context.Context, rwa ReadWriteAcker, tcpRequest *TCPRequest) error {
504-
rwa.AckConnection("")
505-
io.Copy(rwa, rwa)
509+
_ = rwa.AckConnection("")
510+
_, _ = io.Copy(rwa, rwa)
506511
return nil
507512
}
508513

@@ -520,16 +525,19 @@ func TestServeUDPSession(t *testing.T) {
520525
edgeQUICSessionChan := make(chan quic.Connection)
521526
go func() {
522527
earlyListener, err := quic.Listen(udpListener, testTLSServerConfig, testQUICConfig)
523-
require.NoError(t, err)
528+
assert.NoError(t, err)
524529

525530
edgeQUICSession, err := earlyListener.Accept(ctx)
526-
require.NoError(t, err)
531+
assert.NoError(t, err)
532+
527533
edgeQUICSessionChan <- edgeQUICSession
528534
}()
529535

530536
// Random index to avoid reusing port
531537
tunnelConn, datagramConn := testTunnelConnection(t, netip.MustParseAddrPort(udpListener.LocalAddr().String()), 28)
532-
go tunnelConn.Serve(ctx)
538+
go func() {
539+
_ = tunnelConn.Serve(ctx)
540+
}()
533541

534542
edgeQUICSession := <-edgeQUICSessionChan
535543

@@ -545,14 +553,14 @@ func TestNopCloserReadWriterCloseBeforeEOF(t *testing.T) {
545553

546554
n, err := readerWriter.Read(buffer)
547555
require.NoError(t, err)
548-
require.Equal(t, n, 5)
556+
require.Equal(t, 5, n)
549557

550558
// close
551559
require.NoError(t, readerWriter.Close())
552560

553561
// read should get error
554562
n, err = readerWriter.Read(buffer)
555-
require.Equal(t, n, 0)
563+
require.Equal(t, 0, n)
556564
require.Equal(t, err, fmt.Errorf("closed by handler"))
557565
}
558566

@@ -562,7 +570,7 @@ func TestNopCloserReadWriterCloseAfterEOF(t *testing.T) {
562570

563571
n, err := readerWriter.Read(buffer)
564572
require.NoError(t, err)
565-
require.Equal(t, n, 9)
573+
require.Equal(t, 9, n)
566574

567575
// force another read to read eof
568576
_, err = readerWriter.Read(buffer)
@@ -573,7 +581,7 @@ func TestNopCloserReadWriterCloseAfterEOF(t *testing.T) {
573581

574582
// read should get EOF still
575583
n, err = readerWriter.Read(buffer)
576-
require.Equal(t, n, 0)
584+
require.Equal(t, 0, n)
577585
require.Equal(t, err, io.EOF)
578586
}
579587

@@ -669,6 +677,7 @@ func serveSession(ctx context.Context, datagramConn *datagramV2Connection, edgeQ
669677
unregisterReason: expectedReason,
670678
calledUnregisterChan: unregisterFromEdgeChan,
671679
}
680+
// nolint: testifylint
672681
go runRPCServer(ctx, edgeQUICSession, sessionRPCServer, nil, t)
673682

674683
<-unregisterFromEdgeChan
@@ -729,6 +738,7 @@ func (s mockSessionRPCServer) UnregisterUdpSession(ctx context.Context, sessionI
729738

730739
func testTunnelConnection(t *testing.T, serverAddr netip.AddrPort, index uint8) (TunnelConnection, *datagramV2Connection) {
731740
tlsClientConfig := &tls.Config{
741+
// nolint: gosec
732742
InsecureSkipVerify: true,
733743
NextProtos: []string{"argotunnel"},
734744
}
@@ -747,6 +757,7 @@ func testTunnelConnection(t *testing.T, serverAddr netip.AddrPort, index uint8)
747757
index,
748758
&log,
749759
)
760+
require.NoError(t, err)
750761

751762
// Start a session manager for the connection
752763
sessionDemuxChan := make(chan *packet.Session, 4)
@@ -757,7 +768,9 @@ func testTunnelConnection(t *testing.T, serverAddr netip.AddrPort, index uint8)
757768

758769
datagramConn := &datagramV2Connection{
759770
conn,
771+
index,
760772
sessionManager,
773+
cfdsession.NewLimiter(0),
761774
datagramMuxer,
762775
packetRouter,
763776
15 * time.Second,
@@ -796,6 +809,7 @@ func (m *mockReaderNoopWriter) Close() error {
796809

797810
// GenerateTLSConfig sets up a bare-bones TLS config for a QUIC server
798811
func GenerateTLSConfig() *tls.Config {
812+
// nolint: gosec
799813
key, err := rsa.GenerateKey(rand.Reader, 1024)
800814
if err != nil {
801815
panic(err)
@@ -812,6 +826,7 @@ func GenerateTLSConfig() *tls.Config {
812826
if err != nil {
813827
panic(err)
814828
}
829+
// nolint: gosec
815830
return &tls.Config{
816831
Certificates: []tls.Certificate{tlsCert},
817832
NextProtos: []string{"argotunnel"},

connection/quic_datagram_v2.go

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,15 @@ import (
77
"time"
88

99
"github.com/google/uuid"
10+
pkgerrors "github.com/pkg/errors"
1011
"github.com/quic-go/quic-go"
1112
"github.com/rs/zerolog"
1213
"go.opentelemetry.io/otel/attribute"
1314
"go.opentelemetry.io/otel/trace"
1415
"golang.org/x/sync/errgroup"
1516

17+
cfdsession "github.com/cloudflare/cloudflared/session"
18+
1619
"github.com/cloudflare/cloudflared/datagramsession"
1720
"github.com/cloudflare/cloudflared/ingress"
1821
"github.com/cloudflare/cloudflared/management"
@@ -38,10 +41,14 @@ type DatagramSessionHandler interface {
3841
}
3942

4043
type datagramV2Connection struct {
41-
conn quic.Connection
44+
conn quic.Connection
45+
index uint8
4246

4347
// sessionManager tracks active sessions. It receives datagrams from quic connection via datagramMuxer
4448
sessionManager datagramsession.Manager
49+
// sessionLimiter tracks active sessions across the tunnel and limits new sessions if they are above the limit.
50+
sessionLimiter cfdsession.Limiter
51+
4552
// datagramMuxer mux/demux datagrams from quic connection
4653
datagramMuxer *cfdquic.DatagramMuxerV2
4754
packetRouter *ingress.PacketRouter
@@ -58,6 +65,7 @@ func NewDatagramV2Connection(ctx context.Context,
5865
index uint8,
5966
rpcTimeout time.Duration,
6067
streamWriteTimeout time.Duration,
68+
sessionLimiter cfdsession.Limiter,
6169
logger *zerolog.Logger,
6270
) DatagramSessionHandler {
6371
sessionDemuxChan := make(chan *packet.Session, demuxChanCapacity)
@@ -66,13 +74,15 @@ func NewDatagramV2Connection(ctx context.Context,
6674
packetRouter := ingress.NewPacketRouter(icmpRouter, datagramMuxer, index, logger)
6775

6876
return &datagramV2Connection{
69-
conn,
70-
sessionManager,
71-
datagramMuxer,
72-
packetRouter,
73-
rpcTimeout,
74-
streamWriteTimeout,
75-
logger,
77+
conn: conn,
78+
index: index,
79+
sessionManager: sessionManager,
80+
sessionLimiter: sessionLimiter,
81+
datagramMuxer: datagramMuxer,
82+
packetRouter: packetRouter,
83+
rpcTimeout: rpcTimeout,
84+
streamWriteTimeout: streamWriteTimeout,
85+
logger: logger,
7686
}
7787
}
7888

@@ -109,12 +119,23 @@ func (q *datagramV2Connection) RegisterUdpSession(ctx context.Context, sessionID
109119
attribute.String("dst", fmt.Sprintf("%s:%d", dstIP, dstPort)),
110120
))
111121
log := q.logger.With().Int(management.EventTypeKey, int(management.UDP)).Logger()
122+
123+
// Try to start a new session
124+
if err := q.sessionLimiter.Acquire(management.UDP.String()); err != nil {
125+
log.Warn().Msgf("Too many concurrent sessions being handled, rejecting udp proxy to %s:%d", dstIP, dstPort)
126+
127+
err := pkgerrors.Wrap(err, "failed to start udp session due to rate limiting")
128+
tracing.EndWithErrorStatus(registerSpan, err)
129+
return nil, err
130+
}
131+
112132
// Each session is a series of datagram from an eyeball to a dstIP:dstPort.
113133
// (src port, dst IP, dst port) uniquely identifies a session, so it needs a dedicated connected socket.
114134
originProxy, err := ingress.DialUDP(dstIP, dstPort)
115135
if err != nil {
116136
log.Err(err).Msgf("Failed to create udp proxy to %s:%d", dstIP, dstPort)
117137
tracing.EndWithErrorStatus(registerSpan, err)
138+
q.sessionLimiter.Release()
118139
return nil, err
119140
}
120141
registerSpan.SetAttributes(
@@ -127,10 +148,14 @@ func (q *datagramV2Connection) RegisterUdpSession(ctx context.Context, sessionID
127148
originProxy.Close()
128149
log.Err(err).Str(datagramsession.LogFieldSessionID, datagramsession.FormatSessionID(sessionID)).Msgf("Failed to register udp session")
129150
tracing.EndWithErrorStatus(registerSpan, err)
151+
q.sessionLimiter.Release()
130152
return nil, err
131153
}
132154

133-
go q.serveUDPSession(session, closeAfterIdleHint)
155+
go func() {
156+
defer q.sessionLimiter.Release() // we do the release here, instead of inside the `serveUDPSession` just to keep all acquire/release calls in the same method.
157+
q.serveUDPSession(session, closeAfterIdleHint)
158+
}()
134159

135160
log.Debug().
136161
Str(datagramsession.LogFieldSessionID, datagramsession.FormatSessionID(sessionID)).
@@ -170,7 +195,7 @@ func (q *datagramV2Connection) serveUDPSession(session *datagramsession.Session,
170195

171196
// closeUDPSession first unregisters the session from session manager, then it tries to unregister from edge
172197
func (q *datagramV2Connection) closeUDPSession(ctx context.Context, sessionID uuid.UUID, message string) {
173-
q.sessionManager.UnregisterSession(ctx, sessionID, message, false)
198+
_ = q.sessionManager.UnregisterSession(ctx, sessionID, message, false)
174199
quicStream, err := q.conn.OpenStream()
175200
if err != nil {
176201
// Log this at debug because this is not an error if session was closed due to lost connection

0 commit comments

Comments
 (0)