Skip to content

Commit d4bb97a

Browse files
[8.18](backport #5372) Restore connection-level limiter (#5376)
* Restore connection-level limiter (#5372) Restore connection level limiter to prevent OOM incidents. (removed in #4402) This limiter is used in addition to the request-level throttle so that once our in-flight requests reaches max_connections a 429 is returned, but if the total connections the server uses is over max_connections*1.1 the server drops the connection before the TLS handshake. (cherry picked from commit 39199ef) # Conflicts: # NOTICE-fips.txt # NOTICE.txt # go.mod # internal/pkg/api/server.go * Fix merge * Fix file --------- Co-authored-by: Michel Laterman <[email protected]> Co-authored-by: michel-laterman <[email protected]>
1 parent 30cbbac commit d4bb97a

File tree

6 files changed

+272
-38
lines changed

6 files changed

+272
-38
lines changed

NOTICE.txt

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -5035,6 +5035,43 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
50355035
THE SOFTWARE.
50365036

50375037

5038+
--------------------------------------------------------------------------------
5039+
Dependency : golang.org/x/net
5040+
Version: v0.41.0
5041+
Licence type (autodetected): BSD-3-Clause
5042+
--------------------------------------------------------------------------------
5043+
5044+
Contents of probable licence file $GOMODCACHE/golang.org/x/[email protected]/LICENSE:
5045+
5046+
Copyright 2009 The Go Authors.
5047+
5048+
Redistribution and use in source and binary forms, with or without
5049+
modification, are permitted provided that the following conditions are
5050+
met:
5051+
5052+
* Redistributions of source code must retain the above copyright
5053+
notice, this list of conditions and the following disclaimer.
5054+
* Redistributions in binary form must reproduce the above
5055+
copyright notice, this list of conditions and the following disclaimer
5056+
in the documentation and/or other materials provided with the
5057+
distribution.
5058+
* Neither the name of Google LLC nor the names of its
5059+
contributors may be used to endorse or promote products derived from
5060+
this software without specific prior written permission.
5061+
5062+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
5063+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
5064+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
5065+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
5066+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
5067+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
5068+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
5069+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
5070+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
5071+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
5072+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5073+
5074+
50385075
--------------------------------------------------------------------------------
50395076
Dependency : golang.org/x/sync
50405077
Version: v0.15.0
@@ -21859,43 +21896,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2185921896
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2186021897

2186121898

21862-
--------------------------------------------------------------------------------
21863-
Dependency : golang.org/x/net
21864-
Version: v0.41.0
21865-
Licence type (autodetected): BSD-3-Clause
21866-
--------------------------------------------------------------------------------
21867-
21868-
Contents of probable licence file $GOMODCACHE/golang.org/x/[email protected]/LICENSE:
21869-
21870-
Copyright 2009 The Go Authors.
21871-
21872-
Redistribution and use in source and binary forms, with or without
21873-
modification, are permitted provided that the following conditions are
21874-
met:
21875-
21876-
* Redistributions of source code must retain the above copyright
21877-
notice, this list of conditions and the following disclaimer.
21878-
* Redistributions in binary form must reproduce the above
21879-
copyright notice, this list of conditions and the following disclaimer
21880-
in the documentation and/or other materials provided with the
21881-
distribution.
21882-
* Neither the name of Google LLC nor the names of its
21883-
contributors may be used to endorse or promote products derived from
21884-
this software without specific prior written permission.
21885-
21886-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21887-
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21888-
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21889-
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21890-
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21891-
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21892-
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21893-
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21894-
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21895-
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
21896-
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
21897-
21898-
2189921899
--------------------------------------------------------------------------------
2190021900
Dependency : golang.org/x/oauth2
2190121901
Version: v0.30.0
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Kind can be one of:
2+
# - breaking-change: a change to previously-documented behavior
3+
# - deprecation: functionality that is being removed in a later release
4+
# - bug-fix: fixes a problem in a previous version
5+
# - enhancement: extends functionality but does not break or fix existing behavior
6+
# - feature: new functionality
7+
# - known-issue: problems that we are aware of in a given version
8+
# - security: impacts on the security of a product or a user’s deployment.
9+
# - upgrade: important information for someone upgrading from a prior version
10+
# - other: does not fit into any of the other categories
11+
kind: bug-fix
12+
13+
# Change summary; a 80ish characters long description of the change.
14+
summary: Restore connection limiter
15+
16+
# Long description; in case the summary is not enough to describe the change
17+
# this field accommodate a description without length limits.
18+
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
19+
description: |
20+
Restore connection level limiter to prevent OOM incidents.
21+
This limiter is used in addition to the request-level throttle so that once
22+
our in-flight requests reaches max_connections a 429 is returned, but if the
23+
total connections the server uses is over max_connections*1.1 the server drops
24+
the connection before the TLS handshake.
25+
26+
27+
# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
28+
component: fleet-server
29+
30+
# PR URL; optional; the PR number that added the changeset.
31+
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
32+
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
33+
# Please provide it if you are adding a fragment for a different PR.
34+
pr: https://github.com/elastic/fleet-server/pull/5372
35+
36+
# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
37+
# If not present is automatically filled by the tooling with the issue linked to the PR number.
38+
#issue: https://github.com/owner/repo/1234

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ require (
3535
go.elastic.co/apm/v2 v2.7.1
3636
go.elastic.co/ecszerolog v0.2.0
3737
go.uber.org/zap v1.27.0
38+
golang.org/x/net v0.41.0
3839
golang.org/x/sync v0.15.0
3940
golang.org/x/time v0.11.0
4041
google.golang.org/grpc v1.75.0
@@ -91,7 +92,6 @@ require (
9192
go.uber.org/multierr v1.11.0 // indirect
9293
golang.org/x/crypto v0.39.0 // indirect
9394
golang.org/x/mod v0.25.0 // indirect
94-
golang.org/x/net v0.41.0 // indirect
9595
golang.org/x/sys v0.33.0 // indirect
9696
golang.org/x/text v0.26.0 // indirect
9797
golang.org/x/tools v0.33.0 // indirect

internal/pkg/api/server.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/elastic/elastic-agent-libs/logp"
1717
"github.com/elastic/elastic-agent-libs/transport/tlscommon"
1818
"github.com/elastic/fleet-server/v7/internal/pkg/config"
19+
"github.com/elastic/fleet-server/v7/internal/pkg/limit"
1920
"github.com/elastic/fleet-server/v7/internal/pkg/logger"
2021

2122
"github.com/rs/zerolog"
@@ -31,6 +32,7 @@ type server struct {
3132
// NewServer creates a new HTTP api for the passed addr.
3233
//
3334
// The server has an http request limit and endpoint specific rate-limits.
35+
// There is also a connection limit that will drop connections if too many connections are formed.
3436
// The underlying API structs (such as *CheckinT) may be shared between servers.
3537
func NewServer(addr string, cfg *config.Server, opts ...APIOpt) *server {
3638
a := &apiServer{}
@@ -78,6 +80,13 @@ func (s *server) Run(ctx context.Context) error {
7880
}
7981
}()
8082

83+
// Conn Limiter must be before the TLS handshake in the stack;
84+
// The server should not eat the cost of the handshake if there
85+
// is no capacity to service the connection.
86+
// Also, it appears the HTTP2 implementation depends on the tls.Listener
87+
// being at the top of the stack.
88+
ln = wrapConnLimitter(ctx, ln, s.cfg)
89+
8190
if s.cfg.TLS != nil && s.cfg.TLS.IsEnabled() {
8291
commonTLSCfg, err := tlscommon.LoadTLSServerConfig(s.cfg.TLS, s.logger)
8392
if err != nil {
@@ -160,3 +169,21 @@ func errLogger(ctx context.Context) *slog.Logger {
160169
stub := &stubLogger{*log}
161170
return slog.New(stub, "", 0)
162171
}
172+
173+
// wrapConnLimitter will drop connections once the connection count is max_connections*1.1
174+
// This means that once the limit is reached, the server will resturn 429 responses until the connection count reaches the threshold, then the server will drop connections before the TLS handshake.
175+
func wrapConnLimitter(ctx context.Context, ln net.Listener, cfg *config.Server) net.Listener {
176+
hardLimit := int(float64(cfg.Limits.MaxConnections) * 1.1)
177+
178+
if hardLimit != 0 {
179+
zerolog.Ctx(ctx).Info().
180+
Int("hardConnLimit", hardLimit).
181+
Msg("server hard connection limiter installed")
182+
183+
ln = limit.Listener(ln, hardLimit, zerolog.Ctx(ctx))
184+
} else {
185+
zerolog.Ctx(ctx).Info().Msg("server hard connection limiter disabled")
186+
}
187+
188+
return ln
189+
}

internal/pkg/limit/listener.go

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2+
// or more contributor license agreements. Licensed under the Elastic License;
3+
// you may not use this file except in compliance with the Elastic License.
4+
5+
package limit
6+
7+
import (
8+
"net"
9+
"sync"
10+
11+
"github.com/rs/zerolog"
12+
"github.com/rs/zerolog/log"
13+
14+
"github.com/elastic/fleet-server/v7/internal/pkg/logger"
15+
)
16+
17+
// Derived from netutil.LimitListener but works slightly differently.
18+
// Instead of blocking on the semaphore before acception connection,
19+
// this implementation immediately accepts connections and if cannot
20+
// acquire the semaphore it forces the connection closed.
21+
// Ideally, this limiter is run *before* the TLS handshake occurs
22+
// to prevent DDOS attack that eats all the server's CPU.
23+
// The downside to this is that it will Close() valid connections
24+
// indiscriminately.
25+
26+
func Listener(l net.Listener, n int, log *zerolog.Logger) net.Listener {
27+
return &limitListener{
28+
Listener: l,
29+
sem: make(chan struct{}, n),
30+
done: make(chan struct{}),
31+
log: log,
32+
}
33+
}
34+
35+
type limitListener struct {
36+
net.Listener
37+
sem chan struct{}
38+
closeOnce sync.Once // ensures the done chan is only closed once
39+
done chan struct{} // no values sent; closed when Close is called
40+
log *zerolog.Logger
41+
}
42+
43+
func (l *limitListener) acquire() bool {
44+
select {
45+
case <-l.done:
46+
return false
47+
case l.sem <- struct{}{}:
48+
return true
49+
default:
50+
return false
51+
}
52+
}
53+
func (l *limitListener) release() { <-l.sem }
54+
55+
func (l *limitListener) Accept() (net.Conn, error) {
56+
57+
// Accept the connection irregardless
58+
c, err := l.Listener.Accept()
59+
if err != nil {
60+
return nil, err
61+
}
62+
63+
// If we cannot acquire the semaphore, close the connection
64+
if acquired := l.acquire(); !acquired {
65+
zlog := log.Warn()
66+
67+
var err error
68+
if c != nil {
69+
err = c.Close()
70+
zlog.Str(logger.ECSServerAddress, c.LocalAddr().String())
71+
zlog.Str(logger.ECSClientAddress, c.RemoteAddr().String())
72+
zlog.Err(err)
73+
}
74+
zlog.Int("max", cap(l.sem)).Msg("Connection closed due to max limit")
75+
76+
return c, nil
77+
}
78+
79+
return &limitListenerConn{Conn: c, release: l.release}, nil
80+
}
81+
82+
func (l *limitListener) Close() error {
83+
err := l.Listener.Close()
84+
l.closeOnce.Do(func() { close(l.done) })
85+
return err
86+
}
87+
88+
type limitListenerConn struct {
89+
net.Conn
90+
releaseOnce sync.Once
91+
release func()
92+
}
93+
94+
func (l *limitListenerConn) Close() error {
95+
err := l.Conn.Close()
96+
l.releaseOnce.Do(l.release)
97+
return err
98+
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2+
// or more contributor license agreements. Licensed under the Elastic License;
3+
// you may not use this file except in compliance with the Elastic License.
4+
5+
package limit
6+
7+
import (
8+
"net"
9+
"testing"
10+
"time"
11+
12+
"github.com/stretchr/testify/assert"
13+
"github.com/stretchr/testify/require"
14+
"golang.org/x/net/nettest"
15+
16+
testlog "github.com/elastic/fleet-server/v7/internal/pkg/testing/log"
17+
)
18+
19+
func TestLimitListener(t *testing.T) {
20+
logger := testlog.SetLogger(t)
21+
ll, err := nettest.NewLocalListener("tcp")
22+
require.NoError(t, err)
23+
defer ll.Close()
24+
l := Listener(ll, 1, &logger)
25+
26+
ch := make(chan struct{})
27+
done := make(chan struct{})
28+
29+
t.Log("Form 1st connection")
30+
go func() {
31+
_, err := net.Dial("tcp", l.Addr().String())
32+
require.NoError(t, err)
33+
}()
34+
// should accept a connection
35+
conn, err := l.Accept()
36+
require.NoError(t, err, "expected to be able to form one connection")
37+
38+
t.Log("Form 2nd connection")
39+
go func() {
40+
conn, err := net.Dial("tcp", l.Addr().String())
41+
require.NoError(t, err)
42+
43+
select {
44+
case <-ch:
45+
case <-time.After(time.Second):
46+
require.Fail(t, "expected channel write before timeout")
47+
}
48+
49+
var p []byte
50+
n, err := conn.Read(p)
51+
assert.NoError(t, err)
52+
assert.Equal(t, 0, n)
53+
54+
err = conn.Close()
55+
require.NoError(t, err)
56+
done <- struct{}{}
57+
}()
58+
59+
conn2, err := l.Accept()
60+
require.NoError(t, err)
61+
n, err := conn2.Write([]byte(`hellow world`))
62+
ch <- struct{}{}
63+
assert.Error(t, err)
64+
assert.Equal(t, 0, n)
65+
66+
err = conn.Close()
67+
require.NoError(t, err)
68+
err = l.Close()
69+
require.NoError(t, err)
70+
<-done
71+
}

0 commit comments

Comments
 (0)