Skip to content

Commit 80b783e

Browse files
authored
feat(go): add dynamic rate-limiting (#823)
1 parent ae27226 commit 80b783e

File tree

14 files changed

+906
-247
lines changed

14 files changed

+906
-247
lines changed

cmd/server/cmd.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ func NewCmd() *cli.Command {
1919
return &cli.Command{
2020
Name: "server",
2121
Usage: "Run the server part of the service.",
22-
Flags: slices.Concat(config.ServerFlags(), config.TracingFlags(), config.BrowserFlags()),
22+
Flags: slices.Concat(config.ServerFlags(), config.TracingFlags(), config.BrowserFlags(), config.RateLimitFlags()),
2323
Action: run,
2424
}
2525
}
@@ -37,6 +37,10 @@ func run(ctx context.Context, c *cli.Command) error {
3737
if err != nil {
3838
return fmt.Errorf("failed to parse tracing config: %w", err)
3939
}
40+
rateLimitConfig, err := config.RateLimitConfigFromCommand(c)
41+
if err != nil {
42+
return fmt.Errorf("failed to parse process tracker config: %w", err)
43+
}
4044
tracerProvider, err := traces.NewTracerProvider(ctx, tracingConfig)
4145
if err != nil {
4246
return fmt.Errorf("failed to set up tracer: %w", err)
@@ -47,10 +51,11 @@ func run(ctx context.Context, c *cli.Command) error {
4751
otel.SetTracerProvider(tracerProvider)
4852
otel.SetTextMapPropagator(propagation.TraceContext{})
4953
}
50-
browser := service.NewBrowserService(browserConfig)
54+
processStatService := service.NewProcessStatService(rateLimitConfig)
55+
browser := service.NewBrowserService(browserConfig, processStatService)
5156
versions := service.NewVersionService()
5257
metrics := metrics.NewRegistry()
53-
handler, err := api.NewHandler(metrics, serverConfig, browser, versions)
58+
handler, err := api.NewHandler(metrics, serverConfig, rateLimitConfig, processStatService, browser, versions)
5459
if err != nil {
5560
return fmt.Errorf("failed to create API handler: %w", err)
5661
}

devenv/docker/go-build/docker-compose.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ services:
4646
environment:
4747
TRACING_ENDPOINT: http://tempo:4318/v1/traces
4848
LOG_LEVEL: debug
49+
command:
50+
- server
51+
# 1 GiB
52+
- --rate-limit.max-available=1073741824
4953
ports:
5054
- 8081:8081
5155
depends_on:

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ require (
1010
github.com/docker/go-connections v0.6.0
1111
github.com/gen2brain/go-fitz v1.24.15
1212
github.com/go-jose/go-jose/v4 v4.1.2
13+
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58
1314
github.com/prometheus/client_golang v1.23.0
1415
github.com/shirou/gopsutil/v4 v4.25.7
1516
github.com/stretchr/testify v1.11.1

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJw
123123
github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
124124
github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde h1:x0TT0RDC7UhAVbbWWBzr41ElhJx5tXPWkIHA2HWPRuw=
125125
github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0=
126+
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0=
127+
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y=
126128
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
127129
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
128130
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=

pkg/api/middleware/auth.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import (
88
)
99

1010
var MetricAuthenticatedRequestAttempt = prometheus.NewCounterVec(prometheus.CounterOpts{
11-
Name: "http_authenticated_request_attempt",
11+
Name: "http_authenticated_request_attempts_total",
1212
Help: "Counts the attempts of authenticated requests",
1313
}, []string{"result"})
1414

pkg/api/middleware/ratelimiter.go

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
package middleware
2+
3+
import (
4+
"context"
5+
"log/slog"
6+
"net/http"
7+
"sync/atomic"
8+
9+
"github.com/grafana/grafana-image-renderer/pkg/config"
10+
"github.com/grafana/grafana-image-renderer/pkg/service"
11+
"github.com/pbnjay/memory"
12+
"github.com/prometheus/client_golang/prometheus"
13+
"go.opentelemetry.io/otel/attribute"
14+
"go.opentelemetry.io/otel/codes"
15+
"go.opentelemetry.io/otel/trace"
16+
)
17+
18+
var (
19+
MetricRateLimiterRequests = prometheus.NewCounterVec(prometheus.CounterOpts{
20+
Name: "http_rate_limiter_requests_total",
21+
Help: "Number of HTTP requests that pass through the rate-limiter, and their outcomes.",
22+
}, []string{"result", "why"})
23+
MetricRateLimiterSlots = prometheus.NewGaugeVec(prometheus.GaugeOpts{
24+
Name: "http_rate_limiter_slots",
25+
Help: "The number of total available slots for handling requests, based on memory.",
26+
}, []string{"type"})
27+
)
28+
29+
// Limiter unifies the limiter types.
30+
type Limiter interface {
31+
Limit(http.Handler) http.Handler
32+
}
33+
34+
type noOpLimiter struct{}
35+
36+
func (noOpLimiter) Limit(next http.Handler) http.Handler {
37+
return next
38+
}
39+
40+
type processBasedLimiter struct {
41+
svc *service.ProcessStatService
42+
cfg config.RateLimitConfig
43+
running *atomic.Uint32
44+
logger *slog.Logger
45+
}
46+
47+
func (p processBasedLimiter) Limit(next http.Handler) http.Handler {
48+
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
49+
tracer := tracer(r.Context())
50+
ctx, span := tracer.Start(r.Context(), "processBasedLimiter.Limit")
51+
defer span.End()
52+
53+
fits, why := p.canFitRequest(ctx)
54+
span.SetAttributes(attribute.Bool("accepted", fits), attribute.String("reason", why))
55+
56+
if !fits {
57+
span.SetStatus(codes.Error, "rate limit exceeded")
58+
span.SetAttributes(attribute.Bool("accepted", false), attribute.String("reason", why))
59+
MetricRateLimiterRequests.WithLabelValues("rejected", why).Inc()
60+
61+
w.Header().Set("Retry-After", "5")
62+
w.WriteHeader(http.StatusTooManyRequests)
63+
_, _ = w.Write([]byte("server is too busy, try again later"))
64+
return
65+
} else {
66+
p.running.Add(1)
67+
MetricRateLimiterRequests.WithLabelValues("accepted", why).Inc()
68+
// From sync.AddUint32:
69+
// > AddUint32 atomically adds delta to *addr and returns the new value.
70+
// > To subtract a signed positive constant value c from x, do AddUint32(&x, ^uint32(c-1)).
71+
// > In particular, to decrement x, do AddUint32(&x, ^uint32(0)).
72+
// > Consider using the more ergonomic and less error-prone [Uint32.Add] instead.
73+
defer p.running.Add(^uint32(0)) // decrement
74+
75+
next.ServeHTTP(w, r)
76+
}
77+
})
78+
}
79+
80+
func (p processBasedLimiter) canFitRequest(ctx context.Context) (bool, string) {
81+
tracer := tracer(ctx)
82+
_, span := tracer.Start(ctx, "processBasedLimiter.canFitRequest", trace.WithAttributes(
83+
attribute.Int64("headroom", int64(p.cfg.Headroom)),
84+
attribute.Int64("min_memory_per_browser", int64(p.cfg.MinMemoryPerBrowser)),
85+
attribute.Int64("min_limit", int64(p.cfg.MinLimit)),
86+
attribute.Int64("max_limit", int64(p.cfg.MaxLimit)),
87+
attribute.Int64("max_available", int64(p.cfg.MaxAvailable))))
88+
defer span.End()
89+
90+
currentlyRunning := p.running.Load()
91+
span.SetAttributes(attribute.Int64("currently_running", int64(currentlyRunning)))
92+
if currentlyRunning < p.cfg.MinLimit {
93+
return true, "below minimum limit"
94+
} else if p.cfg.MaxLimit > 0 && currentlyRunning >= p.cfg.MaxLimit {
95+
return false, "hit maximum limit"
96+
}
97+
98+
totalMemory := memory.TotalMemory()
99+
if p.cfg.MaxAvailable > 0 && totalMemory > p.cfg.MaxAvailable {
100+
span.AddEvent("capping total memory to configured maximum")
101+
totalMemory = p.cfg.MaxAvailable
102+
}
103+
freeMemory := memory.FreeMemory()
104+
span.SetAttributes(
105+
attribute.Int64("total_memory", int64(totalMemory)),
106+
attribute.Int64("free_memory", int64(freeMemory)))
107+
108+
if totalMemory != 0 {
109+
totalSlots := totalMemory / p.cfg.MinMemoryPerBrowser
110+
MetricRateLimiterSlots.WithLabelValues("total").Set(float64(totalSlots))
111+
MetricRateLimiterSlots.WithLabelValues("free").Set(float64(totalSlots - uint64(currentlyRunning)))
112+
span.SetAttributes(attribute.Int64("total_slots", int64(totalSlots)))
113+
if currentlyRunning >= uint32(totalSlots) {
114+
return false, "no memory slots exist based on total memory"
115+
}
116+
} else {
117+
span.AddEvent("unable to determine total memory, skipping total memory slot check")
118+
}
119+
120+
if freeMemory != 0 {
121+
// Calculate whether we have enough for another slot.
122+
minRequired := max(p.cfg.MinMemoryPerBrowser, uint64(p.svc.PeakMemory))
123+
span.SetAttributes(attribute.Int64("min_required_per_browser", int64(minRequired)))
124+
if freeMemory < p.cfg.Headroom {
125+
return false, "free memory smaller than required headroom"
126+
} else if freeMemory-p.cfg.Headroom < minRequired {
127+
return false, "not enough free memory without headroom for another browser"
128+
}
129+
// We have enough free memory.
130+
} else {
131+
span.AddEvent("unable to determine free memory, skipping free memory check")
132+
}
133+
134+
return true, "sufficient memory slots exist"
135+
}
136+
137+
func NewRateLimiter(svc *service.ProcessStatService, cfg config.RateLimitConfig) (Limiter, error) {
138+
if cfg.Disabled {
139+
return noOpLimiter{}, nil
140+
}
141+
142+
return processBasedLimiter{
143+
svc: svc,
144+
cfg: cfg,
145+
running: &atomic.Uint32{},
146+
logger: slog.With("middleware", "rate_limiter"),
147+
}, nil
148+
}

pkg/api/middleware/recovery.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import (
99
)
1010

1111
var MetricRecoveredRequests = prometheus.NewCounter(prometheus.CounterOpts{
12-
Name: "http_recovered_requests",
12+
Name: "http_recovered_requests_total",
1313
Help: "How many HTTP requests have panicked but recovered to not crash the application?",
1414
})
1515

pkg/api/middleware/trustedurl.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import (
88
)
99

1010
var MetricTrustedURLRequests = prometheus.NewCounterVec(prometheus.CounterOpts{
11-
Name: "http_trusted_url_requests",
11+
Name: "http_trusted_url_requests_total",
1212
Help: "Counts the requests with URL queries",
1313
}, []string{"result"})
1414

pkg/api/mux.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package api
22

33
import (
4+
"fmt"
45
"net/http"
56

67
"github.com/grafana/grafana-image-renderer/pkg/api/middleware"
@@ -17,18 +18,25 @@ func NewHandler(
1718
prometheus.Registerer
1819
},
1920
serverConfig config.ServerConfig,
21+
rateLimitConfig config.RateLimitConfig,
22+
processStatService *service.ProcessStatService,
2023
browser *service.BrowserService,
2124
versions *service.VersionService,
2225
) (http.Handler, error) {
26+
limiter, err := middleware.NewRateLimiter(processStatService, rateLimitConfig)
27+
if err != nil {
28+
return nil, fmt.Errorf("failed to create rate limiter: %w", err)
29+
}
30+
2331
mux := http.NewServeMux()
2432
mux.Handle("GET /", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
2533
_, _ = w.Write([]byte("Grafana Image Renderer (Go)"))
2634
}))
2735
mux.Handle("GET /metrics", middleware.TracingFor("promhttp.HandlerFor", promhttp.HandlerFor(metrics, promhttp.HandlerOpts{Registry: metrics})))
2836
mux.Handle("GET /healthz", HandleGetHealthz())
2937
mux.Handle("GET /version", HandleGetVersion(versions, browser))
30-
mux.Handle("GET /render", middleware.RequireAuthToken(middleware.TrustedURL(HandleGetRender(browser)), serverConfig.AuthTokens...))
31-
mux.Handle("GET /render/csv", middleware.RequireAuthToken(middleware.TrustedURL(HandleGetRenderCSV(browser)), serverConfig.AuthTokens...))
38+
mux.Handle("GET /render", middleware.RequireAuthToken(middleware.TrustedURL(limiter.Limit(HandleGetRender(browser))), serverConfig.AuthTokens...))
39+
mux.Handle("GET /render/csv", middleware.RequireAuthToken(middleware.TrustedURL(limiter.Limit(HandleGetRenderCSV(browser))), serverConfig.AuthTokens...))
3240
mux.Handle("GET /render/version", HandleGetRenderVersion(versions))
3341

3442
handler := middleware.RequestMetrics(mux)

pkg/config/config.go

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -588,3 +588,114 @@ func BrowserConfigFromCommand(c *cli.Command) (BrowserConfig, error) {
588588
Landscape: !c.Bool("browser.portrait"),
589589
}, nil
590590
}
591+
592+
type RateLimitConfig struct {
593+
// Disabled indicates whether rate limiting is disabled.
594+
Disabled bool
595+
596+
// TrackerDecay is the number N in decaying averages, `avg = ((N-1)*avg + new) / N`.
597+
// This must be a minimum of 1, which will not use a slow-moving average at all.
598+
TrackerDecay int64
599+
// TrackerInterval is how often to sample process statistics on the browser processes.
600+
// This must be a minimum of 1ms.
601+
TrackerInterval time.Duration
602+
603+
// MinLimit is the minimum number of requests to permit.
604+
// Even if we don't have slots for it, we will permit at least this many requests.
605+
// Set to 0 to disable minimum; this is generally not recommended, especially in containerised environments like Kubernetes.
606+
MinLimit uint32
607+
// MaxLimit is the maximum number of requests to permit.
608+
// Even if we have memory slots for more, we won't exceed this.
609+
// Set to 0 to disable maximum; this is generally the way to go in horizontally scaled deployments.
610+
MaxLimit uint32
611+
612+
// MaxAvailable is the maximum amount of memory (in bytes) available to processes.
613+
// If there is more memory than this, we will only consider this amount.
614+
// Set to 0 to use all available memory.
615+
MaxAvailable uint64
616+
// MinMemoryPerBrowser is the minimum amount of memory (in bytes) each browser process is expected to use.
617+
// If the process tracker reports less, this is the value used. Otherwise, we use the process tracker's value.
618+
// Set to 0 to disable the minimum.
619+
MinMemoryPerBrowser uint64
620+
// Headroom is how much memory (in bytes) should be left after the request's browser takes its share.
621+
// If this cannot be accommodated, we will reject the request.
622+
// Set to 0 to disable headroom.
623+
Headroom uint64
624+
}
625+
626+
func RateLimitFlags() []cli.Flag {
627+
return []cli.Flag{
628+
&cli.BoolFlag{
629+
Name: "rate-limit.disabled",
630+
Usage: "Disable rate limiting entirely.",
631+
Sources: FromConfig("rate-limit.disabled", "RATE_LIMIT_DISABLED"),
632+
},
633+
&cli.Int64Flag{
634+
Name: "rate-limit.process-tracker.decay",
635+
Usage: "The decay factor N to use in slow-moving averages of process statistics, where `avg = ((N-1)*avg + new) / N`. Must be at least 1.",
636+
Value: 5,
637+
Sources: FromConfig("rate-limit.process-tracker.decay", "RATE_LIMIT_PROCESS_TRACKER_DECAY"),
638+
Validator: func(i int64) error {
639+
if i < 1 {
640+
return fmt.Errorf("rate-limit.process-tracker.decay must be at least 1")
641+
}
642+
return nil
643+
},
644+
},
645+
&cli.DurationFlag{
646+
Name: "rate-limit.process-tracker.interval",
647+
Usage: "How often to sample process statistics on the browser processes. Must be >= 1ms.",
648+
Value: 50 * time.Millisecond,
649+
Sources: FromConfig("rate-limit.process-tracker.interval", "RATE_LIMIT_PROCESS_TRACKER_INTERVAL"),
650+
Validator: func(d time.Duration) error {
651+
if d < time.Millisecond {
652+
return fmt.Errorf("rate-limit.process-tracker.interval must be at least 1ms")
653+
}
654+
return nil
655+
},
656+
},
657+
&cli.Uint32Flag{
658+
Name: "rate-limit.min-limit",
659+
Usage: "The minimum number of requests to permit. Ratelimiting will not reject requests if the number of currently running requests is below this value. Set to 0 to disable minimum (not recommended).",
660+
Value: 3,
661+
Sources: FromConfig("rate-limit.min-limit", "RATE_LIMIT_MIN_LIMIT"),
662+
},
663+
&cli.Uint32Flag{
664+
Name: "rate-limit.max-limit",
665+
Usage: "The maximum number of requests to permit. Ratelimiting will reject requests if the number of currently running requests is at or above this value. Set to 0 to disable maximum. The v4 service used 5 by default.",
666+
Value: 0,
667+
Sources: FromConfig("rate-limit.max-limit", "RATE_LIMIT_MAX_LIMIT"),
668+
},
669+
&cli.Uint64Flag{
670+
Name: "rate-limit.max-available",
671+
Usage: "The maximum amount of memory (in bytes) available to processes. If more memory exists, only this amount is used. 0 disables the maximum.",
672+
Value: 0,
673+
Sources: FromConfig("rate-limit.max-available", "RATE_LIMIT_MAX_AVAILABLE", "GOMEMLIMIT"),
674+
},
675+
&cli.Uint64Flag{
676+
Name: "rate-limit.min-memory-per-browser",
677+
Usage: "The minimum amount of memory (in bytes) each browser process is expected to use. Set to 0 to disable the minimum.",
678+
Value: 64 * 1024 * 1024, // 64 MiB
679+
Sources: FromConfig("rate-limit.min-memory-per-browser", "RATE_LIMIT_MIN_MEMORY_PER_BROWSER"),
680+
},
681+
&cli.Uint64Flag{
682+
Name: "rate-limit.headroom",
683+
Usage: "The amount of memory (in bytes) to leave as headroom after allocating memory for browser processes. Set to 0 to disable headroom.",
684+
Value: 32 * 1024 * 1024, // 32 MiB
685+
Sources: FromConfig("rate-limit.headroom", "RATE_LIMIT_HEADROOM"),
686+
},
687+
}
688+
}
689+
690+
func RateLimitConfigFromCommand(c *cli.Command) (RateLimitConfig, error) {
691+
return RateLimitConfig{
692+
Disabled: c.Bool("rate-limit.disabled"),
693+
TrackerDecay: c.Int64("rate-limit.process-tracker.decay"),
694+
TrackerInterval: c.Duration("rate-limit.process-tracker.interval"),
695+
MinLimit: c.Uint32("rate-limit.min-limit"),
696+
MaxLimit: c.Uint32("rate-limit.max-limit"),
697+
MaxAvailable: c.Uint64("rate-limit.max-available"),
698+
MinMemoryPerBrowser: c.Uint64("rate-limit.min-memory-per-browser"),
699+
Headroom: c.Uint64("rate-limit.headroom"),
700+
}, nil
701+
}

0 commit comments

Comments
 (0)