Skip to content

Commit fe2cf7d

Browse files
authored
ocsp: add load shedding for live signer (#6523)
In live.go we use a semaphore to limit how many inflight signing requests we can have, so a flood of OCSP traffic doesn't flood our CA instances. If traffic exceeds our capacity to sign responses for long enough, we want to eventually start fast-rejecting inbound requests that are unlikely to get serviced before their deadline is reached. To do that, add a MaxSigningWaiters config field to the OCSP responder. Note that the files in //semaphore are forked from x/sync/semaphore, with modifications to add the MaxWaiters field and functionality. Fixes #6392
1 parent f2bb0e4 commit fe2cf7d

File tree

12 files changed

+755
-9
lines changed

12 files changed

+755
-9
lines changed

cmd/ocsp-responder/main.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,21 @@ type Config struct {
8181
// should be set to somewhat less than
8282
// (HSM signing capacity) / (number of ocsp-responders).
8383
// Requests that would exceed this limit will block until capacity is
84-
// available and eventually 500.
84+
// available and eventually serve an HTTP 500 Internal Server Error.
8585
MaxInflightSignings int
8686

87+
// A limit on how many goroutines can be waiting for a signing slot at
88+
// a time. When this limit is exceeded, additional signing requests
89+
// will immediately serve an HTTP 500 Internal Server Error until
90+
// we are back below the limit. This provides load shedding for when
91+
// inbound requests arrive faster than our ability to sign them.
92+
// The default of 0 means "no limit." A good value for this is the
93+
// longest queue we can expect to process before a timeout. For
94+
// instance, if the timeout is 5 seconds, and a signing takes 20ms,
95+
// and we have MaxInflightSignings = 40, we can expect to process
96+
// 40 * 5 / 0.02 = 10,000 requests before the oldest request times out.
97+
MaxSigningWaiters int
98+
8799
ShutdownStopTimeout cmd.ConfigDuration
88100

89101
RequiredSerialPrefixes []string
@@ -189,7 +201,7 @@ as generated by Boulder's ceremony command.
189201
if maxInflight == 0 {
190202
maxInflight = 1000
191203
}
192-
liveSource := live.New(rac, int64(maxInflight))
204+
liveSource := live.New(rac, int64(maxInflight), c.OCSPResponder.MaxSigningWaiters)
193205

194206
rocspSource, err := redis_responder.NewRedisSource(rocspReader, liveSource, liveSigningPeriod, clk, scope, logger)
195207
cmd.FailOnError(err, "Could not create redis source")

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ require (
3030
github.com/zmap/zlint/v3 v3.4.0
3131
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d
3232
golang.org/x/net v0.0.0-20220926192436-02166a98028e
33-
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4
33+
golang.org/x/sync v0.1.0
3434
golang.org/x/term v0.0.0-20220722155259-a9ba230a4035
3535
golang.org/x/text v0.3.8
3636
google.golang.org/grpc v1.49.0

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,8 @@ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJ
624624
golang.org/x/sync v0.0.0-20220601150217-0de741cfad7f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
625625
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 h1:uVc8UZUe6tr40fFVnUP5Oj+veunVezqYl9z7DYw9xzw=
626626
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
627+
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
628+
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
627629
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
628630
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
629631
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=

ocsp/responder/live/live.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ import (
99
berrors "github.com/letsencrypt/boulder/errors"
1010
"github.com/letsencrypt/boulder/ocsp/responder"
1111
rapb "github.com/letsencrypt/boulder/ra/proto"
12+
"github.com/letsencrypt/boulder/semaphore"
1213
"golang.org/x/crypto/ocsp"
13-
"golang.org/x/sync/semaphore"
1414
"google.golang.org/grpc"
1515
)
1616

@@ -23,10 +23,10 @@ type Source struct {
2323
sem *semaphore.Weighted
2424
}
2525

26-
func New(ra ocspGenerator, maxInflight int64) *Source {
26+
func New(ra ocspGenerator, maxInflight int64, maxWaiters int) *Source {
2727
return &Source{
2828
ra: ra,
29-
sem: semaphore.NewWeighted(maxInflight),
29+
sem: semaphore.NewWeighted(maxInflight, maxWaiters),
3030
}
3131
}
3232

ocsp/responder/live/live_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ func TestLiveResponse(t *testing.T) {
4545
fakeResp, _, _ := ocsp_test.FakeResponse(ocsp.Response{
4646
SerialNumber: eeSerial,
4747
})
48-
source := New(mockOCSPGenerator{fakeResp.Raw}, 1)
48+
source := New(mockOCSPGenerator{fakeResp.Raw}, 1, 0)
4949
resp, err := source.Response(context.Background(), &ocsp.Request{
5050
SerialNumber: eeSerial,
5151
})
@@ -59,7 +59,7 @@ func TestLiveResponse(t *testing.T) {
5959

6060
func TestNotFound(t *testing.T) {
6161
eeSerial := big.NewInt(1)
62-
source := New(notFoundOCSPGenerator{}, 1)
62+
source := New(notFoundOCSPGenerator{}, 1, 0)
6363
_, err := source.Response(context.Background(), &ocsp.Request{
6464
SerialNumber: eeSerial,
6565
})

semaphore/semaphore.go

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
// Copyright 2017 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
// Modified by Boulder to provide a load-shedding mechanism.
5+
6+
// Package semaphore provides a weighted semaphore implementation.
7+
package semaphore // import "golang.org/x/sync/semaphore"
8+
9+
import (
10+
"container/list"
11+
"context"
12+
"errors"
13+
"sync"
14+
)
15+
16+
type waiter struct {
17+
n int64
18+
ready chan<- struct{} // Closed when semaphore acquired.
19+
}
20+
21+
// ErrMaxWaiters is returned when Acquire is called, but there are more than
22+
// maxWaiters waiters.
23+
var ErrMaxWaiters = errors.New("too many waiters")
24+
25+
// NewWeighted creates a new weighted semaphore with the given
26+
// maximum combined weight for concurrent access.
27+
// maxWaiters provides a limit such that calls to Acquire
28+
// will immediately error if the number of waiters is that high.
29+
// A maxWaiters of zero means no limit.
30+
func NewWeighted(n int64, maxWaiters int) *Weighted {
31+
w := &Weighted{size: n, maxWaiters: maxWaiters}
32+
return w
33+
}
34+
35+
// Weighted provides a way to bound concurrent access to a resource.
36+
// The callers can request access with a given weight.
37+
type Weighted struct {
38+
size int64
39+
cur int64
40+
mu sync.Mutex
41+
waiters list.List
42+
maxWaiters int
43+
}
44+
45+
// Acquire acquires the semaphore with a weight of n, blocking until resources
46+
// are available or ctx is done. On success, returns nil. On failure, returns
47+
// ctx.Err() and leaves the semaphore unchanged.
48+
//
49+
// If ctx is already done, Acquire may still succeed without blocking.
50+
//
51+
// If there are maxWaiters waiters, Acquire will return an error immediately.
52+
func (s *Weighted) Acquire(ctx context.Context, n int64) error {
53+
s.mu.Lock()
54+
if s.size-s.cur >= n && s.waiters.Len() == 0 {
55+
s.cur += n
56+
s.mu.Unlock()
57+
return nil
58+
}
59+
60+
if n > s.size {
61+
// Don't make other Acquire calls block on one that's doomed to fail.
62+
s.mu.Unlock()
63+
<-ctx.Done()
64+
return ctx.Err()
65+
}
66+
67+
if s.maxWaiters > 0 && s.waiters.Len() >= s.maxWaiters {
68+
return ErrMaxWaiters
69+
}
70+
71+
ready := make(chan struct{})
72+
w := waiter{n: n, ready: ready}
73+
elem := s.waiters.PushBack(w)
74+
s.mu.Unlock()
75+
76+
select {
77+
case <-ctx.Done():
78+
err := ctx.Err()
79+
s.mu.Lock()
80+
select {
81+
case <-ready:
82+
// Acquired the semaphore after we were canceled. Rather than trying to
83+
// fix up the queue, just pretend we didn't notice the cancellation.
84+
err = nil
85+
default:
86+
isFront := s.waiters.Front() == elem
87+
s.waiters.Remove(elem)
88+
// If we're at the front and there're extra tokens left, notify other waiters.
89+
if isFront && s.size > s.cur {
90+
s.notifyWaiters()
91+
}
92+
}
93+
s.mu.Unlock()
94+
return err
95+
96+
case <-ready:
97+
return nil
98+
}
99+
}
100+
101+
// TryAcquire acquires the semaphore with a weight of n without blocking.
102+
// On success, returns true. On failure, returns false and leaves the semaphore unchanged.
103+
func (s *Weighted) TryAcquire(n int64) bool {
104+
s.mu.Lock()
105+
success := s.size-s.cur >= n && s.waiters.Len() == 0
106+
if success {
107+
s.cur += n
108+
}
109+
s.mu.Unlock()
110+
return success
111+
}
112+
113+
// Release releases the semaphore with a weight of n.
114+
func (s *Weighted) Release(n int64) {
115+
s.mu.Lock()
116+
s.cur -= n
117+
if s.cur < 0 {
118+
s.mu.Unlock()
119+
panic("semaphore: released more than held")
120+
}
121+
s.notifyWaiters()
122+
s.mu.Unlock()
123+
}
124+
125+
func (s *Weighted) notifyWaiters() {
126+
for {
127+
next := s.waiters.Front()
128+
if next == nil {
129+
break // No more waiters blocked.
130+
}
131+
132+
w := next.Value.(waiter)
133+
if s.size-s.cur < w.n {
134+
// Not enough tokens for the next waiter. We could keep going (to try to
135+
// find a waiter with a smaller request), but under load that could cause
136+
// starvation for large requests; instead, we leave all remaining waiters
137+
// blocked.
138+
//
139+
// Consider a semaphore used as a read-write lock, with N tokens, N
140+
// readers, and one writer. Each reader can Acquire(1) to obtain a read
141+
// lock. The writer can Acquire(N) to obtain a write lock, excluding all
142+
// of the readers. If we allow the readers to jump ahead in the queue,
143+
// the writer will starve — there is always one token available for every
144+
// reader.
145+
break
146+
}
147+
148+
s.cur += w.n
149+
s.waiters.Remove(next)
150+
close(w.ready)
151+
}
152+
}

semaphore/semaphore_bench_test.go

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
// Copyright 2017 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
//go:build go1.7
6+
// +build go1.7
7+
8+
package semaphore_test
9+
10+
import (
11+
"context"
12+
"fmt"
13+
"testing"
14+
15+
"github.com/letsencrypt/boulder/semaphore"
16+
)
17+
18+
// weighted is an interface matching a subset of *Weighted. It allows
19+
// alternate implementations for testing and benchmarking.
20+
type weighted interface {
21+
Acquire(context.Context, int64) error
22+
TryAcquire(int64) bool
23+
Release(int64)
24+
}
25+
26+
// semChan implements Weighted using a channel for
27+
// comparing against the condition variable-based implementation.
28+
type semChan chan struct{}
29+
30+
func newSemChan(n int64) semChan {
31+
return semChan(make(chan struct{}, n))
32+
}
33+
34+
func (s semChan) Acquire(_ context.Context, n int64) error {
35+
for i := int64(0); i < n; i++ {
36+
s <- struct{}{}
37+
}
38+
return nil
39+
}
40+
41+
func (s semChan) TryAcquire(n int64) bool {
42+
if int64(len(s))+n > int64(cap(s)) {
43+
return false
44+
}
45+
46+
for i := int64(0); i < n; i++ {
47+
s <- struct{}{}
48+
}
49+
return true
50+
}
51+
52+
func (s semChan) Release(n int64) {
53+
for i := int64(0); i < n; i++ {
54+
<-s
55+
}
56+
}
57+
58+
// acquireN calls Acquire(size) on sem N times and then calls Release(size) N times.
59+
func acquireN(b *testing.B, sem weighted, size int64, N int) {
60+
b.ResetTimer()
61+
for i := 0; i < b.N; i++ {
62+
for j := 0; j < N; j++ {
63+
sem.Acquire(context.Background(), size)
64+
}
65+
for j := 0; j < N; j++ {
66+
sem.Release(size)
67+
}
68+
}
69+
}
70+
71+
// tryAcquireN calls TryAcquire(size) on sem N times and then calls Release(size) N times.
72+
func tryAcquireN(b *testing.B, sem weighted, size int64, N int) {
73+
b.ResetTimer()
74+
for i := 0; i < b.N; i++ {
75+
for j := 0; j < N; j++ {
76+
if !sem.TryAcquire(size) {
77+
b.Fatalf("TryAcquire(%v) = false, want true", size)
78+
}
79+
}
80+
for j := 0; j < N; j++ {
81+
sem.Release(size)
82+
}
83+
}
84+
}
85+
86+
func BenchmarkNewSeq(b *testing.B) {
87+
for _, cap := range []int64{1, 128} {
88+
b.Run(fmt.Sprintf("Weighted-%d", cap), func(b *testing.B) {
89+
for i := 0; i < b.N; i++ {
90+
_ = semaphore.NewWeighted(cap, 0)
91+
}
92+
})
93+
b.Run(fmt.Sprintf("semChan-%d", cap), func(b *testing.B) {
94+
for i := 0; i < b.N; i++ {
95+
_ = newSemChan(cap)
96+
}
97+
})
98+
}
99+
}
100+
101+
func BenchmarkAcquireSeq(b *testing.B) {
102+
for _, c := range []struct {
103+
cap, size int64
104+
N int
105+
}{
106+
{1, 1, 1},
107+
{2, 1, 1},
108+
{16, 1, 1},
109+
{128, 1, 1},
110+
{2, 2, 1},
111+
{16, 2, 8},
112+
{128, 2, 64},
113+
{2, 1, 2},
114+
{16, 8, 2},
115+
{128, 64, 2},
116+
} {
117+
for _, w := range []struct {
118+
name string
119+
w weighted
120+
}{
121+
{"Weighted", semaphore.NewWeighted(c.cap, 0)},
122+
{"semChan", newSemChan(c.cap)},
123+
} {
124+
b.Run(fmt.Sprintf("%s-acquire-%d-%d-%d", w.name, c.cap, c.size, c.N), func(b *testing.B) {
125+
acquireN(b, w.w, c.size, c.N)
126+
})
127+
b.Run(fmt.Sprintf("%s-tryAcquire-%d-%d-%d", w.name, c.cap, c.size, c.N), func(b *testing.B) {
128+
tryAcquireN(b, w.w, c.size, c.N)
129+
})
130+
}
131+
}
132+
}

0 commit comments

Comments
 (0)