Skip to content

Commit 04ae9eb

Browse files
authored
bad-key-revoker: Add delay to mitigate race condition (#8301)
Add a `MaxExpectedReplicationLag` parameter to `bad-key-revoker`. Wait that interval before searching for certificates to revoke. The interval is set to only 100ms in both `test/config` and `test/config-next` so that integration tests don't require long sleeps. The default value within BKR is, and the production value should be, higher. Part of #5686
1 parent a3c1e62 commit 04ae9eb

File tree

4 files changed

+109
-54
lines changed

4 files changed

+109
-54
lines changed

cmd/bad-key-revoker/main.go

Lines changed: 43 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -46,16 +46,17 @@ type revoker interface {
4646
}
4747

4848
type badKeyRevoker struct {
49-
dbMap *db.WrappedMap
50-
maxRevocations int
51-
serialBatchSize int
52-
raClient revoker
53-
logger blog.Logger
54-
clk clock.Clock
55-
backoffIntervalBase time.Duration
56-
backoffIntervalMax time.Duration
57-
backoffFactor float64
58-
backoffTicker int
49+
dbMap *db.WrappedMap
50+
maxRevocations int
51+
serialBatchSize int
52+
raClient revoker
53+
logger blog.Logger
54+
clk clock.Clock
55+
backoffIntervalBase time.Duration
56+
backoffIntervalMax time.Duration
57+
backoffFactor float64
58+
backoffTicker int
59+
maxExpectedReplicationLag time.Duration
5960
}
6061

6162
// uncheckedBlockedKey represents a row in the blockedKeys table
@@ -76,8 +77,10 @@ func (bkr *badKeyRevoker) countUncheckedKeys(ctx context.Context) (int, error) {
7677
&count,
7778
`SELECT COUNT(*)
7879
FROM (SELECT 1 FROM blockedKeys
79-
WHERE extantCertificatesChecked = false
80+
WHERE extantCertificatesChecked = false AND added < ? - INTERVAL ? SECOND
8081
LIMIT ?) AS a`,
82+
bkr.clk.Now(),
83+
bkr.maxExpectedReplicationLag.Seconds(),
8184
blockedKeysGaugeLimit,
8285
)
8386
return count, err
@@ -90,8 +93,10 @@ func (bkr *badKeyRevoker) selectUncheckedKey(ctx context.Context) (uncheckedBloc
9093
&row,
9194
`SELECT keyHash, revokedBy
9295
FROM blockedKeys
93-
WHERE extantCertificatesChecked = false
96+
WHERE extantCertificatesChecked = false AND added < ? - INTERVAL ? SECOND
9497
LIMIT 1`,
98+
bkr.clk.Now(),
99+
bkr.maxExpectedReplicationLag.Seconds(),
95100
)
96101
return row, err
97102
}
@@ -275,6 +280,7 @@ type Config struct {
275280
// is higher than MaximumRevocations bad-key-revoker will error out and refuse to
276281
// progress until this is addressed.
277282
MaximumRevocations int `validate:"gte=0"`
283+
278284
// FindCertificatesBatchSize specifies the maximum number of serials to select from the
279285
// keyHashToSerial table at once
280286
FindCertificatesBatchSize int `validate:"required"`
@@ -288,6 +294,13 @@ type Config struct {
288294
// algorithm will wait before retrying in the event of error
289295
// or no work to do.
290296
BackoffIntervalMax config.Duration `validate:"-"`
297+
298+
// MaxExpectedReplicationLag specifies the minimum duration
299+
// bad-key-revoker should wait before searching for certificates
300+
// matching a blockedKeys row. This should be just slightly greater than
301+
// the database's maximum replication lag, and always well under 24
302+
// hours.
303+
MaxExpectedReplicationLag config.Duration `validate:"-"`
291304
}
292305

293306
Syslog cmd.SyslogConfig
@@ -330,15 +343,16 @@ func main() {
330343
rac := rapb.NewRegistrationAuthorityClient(conn)
331344

332345
bkr := &badKeyRevoker{
333-
dbMap: dbMap,
334-
maxRevocations: config.BadKeyRevoker.MaximumRevocations,
335-
serialBatchSize: config.BadKeyRevoker.FindCertificatesBatchSize,
336-
raClient: rac,
337-
logger: logger,
338-
clk: clk,
339-
backoffIntervalMax: config.BadKeyRevoker.BackoffIntervalMax.Duration,
340-
backoffIntervalBase: config.BadKeyRevoker.Interval.Duration,
341-
backoffFactor: 1.3,
346+
dbMap: dbMap,
347+
maxRevocations: config.BadKeyRevoker.MaximumRevocations,
348+
serialBatchSize: config.BadKeyRevoker.FindCertificatesBatchSize,
349+
raClient: rac,
350+
logger: logger,
351+
clk: clk,
352+
backoffIntervalMax: config.BadKeyRevoker.BackoffIntervalMax.Duration,
353+
backoffIntervalBase: config.BadKeyRevoker.Interval.Duration,
354+
backoffFactor: 1.3,
355+
maxExpectedReplicationLag: config.BadKeyRevoker.MaxExpectedReplicationLag.Duration,
342356
}
343357

344358
// If `BackoffIntervalMax` was not set via the config, set it to 60
@@ -354,6 +368,14 @@ func main() {
354368
bkr.backoffIntervalBase = time.Second
355369
}
356370

371+
// If `MaxExpectedReplicationLag` was not set via the config, then set
372+
// `bkr.maxExpectedReplicationLag` to a default 22 seconds. This is based on
373+
// ProxySQL's max_replication_lag for bad-key-revoker (10s), times two, plus
374+
// two seconds.
375+
if bkr.maxExpectedReplicationLag == 0 {
376+
bkr.maxExpectedReplicationLag = time.Second * 22
377+
}
378+
357379
// Run bad-key-revoker in a loop. Backoff if no work or errors.
358380
for {
359381
noWork, err := bkr.invoke(context.Background())

cmd/bad-key-revoker/main_test.go

Lines changed: 62 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ func insertBlockedRow(t *testing.T, dbMap *db.WrappedMap, fc clock.Clock, hash [
4545
test.AssertNotError(t, err, "failed to add test row")
4646
}
4747

48+
func fcBeforeRepLag(clk clock.Clock, bkr *badKeyRevoker) clock.FakeClock {
49+
fc := clock.NewFake()
50+
fc.Set(clk.Now().Add(-bkr.maxExpectedReplicationLag - time.Second))
51+
return fc
52+
}
53+
4854
func TestSelectUncheckedRows(t *testing.T) {
4955
ctx := context.Background()
5056

@@ -55,24 +61,30 @@ func TestSelectUncheckedRows(t *testing.T) {
5561
fc := clock.NewFake()
5662

5763
bkr := &badKeyRevoker{
58-
dbMap: dbMap,
59-
logger: blog.NewMock(),
60-
clk: fc,
64+
dbMap: dbMap,
65+
logger: blog.NewMock(),
66+
clk: fc,
67+
maxExpectedReplicationLag: time.Second * 22,
6168
}
6269

6370
hashA, hashB, hashC := randHash(t), randHash(t), randHash(t)
71+
72+
// insert a blocked key that's marked as already checked
6473
insertBlockedRow(t, dbMap, fc, hashA, 1, true)
6574
count, err := bkr.countUncheckedKeys(ctx)
6675
test.AssertNotError(t, err, "countUncheckedKeys failed")
6776
test.AssertEquals(t, count, 0)
6877
_, err = bkr.selectUncheckedKey(ctx)
6978
test.AssertError(t, err, "selectUncheckedKey didn't fail with no rows to process")
7079
test.Assert(t, db.IsNoRows(err), "returned error is not sql.ErrNoRows")
71-
insertBlockedRow(t, dbMap, fc, hashB, 1, false)
80+
81+
// insert a blocked key that's due to be checked
82+
insertBlockedRow(t, dbMap, fcBeforeRepLag(fc, bkr), hashB, 1, false)
83+
// insert a freshly blocked key, so it's not yet due to be checked
7284
insertBlockedRow(t, dbMap, fc, hashC, 1, false)
7385
count, err = bkr.countUncheckedKeys(ctx)
7486
test.AssertNotError(t, err, "countUncheckedKeys failed")
75-
test.AssertEquals(t, count, 2)
87+
test.AssertEquals(t, count, 1)
7688
row, err := bkr.selectUncheckedKey(ctx)
7789
test.AssertNotError(t, err, "selectUncheckKey failed")
7890
test.AssertByteEquals(t, row.KeyHash, hashB)
@@ -191,7 +203,13 @@ func TestFindUnrevokedNoRows(t *testing.T) {
191203
)
192204
test.AssertNotError(t, err, "failed to insert test keyHashToSerial row")
193205

194-
bkr := &badKeyRevoker{dbMap: dbMap, serialBatchSize: 1, maxRevocations: 10, clk: fc}
206+
bkr := &badKeyRevoker{
207+
dbMap: dbMap,
208+
serialBatchSize: 1,
209+
maxRevocations: 10,
210+
clk: fc,
211+
maxExpectedReplicationLag: time.Second * 22,
212+
}
195213
_, err = bkr.findUnrevoked(ctx, uncheckedBlockedKey{KeyHash: hashA})
196214
test.Assert(t, db.IsNoRows(err), "expected NoRows error")
197215
}
@@ -207,7 +225,13 @@ func TestFindUnrevoked(t *testing.T) {
207225

208226
regID := insertRegistration(t, dbMap, fc)
209227

210-
bkr := &badKeyRevoker{dbMap: dbMap, serialBatchSize: 1, maxRevocations: 10, clk: fc}
228+
bkr := &badKeyRevoker{
229+
dbMap: dbMap,
230+
serialBatchSize: 1,
231+
maxRevocations: 10,
232+
clk: fc,
233+
maxExpectedReplicationLag: time.Second * 22,
234+
}
211235

212236
hashA := randHash(t)
213237
// insert valid, unexpired
@@ -251,7 +275,11 @@ func TestRevokeCerts(t *testing.T) {
251275

252276
fc := clock.NewFake()
253277
mr := &mockRevoker{}
254-
bkr := &badKeyRevoker{dbMap: dbMap, raClient: mr, clk: fc}
278+
bkr := &badKeyRevoker{
279+
dbMap: dbMap,
280+
raClient: mr,
281+
clk: fc,
282+
}
255283

256284
err = bkr.revokeCerts([]unrevokedCertificate{
257285
{ID: 0, Serial: "ff"},
@@ -269,11 +297,20 @@ func TestCertificateAbsent(t *testing.T) {
269297
defer test.ResetBoulderTestDatabase(t)()
270298

271299
fc := clock.NewFake()
300+
bkr := &badKeyRevoker{
301+
dbMap: dbMap,
302+
maxRevocations: 1,
303+
serialBatchSize: 1,
304+
raClient: &mockRevoker{},
305+
logger: blog.NewMock(),
306+
clk: fc,
307+
maxExpectedReplicationLag: time.Second * 22,
308+
}
272309

273310
// populate DB with all the test data
274311
regIDA := insertRegistration(t, dbMap, fc)
275312
hashA := randHash(t)
276-
insertBlockedRow(t, dbMap, fc, hashA, regIDA, false)
313+
insertBlockedRow(t, dbMap, fcBeforeRepLag(fc, bkr), hashA, regIDA, false)
277314

278315
// Add an entry to keyHashToSerial but not to certificateStatus or certificate
279316
// status, and expect an error.
@@ -286,14 +323,6 @@ func TestCertificateAbsent(t *testing.T) {
286323
)
287324
test.AssertNotError(t, err, "failed to insert test keyHashToSerial row")
288325

289-
bkr := &badKeyRevoker{
290-
dbMap: dbMap,
291-
maxRevocations: 1,
292-
serialBatchSize: 1,
293-
raClient: &mockRevoker{},
294-
logger: blog.NewMock(),
295-
clk: fc,
296-
}
297326
_, err = bkr.invoke(ctx)
298327
test.AssertError(t, err, "expected error when row in keyHashToSerial didn't have a matching cert")
299328
}
@@ -309,12 +338,13 @@ func TestInvoke(t *testing.T) {
309338

310339
mr := &mockRevoker{}
311340
bkr := &badKeyRevoker{
312-
dbMap: dbMap,
313-
maxRevocations: 10,
314-
serialBatchSize: 1,
315-
raClient: mr,
316-
logger: blog.NewMock(),
317-
clk: fc,
341+
dbMap: dbMap,
342+
maxRevocations: 10,
343+
serialBatchSize: 1,
344+
raClient: mr,
345+
logger: blog.NewMock(),
346+
clk: fc,
347+
maxExpectedReplicationLag: time.Second * 22,
318348
}
319349

320350
// populate DB with all the test data
@@ -323,7 +353,7 @@ func TestInvoke(t *testing.T) {
323353
regIDC := insertRegistration(t, dbMap, fc)
324354
regIDD := insertRegistration(t, dbMap, fc)
325355
hashA := randHash(t)
326-
insertBlockedRow(t, dbMap, fc, hashA, regIDC, false)
356+
insertBlockedRow(t, dbMap, fcBeforeRepLag(fc, bkr), hashA, regIDC, false)
327357
insertGoodCert(t, dbMap, fc, hashA, "ff", regIDA)
328358
insertGoodCert(t, dbMap, fc, hashA, "ee", regIDB)
329359
insertGoodCert(t, dbMap, fc, hashA, "dd", regIDC)
@@ -344,7 +374,7 @@ func TestInvoke(t *testing.T) {
344374

345375
// add a row with no associated valid certificates
346376
hashB := randHash(t)
347-
insertBlockedRow(t, dbMap, fc, hashB, regIDC, false)
377+
insertBlockedRow(t, dbMap, fcBeforeRepLag(fc, bkr), hashB, regIDC, false)
348378
insertCert(t, dbMap, fc, hashB, "bb", regIDA, Expired, Revoked)
349379

350380
noWork, err = bkr.invoke(ctx)
@@ -375,11 +405,12 @@ func TestInvokeRevokerHasNoExtantCerts(t *testing.T) {
375405

376406
mr := &mockRevoker{}
377407
bkr := &badKeyRevoker{dbMap: dbMap,
378-
maxRevocations: 10,
379-
serialBatchSize: 1,
380-
raClient: mr,
381-
logger: blog.NewMock(),
382-
clk: fc,
408+
maxRevocations: 10,
409+
serialBatchSize: 1,
410+
raClient: mr,
411+
logger: blog.NewMock(),
412+
clk: fc,
413+
maxExpectedReplicationLag: time.Second * 22,
383414
}
384415

385416
// populate DB with all the test data
@@ -389,7 +420,7 @@ func TestInvokeRevokerHasNoExtantCerts(t *testing.T) {
389420

390421
hashA := randHash(t)
391422

392-
insertBlockedRow(t, dbMap, fc, hashA, regIDA, false)
423+
insertBlockedRow(t, dbMap, fcBeforeRepLag(fc, bkr), hashA, regIDA, false)
393424

394425
insertGoodCert(t, dbMap, fc, hashA, "ee", regIDB)
395426
insertGoodCert(t, dbMap, fc, hashA, "dd", regIDB)

test/config-next/bad-key-revoker.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
"maximumRevocations": 15,
2323
"findCertificatesBatchSize": 10,
2424
"interval": "50ms",
25-
"backoffIntervalMax": "2s"
25+
"backoffIntervalMax": "2s",
26+
"maxExpectedReplicationLag": "100ms"
2627
},
2728
"syslog": {
2829
"stdoutlevel": 4,

test/config/bad-key-revoker.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
"maximumRevocations": 15,
2424
"findCertificatesBatchSize": 10,
2525
"interval": "50ms",
26-
"backoffIntervalMax": "2s"
26+
"backoffIntervalMax": "2s",
27+
"maxExpectedReplicationLag": "100ms"
2728
},
2829
"syslog": {
2930
"stdoutlevel": 4,

0 commit comments

Comments
 (0)