Skip to content

Commit 7bef357

Browse files
craig[bot]sumeerbholarafiss
committed
146124: admission: call LowerBound(0) instead of Reset on token bucket for el… r=jeffswenson a=sumeerbhola …astic CPU The goal of this periodic operation is to ensure the token bucket is not extremely negative, which can happen if elastic work consumes more CPU than initially granted. However, instead of merely ensuring a lower bound of 0 tokens, it was resetting the token bucket to be full, which would admit another burst. Observed in https://cockroachlabs.slack.com/archives/C01SRKWGHG8/p1745891638545989?thread_ts=1745881523.172559&cid=C01SRKWGHG8. Epic: none Release note: None 146135: sql: fix DROP REGION error messages r=rafiss a=rafiss The format arguments were not correct, and the error was misleading for the system database. Epic: None Release note: None Co-authored-by: sumeerbhola <[email protected]> Co-authored-by: Rafi Shamim <[email protected]>
3 parents 7d1a0ec + a694f6a + 1c73c6f commit 7bef357

File tree

2 files changed

+21
-12
lines changed

2 files changed

+21
-12
lines changed

pkg/sql/alter_database.go

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -442,16 +442,25 @@ func (p *planner) AlterDatabaseDropRegion(
442442
)
443443
}
444444

445+
// The system database, once it's been made multi-region, must not be
446+
// allowed to go back.
445447
isSystemDatabase := dbDesc.ID == keys.SystemDatabaseID
446-
if allowDrop := allowDropFinalRegion.Get(&p.execCfg.Settings.SV); !allowDrop ||
447-
// The system database, once it's been made multi-region, must not be
448-
// allowed to go back.
449-
isSystemDatabase {
448+
if isSystemDatabase {
450449
return nil, pgerror.Newf(
451450
pgcode.InvalidDatabaseDefinition,
452-
"databases in this cluster must have at least 1 region",
451+
"cannot drop %s; system database must have at least 1 region",
453452
n.Region,
454-
sqlclustersettings.DefaultPrimaryRegionClusterSettingName,
453+
)
454+
}
455+
if allowDrop := allowDropFinalRegion.Get(&p.execCfg.Settings.SV); !allowDrop {
456+
return nil, errors.WithHintf(
457+
pgerror.Newf(
458+
pgcode.InvalidDatabaseDefinition,
459+
"cannot drop %s; databases in this cluster must have at least 1 region",
460+
n.Region,
461+
),
462+
"Try enabling the %s cluster setting.",
463+
allowDropFinalRegion.Name(),
455464
)
456465
}
457466

pkg/util/admission/elastic_cpu_granter.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -208,12 +208,12 @@ func (e *elasticCPUGranter) setUtilizationLimit(utilizationLimit float64) {
208208
e.mu.utilizationLimit = utilizationLimit
209209
e.mu.tb.UpdateConfig(tokenbucket.TokensPerSecond(rate), tokenbucket.Tokens(rate))
210210
if now := timeutil.Now(); now.Sub(e.mu.tbLastReset) > 15*time.Second { // TODO(irfansharif): make this is a cluster setting?
211-
// Periodically reset the token bucket. This is just defense-in-depth
212-
// and at worst, over-admits. We've seen production clusters where the
213-
// token bucket was severely in debt and caused wait queue times of
214-
// minutes, which can be long enough to fail backups completely
215-
// (#102817).
216-
e.mu.tb.Reset()
211+
// Periodically ensure the tokens are at least 0. This is just
212+
// defense-in-depth and at worst, over-admits. We've seen production
213+
// clusters where the token bucket was severely in debt and caused wait
214+
// queue times of minutes, which can be long enough to fail backups
215+
// completely (#102817).
216+
e.mu.tb.EnsureLowerBound(0)
217217
e.mu.tbLastReset = now
218218
}
219219
e.metrics.NanosExhaustedDuration.Update(e.mu.tb.Exhausted().Microseconds())

0 commit comments

Comments
 (0)