Skip to content

Commit 6be7477

Browse files
craig[bot]joshimhoff
andcommitted
Merge #153794
153794: admission: introduce a granter for CPU time tokens r=sumeerbhola a=joshimhoff **admission: introduce a granter for CPU time tokens** This commit introduces a granter called cpuTimeTokenGranter. cpuTimeTokenGranter uses token buckets to limit CPU usage. This commit introduces the granter, but it does not implement any logic to periodically refill the token buckets, or compute the token rates, based on observed CPU usage. That will come in later PRs. Fixes: #154469 Release note: None. Co-authored-by: Josh Imhoff <[email protected]>
2 parents 29de882 + 41ddf78 commit 6be7477

File tree

6 files changed

+658
-9
lines changed

6 files changed

+658
-9
lines changed

pkg/util/admission/BUILD.bazel

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ go_library(
44
name = "admission",
55
srcs = [
66
"admission.go",
7+
"cpu_time_token_granter.go",
78
"disk_bandwidth.go",
89
"elastic_cpu_grant_coordinator.go",
910
"elastic_cpu_granter.go",
@@ -50,12 +51,14 @@ go_library(
5051
"@com_github_cockroachdb_pebble//:pebble",
5152
"@com_github_cockroachdb_redact//:redact",
5253
"@com_github_cockroachdb_tokenbucket//:tokenbucket",
54+
"@com_github_olekukonko_tablewriter//:tablewriter",
5355
],
5456
)
5557

5658
go_test(
5759
name = "admission_test",
5860
srcs = [
61+
"cpu_time_token_granter_test.go",
5962
"disk_bandwidth_test.go",
6063
"elastic_cpu_granter_test.go",
6164
"elastic_cpu_work_handle_test.go",

pkg/util/admission/admission.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@
121121
package admission
122122

123123
import (
124+
"fmt"
124125
"time"
125126

126127
"github.com/cockroachdb/cockroach/pkg/util/admission/admissionpb"
@@ -143,6 +144,17 @@ const (
143144
numBurstQualifications
144145
)
145146

147+
func (bq burstQualification) String() string {
148+
switch bq {
149+
case canBurst:
150+
return "canBurst"
151+
case noBurst:
152+
return "noBurst"
153+
default:
154+
return fmt.Sprintf("burstQualification(%d)", bq)
155+
}
156+
}
157+
146158
// requester is an interface implemented by an object that orders admission
147159
// work for a particular WorkKind. See WorkQueue for the implementation of
148160
// requester.
Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
// Copyright 2021 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package admission
7+
8+
import (
9+
"fmt"
10+
"strconv"
11+
"strings"
12+
13+
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
14+
"github.com/olekukonko/tablewriter"
15+
)
16+
17+
// resourceTier specifies the tier of a resource group, in descending levels of importance.
18+
// That is, tier 0 is the most important. The token bucket sizes must be such that the
19+
// non-burstable token bucket size of tier-i must be greater than the burstable token bucket
20+
// size of tier-(i+1); see cpuTimeTokenGranter for details on this.
21+
//
22+
// The tier determination for a request happens at a layer outside the admission package.
23+
//
24+
// TODO(josh): Add docs re: tentative usage after a discussion with Sumeer.
25+
//
26+
// NB: Inter-tenant fair sharing only works within a tier.
27+
//
28+
// TODO(josh): Move ths definition to admission.go. Export publicly.
29+
type resourceTier uint8
30+
31+
const numResourceTiers resourceTier = 2
32+
33+
// cpuTimeTokenChildGranter implements granter. It stores resourceTier and proxies
34+
// proxies to cpuTimeTokenGranter. See the declaration comment for cpuTimeTokenGranter
35+
// for more details.
36+
//
37+
// Each "child" granter is paired with a requester, since the requester (in practice, a
38+
// WorkQueue for a certain resourceTier) does not need to know about the others.
39+
// An alternative would be to make resourceTier an argument to the various granter methods,
40+
// but this approach seems cleaner.
41+
type cpuTimeTokenChildGranter struct {
42+
tier resourceTier
43+
parent *cpuTimeTokenGranter
44+
}
45+
46+
var _ granter = &cpuTimeTokenChildGranter{}
47+
48+
// tryGet implements granter.
49+
func (cg *cpuTimeTokenChildGranter) tryGet(qual burstQualification, count int64) bool {
50+
return cg.parent.tryGet(cg.tier, qual, count)
51+
}
52+
53+
// returnGrant implements granter.
54+
func (cg *cpuTimeTokenChildGranter) returnGrant(count int64) {
55+
cg.parent.returnGrant(count)
56+
}
57+
58+
// tookWithoutPermission implements granter.
59+
func (cg *cpuTimeTokenChildGranter) tookWithoutPermission(count int64) {
60+
cg.parent.tookWithoutPermission(count)
61+
}
62+
63+
// continueGrantChain implements granter.
64+
func (cg *cpuTimeTokenChildGranter) continueGrantChain(grantChainID grantChainID) {
65+
// Ignore since grant chains are not used.
66+
}
67+
68+
// cpuTimeTokenGranter uses token buckets to limit CPU usage. There is one
69+
// token bucket per type of request. Requests are only admitted (tryGet
70+
// only returns true), if the bucket for the type of request to be done has
71+
// positive tokens. Before a request is admitted, tokens are deducated from all
72+
// buckets, not just the bucket that was checked initially. This enables
73+
// setting up a hierarchy of types of requests, where some types can use more
74+
// CPU than others.
75+
//
76+
// For example, on an 8 vCPU machine, it might be set up like this:
77+
//
78+
// - Burstable tier-0 work -> 6 seconds of CPU time per second
79+
// - Non-burstable tier-0 work -> 5 seconds of CPU time per second
80+
// - Burstable tier-1 work -> 2 seconds of CPU time per second
81+
// - Non-burstable tier-1 work -> 1 seconds of CPU time per second
82+
//
83+
// A request for 5s of burstable tier-0 work would be admitted immediately,
84+
// since the burstable tier-0 bucket is positive. It would deduct from all
85+
// four buckets, resulting in a balance of (1,0,-3,-4). Non-burstable tier-0
86+
// work and all tier-1 work would now have to wait for their respective buckets
87+
// to refill, while burstable tier-0 work is still admissible.
88+
//
89+
// The immediate purpose of this is to achieve low goroutine scheduling latencies
90+
// even in the case of a Serverless tenant sending a large workload to a multi-host
91+
// cluster. The above rates will be set so as to limit CPU utilization to some
92+
// cluster-setting-configurable maximum. For example, if the target max is 80%, and
93+
// if the machine has 8 vCPUs, then the non-burstable tier-0 work will be allowed
94+
// 6.4 seconds of CPU time per second. In limiting CPU usage to some max, goroutine
95+
// scheduling latency can be kept low.
96+
//
97+
// TODO(josh): Add docs about resourceTier after a discussion with Sumeer.
98+
//
99+
// Note that cpuTimeTokenGranter does not handle replenishing the buckets.
100+
//
101+
// For more, see the initial design sketch:
102+
// https://docs.google.com/document/d/1-Kr2gRFTk0QV8kBs7AXRXUwFpK2ZxR1cqIwWCuOx22Q/edit?tab=t.0
103+
// TODO(josh): Turn into a proper design documnet.
104+
type cpuTimeTokenGranter struct {
105+
requester [numResourceTiers]requester
106+
mu struct {
107+
// TODO(josh): I suspect putting the mutex here is better than in
108+
// CPUTimeTokenGrantCoordinator, but for now the decision is tentative.
109+
// Think better to decice when I put up a PR that introduces
110+
// CPUTimeTokenGrantCoordinator & cpuTimeTokenAdjusterNew.
111+
syncutil.Mutex
112+
// Invariant #1: For any two buckets A & B, if A has a lower ordinal resourceTier,
113+
// then A must have more tokens than B.
114+
// Invariant #2: For any two buckets A & B, if A & B have the same resourceTier,
115+
// and if A has a lower ordinal burstQualification, then A must have more tokens than B.
116+
//
117+
// Since admission deducts from all buckets, these invariants are true, so long as token bucket
118+
// replenishing respects it also. Token bucket replenishing is not yet implemented. See
119+
// tryGrantLocked for a situation where invariant #1 is relied on.
120+
buckets [numResourceTiers][numBurstQualifications]tokenBucket
121+
}
122+
}
123+
124+
// TODO(josh): Make this observable. See here for one approach:
125+
// https://github.com/cockroachdb/cockroach/commit/06967f5fa72115348d57fc66fe895aec514261d5#diff-6212d039fab53dd464bd989bdbd537947b11d37a9c8fe77ca497870b49e28a9cR367
126+
type tokenBucket struct {
127+
tokens int64
128+
}
129+
130+
func (stg *cpuTimeTokenGranter) String() string {
131+
stg.mu.Lock()
132+
defer stg.mu.Unlock()
133+
var buf strings.Builder
134+
tw := tablewriter.NewWriter(&buf)
135+
hdrs := [numBurstQualifications + 1]string{}
136+
hdrs[0] = "cpuTTG"
137+
for gk := canBurst; gk < numBurstQualifications; gk++ {
138+
hdrs[1+gk] = gk.String()
139+
}
140+
tw.SetAlignment(tablewriter.ALIGN_LEFT)
141+
tw.SetAutoFormatHeaders(false)
142+
tw.SetBorder(false)
143+
tw.SetColumnSeparator("")
144+
tw.SetHeader(hdrs[:])
145+
tw.SetHeaderLine(false)
146+
tw.SetNoWhiteSpace(true)
147+
tw.SetTablePadding(" ")
148+
tw.SetTrimWhiteSpaceAtEOL(true)
149+
150+
for tier := 0; tier < int(numResourceTiers); tier++ {
151+
row := [1 + numBurstQualifications]string{}
152+
row[0] = "tier" + strconv.Itoa(tier)
153+
for gk := canBurst; gk < numBurstQualifications; gk++ {
154+
row[gk+1] = fmt.Sprint(stg.mu.buckets[tier][gk].tokens)
155+
}
156+
tw.Append(row[:])
157+
}
158+
tw.Render()
159+
return buf.String()
160+
}
161+
162+
// tryGet is the helper for implementing granter.tryGet.
163+
func (stg *cpuTimeTokenGranter) tryGet(
164+
tier resourceTier, qual burstQualification, count int64,
165+
) bool {
166+
stg.mu.Lock()
167+
defer stg.mu.Unlock()
168+
if stg.mu.buckets[tier][qual].tokens <= 0 {
169+
return false
170+
}
171+
stg.tookWithoutPermissionLocked(count)
172+
return true
173+
}
174+
175+
// returnGrant is the helper for implementing granter.returnGrant.
176+
func (stg *cpuTimeTokenGranter) returnGrant(count int64) {
177+
stg.mu.Lock()
178+
defer stg.mu.Unlock()
179+
stg.tookWithoutPermissionLocked(-count)
180+
// count must be positive. Thus above always adds tokens to the buckets.
181+
// Thus returnGrant should always attempt to grant admission to waiting requests.
182+
stg.grantUntilNoWaitingRequestsLocked()
183+
}
184+
185+
// tookWithoutPermission is the helper for implementing granter.tookWithoutPermission.
186+
func (stg *cpuTimeTokenGranter) tookWithoutPermission(count int64) {
187+
stg.mu.Lock()
188+
defer stg.mu.Unlock()
189+
stg.tookWithoutPermissionLocked(count)
190+
}
191+
192+
func (stg *cpuTimeTokenGranter) tookWithoutPermissionLocked(count int64) {
193+
for tier := range stg.mu.buckets {
194+
for qual := range stg.mu.buckets[tier] {
195+
stg.mu.buckets[tier][qual].tokens -= count
196+
}
197+
}
198+
}
199+
200+
// grantUntilNoWaitingRequestsLocked grants admission to all queued requests
201+
// that can be granted, given the current state of the token buckets, etc.
202+
// It prioritizes requesters from higher class work in the sense of resourceTier
203+
// That is, multiple waiting tier-0 requests will be granted before a single tier-1
204+
// request.
205+
func (stg *cpuTimeTokenGranter) grantUntilNoWaitingRequestsLocked() {
206+
// TODO(josh): If there are a lot of tokens, this could hold the mutex for a long
207+
// time. We may want to drop and reacquire the mutex after every 1000 requests or so.
208+
for stg.tryGrantLocked() {
209+
}
210+
}
211+
212+
// tryGrantLocked attempts to grant admission to a single queued request.
213+
// It prioritizes requesters from higher class work, in the sense of
214+
// resourceTier.
215+
func (stg *cpuTimeTokenGranter) tryGrantLocked() bool {
216+
for tier := range stg.requester {
217+
hasWaitingRequests, qual := stg.requester[tier].hasWaitingRequests()
218+
if !hasWaitingRequests {
219+
continue
220+
}
221+
if stg.mu.buckets[tier][qual].tokens <= 0 {
222+
// tryGrantLocked does not need to continue here, since there are
223+
// no more requests to grant. The detailed reason for this is:
224+
//
225+
// - stg.requester is ordered by resourceTier.
226+
// - Given two buckets A & B, if A is for a lower ordinal resourceTier,
227+
// more tokens will be in bucket A than bucket B (see cpuTimeTokenGranter
228+
// for more on this invariant).
229+
// - Thus, if no tokens in A, there are no tokens in B.
230+
//
231+
// Note that it is up to the requester which is the next request
232+
// to admit. So tryGrantLocked only needs to check the bucket that
233+
// corresponds to the burstQualification of that request, as is done
234+
// below.
235+
return false
236+
}
237+
tokens := stg.requester[tier].granted(noGrantChain)
238+
if tokens == 0 {
239+
// Did not accept grant.
240+
continue
241+
}
242+
stg.tookWithoutPermissionLocked(tokens)
243+
return true
244+
}
245+
return false
246+
}

0 commit comments

Comments
 (0)