Skip to content

Commit fdb417a

Browse files
committed
kvserver: improve tenant_id CPU observability
Motivated by cockroachlabs/support#3297. This commit - adds a new `replicas.cpunanospersecond` metric, which aggregates the Replica ReqCPUNanosPerSecond at the tenant level. - Adds a tenant_id tag to CPU profiles. This should simplify investigations related to tenant-induced overload: the new metric should often help pinpoint the set of hot tenants. CPU profiles can help dig into the specific code paths this tenant is exercising. This can then be rounded out with the existing metrics for request counts by tenant (both received by KV and sent by the tenant Pod) for a comprehensive picture. Release note: the `replicas.cpunanospersecond` metric was added. Notably, when child labels are enabled, it exposes evaluation-related Replica CPU usage by tenant. Epic: none
1 parent c860bad commit fdb417a

File tree

7 files changed

+59
-13
lines changed

7 files changed

+59
-13
lines changed

docs/generated/metrics/metrics.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15115,6 +15115,14 @@ layers:
1511515115
unit: COUNT
1511615116
aggregation: AVG
1511715117
derivative: NONE
15118+
- name: replicas.cpunanospersecond
15119+
exported_name: replicas_cpunanospersecond
15120+
description: Nanoseconds of CPU time in Replica request processing including evaluation but not replication
15121+
y_axis_label: Nanoseconds
15122+
type: COUNTER
15123+
unit: NANOSECONDS
15124+
aggregation: AVG
15125+
derivative: NON_NEGATIVE_DERIVATIVE
1511815126
- name: replicas.leaders
1511915127
exported_name: replicas_leaders
1512015128
description: Number of raft leaders

pkg/kv/kvserver/metrics.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,13 @@ var (
249249
Unit: metric.Unit_COUNT,
250250
}
251251

252+
metaReqCPUNanos = metric.Metadata{
253+
Name: "replicas.cpunanospersecond",
254+
Help: "Nanoseconds of CPU time in Replica request processing including evaluation but not replication",
255+
Measurement: "Nanoseconds",
256+
Unit: metric.Unit_NANOSECONDS,
257+
}
258+
252259
// Storage metrics.
253260
metaLiveBytes = metric.Metadata{
254261
Name: "livebytes",
@@ -3215,6 +3222,7 @@ type StoreMetrics struct {
32153222
type TenantsStorageMetrics struct {
32163223
// NB: If adding more metrics to this struct, be sure to
32173224
// also update tenantsStorageMetricsSet().
3225+
ReqCPUNanos *aggmetric.AggCounterFloat64
32183226
LiveBytes *aggmetric.AggGauge
32193227
KeyBytes *aggmetric.AggGauge
32203228
ValBytes *aggmetric.AggGauge
@@ -3251,6 +3259,7 @@ type TenantsStorageMetrics struct {
32513259
// see kvbase.TenantsStorageMetricsSet for public access. Assigned in init().
32523260
func tenantsStorageMetricsSet() map[string]struct{} {
32533261
return map[string]struct{}{
3262+
metaReqCPUNanos.Name: {},
32543263
metaLiveBytes.Name: {},
32553264
metaKeyBytes.Name: {},
32563265
metaValBytes.Name: {},
@@ -3313,6 +3322,7 @@ func (sm *TenantsStorageMetrics) acquireTenant(tenantID roachpb.TenantID) *tenan
33133322
// Successfully stored a new instance, initialize it and then unlock it.
33143323
tenantIDStr := tenantID.String()
33153324
m.mu.refCount++
3325+
m.ReqCPUNanos = sm.ReqCPUNanos.AddChild(tenantIDStr)
33163326
m.LiveBytes = sm.LiveBytes.AddChild(tenantIDStr)
33173327
m.KeyBytes = sm.KeyBytes.AddChild(tenantIDStr)
33183328
m.ValBytes = sm.ValBytes.AddChild(tenantIDStr)
@@ -3360,6 +3370,8 @@ func (sm *TenantsStorageMetrics) releaseTenant(ctx context.Context, m *tenantSto
33603370
// The refCount is zero, delete this instance after destroying its metrics.
33613371
// Note that concurrent attempts to create an instance will detect the zero
33623372
// refCount value and construct a new instance.
3373+
m.ReqCPUNanos.Unlink() // counter
3374+
m.ReqCPUNanos = nil
33633375
for _, gptr := range []**aggmetric.Gauge{
33643376
&m.LiveBytes,
33653377
&m.KeyBytes,
@@ -3409,6 +3421,8 @@ type tenantStorageMetrics struct {
34093421
stack debugutil.SafeStack
34103422
}
34113423

3424+
ReqCPUNanos *aggmetric.CounterFloat64
3425+
34123426
LiveBytes *aggmetric.Gauge
34133427
KeyBytes *aggmetric.Gauge
34143428
ValBytes *aggmetric.Gauge
@@ -3442,6 +3456,7 @@ func (tm *tenantStorageMetrics) assert(ctx context.Context) {
34423456
func newTenantsStorageMetrics() *TenantsStorageMetrics {
34433457
b := aggmetric.MakeBuilder(multitenant.TenantIDLabel)
34443458
sm := &TenantsStorageMetrics{
3459+
ReqCPUNanos: b.CounterFloat64(metaReqCPUNanos),
34453460
LiveBytes: b.Gauge(metaLiveBytes),
34463461
KeyBytes: b.Gauge(metaKeyBytes),
34473462
ValBytes: b.Gauge(metaValBytes),

pkg/kv/kvserver/mvcc_gc_queue.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -664,7 +664,7 @@ func (mgcq *mvccGCQueue) process(
664664
) (processed bool, err error) {
665665
// Record the CPU time processing the request for this replica. This is
666666
// recorded regardless of errors that are encountered.
667-
defer repl.MeasureReqCPUNanos(grunning.Time())
667+
defer repl.MeasureReqCPUNanos(ctx, grunning.Time())
668668

669669
// Lookup the descriptor and GC policy for the zone containing this key range.
670670
desc, conf := repl.DescAndSpanConfig()

pkg/kv/kvserver/replica.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2768,9 +2768,23 @@ func init() {
27682768

27692769
// MeasureReqCPUNanos measures the cpu time spent on this replica processing
27702770
// requests.
2771-
func (r *Replica) MeasureReqCPUNanos(start time.Duration) {
2771+
func (r *Replica) MeasureReqCPUNanos(ctx context.Context, start time.Duration) {
27722772
r.measureNanosRunning(start, func(dur float64) {
27732773
r.loadStats.RecordReqCPUNanos(dur)
2774+
// NB: the caller also has a tenant ID, but we use the replica's here for
2775+
// simplicity. There is no established pattern for short-lived references
2776+
// to a specific tenant's metrics.
2777+
if r.tenantMetricsRef != nil {
2778+
// We can *not* use the tenant metrics directly because nothing in this
2779+
// current code path prevents the surrounding replica from getting
2780+
// destroyed, which could zero the refcount and release the metrics
2781+
// object. Instead, we go through acquireTenant, which gives us an object
2782+
// that is and remain valid. This is not an expensive operation in
2783+
// the common case (the replica still exists).
2784+
tm := r.store.metrics.acquireTenant(r.tenantMetricsRef.tenantID)
2785+
tm.ReqCPUNanos.Inc(dur)
2786+
r.store.metrics.releaseTenant(ctx, tm)
2787+
}
27742788
})
27752789
}
27762790

pkg/kv/kvserver/replica_rate_limit.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,13 @@ import (
1818
// maybeRateLimitBatch may block the batch waiting to be rate-limited. Note that
1919
// the replica must be initialized and thus there is no synchronization issue
2020
// on the tenantRateLimiter.
21-
func (r *Replica) maybeRateLimitBatch(ctx context.Context, ba *kvpb.BatchRequest) error {
21+
func (r *Replica) maybeRateLimitBatch(
22+
ctx context.Context, ba *kvpb.BatchRequest, tenantIDOrZero roachpb.TenantID,
23+
) error {
2224
if r.tenantLimiter == nil {
2325
return nil
2426
}
25-
tenantID, ok := roachpb.ClientTenantFromContext(ctx)
26-
if !ok || tenantID == roachpb.SystemTenantID {
27+
if !tenantIDOrZero.IsSet() || tenantIDOrZero.IsSystem() {
2728
return nil
2829
}
2930

pkg/kv/kvserver/replica_send.go

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,23 @@ func (r *Replica) Send(
123123
func (r *Replica) SendWithWriteBytes(
124124
ctx context.Context, ba *kvpb.BatchRequest,
125125
) (*kvpb.BatchResponse, *kvadmission.StoreWriteBytes, *kvpb.Error) {
126+
tenantIDOrZero, _ := roachpb.ClientTenantFromContext(ctx)
127+
128+
// Record the CPU time processing the request for this replica. This is
129+
// recorded regardless of errors that are encountered.
130+
startCPU := grunning.Time()
131+
defer r.MeasureReqCPUNanos(ctx, startCPU)
132+
126133
if r.store.cfg.Settings.CPUProfileType() == cluster.CPUProfileWithLabels {
127134
defer pprof.SetGoroutineLabels(ctx)
128135
// Note: the defer statement captured the previous context.
129-
ctx = pprof.WithLabels(ctx, pprof.Labels("range_str", r.rangeStr.ID()))
136+
var lbls pprof.LabelSet
137+
if tenantIDOrZero.IsSet() {
138+
lbls = pprof.Labels("range_str", r.rangeStr.ID(), "tenant_id", tenantIDOrZero.String())
139+
} else {
140+
lbls = pprof.Labels("range_str", r.rangeStr.ID())
141+
}
142+
ctx = pprof.WithLabels(ctx, lbls)
130143
pprof.SetGoroutineLabels(ctx)
131144
}
132145
if trace.IsEnabled() {
@@ -135,11 +148,6 @@ func (r *Replica) SendWithWriteBytes(
135148
// Add the range log tag.
136149
ctx = r.AnnotateCtx(ctx)
137150

138-
// Record the CPU time processing the request for this replica. This is
139-
// recorded regardless of errors that are encountered.
140-
startCPU := grunning.Time()
141-
defer r.MeasureReqCPUNanos(startCPU)
142-
143151
isReadOnly := ba.IsReadOnly()
144152
if err := r.checkBatchRequest(ba, isReadOnly); err != nil {
145153
return nil, nil, kvpb.NewError(err)
@@ -148,7 +156,7 @@ func (r *Replica) SendWithWriteBytes(
148156
if err := r.maybeBackpressureBatch(ctx, ba); err != nil {
149157
return nil, nil, kvpb.NewError(err)
150158
}
151-
if err := r.maybeRateLimitBatch(ctx, ba); err != nil {
159+
if err := r.maybeRateLimitBatch(ctx, ba, tenantIDOrZero); err != nil {
152160
return nil, nil, kvpb.NewError(err)
153161
}
154162
if err := r.maybeCommitWaitBeforeCommitTrigger(ctx, ba); err != nil {

pkg/kv/kvserver/replica_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14319,7 +14319,7 @@ func TestReplicaRateLimit(t *testing.T) {
1431914319
ba.Add(&req)
1432014320
ctx, cancel := context.WithTimeout(tenCtx, timeout)
1432114321
defer cancel()
14322-
return tenRepl.maybeRateLimitBatch(ctx, ba)
14322+
return tenRepl.maybeRateLimitBatch(ctx, ba, ten123)
1432314323
}
1432414324

1432514325
// Verify that first few writes succeed fast, but eventually requests start

0 commit comments

Comments
 (0)