@@ -249,6 +249,13 @@ var (
249
249
Unit : metric .Unit_COUNT ,
250
250
}
251
251
252
+ metaReqCPUNanos = metric.Metadata {
253
+ Name : "replicas.cpunanospersecond" ,
254
+ Help : "Nanoseconds of CPU time in Replica request processing including evaluation but not replication" ,
255
+ Measurement : "Nanoseconds" ,
256
+ Unit : metric .Unit_NANOSECONDS ,
257
+ }
258
+
252
259
// Storage metrics.
253
260
metaLiveBytes = metric.Metadata {
254
261
Name : "livebytes" ,
@@ -3207,29 +3214,6 @@ type StoreMetrics struct {
3207
3214
DiskWriteMaxBytesPerSecond * metric.Gauge
3208
3215
}
3209
3216
3210
- type tenantMetricsRef struct {
3211
- // All fields are internal. Don't access them.
3212
-
3213
- _tenantID roachpb.TenantID
3214
- _state int32 // atomic; 0=usable 1=poisoned
3215
-
3216
- // _stack helps diagnose use-after-release when it occurs.
3217
- // This field is populated in releaseTenant and printed
3218
- // in assertions on failure.
3219
- _stack struct {
3220
- syncutil.Mutex
3221
- debugutil.SafeStack
3222
- }
3223
- }
3224
-
3225
- func (ref * tenantMetricsRef ) assert (ctx context.Context ) {
3226
- if atomic .LoadInt32 (& ref ._state ) != 0 {
3227
- ref ._stack .Lock ()
3228
- defer ref ._stack .Unlock ()
3229
- log .FatalfDepth (ctx , 1 , "tenantMetricsRef already finalized in:\n %s" , ref ._stack .SafeStack )
3230
- }
3231
- }
3232
-
3233
3217
// TenantsStorageMetrics are metrics which are aggregated over all tenants
3234
3218
// present on the server. The struct maintains child metrics used by each
3235
3219
// tenant to track their individual values. The struct expects that children
@@ -3238,6 +3222,7 @@ func (ref *tenantMetricsRef) assert(ctx context.Context) {
3238
3222
type TenantsStorageMetrics struct {
3239
3223
// NB: If adding more metrics to this struct, be sure to
3240
3224
// also update tenantsStorageMetricsSet().
3225
+ ReqCPUNanos * aggmetric.AggCounterFloat64
3241
3226
LiveBytes * aggmetric.AggGauge
3242
3227
KeyBytes * aggmetric.AggGauge
3243
3228
ValBytes * aggmetric.AggGauge
@@ -3274,6 +3259,7 @@ type TenantsStorageMetrics struct {
3274
3259
// see kvbase.TenantsStorageMetricsSet for public access. Assigned in init().
3275
3260
func tenantsStorageMetricsSet () map [string ]struct {} {
3276
3261
return map [string ]struct {}{
3262
+ metaReqCPUNanos .Name : {},
3277
3263
metaLiveBytes .Name : {},
3278
3264
metaKeyBytes .Name : {},
3279
3265
metaValBytes .Name : {},
@@ -3304,7 +3290,7 @@ func (sm *TenantsStorageMetrics) MetricStruct() {}
3304
3290
// method are reference counted with decrements occurring in the corresponding
3305
3291
// releaseTenant call. This method must be called prior to adding or subtracting
3306
3292
// MVCC stats.
3307
- func (sm * TenantsStorageMetrics ) acquireTenant (tenantID roachpb.TenantID ) * tenantMetricsRef {
3293
+ func (sm * TenantsStorageMetrics ) acquireTenant (tenantID roachpb.TenantID ) * tenantStorageMetrics {
3308
3294
// incRef increments the reference count if it is not already zero indicating
3309
3295
// that the struct has already been destroyed.
3310
3296
incRef := func (m * tenantStorageMetrics ) (alreadyDestroyed bool ) {
@@ -3319,15 +3305,13 @@ func (sm *TenantsStorageMetrics) acquireTenant(tenantID roachpb.TenantID) *tenan
3319
3305
for {
3320
3306
if m , ok := sm .tenants .Load (tenantID ); ok {
3321
3307
if alreadyDestroyed := incRef (m ); ! alreadyDestroyed {
3322
- return & tenantMetricsRef {
3323
- _tenantID : tenantID ,
3324
- }
3308
+ return m
3325
3309
}
3326
3310
// Somebody else concurrently took the reference count to zero, go back
3327
3311
// around. Because of the locking in releaseTenant, we know that we'll
3328
3312
// find a different value or no value at all on the next iteration.
3329
3313
} else {
3330
- m := & tenantStorageMetrics {}
3314
+ m := & tenantStorageMetrics {tenantID : tenantID }
3331
3315
m .mu .Lock ()
3332
3316
_ , loaded := sm .tenants .LoadOrStore (tenantID , m )
3333
3317
if loaded {
@@ -3338,6 +3322,7 @@ func (sm *TenantsStorageMetrics) acquireTenant(tenantID roachpb.TenantID) *tenan
3338
3322
// Successfully stored a new instance, initialize it and then unlock it.
3339
3323
tenantIDStr := tenantID .String ()
3340
3324
m .mu .refCount ++
3325
+ m .ReqCPUNanos = sm .ReqCPUNanos .AddChild (tenantIDStr )
3341
3326
m .LiveBytes = sm .LiveBytes .AddChild (tenantIDStr )
3342
3327
m .KeyBytes = sm .KeyBytes .AddChild (tenantIDStr )
3343
3328
m .ValBytes = sm .ValBytes .AddChild (tenantIDStr )
@@ -3359,37 +3344,35 @@ func (sm *TenantsStorageMetrics) acquireTenant(tenantID roachpb.TenantID) *tenan
3359
3344
m .SysCount = sm .SysCount .AddChild (tenantIDStr )
3360
3345
m .AbortSpanBytes = sm .AbortSpanBytes .AddChild (tenantIDStr )
3361
3346
m .mu .Unlock ()
3362
- return & tenantMetricsRef {
3363
- _tenantID : tenantID ,
3364
- }
3347
+ return m
3365
3348
}
3366
3349
}
3367
3350
}
3368
3351
3369
3352
// releaseTenant releases the reference to the metrics for this tenant which was
3370
3353
// acquired with acquireTenant. It will fatally log if no entry exists for this
3371
3354
// tenant.
3372
- func (sm * TenantsStorageMetrics ) releaseTenant (ctx context.Context , ref * tenantMetricsRef ) {
3373
- m := sm .getTenant (ctx , ref ) // NB: asserts against use-after-release
3374
- if atomic .SwapInt32 (& ref ._state , 1 ) != 0 {
3375
- ref .assert (ctx ) // this will fatal
3376
- return // unreachable
3377
- }
3378
- ref ._stack .Lock ()
3379
- ref ._stack .SafeStack = debugutil .Stack ()
3380
- ref ._stack .Unlock ()
3355
+ func (sm * TenantsStorageMetrics ) releaseTenant (ctx context.Context , m * tenantStorageMetrics ) {
3381
3356
m .mu .Lock ()
3382
3357
defer m .mu .Unlock ()
3358
+ if m .mu .released .Load () {
3359
+ log .FatalfDepth (ctx , 1 , "tenant metrics already released in:\n %s" , m .mu .stack )
3360
+ }
3383
3361
m .mu .refCount --
3384
- if m .mu .refCount < 0 {
3385
- log .Fatalf (ctx , "invalid refCount on metrics for tenant %v: %d" , ref . _tenantID , m . mu . refCount )
3386
- } else if m . mu . refCount > 0 {
3362
+ if n := m .mu .refCount ; n < 0 {
3363
+ log .Fatalf (ctx , "invalid refCount on metrics for tenant %v: %d" , m . tenantID , n )
3364
+ } else if n > 0 {
3387
3365
return
3388
3366
}
3389
3367
3368
+ m .mu .released .Store (true )
3369
+ m .mu .stack = debugutil .Stack ()
3370
+
3390
3371
// The refCount is zero, delete this instance after destroying its metrics.
3391
3372
// Note that concurrent attempts to create an instance will detect the zero
3392
3373
// refCount value and construct a new instance.
3374
+ m .ReqCPUNanos .Unlink () // counter
3375
+ m .ReqCPUNanos = nil
3393
3376
for _ , gptr := range []* * aggmetric.Gauge {
3394
3377
& m .LiveBytes ,
3395
3378
& m .KeyBytes ,
@@ -3417,28 +3400,30 @@ func (sm *TenantsStorageMetrics) releaseTenant(ctx context.Context, ref *tenantM
3417
3400
(* gptr ).Unlink ()
3418
3401
* gptr = nil
3419
3402
}
3420
- sm .tenants .Delete (ref ._tenantID )
3421
- }
3422
-
3423
- // getTenant is a helper method used to retrieve the metrics for a tenant. The
3424
- // call will log fatally if no such tenant has been previously acquired.
3425
- func (sm * TenantsStorageMetrics ) getTenant (
3426
- ctx context.Context , ref * tenantMetricsRef ,
3427
- ) * tenantStorageMetrics {
3428
- ref .assert (ctx )
3429
- m , ok := sm .tenants .Load (ref ._tenantID )
3430
- if ! ok {
3431
- log .Fatalf (ctx , "no metrics exist for tenant %v" , ref ._tenantID )
3432
- }
3433
- return m
3403
+ sm .tenants .Delete (m .tenantID )
3434
3404
}
3435
3405
3406
+ // tenantStorageMetrics is a struct that holds the metrics for all replicas (
3407
+ // within a Store) of a given tenant.
3408
+ //
3409
+ // Whenever it is guaranteed that the replica is not destroyed, the metrics
3410
+ // fields can be accessed directly (for example, when holding raftMu and having
3411
+ // previously checked the replica's destroyStatus).
3412
+ //
3413
+ // Whenever this is *not* guaranteed, use `TenantsStorageMetrics.acquireTenant`
3414
+ // ( followed by releaseTenant after completion of usage) instead to avoid
3415
+ // racing with a potential concurrent attempt to release the metrics.
3436
3416
type tenantStorageMetrics struct {
3437
- mu struct {
3417
+ tenantID roachpb.TenantID
3418
+ mu struct {
3438
3419
syncutil.Mutex
3439
3420
refCount int
3421
+ released atomic.Bool // allowed to read without holding mu
3422
+ stack debugutil.SafeStack
3440
3423
}
3441
3424
3425
+ ReqCPUNanos * aggmetric.CounterFloat64
3426
+
3442
3427
LiveBytes * aggmetric.Gauge
3443
3428
KeyBytes * aggmetric.Gauge
3444
3429
ValBytes * aggmetric.Gauge
@@ -3461,9 +3446,18 @@ type tenantStorageMetrics struct {
3461
3446
AbortSpanBytes * aggmetric.Gauge
3462
3447
}
3463
3448
3449
+ func (tm * tenantStorageMetrics ) assert (ctx context.Context ) {
3450
+ if tm .mu .released .Load () {
3451
+ tm .mu .Lock ()
3452
+ defer tm .mu .Unlock ()
3453
+ log .Fatalf (ctx , "tenant metrics already released in:\n %s" , tm .mu .stack )
3454
+ }
3455
+ }
3456
+
3464
3457
func newTenantsStorageMetrics () * TenantsStorageMetrics {
3465
3458
b := aggmetric .MakeBuilder (multitenant .TenantIDLabel )
3466
3459
sm := & TenantsStorageMetrics {
3460
+ ReqCPUNanos : b .CounterFloat64 (metaReqCPUNanos ),
3467
3461
LiveBytes : b .Gauge (metaLiveBytes ),
3468
3462
KeyBytes : b .Gauge (metaKeyBytes ),
3469
3463
ValBytes : b .Gauge (metaValBytes ),
@@ -4020,10 +4014,9 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
4020
4014
// single snapshot of these gauges in the registry might mix the values of two
4021
4015
// subsequent updates.
4022
4016
func (sm * TenantsStorageMetrics ) incMVCCGauges (
4023
- ctx context.Context , ref * tenantMetricsRef , delta enginepb.MVCCStats ,
4017
+ ctx context.Context , tm * tenantStorageMetrics , delta enginepb.MVCCStats ,
4024
4018
) {
4025
- ref .assert (ctx )
4026
- tm := sm .getTenant (ctx , ref )
4019
+ tm .assert (ctx )
4027
4020
tm .LiveBytes .Inc (delta .LiveBytes )
4028
4021
tm .KeyBytes .Inc (delta .KeyBytes )
4029
4022
tm .ValBytes .Inc (delta .ValBytes )
@@ -4047,17 +4040,17 @@ func (sm *TenantsStorageMetrics) incMVCCGauges(
4047
4040
}
4048
4041
4049
4042
func (sm * TenantsStorageMetrics ) addMVCCStats (
4050
- ctx context.Context , ref * tenantMetricsRef , delta enginepb.MVCCStats ,
4043
+ ctx context.Context , tm * tenantStorageMetrics , delta enginepb.MVCCStats ,
4051
4044
) {
4052
- sm .incMVCCGauges (ctx , ref , delta )
4045
+ sm .incMVCCGauges (ctx , tm , delta )
4053
4046
}
4054
4047
4055
4048
func (sm * TenantsStorageMetrics ) subtractMVCCStats (
4056
- ctx context.Context , ref * tenantMetricsRef , delta enginepb.MVCCStats ,
4049
+ ctx context.Context , tm * tenantStorageMetrics , delta enginepb.MVCCStats ,
4057
4050
) {
4058
4051
var neg enginepb.MVCCStats
4059
4052
neg .Subtract (delta )
4060
- sm .incMVCCGauges (ctx , ref , neg )
4053
+ sm .incMVCCGauges (ctx , tm , neg )
4061
4054
}
4062
4055
4063
4056
func (sm * StoreMetrics ) updateEngineMetrics (m storage.Metrics ) {
0 commit comments