Skip to content

Commit 48f644f

Browse files
split: add an access direction metric and log
Similar to how the split decider logs and increments a counter when a popular key is found, do the same for when the access direction is > 80% left or right (descending or ascending). Fixes: #142208 Epic: CRDB-43150 Release note (ops change): Adds a new metric `kv.loadsplitter.cleardirection` which increments when replica samples are found to be moving in a uniform direction.
1 parent 74f9ec4 commit 48f644f

File tree

7 files changed

+79
-41
lines changed

7 files changed

+79
-41
lines changed

docs/generated/metrics/metrics.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@
202202
<tr><td>STORAGE</td><td>kv.concurrency.max_lock_hold_duration_nanos</td><td>Maximum length of time any lock in a lock table is held. Does not include replicated locks (intents) that are not held in memory</td><td>Nanoseconds</td><td>GAUGE</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
203203
<tr><td>STORAGE</td><td>kv.concurrency.max_lock_wait_duration_nanos</td><td>Maximum lock wait duration across requests currently waiting in lock wait-queues</td><td>Nanoseconds</td><td>GAUGE</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
204204
<tr><td>STORAGE</td><td>kv.concurrency.max_lock_wait_queue_waiters_for_lock</td><td>Maximum number of requests actively waiting in any single lock wait-queue</td><td>Lock-Queue Waiters</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
205+
<tr><td>STORAGE</td><td>kv.loadsplitter.cleardirection</td><td>Load-based splitter observed an access direction greater than 80% left or right in the samples.</td><td>Occurrences</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
205206
<tr><td>STORAGE</td><td>kv.loadsplitter.nosplitkey</td><td>Load-based splitter could not find a split key.</td><td>Occurrences</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
206207
<tr><td>STORAGE</td><td>kv.loadsplitter.popularkey</td><td>Load-based splitter could not find a split key and the most popular sampled split key occurs in &gt;= 25% of the samples.</td><td>Occurrences</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
207208
<tr><td>STORAGE</td><td>kv.prober.planning_attempts</td><td>Number of attempts at planning out probes made; in order to probe KV we need to plan out which ranges to probe;</td><td>Runs</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>

pkg/kv/kvserver/asim/state/split_decider.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,9 @@ func NewSplitDecider(settings *config.SimulationSettings) *SplitDecider {
8080
func (s *SplitDecider) newDecider() *split.Decider {
8181
decider := &split.Decider{}
8282
split.Init(decider, s.splitConfig, &split.LoadSplitterMetrics{
83-
PopularKeyCount: metric.NewCounter(metric.Metadata{}),
84-
NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
83+
PopularKeyCount: metric.NewCounter(metric.Metadata{}),
84+
NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
85+
ClearDirectionCount: metric.NewCounter(metric.Metadata{}),
8586
}, split.SplitQPS)
8687
return decider
8788
}

pkg/kv/kvserver/metrics.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2471,6 +2471,13 @@ Note that the measurement does not include the duration for replicating the eval
24712471
Unit: metric.Unit_COUNT,
24722472
}
24732473

2474+
metaClearDirectionCount = metric.Metadata{
2475+
Name: "kv.loadsplitter.cleardirection",
2476+
Help: "Load-based splitter observed an access direction greater than 80% left or right in the samples.",
2477+
Measurement: "Occurrences",
2478+
Unit: metric.Unit_COUNT,
2479+
}
2480+
24742481
metaSplitEstimatedStats = metric.Metadata{
24752482
Name: "kv.split.estimated_stats",
24762483
Help: "Number of splits that computed estimated MVCC stats.",
@@ -3333,8 +3340,9 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
33333340
registry: storeRegistry,
33343341
TenantsStorageMetrics: newTenantsStorageMetrics(),
33353342
LoadSplitterMetrics: &split.LoadSplitterMetrics{
3336-
PopularKeyCount: metric.NewCounter(metaPopularKeyCount),
3337-
NoSplitKeyCount: metric.NewCounter(metaNoSplitKeyCount),
3343+
PopularKeyCount: metric.NewCounter(metaPopularKeyCount),
3344+
NoSplitKeyCount: metric.NewCounter(metaNoSplitKeyCount),
3345+
ClearDirectionCount: metric.NewCounter(metaClearDirectionCount),
33383346
},
33393347

33403348
// Replica metrics.

pkg/kv/kvserver/split/decider.go

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ package split
99

1010
import (
1111
"context"
12+
"math"
1213
"math/rand/v2"
1314
"time"
1415

@@ -140,8 +141,9 @@ func GlobalRandSource() RandSource {
140141

141142
// LoadSplitterMetrics consists of metrics for load-based splitter split key.
142143
type LoadSplitterMetrics struct {
143-
PopularKeyCount *metric.Counter
144-
NoSplitKeyCount *metric.Counter
144+
PopularKeyCount *metric.Counter
145+
NoSplitKeyCount *metric.Counter
146+
ClearDirectionCount *metric.Counter
145147
}
146148

147149
// Decider tracks the latest load and if certain conditions are met, records
@@ -283,6 +285,13 @@ func (d *Decider) recordLocked(
283285
if popularKeyFrequency >= splitKeyThreshold {
284286
d.loadSplitterMetrics.PopularKeyCount.Inc(1)
285287
}
288+
accessDirection := d.mu.splitFinder.AccessDirection()
289+
direction := directionStr(accessDirection)
290+
log.KvDistribution.Infof(ctx, "%s, access balance between left and right for sampled keys: %s-biased %d%%",
291+
causeMsg, direction, int(math.Abs(accessDirection)*100))
292+
if math.Abs(accessDirection) >= clearDirectionThreshold {
293+
d.loadSplitterMetrics.ClearDirectionCount.Inc(1)
294+
}
286295
d.loadSplitterMetrics.NoSplitKeyCount.Inc(1)
287296
}
288297
}
@@ -542,3 +551,16 @@ func (t *maxStatTracker) windowWidth() time.Duration {
542551
// NB: -1 because during a rotation, only len(t.windows)-1 windows survive.
543552
return t.minRetention / time.Duration(len(t.windows)-1)
544553
}
554+
555+
// Returns the absolute percentage and direction of accesses
556+
// as a string to be used in a log statement.
557+
func directionStr(direction float64) string {
558+
dstr := "right"
559+
if direction == 0 {
560+
dstr = "even"
561+
}
562+
if direction < 0 {
563+
dstr = "left"
564+
}
565+
return dstr
566+
}

pkg/kv/kvserver/split/decider_test.go

Lines changed: 39 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,14 @@ func ms(i int) time.Time {
6464
return ts.Add(time.Duration(i) * time.Millisecond)
6565
}
6666

67+
func newSplitterMetrics() *LoadSplitterMetrics {
68+
return &LoadSplitterMetrics{
69+
PopularKeyCount: metric.NewCounter(metric.Metadata{}),
70+
NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
71+
ClearDirectionCount: metric.NewCounter(metric.Metadata{}),
72+
}
73+
}
74+
6775
func TestDecider(t *testing.T) {
6876
defer leaktest.AfterTest(t)()
6977

@@ -76,10 +84,7 @@ func TestDecider(t *testing.T) {
7684
}
7785

7886
var d Decider
79-
Init(&d, &loadSplitConfig, &LoadSplitterMetrics{
80-
PopularKeyCount: metric.NewCounter(metric.Metadata{}),
81-
NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
82-
},
87+
Init(&d, &loadSplitConfig, newSplitterMetrics(),
8388
SplitQPS,
8489
)
8590

@@ -242,10 +247,7 @@ func TestDecider_MaxStat(t *testing.T) {
242247
}
243248

244249
var d Decider
245-
Init(&d, &loadSplitConfig, &LoadSplitterMetrics{
246-
PopularKeyCount: metric.NewCounter(metric.Metadata{}),
247-
NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
248-
}, SplitQPS)
250+
Init(&d, &loadSplitConfig, newSplitterMetrics(), SplitQPS)
249251

250252
assertMaxStat := func(i int, expMaxStat float64, expOK bool) {
251253
t.Helper()
@@ -408,10 +410,7 @@ func TestSplitStatisticsGeneral(t *testing.T) {
408410
statThreshold: 1,
409411
}
410412

411-
Init(&decider, &loadSplitConfig, &LoadSplitterMetrics{
412-
PopularKeyCount: metric.NewCounter(metric.Metadata{}),
413-
NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
414-
}, SplitCPU)
413+
Init(&decider, &loadSplitConfig, newSplitterMetrics(), SplitCPU)
415414

416415
for i := 1; i <= 1000; i++ {
417416
k := i
@@ -462,10 +461,7 @@ func TestSplitStatisticsPopularKey(t *testing.T) {
462461
statThreshold: 1,
463462
}
464463

465-
Init(&decider, &loadSplitConfig, &LoadSplitterMetrics{
466-
PopularKeyCount: metric.NewCounter(metric.Metadata{}),
467-
NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
468-
}, SplitCPU)
464+
Init(&decider, &loadSplitConfig, newSplitterMetrics(), SplitCPU)
469465

470466
for i := 1; i <= 1000; i++ {
471467
decider.Record(context.Background(), ms(timeStart+i*50), ld(1), func() roachpb.Span {
@@ -491,12 +487,9 @@ func TestDeciderMetrics(t *testing.T) {
491487
statThreshold: 1,
492488
}
493489

494-
Init(&dPopular, &loadSplitConfig, &LoadSplitterMetrics{
495-
PopularKeyCount: metric.NewCounter(metric.Metadata{}),
496-
NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
497-
}, SplitCPU)
490+
Init(&dPopular, &loadSplitConfig, newSplitterMetrics(), SplitCPU)
498491

499-
// No split key, popular key
492+
// No split key, popular key, clear direction
500493
for i := 0; i < 20; i++ {
501494
dPopular.Record(context.Background(), ms(timeStart), ld(1), func() roachpb.Span {
502495
return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(0))}
@@ -510,13 +503,11 @@ func TestDeciderMetrics(t *testing.T) {
510503

511504
assert.Equal(t, dPopular.loadSplitterMetrics.PopularKeyCount.Count(), int64(2))
512505
assert.Equal(t, dPopular.loadSplitterMetrics.NoSplitKeyCount.Count(), int64(2))
506+
assert.Equal(t, dPopular.loadSplitterMetrics.ClearDirectionCount.Count(), int64(2))
513507

514-
// No split key, not popular key
508+
// No split key, not popular key, clear direction
515509
var dNotPopular Decider
516-
Init(&dNotPopular, &loadSplitConfig, &LoadSplitterMetrics{
517-
PopularKeyCount: metric.NewCounter(metric.Metadata{}),
518-
NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
519-
}, SplitCPU)
510+
Init(&dNotPopular, &loadSplitConfig, newSplitterMetrics(), SplitCPU)
520511

521512
for i := 0; i < 20; i++ {
522513
dNotPopular.Record(context.Background(), ms(timeStart), ld(1), func() roachpb.Span {
@@ -531,13 +522,29 @@ func TestDeciderMetrics(t *testing.T) {
531522

532523
assert.Equal(t, dNotPopular.loadSplitterMetrics.PopularKeyCount.Count(), int64(0))
533524
assert.Equal(t, dNotPopular.loadSplitterMetrics.NoSplitKeyCount.Count(), int64(2))
525+
assert.Equal(t, dNotPopular.loadSplitterMetrics.ClearDirectionCount.Count(), int64(2))
526+
527+
// no split key, no popular key, no clear direction
528+
var dNoClearDirection Decider
529+
Init(&dNoClearDirection, &loadSplitConfig, newSplitterMetrics(), SplitCPU)
530+
for i := 0; i < 20; i++ {
531+
dNoClearDirection.Record(context.Background(), ms(timeStart), ld(1), func() roachpb.Span {
532+
return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(i))}
533+
})
534+
}
535+
for i := 1; i <= 2000; i++ {
536+
dNoClearDirection.Record(context.Background(), ms(timeStart+i*1000), ld(1), func() roachpb.Span {
537+
return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(i % 20))}
538+
})
539+
}
540+
541+
assert.Equal(t, dNoClearDirection.loadSplitterMetrics.PopularKeyCount.Count(), int64(0))
542+
assert.Equal(t, dNoClearDirection.loadSplitterMetrics.NoSplitKeyCount.Count(), int64(0))
543+
assert.Equal(t, dNoClearDirection.loadSplitterMetrics.ClearDirectionCount.Count(), int64(0))
534544

535545
// No split key, all insufficient counters
536546
var dAllInsufficientCounters Decider
537-
Init(&dAllInsufficientCounters, &loadSplitConfig, &LoadSplitterMetrics{
538-
PopularKeyCount: metric.NewCounter(metric.Metadata{}),
539-
NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
540-
}, SplitCPU)
547+
Init(&dAllInsufficientCounters, &loadSplitConfig, newSplitterMetrics(), SplitCPU)
541548
for i := 0; i < 20; i++ {
542549
dAllInsufficientCounters.Record(context.Background(), ms(timeStart), ld(1), func() roachpb.Span {
543550
return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(0))}
@@ -551,4 +558,6 @@ func TestDeciderMetrics(t *testing.T) {
551558

552559
assert.Equal(t, dAllInsufficientCounters.loadSplitterMetrics.PopularKeyCount.Count(), int64(0))
553560
assert.Equal(t, dAllInsufficientCounters.loadSplitterMetrics.NoSplitKeyCount.Count(), int64(0))
561+
assert.Equal(t, dAllInsufficientCounters.loadSplitterMetrics.ClearDirectionCount.Count(), int64(0))
562+
554563
}

pkg/kv/kvserver/split/load_based_splitter_test.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ import (
2222
"github.com/cockroachdb/cockroach/pkg/testutils/datapathutils"
2323
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
2424
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
25-
"github.com/cockroachdb/cockroach/pkg/util/metric"
2625
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
2726
"github.com/cockroachdb/cockroach/pkg/workload/ycsb"
2827
"github.com/cockroachdb/datadriven"
@@ -325,10 +324,7 @@ func (dc deciderConfig) makeDecider(randSource rand.Source) *Decider {
325324
statThreshold: dc.threshold,
326325
}
327326

328-
Init(d, &loadSplitConfig, &LoadSplitterMetrics{
329-
PopularKeyCount: metric.NewCounter(metric.Metadata{}),
330-
NoSplitKeyCount: metric.NewCounter(metric.Metadata{}),
331-
}, dc.objective)
327+
Init(d, &loadSplitConfig, newSplitterMetrics(), dc.objective)
332328
return d
333329
}
334330

pkg/kv/kvserver/split/unweighted_finder.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ const (
4747
splitKeyMinCounter = 100 // min aggregate counters before consideration
4848
splitKeyThreshold = 0.25 // 25% difference between left/right counters
4949
splitKeyContainedThreshold = 0.50 // too many spanning queries over split point
50+
clearDirectionThreshold = 0.80 // 80% accesses left or right
5051
)
5152

5253
type sample struct {

0 commit comments

Comments
 (0)