Merge pull request #147588 from cockroachdb/blathers/backport-release-25.2-146331

wenyihu6 · web-flow · commit 305c21be013e · 2025-09-08T15:56:07.000-04:00
release-25.2: kvserver: introduce setting to periodically reset split samples
diff --git a/pkg/kv/kvserver/asim/state/split_decider.go b/pkg/kv/kvserver/asim/state/split_decider.go
@@ -58,6 +58,12 @@ func (lsc loadSplitConfig) StatThreshold(_ split.SplitObjective) float64 {
 	return lsc.settings.SplitQPSThreshold
 }
 
+// SampleResetDuration returns the duration that any sampling structure should
+// retain data for before resetting.
+func (lsc loadSplitConfig) SampleResetDuration() time.Duration {
+	return 0 /* disabled */
+}
+
 // SplitDecider implements the LoadSplitter interface.
 type SplitDecider struct {
 	deciders    map[RangeID]*split.Decider
diff --git a/pkg/kv/kvserver/replica_split_load.go b/pkg/kv/kvserver/replica_split_load.go
@@ -64,6 +64,20 @@ var SplitByLoadCPUThreshold = settings.RegisterDurationSetting(
 	settings.WithPublic,
 )
 
+// SplitSampleResetDuration wraps "kv.range_split.load_sample_reset_duration".
+// This is the duration after which the load based split sampler will reset its
+// state, regardless of any split suggestions made. This is useful when the
+// load on a range is non-stationary.
+var SplitSampleResetDuration = settings.RegisterDurationSetting(
+	settings.SystemOnly,
+	"kv.range_split.load_sample_reset_duration",
+	"the duration after which the load based split sampler will reset its state, "+
+		"regardless of any split suggestions made, when zero, the sampler will "+
+		"never reset",
+	0, /* disabled */
+	settings.DurationWithMinimumOrZeroDisable(10*time.Second),
+)
+
 func (obj LBRebalancingObjective) ToSplitObjective() split.SplitObjective {
 	switch obj {
 	case LBRebalancingQueries:
@@ -121,6 +135,12 @@ func (c *replicaSplitConfig) StatThreshold(obj split.SplitObjective) float64 {
 	}
 }
 
+// SampleResetDuration returns the duration that any sampling structure should
+// retain data for before resetting.
+func (c *replicaSplitConfig) SampleResetDuration() time.Duration {
+	return SplitSampleResetDuration.Get(&c.st.SV)
+}
+
 // SplitByLoadEnabled returns whether load based splitting is enabled.
 // Although this is a method of *Replica, the configuration is really global,
 // shared across all stores.
diff --git a/pkg/kv/kvserver/split/decider.go b/pkg/kv/kvserver/split/decider.go
@@ -75,6 +75,9 @@ type LoadSplitConfig interface {
 	// StatThreshold returns the threshold for load above which the range
 	// should be considered split.
 	StatThreshold(SplitObjective) float64
+	// SampleResetDuration returns the duration that any sampling structure
+	// should retain data for before resetting.
+	SampleResetDuration() time.Duration
 }
 
 type RandSource interface {
@@ -167,6 +170,7 @@ type Decider struct {
 
 		// Fields tracking split key suggestions.
 		splitFinder         LoadBasedSplitter // populated when engaged or decided
+		splitFinderInitAt   time.Time         // when the split finder was initialized
 		lastSplitSuggestion time.Time         // last stipulation to client to carry out split
 		suggestionsMade     int               // suggestions made since last reset
 
@@ -252,6 +256,7 @@ func (d *Decider) recordLocked(
 		if d.mu.lastStatVal >= d.config.StatThreshold(d.mu.objective) {
 			if d.mu.splitFinder == nil {
 				d.mu.splitFinder = d.config.NewLoadBasedSplitter(now, d.mu.objective)
+				d.mu.splitFinderInitAt = now
 			}
 		} else {
 			d.mu.splitFinder = nil
@@ -305,6 +310,15 @@ func (d *Decider) recordLocked(
 				}
 			}
 		}
+		// If the split finder has been initialized for longer than the sample
+		// reset duration, then we discard the split finder and start over. This is
+		// to prevent the split finder from being stuck in a state where it is not
+		// finding a split key based on earlier sampled keys, but could find one if
+		// it were to sample new keys with higher probability.
+		if sampleResetDuration := d.config.SampleResetDuration(); sampleResetDuration != 0 &&
+			now.Sub(d.mu.splitFinderInitAt) >= sampleResetDuration {
+			d.mu.splitFinder = nil
+		}
 	}
 	return false
 }
@@ -410,6 +424,7 @@ func (d *Decider) resetLocked(now time.Time) {
 	d.mu.lastStatVal = 0
 	d.mu.count = 0
 	d.mu.maxStat.reset(now, d.config.StatRetention())
+	d.mu.splitFinderInitAt = time.Time{}
 	d.mu.splitFinder = nil
 	d.mu.suggestionsMade = 0
 	d.mu.lastSplitSuggestion = time.Time{}
diff --git a/pkg/kv/kvserver/split/decider_test.go b/pkg/kv/kvserver/split/decider_test.go
@@ -22,10 +22,11 @@ import (
 // testLoadSplitConfig implements the LoadSplitConfig interface and may be used
 // in testing.
 type testLoadSplitConfig struct {
-	randSource    RandSource
-	useWeighted   bool
-	statRetention time.Duration
-	statThreshold float64
+	randSource          RandSource
+	useWeighted         bool
+	statRetention       time.Duration
+	statThreshold       float64
+	sampleResetDuration time.Duration
 }
 
 // NewLoadBasedSplitter returns a new LoadBasedSplitter that may be used to
@@ -50,6 +51,12 @@ func (t *testLoadSplitConfig) StatThreshold(_ SplitObjective) float64 {
 	return t.statThreshold
 }
 
+// SampleResetDuration returns the duration that any sampling structure should
+// retain data for before resetting.
+func (t *testLoadSplitConfig) SampleResetDuration() time.Duration {
+	return t.sampleResetDuration
+}
+
 func ld(n int) func(SplitObjective) int {
 	return func(_ SplitObjective) int {
 		return n
@@ -561,3 +568,66 @@ func TestDeciderMetrics(t *testing.T) {
 	assert.Equal(t, dAllInsufficientCounters.loadSplitterMetrics.ClearDirectionCount.Count(), int64(0))
 
 }
+
+// TestDeciderSampleReset tests the sample reset functionality of the decider,
+// when the sample reset duration is non-zero, the split finder should be reset
+// after the given duration. When the sample reset duration is zero, the split
+// finder should not be reset.
+func TestDeciderSampleReset(t *testing.T) {
+	defer leaktest.AfterTest(t)()
+
+	rng := rand.New(rand.NewPCG(12, 12))
+	loadSplitConfig := testLoadSplitConfig{
+		randSource:          rng,
+		useWeighted:         false,
+		statRetention:       2 * time.Second,
+		statThreshold:       1,
+		sampleResetDuration: 10 * time.Second,
+	}
+	ctx := context.Background()
+	tick := 0
+
+	var d Decider
+	Init(&d, &loadSplitConfig, newSplitterMetrics(), SplitQPS)
+
+	require.Nil(t, d.mu.splitFinder)
+	d.Record(ctx, ms(tick), ld(100), func() roachpb.Span {
+		return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(0))}
+	})
+	// The split finder should be created as the second sample is recorded and
+	// the stat remains above the threshold (1) each tick.
+	for i := 0; i < 10; i++ {
+		tick += 1000
+		d.Record(ctx, ms(tick), ld(100), func() roachpb.Span {
+			return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(0))}
+		})
+		require.NotNil(t, d.mu.splitFinder, (*lockedDecider)(&d))
+	}
+
+	// Tick one more time, now the sample reset duration (10s) has passed and the
+	// split finder should be reset.
+	tick += 1000
+	d.Record(ctx, ms(tick), ld(100), func() roachpb.Span {
+		return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(0))}
+	})
+	require.Nil(t, d.mu.splitFinder, (*lockedDecider)(&d))
+
+	// Immediately following the last tick where the splitFinder was reset, it
+	// should be recreated as the stat is still above the threshold.
+	for i := 0; i < 10; i++ {
+		tick += 1000
+		d.Record(ctx, ms(tick), ld(100), func() roachpb.Span {
+			return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(0))}
+		})
+		require.NotNil(t, d.mu.splitFinder, (*lockedDecider)(&d))
+	}
+	// Set the sample reset duration to 0, which should cause the split finder to
+	// not be reset in the next tick, unlike before when the sample reset
+	// duration was 10s.
+	loadSplitConfig.sampleResetDuration = 0
+	tick += 1000
+	d.Record(ctx, ms(tick), ld(100), func() roachpb.Span {
+		return roachpb.Span{Key: keys.SystemSQLCodec.TablePrefix(uint32(0))}
+	})
+	require.NotNil(t, d.mu.splitFinder, (*lockedDecider)(&d))
+}