Skip to content

Commit 388a1ed

Browse files
committed
storage: remove Engine.PreIngestDelay
Previously the Engine exposed a method that would block a variable amount of time depending on the health of the LSM. This was a rudimentary backpressure mechanism that predated admission control (and quorum replication flow control). We no longer require this mechanism and can remove it and the corresponding cluster settings. Release note: none Epic: none
1 parent a652bfc commit 388a1ed

File tree

8 files changed

+16
-150
lines changed

8 files changed

+16
-150
lines changed

docs/generated/metrics/metrics.yaml

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10598,14 +10598,6 @@ layers:
1059810598
unit: COUNT
1059910599
aggregation: AVG
1060010600
derivative: NON_NEGATIVE_DERIVATIVE
10601-
- name: addsstable.delay.enginebackpressure
10602-
exported_name: addsstable_delay_enginebackpressure
10603-
description: Amount by which evaluation of AddSSTable requests was delayed by storage-engine backpressure
10604-
y_axis_label: Nanoseconds
10605-
type: COUNTER
10606-
unit: NANOSECONDS
10607-
aggregation: AVG
10608-
derivative: NON_NEGATIVE_DERIVATIVE
1060910601
- name: addsstable.delay.total
1061010602
exported_name: addsstable_delay_total
1061110603
description: Amount by which evaluation of AddSSTable requests was delayed

pkg/kv/kvserver/metrics.go

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2621,12 +2621,6 @@ throttled they do count towards 'delay.total' and 'delay.enginebackpressure'.
26212621
Measurement: "Nanoseconds",
26222622
Unit: metric.Unit_NANOSECONDS,
26232623
}
2624-
metaAddSSTableEvalEngineDelay = metric.Metadata{
2625-
Name: "addsstable.delay.enginebackpressure",
2626-
Help: "Amount by which evaluation of AddSSTable requests was delayed by storage-engine backpressure",
2627-
Measurement: "Nanoseconds",
2628-
Unit: metric.Unit_NANOSECONDS,
2629-
}
26302624

26312625
// Export request counter.
26322626
metaExportEvalTotalDelay = metric.Metadata{
@@ -3402,12 +3396,11 @@ type StoreMetrics struct {
34023396

34033397
// AddSSTable stats: how many AddSSTable commands were proposed and how many
34043398
// were applied? How many applications required writing a copy?
3405-
AddSSTableProposals *metric.Counter
3406-
AddSSTableApplications *metric.Counter
3407-
AddSSTableApplicationCopies *metric.Counter
3408-
AddSSTableAsWrites *metric.Counter
3409-
AddSSTableProposalTotalDelay *metric.Counter
3410-
AddSSTableProposalEngineDelay *metric.Counter
3399+
AddSSTableProposals *metric.Counter
3400+
AddSSTableApplications *metric.Counter
3401+
AddSSTableApplicationCopies *metric.Counter
3402+
AddSSTableAsWrites *metric.Counter
3403+
AddSSTableProposalTotalDelay *metric.Counter
34113404

34123405
// Export request stats.
34133406
ExportRequestProposalTotalDelay *metric.Counter
@@ -4214,12 +4207,11 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
42144207
BackpressuredOnSplitRequests: metric.NewGauge(metaBackpressuredOnSplitRequests),
42154208

42164209
// AddSSTable proposal + applications counters.
4217-
AddSSTableProposals: metric.NewCounter(metaAddSSTableProposals),
4218-
AddSSTableApplications: metric.NewCounter(metaAddSSTableApplications),
4219-
AddSSTableAsWrites: metric.NewCounter(metaAddSSTableAsWrites),
4220-
AddSSTableApplicationCopies: metric.NewCounter(metaAddSSTableApplicationCopies),
4221-
AddSSTableProposalTotalDelay: metric.NewCounter(metaAddSSTableEvalTotalDelay),
4222-
AddSSTableProposalEngineDelay: metric.NewCounter(metaAddSSTableEvalEngineDelay),
4210+
AddSSTableProposals: metric.NewCounter(metaAddSSTableProposals),
4211+
AddSSTableApplications: metric.NewCounter(metaAddSSTableApplications),
4212+
AddSSTableAsWrites: metric.NewCounter(metaAddSSTableAsWrites),
4213+
AddSSTableApplicationCopies: metric.NewCounter(metaAddSSTableApplicationCopies),
4214+
AddSSTableProposalTotalDelay: metric.NewCounter(metaAddSSTableEvalTotalDelay),
42234215

42244216
// ExportRequest proposal.
42254217
ExportRequestProposalTotalDelay: metric.NewCounter(metaExportEvalTotalDelay),

pkg/kv/kvserver/store_send.go

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -330,18 +330,11 @@ func (s *Store) maybeThrottleBatch(
330330
if err != nil {
331331
return nil, err
332332
}
333+
waited := timeutil.Since(before)
333334

334-
beforeEngineDelay := timeutil.Now()
335-
// TODO(sep-raft-log): can we get rid of this?
336-
s.TODOEngine().PreIngestDelay(ctx)
337-
after := timeutil.Now()
338-
339-
waited, waitedEngine := after.Sub(before), after.Sub(beforeEngineDelay)
340335
s.metrics.AddSSTableProposalTotalDelay.Inc(waited.Nanoseconds())
341-
s.metrics.AddSSTableProposalEngineDelay.Inc(waitedEngine.Nanoseconds())
342336
if waited > time.Second {
343-
log.KvExec.Infof(ctx, "SST ingestion was delayed by %v (%v for storage engine back-pressure)",
344-
waited, waitedEngine)
337+
log.KvExec.Infof(ctx, "SST ingestion was delayed by %v", waited)
345338
}
346339
return res, nil
347340

pkg/roachprod/agents/opentelemetry/cockroachdb_metrics.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ var cockroachdbMetrics = map[string]string{
3434
"addsstable_applications": "addsstable.applications",
3535
"addsstable_aswrites": "addsstable.aswrites",
3636
"addsstable_copies": "addsstable.copies",
37-
"addsstable_delay_enginebackpressure": "addsstable.delay.enginebackpressure",
3837
"addsstable_delay_total": "addsstable.delay.total",
3938
"addsstable_proposals": "addsstable.proposals",
4039
"admission_admitted_elastic_cpu": "admission.admitted.elastic_cpu",

pkg/settings/registry.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,10 @@ var retiredSettings = map[InternalKey]struct{}{
276276
"storage.columnar_blocks.enabled": {},
277277

278278
// removed as of 26.1
279-
"sql.distsql_planning.use_gossip.enabled": {},
279+
"sql.distsql_planning.use_gossip.enabled": {},
280+
"rocksdb.ingest_backpressure.l0_file_count_threshold": {},
281+
"rocksdb.ingest_backpressure.max_delay": {},
282+
"pebble.pre_ingest_delay.enabled": {},
280283
}
281284

282285
// grandfatheredDefaultSettings is the list of "grandfathered" existing sql.defaults

pkg/storage/engine.go

Lines changed: 0 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@ import (
1313
"github.com/cockroachdb/cockroach/pkg/keys"
1414
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock"
1515
"github.com/cockroachdb/cockroach/pkg/roachpb"
16-
"github.com/cockroachdb/cockroach/pkg/settings"
17-
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
1816
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
1917
"github.com/cockroachdb/cockroach/pkg/storage/fs"
2018
"github.com/cockroachdb/cockroach/pkg/storage/pebbleiter"
@@ -1053,10 +1051,6 @@ type Engine interface {
10531051
// modified or deleted by the Engine doing the ingestion.
10541052
IngestExternalFiles(ctx context.Context, external []pebble.ExternalFile) (pebble.IngestOperationStats, error)
10551053

1056-
// PreIngestDelay offers an engine the chance to backpressure ingestions.
1057-
// When called, it may choose to block if the engine determines that it is in
1058-
// or approaching a state where further ingestions may risk its health.
1059-
PreIngestDelay(ctx context.Context)
10601054
// ApproximateDiskBytes returns an approximation of the on-disk size and file
10611055
// counts for the given key span, along with how many of those bytes are on
10621056
// remote, as well as specifically external remote, storage.
@@ -1718,79 +1712,6 @@ func ClearRangeWithHeuristic(
17181712
return nil
17191713
}
17201714

1721-
var ingestDelayL0Threshold = settings.RegisterIntSetting(
1722-
settings.ApplicationLevel,
1723-
"rocksdb.ingest_backpressure.l0_file_count_threshold",
1724-
"number of L0 files after which to backpressure SST ingestions",
1725-
20,
1726-
)
1727-
1728-
var ingestDelayTime = settings.RegisterDurationSetting(
1729-
settings.ApplicationLevel,
1730-
"rocksdb.ingest_backpressure.max_delay",
1731-
"maximum amount of time to backpressure a single SST ingestion",
1732-
time.Second*5,
1733-
)
1734-
1735-
var preIngestDelayEnabled = settings.RegisterBoolSetting(
1736-
settings.SystemOnly,
1737-
"pebble.pre_ingest_delay.enabled",
1738-
"controls whether the pre-ingest delay mechanism is active",
1739-
false,
1740-
)
1741-
1742-
// PreIngestDelay may choose to block for some duration if L0 has an excessive
1743-
// number of files in it or if PendingCompactionBytesEstimate is elevated. This
1744-
// it is intended to be called before ingesting a new SST, since we'd rather
1745-
// backpressure the bulk operation adding SSTs than slow down the whole RocksDB
1746-
// instance and impact all foreground traffic by adding too many files to it.
1747-
// After the number of L0 files exceeds the configured limit, it gradually
1748-
// begins delaying more for each additional file in L0 over the limit until
1749-
// hitting its configured (via settings) maximum delay. If the pending
1750-
// compaction limit is exceeded, it waits for the maximum delay.
1751-
func preIngestDelay(ctx context.Context, eng Engine, settings *cluster.Settings) {
1752-
if settings == nil {
1753-
return
1754-
}
1755-
if !preIngestDelayEnabled.Get(&settings.SV) {
1756-
return
1757-
}
1758-
metrics := eng.GetMetrics()
1759-
targetDelay := calculatePreIngestDelay(settings, metrics.Metrics)
1760-
1761-
if targetDelay == 0 {
1762-
return
1763-
}
1764-
log.VEventf(ctx, 2, "delaying SST ingestion %s. %d L0 files, %d L0 Sublevels",
1765-
targetDelay, metrics.Levels[0].Tables.Count, metrics.Levels[0].Sublevels)
1766-
1767-
select {
1768-
case <-time.After(targetDelay):
1769-
case <-ctx.Done():
1770-
}
1771-
}
1772-
1773-
func calculatePreIngestDelay(settings *cluster.Settings, metrics *pebble.Metrics) time.Duration {
1774-
maxDelay := ingestDelayTime.Get(&settings.SV)
1775-
l0ReadAmpLimit := ingestDelayL0Threshold.Get(&settings.SV)
1776-
1777-
const ramp = 10
1778-
l0ReadAmp := int64(metrics.Levels[0].Tables.Count)
1779-
if metrics.Levels[0].Sublevels >= 0 {
1780-
l0ReadAmp = int64(metrics.Levels[0].Sublevels)
1781-
}
1782-
1783-
if l0ReadAmp > l0ReadAmpLimit {
1784-
delayPerFile := maxDelay / time.Duration(ramp)
1785-
targetDelay := time.Duration(l0ReadAmp-l0ReadAmpLimit) * delayPerFile
1786-
if targetDelay > maxDelay {
1787-
return maxDelay
1788-
}
1789-
return targetDelay
1790-
}
1791-
return 0
1792-
}
1793-
17941715
// Helper function to implement Reader.MVCCIterate().
17951716
func iterateOnReader(
17961717
ctx context.Context,

pkg/storage/engine_test.go

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020
"strconv"
2121
"strings"
2222
"testing"
23-
"time"
2423

2524
"github.com/cockroachdb/cockroach/pkg/keys"
2625
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/isolation"
@@ -1186,34 +1185,6 @@ func TestCreateCheckpoint_SpanConstrained(t *testing.T) {
11861185
}
11871186
}
11881187

1189-
func TestIngestDelayLimit(t *testing.T) {
1190-
defer leaktest.AfterTest(t)()
1191-
defer log.Scope(t).Close(t)
1192-
s := cluster.MakeTestingClusterSettings()
1193-
1194-
max, ramp := time.Second*5, time.Second*5/10
1195-
1196-
for _, tc := range []struct {
1197-
exp time.Duration
1198-
fileCount uint64
1199-
sublevelCount int32
1200-
}{
1201-
{0, 0, 0},
1202-
{0, 19, -1},
1203-
{0, 20, -1},
1204-
{ramp, 21, -1},
1205-
{ramp * 2, 22, -1},
1206-
{ramp * 2, 22, 22},
1207-
{ramp * 2, 55, 22},
1208-
{max, 55, -1},
1209-
} {
1210-
var m pebble.Metrics
1211-
m.Levels[0].Tables.Count = tc.fileCount
1212-
m.Levels[0].Sublevels = tc.sublevelCount
1213-
require.Equal(t, tc.exp, calculatePreIngestDelay(s, &m))
1214-
}
1215-
}
1216-
12171188
func TestEngineFS(t *testing.T) {
12181189
defer leaktest.AfterTest(t)()
12191190
defer log.Scope(t).Close(t)

pkg/storage/pebble.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2255,11 +2255,6 @@ func (p *Pebble) IngestExternalFiles(
22552255
return p.db.IngestExternalFiles(ctx, external)
22562256
}
22572257

2258-
// PreIngestDelay implements the Engine interface.
2259-
func (p *Pebble) PreIngestDelay(ctx context.Context) {
2260-
preIngestDelay(ctx, p, p.cfg.settings)
2261-
}
2262-
22632258
// GetTableMetrics implements the Engine interface.
22642259
func (p *Pebble) GetTableMetrics(start, end roachpb.Key) ([]enginepb.SSTableMetricsInfo, error) {
22652260
filterOpt := pebble.WithKeyRangeFilter(

0 commit comments

Comments
 (0)