Skip to content

Commit 165e40b

Browse files
craig[bot]RaduBerinde
andcommitted
Merge #155021
155021: storage, kv: report long-lived iterators r=RaduBerinde a=RaduBerinde This change enables the new iterator tracking feature in Pebble. Every 5 minutes, we log the iterators that have been open for more than 1 minute. Because iterator tracking has measurable overhead, we exempt iterators in hot paths (based on the read category). This required some read category plumbing fixes and the addition of a new category for the abort span. The test utilizes go1.25's `synctest` and will start running when we upgrade Go. I did run it locally with go 1.25. Epic: none Release note: None Co-authored-by: Radu Berinde <[email protected]>
2 parents 59c110c + 45382bf commit 165e40b

File tree

12 files changed

+174
-19
lines changed

12 files changed

+174
-19
lines changed

docs/generated/metrics/metrics.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16487,6 +16487,30 @@ layers:
1648716487
unit: NANOSECONDS
1648816488
aggregation: AVG
1648916489
derivative: NON_NEGATIVE_DERIVATIVE
16490+
- name: storage.iterator.category-abort-span.block-load.bytes
16491+
exported_name: storage_iterator_category_abort_span_block_load_bytes
16492+
description: Bytes loaded by storage sstable iterators (possibly cached).
16493+
y_axis_label: Bytes
16494+
type: COUNTER
16495+
unit: BYTES
16496+
aggregation: AVG
16497+
derivative: NON_NEGATIVE_DERIVATIVE
16498+
- name: storage.iterator.category-abort-span.block-load.cached-bytes
16499+
exported_name: storage_iterator_category_abort_span_block_load_cached_bytes
16500+
description: Bytes loaded by storage sstable iterators from the block cache
16501+
y_axis_label: Bytes
16502+
type: COUNTER
16503+
unit: BYTES
16504+
aggregation: AVG
16505+
derivative: NON_NEGATIVE_DERIVATIVE
16506+
- name: storage.iterator.category-abort-span.block-load.latency-sum
16507+
exported_name: storage_iterator_category_abort_span_block_load_latency_sum
16508+
description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators
16509+
y_axis_label: Latency
16510+
type: COUNTER
16511+
unit: NANOSECONDS
16512+
aggregation: AVG
16513+
derivative: NON_NEGATIVE_DERIVATIVE
1649016514
- name: storage.iterator.category-backup.block-load.bytes
1649116515
exported_name: storage_iterator_category_backup_block_load_bytes
1649216516
description: Bytes loaded by storage sstable iterators (possibly cached).

pkg/kv/kvserver/abortspan/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ go_library(
1010
"//pkg/roachpb",
1111
"//pkg/storage",
1212
"//pkg/storage/enginepb",
13+
"//pkg/storage/fs",
1314
"//pkg/util/hlc",
1415
"//pkg/util/log",
1516
"//pkg/util/uuid",

pkg/kv/kvserver/abortspan/abortspan.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"github.com/cockroachdb/cockroach/pkg/roachpb"
1414
"github.com/cockroachdb/cockroach/pkg/storage"
1515
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
16+
"github.com/cockroachdb/cockroach/pkg/storage/fs"
1617
"github.com/cockroachdb/cockroach/pkg/util/hlc"
1718
"github.com/cockroachdb/cockroach/pkg/util/log"
1819
"github.com/cockroachdb/cockroach/pkg/util/uuid"
@@ -87,7 +88,9 @@ func (sc *AbortSpan) Get(
8788
) (bool, error) {
8889
// Pull response from disk and read into reply if available.
8990
key := keys.AbortSpanKey(sc.rangeID, txnID)
90-
ok, err := storage.MVCCGetProto(ctx, reader, key, hlc.Timestamp{}, entry, storage.MVCCGetOptions{})
91+
ok, err := storage.MVCCGetProto(ctx, reader, key, hlc.Timestamp{}, entry, storage.MVCCGetOptions{
92+
ReadCategory: fs.AbortSpanReadCategory,
93+
})
9194
return ok, err
9295
}
9396

pkg/storage/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ go_test(
142142
"mvcc_value_test.go",
143143
"open_test.go",
144144
"pebble_iterator_test.go",
145+
"pebble_iterator_tracking_test.go",
145146
"pebble_merge_test.go",
146147
"pebble_mvcc_scanner_test.go",
147148
"pebble_test.go",
@@ -204,6 +205,7 @@ go_test(
204205
"//pkg/util/protoutil",
205206
"//pkg/util/randutil",
206207
"//pkg/util/shuffle",
208+
"//pkg/util/syncutil",
207209
"//pkg/util/sysutil",
208210
"//pkg/util/timeutil",
209211
"//pkg/util/uint128",

pkg/storage/engine.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1443,6 +1443,10 @@ func GetIntent(ctx context.Context, reader Reader, key roachpb.Key) (*roachpb.In
14431443
Prefix: true,
14441444
// Ignore Exclusive and Shared locks. We only care about intents.
14451445
MatchMinStr: lock.Intent,
1446+
// This is eventually called from the QueryIntent request, so this isn't
1447+
// quite "intent resolution", but we don't want too many categories, and
1448+
// this does relate to intents, so we use this existing category.
1449+
ReadCategory: fs.IntentResolutionReadCategory,
14461450
}
14471451
iter, err := NewLockTableIterator(ctx, reader, opts)
14481452
if err != nil {

pkg/storage/fs/category.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ const (
4444
IntentResolutionReadCategory
4545
// BackupReadCategory are reads for backups.
4646
BackupReadCategory
47+
// AbortSpanReadCategory are reads used to check if a txn was aborted.
48+
AbortSpanReadCategory
4749
)
4850

4951
var readCategoryMap = [...]block.Category{
@@ -59,6 +61,7 @@ var readCategoryMap = [...]block.Category{
5961
ReplicationReadCategory: block.RegisterCategory("replication", block.LatencySensitiveQoSLevel),
6062
IntentResolutionReadCategory: block.RegisterCategory("intent-resolution", block.LatencySensitiveQoSLevel),
6163
BackupReadCategory: block.RegisterCategory("backup", block.NonLatencySensitiveQoSLevel),
64+
AbortSpanReadCategory: block.RegisterCategory("abort-span", block.LatencySensitiveQoSLevel),
6265
}
6366

6467
// PebbleCategory returns the block.Category associated with the given ReadCategory.

pkg/storage/lock_table_iterator.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,10 @@ func (opts LockTableIteratorOptions) validate() error {
110110
// toIterOptions converts the LockTableIteratorOptions to IterOptions.
111111
func (opts LockTableIteratorOptions) toIterOptions() IterOptions {
112112
return IterOptions{
113-
Prefix: opts.Prefix,
114-
LowerBound: opts.LowerBound,
115-
UpperBound: opts.UpperBound,
113+
Prefix: opts.Prefix,
114+
LowerBound: opts.LowerBound,
115+
UpperBound: opts.UpperBound,
116+
ReadCategory: opts.ReadCategory,
116117
}
117118
}
118119

pkg/storage/open.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,14 @@ func Attributes(attrs roachpb.Attributes) ConfigOption {
7878
}
7979
}
8080

81+
// TestingLogger overrides the logger.
82+
func TestingLogger(logger pebble.LoggerAndTracer) ConfigOption {
83+
return func(cfg *engineConfig) error {
84+
cfg.opts.LoggerAndTracer = logger
85+
return nil
86+
}
87+
}
88+
8189
// storeSize configures the maximum allowable size for a store.
8290
// Can be specified either as a percentage of total capacity or
8391
// an absolute byte size; if both are specified, the percentage takes

pkg/storage/pebble.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,10 @@ func DefaultPebbleOptions() *pebble.Options {
582582
opts.Experimental.SpanPolicyFunc = spanPolicyFunc
583583
opts.Experimental.UserKeyCategories = userKeyCategories
584584

585+
// Every 5 minutes, log iterators that have been open for more than 1 minute.
586+
opts.Experimental.IteratorTracking.PollInterval = 5 * time.Minute
587+
opts.Experimental.IteratorTracking.MaxAge = time.Minute
588+
585589
opts.Levels[0] = pebble.LevelOptions{
586590
BlockSize: 32 << 10, // 32 KB
587591
IndexBlockSize: 256 << 10, // 256 KB
@@ -2782,11 +2786,8 @@ func (p *pebbleReadOnly) ConsistentIterators() bool {
27822786
// PinEngineStateForIterators implements the Engine interface.
27832787
func (p *pebbleReadOnly) PinEngineStateForIterators(readCategory fs.ReadCategory) error {
27842788
if p.iter == nil {
2785-
o := &pebble.IterOptions{Category: readCategory.PebbleCategory()}
2786-
if p.durability == GuaranteedDurability {
2787-
o.OnlyReadGuaranteedDurable = true
2788-
}
2789-
iter, err := p.parent.db.NewIter(o)
2789+
o := makeIterOptions(readCategory, p.durability)
2790+
iter, err := p.parent.db.NewIter(&o)
27902791
if err != nil {
27912792
return err
27922793
}

pkg/storage/pebble_batch.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -713,11 +713,11 @@ func (p *pebbleBatch) PinEngineStateForIterators(readCategory fs.ReadCategory) e
713713
var err error
714714
if p.iter == nil {
715715
var iter *pebble.Iterator
716-
o := &pebble.IterOptions{Category: readCategory.PebbleCategory()}
716+
o := makeIterOptions(readCategory, StandardDurability)
717717
if p.batch.Indexed() {
718-
iter, err = p.batch.NewIter(o)
718+
iter, err = p.batch.NewIter(&o)
719719
} else {
720-
iter, err = p.db.NewIter(o)
720+
iter, err = p.db.NewIter(&o)
721721
}
722722
if err != nil {
723723
return err

0 commit comments

Comments
 (0)