Skip to content

Commit 1cb683a

Browse files
committed
db: make iterator tracking configurable
We disable iterator tracking by default (since the overhead is measurable). In addition, we allow disabling tracking for specific iterators via `IterOptions`. In CRDB we will enable iterator tracking but exempt those in the hottest batch eval paths.
1 parent c34a282 commit 1cb683a

File tree

9 files changed

+87
-21
lines changed

9 files changed

+87
-21
lines changed

db.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,8 +1114,8 @@ func (d *DB) newIter(
11141114
dbi.batch.batch = batch
11151115
dbi.batch.batchSeqNum = batch.nextSeqNum()
11161116
}
1117-
if !dbi.batchOnlyIter {
1118-
dbi.tracker = d.iterTracker
1117+
dbi.tracker = d.iterTracker
1118+
if !dbi.batchOnlyIter && d.iterTracker != nil && !dbi.opts.ExemptFromTracking {
11191119
dbi.trackerHandle = d.iterTracker.Start()
11201120
}
11211121
return finishInitializingIter(ctx, buf)
@@ -1624,8 +1624,10 @@ func (d *DB) Close() error {
16241624
err = firstError(err, errors.Errorf("leaked snapshots: %d open snapshots on DB %p", v, d))
16251625
}
16261626

1627-
d.iterTracker.Close()
1628-
d.iterTracker = nil
1627+
if d.iterTracker != nil {
1628+
d.iterTracker.Close()
1629+
d.iterTracker = nil
1630+
}
16291631

16301632
return err
16311633
}

internal/inflight/in_flight.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@ type Tracker struct {
7272
// A zero Handle is not valid.
7373
type Handle uint64
7474

75+
func (h Handle) IsValid() bool {
76+
return h != 0
77+
}
78+
7579
// ReportFn is called (typically periodically) with a formatted report that
7680
// describes entries older than some threshold. An empty string report is never
7781
// delivered by the polling tracker.

iterator.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,9 @@ type Iterator struct {
210210
// short-lived (since they pin memtables and sstables), (b) plumbing a
211211
// context into every method is very painful, (c) they do not (yet) respect
212212
// context cancellation and are only used for tracing.
213-
ctx context.Context
213+
ctx context.Context
214+
// TODO(radu): group fields that are always set from `db` fields (tracker,
215+
// merge, comparer, fc) into a single struct we can point to.
214216
tracker *inflight.Tracker
215217
trackerHandle inflight.Handle
216218
opts IterOptions
@@ -2365,8 +2367,9 @@ const maxKeyBufCacheSize = 4 << 10 // 4 KB
23652367
// It is not valid to call any method, including Close, after the iterator
23662368
// has been closed.
23672369
func (i *Iterator) Close() error {
2368-
if i.tracker != nil {
2370+
if i.trackerHandle.IsValid() {
23692371
i.tracker.Stop(i.trackerHandle)
2372+
i.trackerHandle = 0
23702373
}
23712374
// Close the child iterator before releasing the readState because when the
23722375
// readState is released sstables referenced by the readState may be deleted
@@ -2900,9 +2903,9 @@ func (i *Iterator) CloneWithContext(ctx context.Context, opts CloneOptions) (*It
29002903
newIters: i.newIters,
29012904
newIterRangeKey: i.newIterRangeKey,
29022905
seqNum: i.seqNum,
2906+
tracker: i.tracker,
29032907
}
2904-
if i.tracker != nil {
2905-
dbi.tracker = i.tracker
2908+
if i.tracker != nil && !dbi.opts.ExemptFromTracking {
29062909
dbi.trackerHandle = i.tracker.Start()
29072910
}
29082911
if i.batch != nil {

metamorphic/ops.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1181,9 +1181,6 @@ const debugIterators = false
11811181
func (o *newIterOp) run(t *Test, h historyRecorder) {
11821182
r := t.getReader(o.readerID)
11831183
opts := iterOptions(t.testOpts.KeyFormat, o.iterOpts)
1184-
if debugIterators {
1185-
opts.DebugRangeKeyStack = true
1186-
}
11871184

11881185
var i *pebble.Iterator
11891186
for {
@@ -1261,9 +1258,14 @@ type newIterUsingCloneOp struct {
12611258
func (o *newIterUsingCloneOp) run(t *Test, h historyRecorder) {
12621259
iter := t.getIter(o.existingIterID)
12631260
cloneOpts := pebble.CloneOptions{
1264-
IterOptions: iterOptions(t.testOpts.KeyFormat, o.iterOpts),
12651261
RefreshBatchView: o.refreshBatch,
12661262
}
1263+
// We treat the zero options as an indication that we want to keep the options
1264+
// of the original iterator. This way, we don't depend on iterOptions() and
1265+
// whether it returns nil or not.
1266+
if !o.iterOpts.IsZero() {
1267+
cloneOpts.IterOptions = iterOptions(t.testOpts.KeyFormat, o.iterOpts)
1268+
}
12671269
i, err := iter.iter.Clone(cloneOpts)
12681270
if err != nil {
12691271
panic(err)
@@ -1377,7 +1379,10 @@ func (o *iterSetOptionsOp) formattedString(kf KeyFormat) string {
13771379
}
13781380

13791381
func iterOptions(kf KeyFormat, o iterOpts) *pebble.IterOptions {
1380-
if o.IsZero() && !debugIterators {
1382+
// Sometimes exempt from tracking.
1383+
exemptFromTracking := rand.IntN(4) == 0
1384+
if o.IsZero() && !debugIterators && !exemptFromTracking && rand.IntN(2) == 0 {
1385+
// Sometimes return nil if we are using the default options.
13811386
return nil
13821387
}
13831388
var lower, upper []byte
@@ -1395,6 +1400,7 @@ func iterOptions(kf KeyFormat, o iterOpts) *pebble.IterOptions {
13951400
Suffix: o.maskSuffix,
13961401
},
13971402
UseL6Filters: o.useL6Filters,
1403+
ExemptFromTracking: exemptFromTracking,
13981404
DebugRangeKeyStack: debugIterators,
13991405
}
14001406
if opts.RangeKeyMasking.Suffix != nil {

metamorphic/options.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,11 @@ func RandomOptions(rng *rand.Rand, kf KeyFormat, cfg RandomOptionsCfg) *TestOpti
934934
}
935935
}
936936

937+
if rand.IntN(2) == 0 {
938+
opts.Experimental.IteratorTracking.PollInterval = 100 * time.Millisecond
939+
opts.Experimental.IteratorTracking.MaxAge = 10 * time.Second
940+
}
941+
937942
testOpts.seedEFOS = rng.Uint64()
938943
testOpts.ingestSplit = rng.IntN(2) == 0
939944
opts.Experimental.IngestSplit = func() bool { return testOpts.ingestSplit }

open.go

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -205,11 +205,11 @@ func Open(dirname string, opts *Options) (db *DB, err error) {
205205
} else {
206206
d.compactionScheduler = newConcurrencyLimitScheduler(defaultTimeSource{})
207207
}
208-
const iterTrackerPollInterval = 5 * time.Minute
209-
const iterTrackerMaxAge = 1 * time.Minute
210-
d.iterTracker = inflight.NewPollingTracker(iterTrackerPollInterval, iterTrackerMaxAge, func(report string) {
211-
d.opts.Logger.Infof("Long-lived iterators detected:\n%s", report)
212-
})
208+
if iterTrackOpts := opts.Experimental.IteratorTracking; iterTrackOpts.PollInterval > 0 && iterTrackOpts.MaxAge > 0 {
209+
d.iterTracker = inflight.NewPollingTracker(iterTrackOpts.PollInterval, iterTrackOpts.MaxAge, func(report string) {
210+
d.opts.Logger.Infof("Long-lived iterators detected:\n%s", report)
211+
})
212+
}
213213

214214
defer func() {
215215
// If an error or panic occurs during open, attempt to release the manually
@@ -242,7 +242,10 @@ func Open(dirname string, opts *Options) (db *DB, err error) {
242242
if d.mu.versions.manifestFile != nil {
243243
_ = d.mu.versions.manifestFile.Close()
244244
}
245-
d.iterTracker.Close()
245+
if d.iterTracker != nil {
246+
d.iterTracker.Close()
247+
d.iterTracker = nil
248+
}
246249
if r != nil {
247250
panic(r)
248251
}

options.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,13 @@ type IterOptions struct {
207207
// changed by calling SetOptions.
208208
Category block.Category
209209

210+
// ExemptFromTracking indicates that we should not track the lifetime of the
211+
// iterator (used to log information about long-lived iterators). Useful for
212+
// hot paths where we know the iterator will be short-lived.
213+
ExemptFromTracking bool
214+
215+
// DebugRangeKeyStack enables additional logging of the range key stack
216+
// iterator, via keyspan.InjectLogging. Only used for debugging.
210217
DebugRangeKeyStack bool
211218

212219
// Internal options.
@@ -792,6 +799,22 @@ type Options struct {
792799
// virtual table rewrite compactions entirely. The default value is 0.30
793800
// (rewrite when >= 30% of backing data is unreferenced).
794801
VirtualTableRewriteUnreferencedFraction func() float64
802+
803+
// IteratorTracking configures periodic logging of iterators held open for
804+
// too long.
805+
IteratorTracking struct {
806+
// PollInterval is the interval at which to log a report of long-lived
807+
// iterators. If zero, disables iterator tracking.
808+
//
809+
// The default value is 0 (disabled).
810+
PollInterval time.Duration
811+
812+
// MaxAge is the age above which iterators are considered long-lived. If
813+
// zero, disables iterator tracking.
814+
//
815+
// The default value is 0 (disabled).
816+
MaxAge time.Duration
817+
}
795818
}
796819

797820
// Filters is a map from filter policy name to filter policy. It is used for
@@ -1790,6 +1813,13 @@ func (o *Options) String() string {
17901813
fmt.Fprintf(&buf, " secondary_cache_size_bytes=%d\n", o.Experimental.SecondaryCacheSizeBytes)
17911814
fmt.Fprintf(&buf, " create_on_shared=%d\n", o.Experimental.CreateOnShared)
17921815

1816+
if o.Experimental.IteratorTracking.PollInterval != 0 {
1817+
fmt.Fprintf(&buf, " iterator_tracking_poll_interval=%s\n", o.Experimental.IteratorTracking.PollInterval)
1818+
}
1819+
if o.Experimental.IteratorTracking.MaxAge != 0 {
1820+
fmt.Fprintf(&buf, " iterator_tracking_max_age=%s\n", o.Experimental.IteratorTracking.MaxAge)
1821+
}
1822+
17931823
// Private options.
17941824
//
17951825
// These options are only encoded if true, because we do not want them to
@@ -2232,6 +2262,10 @@ func (o *Options) Parse(s string, hooks *ParseHooks) error {
22322262
var createOnSharedInt int64
22332263
createOnSharedInt, err = strconv.ParseInt(value, 10, 64)
22342264
o.Experimental.CreateOnShared = remote.CreateOnSharedStrategy(createOnSharedInt)
2265+
case "iterator_tracking_poll_interval":
2266+
o.Experimental.IteratorTracking.PollInterval, err = time.ParseDuration(value)
2267+
case "iterator_tracking_max_age":
2268+
o.Experimental.IteratorTracking.MaxAge, err = time.ParseDuration(value)
22352269
default:
22362270
if hooks != nil && hooks.SkipUnknown != nil && hooks.SkipUnknown(section+"."+key, value) {
22372271
return nil

options_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ func (o *Options) randomizeForTesting(t testing.TB) {
5252
}
5353
o.Experimental.ValueSeparationPolicy = func() ValueSeparationPolicy { return policy }
5454
}
55+
if rand.IntN(2) == 0 {
56+
o.Experimental.IteratorTracking.PollInterval = 100 * time.Millisecond
57+
o.Experimental.IteratorTracking.MaxAge = 10 * time.Second
58+
}
5559
o.EnsureDefaults()
5660
}
5761

scan_internal.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,6 @@ func (d *DB) newInternalIter(
188188
*dbi = scanInternalIterator{
189189
ctx: ctx,
190190
db: d,
191-
trackerHandle: d.iterTracker.Start(),
192191
comparer: d.opts.Comparer,
193192
merge: d.opts.Merger.Merge,
194193
readState: readState,
@@ -199,6 +198,9 @@ func (d *DB) newInternalIter(
199198
seqNum: seqNum,
200199
mergingIter: &buf.iterAlloc.merging,
201200
}
201+
if d.iterTracker != nil {
202+
dbi.trackerHandle = d.iterTracker.Start()
203+
}
202204
dbi.blobValueFetcher.Init(&vers.BlobFiles, d.fileCache, block.ReadEnv{},
203205
blob.SuggestedCachedReaders(vers.MaxReadAmp()))
204206

@@ -1286,7 +1288,10 @@ func (i *scanInternalIterator) error() error {
12861288

12871289
// Close closes this iterator, and releases any pooled objects.
12881290
func (i *scanInternalIterator) Close() error {
1289-
i.db.iterTracker.Stop(i.trackerHandle)
1291+
if i.trackerHandle != 0 {
1292+
i.db.iterTracker.Stop(i.trackerHandle)
1293+
i.trackerHandle = 0
1294+
}
12901295
err := i.iter.Close()
12911296
err = errors.CombineErrors(err, i.blobValueFetcher.Close())
12921297
if i.readState != nil {

0 commit comments

Comments
 (0)