From 48f78420a1172c50f340aadb4cd3be87753e2fd2 Mon Sep 17 00:00:00 2001 From: galaio Date: Thu, 28 Aug 2025 14:50:34 +0800 Subject: [PATCH 1/2] cache: add filter cache option; --- cmd/pebble/db.go | 3 +++ db.go | 2 +- metamorphic/options.go | 5 +++-- .../objiotracing/obj_io_tracing_off.go | 5 ++++- open.go | 6 ++++++ options.go | 5 ++++- sstable/options.go | 6 ++++-- sstable/reader.go | 15 +++++++++++++-- table_cache.go | 1 + tool/find.go | 7 ++++--- tool/sstable.go | 8 +++++--- 11 files changed, 48 insertions(+), 15 deletions(-) diff --git a/cmd/pebble/db.go b/cmd/pebble/db.go index 41c6e594d3..fe8d95aa08 100644 --- a/cmd/pebble/db.go +++ b/cmd/pebble/db.go @@ -55,8 +55,11 @@ type pebbleDB struct { func newPebbleDB(dir string) DB { cache := pebble.NewCache(cacheSize) defer cache.Unref() + filterCache := pebble.NewCache(cacheSize) + defer filterCache.Unref() opts := &pebble.Options{ Cache: cache, + FilterCache: filterCache, Comparer: mvccComparer, DisableWAL: disableWAL, FormatMajorVersion: pebble.FormatNewest, diff --git a/db.go b/db.go index 67a2065898..87b22dbd04 100644 --- a/db.go +++ b/db.go @@ -1584,7 +1584,7 @@ func (d *DB) Close() error { close(d.closedCh) defer d.opts.Cache.Unref() - + defer d.opts.FilterCache.Unref() for d.mu.compact.compactingCount > 0 || d.mu.compact.flushing { d.mu.compact.cond.Wait() } diff --git a/metamorphic/options.go b/metamorphic/options.go index 780e66bd4f..40384c80a0 100644 --- a/metamorphic/options.go +++ b/metamorphic/options.go @@ -458,8 +458,9 @@ func randomOptions( } } - opts.BytesPerSync = 1 << uint(rng.Intn(28)) // 1B - 256MB - opts.Cache = cache.New(1 << uint(rng.Intn(30))) // 1B - 1GB + opts.BytesPerSync = 1 << uint(rng.Intn(28)) // 1B - 256MB + opts.Cache = cache.New(1 << uint(rng.Intn(30))) // 1B - 1GB + opts.FilterCache = cache.New(1 << uint(rng.Intn(30))) // 1B - 1GB opts.DisableWAL = rng.Intn(2) == 0 opts.FlushDelayDeleteRange = time.Millisecond * time.Duration(5*rng.Intn(245)) // 5-250ms opts.FlushDelayRangeKey = time.Millisecond * time.Duration(5*rng.Intn(245)) // 5-250ms diff --git a/objstorage/objstorageprovider/objiotracing/obj_io_tracing_off.go b/objstorage/objstorageprovider/objiotracing/obj_io_tracing_off.go index a4923ab60e..997cbb7b63 100644 --- a/objstorage/objstorageprovider/objiotracing/obj_io_tracing_off.go +++ b/objstorage/objstorageprovider/objiotracing/obj_io_tracing_off.go @@ -52,7 +52,10 @@ func WithReason(ctx context.Context, reason Reason) context.Context { return ctx // WithBlockType creates a context that has an associated BlockType (which ends up in // traces created under that context). -func WithBlockType(ctx context.Context, blockType BlockType) context.Context { return ctx } +func WithBlockType(ctx context.Context, blockType BlockType) context.Context { + ctx = context.WithValue(ctx, "blockType", blockType) + return ctx +} // WithLevel creates a context that has an associated level (which ends up in // traces created under that context). diff --git a/open.go b/open.go index 3963d9cce1..81d672ca2f 100644 --- a/open.go +++ b/open.go @@ -162,6 +162,11 @@ func Open(dirname string, opts *Options) (db *DB, _ error) { } else { opts.Cache.Ref() } + if opts.FilterCache == nil { + opts.FilterCache = cache.New(cacheDefaultSize) + } else { + opts.FilterCache.Ref() + } d := &DB{ cacheID: opts.Cache.NewID(), @@ -195,6 +200,7 @@ func Open(dirname string, opts *Options) (db *DB, _ error) { // the tableCache, then the tableCache will also release its // reference to the cache. opts.Cache.Unref() + opts.FilterCache.Unref() if d.tableCache != nil { _ = d.tableCache.close() diff --git a/options.go b/options.go index b6f240d04a..a88a12dc83 100644 --- a/options.go +++ b/options.go @@ -481,7 +481,8 @@ type Options struct { // Cache is used to cache uncompressed blocks from sstables. // // The default cache size is 8 MB. - Cache *cache.Cache + Cache *cache.Cache + FilterCache *cache.Cache // LoadBlockSema, if set, is used to limit the number of blocks that can be // loaded (i.e. read from the filesystem) in parallel. Each load acquires one @@ -1732,6 +1733,7 @@ func (o *Options) MakeReaderOptions() sstable.ReaderOptions { var readerOpts sstable.ReaderOptions if o != nil { readerOpts.Cache = o.Cache + readerOpts.FilterCache = o.FilterCache readerOpts.LoadBlockSema = o.LoadBlockSema readerOpts.Comparer = o.Comparer readerOpts.Filters = o.Filters @@ -1751,6 +1753,7 @@ func (o *Options) MakeWriterOptions(level int, format sstable.TableFormat) sstab writerOpts.TableFormat = format if o != nil { writerOpts.Cache = o.Cache + writerOpts.FilterCache = o.FilterCache writerOpts.Comparer = o.Comparer if o.Merger != nil { writerOpts.MergerName = o.Merger.Name diff --git a/sstable/options.go b/sstable/options.go index 2654f70bff..2ccfcdff11 100644 --- a/sstable/options.go +++ b/sstable/options.go @@ -110,7 +110,8 @@ type ReaderOptions struct { // Cache is used to cache uncompressed blocks from sstables. // // The default cache size is a zero-size cache. - Cache *cache.Cache + Cache *cache.Cache + FilterCache *cache.Cache // LoadBlockSema, if set, is used to limit the number of blocks that can be // loaded (i.e. read from the filesystem) in parallel. Each load acquires one @@ -187,7 +188,8 @@ type WriterOptions struct { // Cache is used to cache uncompressed blocks from sstables. // // The default is a nil cache. - Cache *cache.Cache + Cache *cache.Cache + FilterCache *cache.Cache // Comparer defines a total ordering over the space of []byte keys: a 'less // than' relationship. The same comparison algorithm must be used for reads diff --git a/sstable/reader.go b/sstable/reader.go index 39c7a6966a..15a90877d1 100644 --- a/sstable/reader.go +++ b/sstable/reader.go @@ -236,6 +236,7 @@ type Reader struct { // Close implements DB.Close, as documented in the pebble package. func (r *Reader) Close() error { r.opts.Cache.Unref() + r.opts.FilterCache.Unref() if r.readable != nil { r.err = firstError(r.err, r.readable.Close()) @@ -524,7 +525,12 @@ func (r *Reader) readBlock( stats *base.InternalIteratorStats, bufferPool *BufferPool, ) (handle bufferHandle, _ error) { - if h := r.opts.Cache.Get(r.cacheID, r.fileNum, bh.Offset); h.Get() != nil { + dbCache := r.opts.Cache + if ctx.Value("blockType") == objiotracing.FilterBlock { + dbCache = r.opts.FilterCache + } + + if h := dbCache.Get(r.cacheID, r.fileNum, bh.Offset); h.Get() != nil { // Cache hit. if readHandle != nil { readHandle.RecordCacheHit(ctx, int64(bh.Offset), int64(bh.Length+blockTrailerLen)) @@ -641,7 +647,7 @@ func (r *Reader) readBlock( if decompressed.buf.Valid() { return bufferHandle{b: decompressed.buf}, nil } - h := r.opts.Cache.Set(r.cacheID, r.fileNum, bh.Offset, decompressed.v) + h := dbCache.Set(r.cacheID, r.fileNum, bh.Offset, decompressed.v) return bufferHandle{h: h}, nil } @@ -1114,6 +1120,11 @@ func NewReader(f objstorage.Readable, o ReaderOptions, extraOpts ...ReaderOption } else { r.opts.Cache.Ref() } + if r.opts.FilterCache == nil { + r.opts.FilterCache = cache.New(0) + } else { + r.opts.FilterCache.Ref() + } if f == nil { r.err = errors.New("pebble/table: nil file") diff --git a/table_cache.go b/table_cache.go index 516e5e8344..5258f4106d 100644 --- a/table_cache.go +++ b/table_cache.go @@ -985,6 +985,7 @@ func (c *tableCacheShard) evict(fileNum base.DiskFileNum, dbOpts *tableCacheOpts } dbOpts.opts.Cache.EvictFile(dbOpts.cacheID, fileNum) + dbOpts.opts.FilterCache.EvictFile(dbOpts.cacheID, fileNum) } // removeDB evicts any nodes which have a reference to the DB diff --git a/tool/find.go b/tool/find.go index b70e1c4141..99819cbca3 100644 --- a/tool/find.go +++ b/tool/find.go @@ -428,9 +428,10 @@ func (f *findT) searchTables(stdout io.Writer, searchKey []byte, refs []findRef) }() opts := sstable.ReaderOptions{ - Cache: cache, - Comparer: f.opts.Comparer, - Filters: f.opts.Filters, + Cache: cache, + FilterCache: cache, + Comparer: f.opts.Comparer, + Filters: f.opts.Filters, } readable, err := sstable.NewSimpleReadable(tf) if err != nil { diff --git a/tool/sstable.go b/tool/sstable.go index deb0a0cf65..8ae1428101 100644 --- a/tool/sstable.go +++ b/tool/sstable.go @@ -145,10 +145,12 @@ func (s *sstableT) newReader(f vfs.File) (*sstable.Reader, error) { if err != nil { return nil, err } + cache := pebble.NewCache(128 << 20 /* 128 MB */) o := sstable.ReaderOptions{ - Cache: pebble.NewCache(128 << 20 /* 128 MB */), - Comparer: s.opts.Comparer, - Filters: s.opts.Filters, + Cache: cache, + FilterCache: cache, + Comparer: s.opts.Comparer, + Filters: s.opts.Filters, } defer o.Cache.Unref() return sstable.NewReader(readable, o, s.comparers, s.mergers, From 9df126cc5df5920e3696c7287502183514ad330d Mon Sep 17 00:00:00 2001 From: galaio Date: Thu, 28 Aug 2025 17:27:57 +0800 Subject: [PATCH 2/2] cache: add index cache option; --- cmd/pebble/db.go | 3 +++ db.go | 1 + metamorphic/options.go | 1 + open.go | 6 ++++++ options.go | 3 +++ sstable/options.go | 2 ++ sstable/reader.go | 8 ++++++++ table_cache.go | 1 + tool/find.go | 1 + tool/sstable.go | 1 + 10 files changed, 27 insertions(+) diff --git a/cmd/pebble/db.go b/cmd/pebble/db.go index fe8d95aa08..c61f28f678 100644 --- a/cmd/pebble/db.go +++ b/cmd/pebble/db.go @@ -57,9 +57,12 @@ func newPebbleDB(dir string) DB { defer cache.Unref() filterCache := pebble.NewCache(cacheSize) defer filterCache.Unref() + indexCache := pebble.NewCache(cacheSize) + defer indexCache.Unref() opts := &pebble.Options{ Cache: cache, FilterCache: filterCache, + IndexCache: indexCache, Comparer: mvccComparer, DisableWAL: disableWAL, FormatMajorVersion: pebble.FormatNewest, diff --git a/db.go b/db.go index 87b22dbd04..da3713c2ee 100644 --- a/db.go +++ b/db.go @@ -1585,6 +1585,7 @@ func (d *DB) Close() error { defer d.opts.Cache.Unref() defer d.opts.FilterCache.Unref() + defer d.opts.IndexCache.Unref() for d.mu.compact.compactingCount > 0 || d.mu.compact.flushing { d.mu.compact.cond.Wait() } diff --git a/metamorphic/options.go b/metamorphic/options.go index 40384c80a0..c2d835e792 100644 --- a/metamorphic/options.go +++ b/metamorphic/options.go @@ -461,6 +461,7 @@ func randomOptions( opts.BytesPerSync = 1 << uint(rng.Intn(28)) // 1B - 256MB opts.Cache = cache.New(1 << uint(rng.Intn(30))) // 1B - 1GB opts.FilterCache = cache.New(1 << uint(rng.Intn(30))) // 1B - 1GB + opts.IndexCache = cache.New(1 << uint(rng.Intn(30))) // 1B - 1GB opts.DisableWAL = rng.Intn(2) == 0 opts.FlushDelayDeleteRange = time.Millisecond * time.Duration(5*rng.Intn(245)) // 5-250ms opts.FlushDelayRangeKey = time.Millisecond * time.Duration(5*rng.Intn(245)) // 5-250ms diff --git a/open.go b/open.go index 81d672ca2f..53e9b80e53 100644 --- a/open.go +++ b/open.go @@ -167,6 +167,11 @@ func Open(dirname string, opts *Options) (db *DB, _ error) { } else { opts.FilterCache.Ref() } + if opts.IndexCache == nil { + opts.IndexCache = cache.New(cacheDefaultSize) + } else { + opts.IndexCache.Ref() + } d := &DB{ cacheID: opts.Cache.NewID(), @@ -201,6 +206,7 @@ func Open(dirname string, opts *Options) (db *DB, _ error) { // reference to the cache. opts.Cache.Unref() opts.FilterCache.Unref() + opts.IndexCache.Unref() if d.tableCache != nil { _ = d.tableCache.close() diff --git a/options.go b/options.go index a88a12dc83..65f785941f 100644 --- a/options.go +++ b/options.go @@ -483,6 +483,7 @@ type Options struct { // The default cache size is 8 MB. Cache *cache.Cache FilterCache *cache.Cache + IndexCache *cache.Cache // LoadBlockSema, if set, is used to limit the number of blocks that can be // loaded (i.e. read from the filesystem) in parallel. Each load acquires one @@ -1734,6 +1735,7 @@ func (o *Options) MakeReaderOptions() sstable.ReaderOptions { if o != nil { readerOpts.Cache = o.Cache readerOpts.FilterCache = o.FilterCache + readerOpts.IndexCache = o.IndexCache readerOpts.LoadBlockSema = o.LoadBlockSema readerOpts.Comparer = o.Comparer readerOpts.Filters = o.Filters @@ -1754,6 +1756,7 @@ func (o *Options) MakeWriterOptions(level int, format sstable.TableFormat) sstab if o != nil { writerOpts.Cache = o.Cache writerOpts.FilterCache = o.FilterCache + writerOpts.IndexCache = o.IndexCache writerOpts.Comparer = o.Comparer if o.Merger != nil { writerOpts.MergerName = o.Merger.Name diff --git a/sstable/options.go b/sstable/options.go index 2ccfcdff11..8b7ad1959f 100644 --- a/sstable/options.go +++ b/sstable/options.go @@ -112,6 +112,7 @@ type ReaderOptions struct { // The default cache size is a zero-size cache. Cache *cache.Cache FilterCache *cache.Cache + IndexCache *cache.Cache // LoadBlockSema, if set, is used to limit the number of blocks that can be // loaded (i.e. read from the filesystem) in parallel. Each load acquires one @@ -190,6 +191,7 @@ type WriterOptions struct { // The default is a nil cache. Cache *cache.Cache FilterCache *cache.Cache + IndexCache *cache.Cache // Comparer defines a total ordering over the space of []byte keys: a 'less // than' relationship. The same comparison algorithm must be used for reads diff --git a/sstable/reader.go b/sstable/reader.go index 15a90877d1..ef6aef7aab 100644 --- a/sstable/reader.go +++ b/sstable/reader.go @@ -237,6 +237,7 @@ type Reader struct { func (r *Reader) Close() error { r.opts.Cache.Unref() r.opts.FilterCache.Unref() + r.opts.IndexCache.Unref() if r.readable != nil { r.err = firstError(r.err, r.readable.Close()) @@ -528,6 +529,8 @@ func (r *Reader) readBlock( dbCache := r.opts.Cache if ctx.Value("blockType") == objiotracing.FilterBlock { dbCache = r.opts.FilterCache + } else if ctx.Value("blockType") == objiotracing.MetadataBlock { + dbCache = r.opts.IndexCache } if h := dbCache.Get(r.cacheID, r.fileNum, bh.Offset); h.Get() != nil { @@ -1125,6 +1128,11 @@ func NewReader(f objstorage.Readable, o ReaderOptions, extraOpts ...ReaderOption } else { r.opts.FilterCache.Ref() } + if r.opts.IndexCache == nil { + r.opts.IndexCache = cache.New(0) + } else { + r.opts.IndexCache.Ref() + } if f == nil { r.err = errors.New("pebble/table: nil file") diff --git a/table_cache.go b/table_cache.go index 5258f4106d..6324828649 100644 --- a/table_cache.go +++ b/table_cache.go @@ -986,6 +986,7 @@ func (c *tableCacheShard) evict(fileNum base.DiskFileNum, dbOpts *tableCacheOpts dbOpts.opts.Cache.EvictFile(dbOpts.cacheID, fileNum) dbOpts.opts.FilterCache.EvictFile(dbOpts.cacheID, fileNum) + dbOpts.opts.IndexCache.EvictFile(dbOpts.cacheID, fileNum) } // removeDB evicts any nodes which have a reference to the DB diff --git a/tool/find.go b/tool/find.go index 99819cbca3..8fc9736108 100644 --- a/tool/find.go +++ b/tool/find.go @@ -430,6 +430,7 @@ func (f *findT) searchTables(stdout io.Writer, searchKey []byte, refs []findRef) opts := sstable.ReaderOptions{ Cache: cache, FilterCache: cache, + IndexCache: cache, Comparer: f.opts.Comparer, Filters: f.opts.Filters, } diff --git a/tool/sstable.go b/tool/sstable.go index 8ae1428101..f153967d6f 100644 --- a/tool/sstable.go +++ b/tool/sstable.go @@ -149,6 +149,7 @@ func (s *sstableT) newReader(f vfs.File) (*sstable.Reader, error) { o := sstable.ReaderOptions{ Cache: cache, FilterCache: cache, + IndexCache: cache, Comparer: s.opts.Comparer, Filters: s.opts.Filters, }