Skip to content

Commit 28fcae5

Browse files
committed
db: allow loose excised table bounds when using DB.Excise()
In the future we can easily change the policy for IngestAndExcise as well if we decide to. Fixes #4417
1 parent d343bc4 commit 28fcae5

File tree

7 files changed

+294
-53
lines changed

7 files changed

+294
-53
lines changed

compaction.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1345,10 +1345,11 @@ func (d *DB) runIngestFlush(c *compaction) (*manifest.VersionEdit, error) {
13451345
levelMetrics.TablesIngested++
13461346
}
13471347
if ingestFlushable.exciseSpan.Valid() {
1348+
exciseBounds := ingestFlushable.exciseSpan.UserKeyBounds()
13481349
// Iterate through all levels and find files that intersect with exciseSpan.
13491350
for layer, ls := range c.version.AllLevelsAndSublevels() {
13501351
for m := range ls.Overlaps(d.cmp, ingestFlushable.exciseSpan.UserKeyBounds()).All() {
1351-
leftTable, rightTable, err := d.exciseTable(context.TODO(), ingestFlushable.exciseSpan.UserKeyBounds(), m, layer.Level())
1352+
leftTable, rightTable, err := d.exciseTable(context.TODO(), exciseBounds, m, layer.Level(), tightExciseBounds)
13521353
if err != nil {
13531354
return nil, err
13541355
}
@@ -2750,7 +2751,8 @@ func (d *DB) applyHintOnFile(
27502751
}
27512752

27522753
levelMetrics.TablesExcised++
2753-
leftTable, rightTable, err := d.exciseTable(context.TODO(), base.UserKeyBoundsEndExclusive(h.start, h.end), f, level)
2754+
exciseBounds := base.UserKeyBoundsEndExclusive(h.start, h.end)
2755+
leftTable, rightTable, err := d.exciseTable(context.TODO(), exciseBounds, f, level, tightExciseBounds)
27542756
if err != nil {
27552757
return nil, errors.Wrap(err, "error when running excise for delete-only compaction")
27562758
}

excise.go

Lines changed: 117 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"github.com/cockroachdb/pebble/internal/base"
1313
"github.com/cockroachdb/pebble/internal/invariants"
1414
"github.com/cockroachdb/pebble/internal/manifest"
15+
"github.com/cockroachdb/pebble/objstorage"
1516
)
1617

1718
// Excise atomically deletes all data overlapping with the provided span. All
@@ -40,43 +41,79 @@ func (d *DB) Excise(ctx context.Context, span KeyRange) error {
4041
v, FormatVirtualSSTables,
4142
)
4243
}
43-
_, err := d.ingest(ctx, nil, nil, span, nil)
44+
_, err := d.ingest(ctx, ingestArgs{ExciseSpan: span, ExciseBoundsPolicy: tightExciseBoundsIfLocal})
4445
return err
4546
}
4647

48+
// exciseBoundsPolicy controls whether we open excised files to obtain tight
49+
// bounds for the remaining file(s).
50+
type exciseBoundsPolicy uint8
51+
52+
const (
53+
// tightExciseBounds means that we will always open the file to find the exact
54+
// bounds of the remaining file(s).
55+
tightExciseBounds exciseBoundsPolicy = iota
56+
// looseExciseBounds means that we will not open the file and will assign bounds
57+
// pessimistically.
58+
looseExciseBounds
59+
// tightExciseBoundsLocalOnly means that we will only open the file if it is
60+
// local; otherwise we will assign loose bounds to the remaining file(s).
61+
tightExciseBoundsIfLocal
62+
)
63+
4764
// exciseTable initializes up to two virtual tables for what is left over after
4865
// excising the given span from the table.
4966
//
50-
// Returns the left and/or right tables, if they exist.
67+
// Returns the left and/or right tables, if they exist. The boundsPolicy controls
68+
// whether we create iterators for m to determine tight bounds. Note that if the
69+
// exciseBounds are end-inclusive, tight bounds will be used regardless of the
70+
// policy.
5171
//
5272
// The file bounds must overlap with the excise span.
5373
//
5474
// This method is agnostic to whether d.mu is held or not. Some cases call it with
5575
// the db mutex held (eg. ingest-time excises), while in the case of compactions
5676
// the mutex is not held.
5777
func (d *DB) exciseTable(
58-
ctx context.Context, exciseSpan base.UserKeyBounds, m *tableMetadata, level int,
78+
ctx context.Context,
79+
exciseBounds base.UserKeyBounds,
80+
m *tableMetadata,
81+
level int,
82+
boundsPolicy exciseBoundsPolicy,
5983
) (leftTable, rightTable *tableMetadata, _ error) {
6084
// Check if there's actually an overlap between m and exciseSpan.
6185
mBounds := m.UserKeyBounds()
62-
if !exciseSpan.Overlaps(d.cmp, &mBounds) {
86+
if !exciseBounds.Overlaps(d.cmp, &mBounds) {
6387
return nil, nil, base.AssertionFailedf("excise span does not overlap table")
6488
}
6589
// Fast path: m sits entirely within the exciseSpan, so just delete it.
66-
if exciseSpan.ContainsInternalKey(d.cmp, m.Smallest) && exciseSpan.ContainsInternalKey(d.cmp, m.Largest) {
90+
if exciseBounds.ContainsInternalKey(d.cmp, m.Smallest) && exciseBounds.ContainsInternalKey(d.cmp, m.Largest) {
6791
return nil, nil, nil
6892
}
6993

70-
// The file partially overlaps the excise span; we will need to open it to
71-
// determine tight bounds for the left-over table(s).
72-
iters, err := d.newIters(ctx, m, &IterOptions{
73-
Category: categoryIngest,
74-
layer: manifest.Level(level),
75-
}, internalIterOpts{}, iterPointKeys|iterRangeDeletions|iterRangeKeys)
76-
if err != nil {
77-
return nil, nil, err
94+
looseBounds := boundsPolicy == looseExciseBounds ||
95+
(boundsPolicy == tightExciseBoundsIfLocal && !objstorage.IsLocalTable(d.objProvider, m.FileBacking.DiskFileNum))
96+
97+
if exciseBounds.End.Kind == base.Inclusive {
98+
// Loose bounds are not allowed with end-inclusive bounds. This can only
99+
// happen for ingest splits.
100+
looseBounds = false
101+
}
102+
103+
// The file partially overlaps the excise span; unless looseBounds is true, we
104+
// will need to open it to determine tight bounds for the left-over table(s).
105+
var iters iterSet
106+
if !looseBounds {
107+
var err error
108+
iters, err = d.newIters(ctx, m, &IterOptions{
109+
Category: categoryIngest,
110+
layer: manifest.Level(level),
111+
}, internalIterOpts{}, iterPointKeys|iterRangeDeletions|iterRangeKeys)
112+
if err != nil {
113+
return nil, nil, err
114+
}
115+
defer func() { _ = iters.CloseAll() }()
78116
}
79-
defer func() { _ = iters.CloseAll() }()
80117

81118
// Create a file to the left of the excise span, if necessary.
82119
// The bounds of this file will be [m.Smallest, lastKeyBefore(exciseSpan.Start)].
@@ -100,7 +137,7 @@ func (d *DB) exciseTable(
100137
// files, then grab the lock again and recalculate for just the files that
101138
// have changed since our previous calculation. Do this optimiaztino as part of
102139
// https://github.com/cockroachdb/pebble/issues/2112 .
103-
if d.cmp(m.Smallest.UserKey, exciseSpan.Start) < 0 {
140+
if d.cmp(m.Smallest.UserKey, exciseBounds.Start) < 0 {
104141
leftTable = &tableMetadata{
105142
Virtual: true,
106143
FileNum: d.mu.versions.getNextFileNum(),
@@ -111,7 +148,9 @@ func (d *DB) exciseTable(
111148
LargestSeqNumAbsolute: m.LargestSeqNumAbsolute,
112149
SyntheticPrefixAndSuffix: m.SyntheticPrefixAndSuffix,
113150
}
114-
if err := determineLeftTableBounds(d.cmp, m, leftTable, exciseSpan.Start, iters); err != nil {
151+
if looseBounds {
152+
looseLeftTableBounds(d.cmp, m, leftTable, exciseBounds.Start)
153+
} else if err := determineLeftTableBounds(d.cmp, m, leftTable, exciseBounds.Start, iters); err != nil {
115154
return nil, nil, err
116155
}
117156

@@ -129,7 +168,7 @@ func (d *DB) exciseTable(
129168
}
130169
}
131170
// Create a file to the right, if necessary.
132-
if !exciseSpan.End.IsUpperBoundForInternalKey(d.cmp, m.Largest) {
171+
if !exciseBounds.End.IsUpperBoundForInternalKey(d.cmp, m.Largest) {
133172
// Create a new file, rightFile, between [firstKeyAfter(exciseSpan.End), m.Largest].
134173
//
135174
// See comment before the definition of leftFile for the motivation behind
@@ -144,7 +183,10 @@ func (d *DB) exciseTable(
144183
LargestSeqNumAbsolute: m.LargestSeqNumAbsolute,
145184
SyntheticPrefixAndSuffix: m.SyntheticPrefixAndSuffix,
146185
}
147-
if err := determineRightTableBounds(d.cmp, m, rightTable, exciseSpan.End, iters); err != nil {
186+
if looseBounds {
187+
// We already checked that the end bound is exclusive.
188+
looseRightTableBounds(d.cmp, m, rightTable, exciseBounds.End.Key)
189+
} else if err := determineRightTableBounds(d.cmp, m, rightTable, exciseBounds.End, iters); err != nil {
148190
return nil, nil, err
149191
}
150192
if rightTable.HasRangeKeys || rightTable.HasPointKeys {
@@ -207,8 +249,62 @@ func exciseOverlapBounds(
207249
return extended
208250
}
209251

252+
// looseLeftTableBounds initializes the bounds for the table that remains to the
253+
// left of the excise span after excising originalTable, without consulting the
254+
// contents of originalTable. The resulting bounds are loose.
255+
//
256+
// Sets the smallest and largest keys, as well as HasPointKeys/HasRangeKeys in
257+
// the leftFile.
258+
func looseLeftTableBounds(
259+
cmp Compare, originalTable, leftTable *tableMetadata, exciseSpanStart []byte,
260+
) {
261+
if originalTable.HasPointKeys {
262+
largestPointKey := originalTable.LargestPointKey
263+
if largestPointKey.IsUpperBoundFor(cmp, exciseSpanStart) {
264+
largestPointKey = base.MakeRangeDeleteSentinelKey(exciseSpanStart)
265+
}
266+
leftTable.ExtendPointKeyBounds(cmp, originalTable.SmallestPointKey, largestPointKey)
267+
}
268+
if originalTable.HasRangeKeys {
269+
largestRangeKey := originalTable.LargestRangeKey
270+
if largestRangeKey.IsUpperBoundFor(cmp, exciseSpanStart) {
271+
largestRangeKey = base.MakeExclusiveSentinelKey(InternalKeyKindRangeKeyMin, exciseSpanStart)
272+
}
273+
leftTable.ExtendRangeKeyBounds(cmp, originalTable.SmallestRangeKey, largestRangeKey)
274+
}
275+
}
276+
277+
// looseRightTableBounds initializes the bounds for the table that remains to the
278+
// right of the excise span after excising originalTable, without consulting the
279+
// contents of originalTable. The resulting bounds are loose.
280+
//
281+
// Sets the smallest and largest keys, as well as HasPointKeys/HasRangeKeys in
282+
// the rightFile.
283+
//
284+
// The excise span end bound is assumed to be exclusive; this function cannot be
285+
// used with an inclusive end bound.
286+
func looseRightTableBounds(
287+
cmp Compare, originalTable, rightTable *tableMetadata, exciseSpanEnd []byte,
288+
) {
289+
if originalTable.HasPointKeys {
290+
smallestPointKey := originalTable.SmallestPointKey
291+
if !smallestPointKey.IsUpperBoundFor(cmp, exciseSpanEnd) {
292+
smallestPointKey = base.MakeInternalKey(exciseSpanEnd, 0, base.InternalKeyKindMaxForSSTable)
293+
}
294+
rightTable.ExtendPointKeyBounds(cmp, smallestPointKey, originalTable.LargestPointKey)
295+
}
296+
if originalTable.HasRangeKeys {
297+
smallestRangeKey := originalTable.SmallestRangeKey
298+
if !smallestRangeKey.IsUpperBoundFor(cmp, exciseSpanEnd) {
299+
smallestRangeKey = base.MakeInternalKey(exciseSpanEnd, 0, base.InternalKeyKindRangeKeyMax)
300+
}
301+
rightTable.ExtendRangeKeyBounds(cmp, smallestRangeKey, originalTable.LargestRangeKey)
302+
}
303+
}
304+
210305
// determineLeftTableBounds calculates the bounds for the table that remains to
211-
// the left of the excise span after excising originalFile.
306+
// the left of the excise span after excising originalTable. The bounds around
307+
// the excise span are determined precisely by looking inside the file.
212308
//
213309
// Sets the smallest and largest keys, as well as HasPointKeys/HasRangeKeys in
214310
// the leftFile.
@@ -256,7 +352,8 @@ func determineLeftTableBounds(
256352
}
257353

258354
// determineRightTableBounds calculates the bounds for the table that remains to
259-
// the right of the excise span after excising originalFile.
355+
// the right of the excise span after excising originalTable. The bounds around
356+
// the excise span are determined precisely by looking inside the file.
260357
//
261358
// Sets the smallest and largest keys, as well as HasPointKeys/HasRangeKeys in
262359
// the right.

excise_test.go

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ func TestExcise(t *testing.T) {
5050
flushed = false
5151
}
5252

53+
var remoteStorage remote.Storage
5354
var opts *Options
5455
reset := func(blockSize int) {
5556
for _, e := range efos {
@@ -62,6 +63,7 @@ func TestExcise(t *testing.T) {
6263

6364
mem = vfs.NewMem()
6465
require.NoError(t, mem.MkdirAll("ext", 0755))
66+
remoteStorage = remote.NewInMem()
6567
opts = &Options{
6668
BlockPropertyCollectors: []func() BlockPropertyCollector{
6769
sstable.NewTestKeysBlockPropertyCollector,
@@ -80,6 +82,10 @@ func TestExcise(t *testing.T) {
8082
if blockSize != 0 {
8183
opts.Levels = append(opts.Levels, LevelOptions{BlockSize: blockSize, IndexBlockSize: 32 << 10})
8284
}
85+
opts.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{
86+
"external-locator": remoteStorage,
87+
})
88+
opts.Experimental.CreateOnShared = remote.CreateOnSharedNone
8389
// Disable automatic compactions because otherwise we'll race with
8490
// delete-only compactions triggered by ingesting range tombstones.
8591
opts.DisableAutomaticCompactions = true
@@ -128,6 +134,11 @@ func TestExcise(t *testing.T) {
128134
return err.Error()
129135
}
130136
return ""
137+
case "build-remote":
138+
if err := runBuildRemoteCmd(td, d, remoteStorage); err != nil {
139+
return err.Error()
140+
}
141+
return ""
131142
case "flush":
132143
if err := d.Flush(); err != nil {
133144
return err.Error()
@@ -193,7 +204,12 @@ func TestExcise(t *testing.T) {
193204
}
194205
return "memtable flushed"
195206
}
196-
return ""
207+
208+
case "ingest-external":
209+
if err := runIngestExternalCmd(t, td, d, remoteStorage, "external-locator"); err != nil {
210+
return err.Error()
211+
}
212+
197213
case "file-only-snapshot":
198214
if len(td.CmdArgs) != 1 {
199215
panic("insufficient args for file-only-snapshot command")
@@ -281,10 +297,11 @@ func TestExcise(t *testing.T) {
281297
d.mu.Unlock()
282298
current := d.mu.versions.currentVersion()
283299

300+
exciseBounds := exciseSpan.UserKeyBounds()
284301
for l, ls := range current.AllLevelsAndSublevels() {
285302
iter := ls.Iter()
286303
for m := iter.SeekGE(d.cmp, exciseSpan.Start); m != nil && d.cmp(m.Smallest.UserKey, exciseSpan.End) < 0; m = iter.Next() {
287-
leftTable, rightTable, err := d.exciseTable(context.Background(), exciseSpan.UserKeyBounds(), m, l.Level())
304+
leftTable, rightTable, err := d.exciseTable(context.Background(), exciseBounds, m, l.Level(), tightExciseBounds)
288305
if err != nil {
289306
td.Fatalf(t, "error when excising %s: %s", m.FileNum, err.Error())
290307
}
@@ -335,10 +352,11 @@ func TestExcise(t *testing.T) {
335352
if err != nil {
336353
return err.Error()
337354
}
338-
return ""
355+
339356
default:
340-
return fmt.Sprintf("unknown command: %s", td.Cmd)
357+
td.Fatalf(t, "unknown command: %s", td.Cmd)
341358
}
359+
return ""
342360
})
343361
}
344362

@@ -661,10 +679,11 @@ func TestConcurrentExcise(t *testing.T) {
661679
d.mu.versions.logLock()
662680
d.mu.Unlock()
663681
current := d.mu.versions.currentVersion()
682+
exciseBounds := exciseSpan.UserKeyBounds()
664683
for level := range current.Levels {
665684
iter := current.Levels[level].Iter()
666685
for m := iter.SeekGE(d.cmp, exciseSpan.Start); m != nil && d.cmp(m.Smallest.UserKey, exciseSpan.End) < 0; m = iter.Next() {
667-
leftTable, rightTable, err := d.exciseTable(context.Background(), exciseSpan.UserKeyBounds(), m, level)
686+
leftTable, rightTable, err := d.exciseTable(context.Background(), exciseBounds, m, level, tightExciseBounds)
668687
if err != nil {
669688
d.mu.Lock()
670689
d.mu.versions.logUnlock()
@@ -835,6 +854,9 @@ func TestExciseBounds(t *testing.T) {
835854
var t manifest.TableMetadata
836855
checkErr(determineLeftTableBounds(cmp, m, &t, exciseSpan.Start, iters))
837856
printBounds("Left table bounds", &t)
857+
t = manifest.TableMetadata{}
858+
looseLeftTableBounds(cmp, m, &t, exciseSpan.Start)
859+
printBounds("Left table bounds (loose)", &t)
838860
}
839861

840862
if !exciseSpan.End.IsUpperBoundForInternalKey(cmp, m.Largest) {
@@ -858,6 +880,11 @@ func TestExciseBounds(t *testing.T) {
858880
} else {
859881
printBounds("Right table bounds", &t)
860882
}
883+
if exciseSpan.End.Kind == base.Exclusive {
884+
t = manifest.TableMetadata{}
885+
looseRightTableBounds(cmp, m, &t, exciseSpan.End.Key)
886+
printBounds("Right table bounds (loose)", &t)
887+
}
861888
}
862889
checkErr(iters.CloseAll())
863890

0 commit comments

Comments
 (0)