@@ -12,6 +12,7 @@ import (
1212 "github.com/cockroachdb/pebble/internal/base"
1313 "github.com/cockroachdb/pebble/internal/invariants"
1414 "github.com/cockroachdb/pebble/internal/manifest"
15+ "github.com/cockroachdb/pebble/objstorage"
1516)
1617
1718// Excise atomically deletes all data overlapping with the provided span. All
@@ -40,43 +41,79 @@ func (d *DB) Excise(ctx context.Context, span KeyRange) error {
4041 v , FormatVirtualSSTables ,
4142 )
4243 }
43- _ , err := d .ingest (ctx , nil , nil , span , nil )
44+ _ , err := d .ingest (ctx , ingestArgs { ExciseSpan : span , ExciseBoundsPolicy : tightExciseBoundsIfLocal } )
4445 return err
4546}
4647
48+ // exciseBoundsPolicy controls whether we open excised files to obtain tight
49+ // bounds for the remaining file(s).
50+ type exciseBoundsPolicy uint8
51+
52+ const (
53+ // tightExciseBounds means that we will always open the file to find the exact
54+ // bounds of the remaining file(s).
55+ tightExciseBounds exciseBoundsPolicy = iota
56+ // looseExciseBounds means that we will not open the file and will assign bounds
57+ // pessimistically.
58+ looseExciseBounds
59+ // tightExciseBoundsLocalOnly means that we will only open the file if it is
60+ // local; otherwise we will assign loose bounds to the remaining file(s).
61+ tightExciseBoundsIfLocal
62+ )
63+
4764// exciseTable initializes up to two virtual tables for what is left over after
4865// excising the given span from the table.
4966//
50- // Returns the left and/or right tables, if they exist.
67+ // Returns the left and/or right tables, if they exist. The boundsPolicy controls
68+ // whether we create iterators for m to determine tight bounds. Note that if the
69+ // exciseBounds are end-inclusive, tight bounds will be used regardless of the
70+ // policy.
5171//
5272// The file bounds must overlap with the excise span.
5373//
5474// This method is agnostic to whether d.mu is held or not. Some cases call it with
5575// the db mutex held (eg. ingest-time excises), while in the case of compactions
5676// the mutex is not held.
5777func (d * DB ) exciseTable (
58- ctx context.Context , exciseSpan base.UserKeyBounds , m * tableMetadata , level int ,
78+ ctx context.Context ,
79+ exciseBounds base.UserKeyBounds ,
80+ m * tableMetadata ,
81+ level int ,
82+ boundsPolicy exciseBoundsPolicy ,
5983) (leftTable , rightTable * tableMetadata , _ error ) {
6084 // Check if there's actually an overlap between m and exciseSpan.
6185 mBounds := m .UserKeyBounds ()
62- if ! exciseSpan .Overlaps (d .cmp , & mBounds ) {
86+ if ! exciseBounds .Overlaps (d .cmp , & mBounds ) {
6387 return nil , nil , base .AssertionFailedf ("excise span does not overlap table" )
6488 }
6589 // Fast path: m sits entirely within the exciseSpan, so just delete it.
66- if exciseSpan .ContainsInternalKey (d .cmp , m .Smallest ) && exciseSpan .ContainsInternalKey (d .cmp , m .Largest ) {
90+ if exciseBounds .ContainsInternalKey (d .cmp , m .Smallest ) && exciseBounds .ContainsInternalKey (d .cmp , m .Largest ) {
6791 return nil , nil , nil
6892 }
6993
70- // The file partially overlaps the excise span; we will need to open it to
71- // determine tight bounds for the left-over table(s).
72- iters , err := d .newIters (ctx , m , & IterOptions {
73- Category : categoryIngest ,
74- layer : manifest .Level (level ),
75- }, internalIterOpts {}, iterPointKeys | iterRangeDeletions | iterRangeKeys )
76- if err != nil {
77- return nil , nil , err
94+ looseBounds := boundsPolicy == looseExciseBounds ||
95+ (boundsPolicy == tightExciseBoundsIfLocal && ! objstorage .IsLocalTable (d .objProvider , m .FileBacking .DiskFileNum ))
96+
97+ if exciseBounds .End .Kind == base .Inclusive {
98+ // Loose bounds are not allowed with end-inclusive bounds. This can only
99+ // happen for ingest splits.
100+ looseBounds = false
101+ }
102+
103+ // The file partially overlaps the excise span; unless looseBounds is true, we
104+ // will need to open it to determine tight bounds for the left-over table(s).
105+ var iters iterSet
106+ if ! looseBounds {
107+ var err error
108+ iters , err = d .newIters (ctx , m , & IterOptions {
109+ Category : categoryIngest ,
110+ layer : manifest .Level (level ),
111+ }, internalIterOpts {}, iterPointKeys | iterRangeDeletions | iterRangeKeys )
112+ if err != nil {
113+ return nil , nil , err
114+ }
115+ defer func () { _ = iters .CloseAll () }()
78116 }
79- defer func () { _ = iters .CloseAll () }()
80117
81118 // Create a file to the left of the excise span, if necessary.
82119 // The bounds of this file will be [m.Smallest, lastKeyBefore(exciseSpan.Start)].
@@ -100,7 +137,7 @@ func (d *DB) exciseTable(
100137 // files, then grab the lock again and recalculate for just the files that
101138 // have changed since our previous calculation. Do this optimiaztino as part of
102139 // https://github.com/cockroachdb/pebble/issues/2112 .
103- if d .cmp (m .Smallest .UserKey , exciseSpan .Start ) < 0 {
140+ if d .cmp (m .Smallest .UserKey , exciseBounds .Start ) < 0 {
104141 leftTable = & tableMetadata {
105142 Virtual : true ,
106143 FileNum : d .mu .versions .getNextFileNum (),
@@ -111,7 +148,9 @@ func (d *DB) exciseTable(
111148 LargestSeqNumAbsolute : m .LargestSeqNumAbsolute ,
112149 SyntheticPrefixAndSuffix : m .SyntheticPrefixAndSuffix ,
113150 }
114- if err := determineLeftTableBounds (d .cmp , m , leftTable , exciseSpan .Start , iters ); err != nil {
151+ if looseBounds {
152+ looseLeftTableBounds (d .cmp , m , leftTable , exciseBounds .Start )
153+ } else if err := determineLeftTableBounds (d .cmp , m , leftTable , exciseBounds .Start , iters ); err != nil {
115154 return nil , nil , err
116155 }
117156
@@ -129,7 +168,7 @@ func (d *DB) exciseTable(
129168 }
130169 }
131170 // Create a file to the right, if necessary.
132- if ! exciseSpan .End .IsUpperBoundForInternalKey (d .cmp , m .Largest ) {
171+ if ! exciseBounds .End .IsUpperBoundForInternalKey (d .cmp , m .Largest ) {
133172 // Create a new file, rightFile, between [firstKeyAfter(exciseSpan.End), m.Largest].
134173 //
135174 // See comment before the definition of leftFile for the motivation behind
@@ -144,7 +183,10 @@ func (d *DB) exciseTable(
144183 LargestSeqNumAbsolute : m .LargestSeqNumAbsolute ,
145184 SyntheticPrefixAndSuffix : m .SyntheticPrefixAndSuffix ,
146185 }
147- if err := determineRightTableBounds (d .cmp , m , rightTable , exciseSpan .End , iters ); err != nil {
186+ if looseBounds {
187+ // We already checked that the end bound is exclusive.
188+ looseRightTableBounds (d .cmp , m , rightTable , exciseBounds .End .Key )
189+ } else if err := determineRightTableBounds (d .cmp , m , rightTable , exciseBounds .End , iters ); err != nil {
148190 return nil , nil , err
149191 }
150192 if rightTable .HasRangeKeys || rightTable .HasPointKeys {
@@ -207,8 +249,62 @@ func exciseOverlapBounds(
207249 return extended
208250}
209251
252+ // looseLeftTableBounds initializes the bounds for the table that remains to the
253+ // left of the excise span after excising originalTable, without consulting the
254+ // contents of originalTable. The resulting bounds are loose.
255+ //
256+ // Sets the smallest and largest keys, as well as HasPointKeys/HasRangeKeys in
257+ // the leftFile.
258+ func looseLeftTableBounds (
259+ cmp Compare , originalTable , leftTable * tableMetadata , exciseSpanStart []byte ,
260+ ) {
261+ if originalTable .HasPointKeys {
262+ largestPointKey := originalTable .LargestPointKey
263+ if largestPointKey .IsUpperBoundFor (cmp , exciseSpanStart ) {
264+ largestPointKey = base .MakeRangeDeleteSentinelKey (exciseSpanStart )
265+ }
266+ leftTable .ExtendPointKeyBounds (cmp , originalTable .SmallestPointKey , largestPointKey )
267+ }
268+ if originalTable .HasRangeKeys {
269+ largestRangeKey := originalTable .LargestRangeKey
270+ if largestRangeKey .IsUpperBoundFor (cmp , exciseSpanStart ) {
271+ largestRangeKey = base .MakeExclusiveSentinelKey (InternalKeyKindRangeKeyMin , exciseSpanStart )
272+ }
273+ leftTable .ExtendRangeKeyBounds (cmp , originalTable .SmallestRangeKey , largestRangeKey )
274+ }
275+ }
276+
277+ // looseRightTableBounds initializes the bounds for the table that remains to the
278+ // right of the excise span after excising originalTable, without consulting the
279+ // contents of originalTable. The resulting bounds are loose.
280+ //
281+ // Sets the smallest and largest keys, as well as HasPointKeys/HasRangeKeys in
282+ // the rightFile.
283+ //
284+ // The excise span end bound is assumed to be exclusive; this function cannot be
285+ // used with an inclusive end bound.
286+ func looseRightTableBounds (
287+ cmp Compare , originalTable , rightTable * tableMetadata , exciseSpanEnd []byte ,
288+ ) {
289+ if originalTable .HasPointKeys {
290+ smallestPointKey := originalTable .SmallestPointKey
291+ if ! smallestPointKey .IsUpperBoundFor (cmp , exciseSpanEnd ) {
292+ smallestPointKey = base .MakeInternalKey (exciseSpanEnd , 0 , base .InternalKeyKindMaxForSSTable )
293+ }
294+ rightTable .ExtendPointKeyBounds (cmp , smallestPointKey , originalTable .LargestPointKey )
295+ }
296+ if originalTable .HasRangeKeys {
297+ smallestRangeKey := originalTable .SmallestRangeKey
298+ if ! smallestRangeKey .IsUpperBoundFor (cmp , exciseSpanEnd ) {
299+ smallestRangeKey = base .MakeInternalKey (exciseSpanEnd , 0 , base .InternalKeyKindRangeKeyMax )
300+ }
301+ rightTable .ExtendRangeKeyBounds (cmp , smallestRangeKey , originalTable .LargestRangeKey )
302+ }
303+ }
304+
210305// determineLeftTableBounds calculates the bounds for the table that remains to
211- // the left of the excise span after excising originalFile.
306+ // the left of the excise span after excising originalTable. The bounds around
307+ // the excise span are determined precisely by looking inside the file.
212308//
213309// Sets the smallest and largest keys, as well as HasPointKeys/HasRangeKeys in
214310// the leftFile.
@@ -256,7 +352,8 @@ func determineLeftTableBounds(
256352}
257353
258354// determineRightTableBounds calculates the bounds for the table that remains to
259- // the right of the excise span after excising originalFile.
355+ // the right of the excise span after excising originalTable. The bounds around
356+ // the excise span are determined precisely by looking inside the file.
260357//
261358// Sets the smallest and largest keys, as well as HasPointKeys/HasRangeKeys in
262359// the right.
0 commit comments