Skip to content

Commit 8e2dc01

Browse files
committed
overlap: add overlapcache subpackage
This commit implements a new overlap cache data structure which will be embedded into `FileMetadata`. The cache works by remembering a handful of data regions and whether the spaces in-between are known to be empty. The goal is to make repeated overlap checks in the same area of a file much cheaper. This will make it feasible to have an optimistic overlap check that can be repeated on a slightly changed version without redoing most of the work.
1 parent 907d865 commit 8e2dc01

File tree

3 files changed

+723
-0
lines changed

3 files changed

+723
-0
lines changed
Lines changed: 294 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,294 @@
1+
// Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2+
// of this source code is governed by a BSD-style license that can be found in
3+
// the LICENSE file.
4+
5+
package overlapcache
6+
7+
import (
8+
"fmt"
9+
"sort"
10+
"sync"
11+
12+
"github.com/cockroachdb/errors"
13+
"github.com/cockroachdb/pebble/internal/base"
14+
"github.com/cockroachdb/pebble/internal/invariants"
15+
)
16+
17+
// C is a data structure that caches information about data regions in a file.
18+
// It is used to speed up related overlap checks during ingestion.
19+
//
20+
// -- Implementation --
21+
//
22+
// The cache maintains information about a small number of regions. A region
23+
// corresponds to a user key interval (UserKeyBounds). We define three types of
24+
// regions:
25+
// - empty region: it is known that no keys or spans in the file overlap this
26+
// region.
27+
// - data region: corresponds to a key or span (or union of keys and spans) in
28+
// the file. Any single key that falls inside ths region has data overlap.
29+
// - unknown region.
30+
//
31+
// We maintain a list of disjoint and sorted data regions, along with flags
32+
// which indicate if the regions in-between are empty or unknown. The region
33+
// before data region 0 refers to the entire start of the file up to data region
34+
// 0. THe region after data region n-1 refers to the entire end of the file
35+
// starting from the end of data region n-1.
36+
//
37+
// See testdata/cache for some examples represented visually.
38+
type C struct {
39+
mu struct {
40+
sync.Mutex
41+
n int
42+
dataRegions [cacheMaxEntries]base.UserKeyBounds
43+
emptyBeforeRegion [cacheMaxEntries + 1]bool
44+
}
45+
}
46+
47+
// cacheMaxEntries must be at least 4.
48+
const cacheMaxEntries = 6
49+
50+
// maxKeySize prevents the cache from holding on to very large keys. It is a
51+
// safety precaution.
52+
const maxKeySize = 4096
53+
54+
// CheckDataOverlap tries to determine if the target region overlaps any data
55+
// regions.
56+
func (c *C) CheckDataOverlap(cmp base.Compare, target base.UserKeyBounds) (overlaps, ok bool) {
57+
c.mu.Lock()
58+
defer c.mu.Unlock()
59+
n := c.mu.n
60+
61+
// Find first region which ends after the start of the target region.
62+
idx := sort.Search(n, func(i int) bool {
63+
return c.mu.dataRegions[i].End.IsUpperBoundFor(cmp, target.Start)
64+
})
65+
if idx < n && target.End.IsUpperBoundFor(cmp, c.mu.dataRegions[idx].Start) {
66+
// target overlaps with a known data region.
67+
return true, true
68+
}
69+
// The target region falls completely outside regions idx-1 and idx.
70+
if c.mu.emptyBeforeRegion[idx] {
71+
// The entire space between data regions idx-1 and idx is known to contain
72+
// no data.
73+
return false, true
74+
}
75+
// We don't know if there is data in the space between regions idx-1 and idx.
76+
return false, false
77+
}
78+
79+
// ReportDataRegion informs the cache that the target region contains data.
80+
//
81+
// There is no assumption about the region being maximal (i.e. it could be part
82+
// of a larger data region).
83+
//
84+
// Note that the cache will hold on to the region's key slices indefinitely.
85+
// They should not be modified ever again by the caller.
86+
func (c *C) ReportDataRegion(cmp base.Compare, region base.UserKeyBounds) {
87+
if len(region.Start) > maxKeySize || len(region.End.Key) > maxKeySize {
88+
return
89+
}
90+
91+
c.mu.Lock()
92+
defer c.mu.Unlock()
93+
if invariants.Enabled {
94+
defer c.check(cmp)
95+
}
96+
c.insertRegion(cmp, region, allowLeftExtension|allowRightExtension)
97+
}
98+
99+
// ReportEmptyRegion informs the cache of an empty region, in-between two data
100+
// regions r1 and r2.
101+
//
102+
// Unset regions are accepted and serve as "sentinels" representing the start or
103+
// end of the file. Specifically:
104+
// - if r1 is unset, the empty region is from the start of the file to the
105+
// start of r2;
106+
// - if r2 is unset, the empty region is from the end of r2 to the end of the
107+
// file;
108+
// - if both r1 and r2 are unset, the entire file is empty.
109+
//
110+
// There is no assumption about the regions being maximal (i.e. r1 could be part
111+
// of a larger data region extending to the left, and r2 could be part of a
112+
// larger data region extending to the right).
113+
//
114+
// Note that the cache will hold on to the regions' key slices indefinitely.
115+
// They should not be modified ever again by the caller.
116+
func (c *C) ReportEmptyRegion(cmp base.Compare, r1, r2 base.UserKeyBounds) {
117+
if len(r1.Start) > maxKeySize || len(r1.End.Key) > maxKeySize ||
118+
len(r2.Start) > maxKeySize || len(r2.End.Key) > maxKeySize {
119+
return
120+
}
121+
122+
c.mu.Lock()
123+
defer c.mu.Unlock()
124+
if invariants.Enabled {
125+
defer c.check(cmp)
126+
}
127+
128+
switch {
129+
case r1.Start == nil && r2.Start == nil:
130+
// The entire file is empty,
131+
c.assert(c.mu.n == 0)
132+
c.mu.emptyBeforeRegion[0] = true
133+
return
134+
135+
case r1.Start == nil:
136+
// We know there is only empty space before r2.
137+
idx := c.insertRegion(cmp, r2, allowRightExtension)
138+
c.assert(idx == 0)
139+
c.mu.emptyBeforeRegion[0] = true
140+
return
141+
142+
case r2.Start == nil:
143+
// We know there is only empty space after r1.
144+
idx := c.insertRegion(cmp, r1, allowLeftExtension)
145+
c.assert(idx == c.mu.n-1)
146+
c.mu.emptyBeforeRegion[c.mu.n] = true
147+
return
148+
}
149+
150+
// Find the first region that contains or ends right at r1.Start.
151+
r1Idx := c.insertionPoint(cmp, r1)
152+
r1Overlapping, r1, r1EmptyBefore, _ := c.checkOverlap(cmp, r1Idx, r1, allowLeftExtension)
153+
r2Idx := r1Idx + r1Overlapping
154+
155+
r2Overlapping, r2, _, r2EmptyAfter := c.checkOverlap(cmp, r2Idx, r2, allowRightExtension)
156+
157+
newIdx := c.makeSpace(r1Idx, 2, r2Idx+r2Overlapping)
158+
c.mu.dataRegions[newIdx] = r1
159+
c.mu.dataRegions[newIdx+1] = r2
160+
c.mu.emptyBeforeRegion[newIdx] = r1EmptyBefore
161+
c.mu.emptyBeforeRegion[newIdx+1] = true
162+
c.mu.emptyBeforeRegion[newIdx+2] = r2EmptyAfter
163+
}
164+
165+
// insertionPoint returns the first region that contains or ends right at Start.
166+
// We allow an exclusive end bound "touching" the new region, because we can
167+
// coalesce with it.
168+
func (c *C) insertionPoint(cmp base.Compare, region base.UserKeyBounds) int {
169+
return sort.Search(c.mu.n, func(i int) bool {
170+
return cmp(c.mu.dataRegions[i].End.Key, region.Start) >= 0
171+
})
172+
}
173+
174+
// insertRegion inserts a data region, evicting a region if necessary. Returns
175+
// the index where it was inserted.
176+
func (c *C) insertRegion(
177+
cmp base.Compare, region base.UserKeyBounds, extension allowedExtension,
178+
) (idx int) {
179+
idx = c.insertionPoint(cmp, region)
180+
overlapping, extendedRegion, emptyBefore, emptyAfter := c.checkOverlap(cmp, idx, region, extension)
181+
idx = c.makeSpace(idx, 1, idx+overlapping)
182+
c.mu.dataRegions[idx] = extendedRegion
183+
c.mu.emptyBeforeRegion[idx] = emptyBefore
184+
c.mu.emptyBeforeRegion[idx+1] = emptyAfter
185+
return idx
186+
}
187+
188+
// allowedExtension represents in which direction it is legal for checkOverlap
189+
// to extend a region; used for sanity checking.
190+
type allowedExtension uint8
191+
192+
const (
193+
allowLeftExtension allowedExtension = 1 << iota
194+
allowRightExtension
195+
)
196+
197+
// numOverlappingRegions is called with idx pointing to the first region that
198+
// ends after region.Start and returns the number of regions that overlap with
199+
// (or touch) the target region.
200+
func (c *C) checkOverlap(
201+
cmp base.Compare, idx int, region base.UserKeyBounds, extension allowedExtension,
202+
) (numOverlapping int, extendedRegion base.UserKeyBounds, emptyBefore, emptyAfter bool) {
203+
for ; ; numOverlapping++ {
204+
if idx+numOverlapping >= c.mu.n || cmp(region.End.Key, c.mu.dataRegions[idx+numOverlapping].Start) < 0 {
205+
break
206+
}
207+
}
208+
209+
// Extend the region if necessary.
210+
extendedRegion = region
211+
if numOverlapping > 0 {
212+
switch cmp(c.mu.dataRegions[idx].Start, region.Start) {
213+
case -1:
214+
c.assert(extension&allowLeftExtension != 0)
215+
extendedRegion.Start = c.mu.dataRegions[idx].Start
216+
fallthrough
217+
case 0:
218+
emptyBefore = c.mu.emptyBeforeRegion[idx]
219+
}
220+
221+
switch c.mu.dataRegions[idx+numOverlapping-1].End.CompareUpperBounds(cmp, region.End) {
222+
case 1:
223+
c.assert(extension&allowRightExtension != 0)
224+
extendedRegion.End = c.mu.dataRegions[idx+numOverlapping-1].End
225+
case 0:
226+
emptyAfter = c.mu.emptyBeforeRegion[idx+numOverlapping]
227+
}
228+
}
229+
return numOverlapping, extendedRegion, emptyBefore, emptyAfter
230+
}
231+
232+
// makeSpace is used to retain regions [0, keepLeftIdx) and [keepRightIdx, n)
233+
// and leave space for <newRegions> regions in-between.
234+
//
235+
// When necessary, makeSpace evicts regions to make room for the new regions.
236+
//
237+
// Returns the index for the first new region (this equals keepLeftIdx when
238+
// there is no eviction).
239+
func (c *C) makeSpace(keepLeftIdx, newRegions, keepRightIdx int) (firstSpaceIdx int) {
240+
start := 0
241+
end := c.mu.n
242+
newLen := keepLeftIdx + newRegions + (c.mu.n - keepRightIdx)
243+
for ; newLen > cacheMaxEntries; newLen-- {
244+
// The result doesn't fit, so we have to evict a region. We choose to evict
245+
// either the first or the last region, whichever keeps the new region(s)
246+
// closer to the center. The reasoning is that we want to optimize for the
247+
// case where we get repeated queries around the same region of interest.
248+
if (keepLeftIdx - start) > (end - keepRightIdx) {
249+
start++
250+
c.mu.emptyBeforeRegion[start] = false
251+
} else {
252+
end--
253+
c.mu.emptyBeforeRegion[end] = false
254+
}
255+
}
256+
c.moveRegions(start, keepLeftIdx, 0)
257+
c.moveRegions(keepRightIdx, end, keepLeftIdx-start+newRegions)
258+
if newLen < c.mu.n {
259+
// Clear the now unused regions so we don't hold on to key slices.
260+
clear(c.mu.dataRegions[newLen:c.mu.n])
261+
}
262+
c.mu.n = newLen
263+
return keepLeftIdx - start
264+
}
265+
266+
// moveRegions copies the regions [startIdx, endIdx) to
267+
// [newStartIdx, newStartIdx+endIdx-startIdx). The emptyBeforeRegion flags for
268+
// [startIdx, endIdx] are also copied.
269+
func (c *C) moveRegions(startIdx, endIdx int, newStartIdx int) {
270+
if startIdx >= endIdx || startIdx == newStartIdx {
271+
return
272+
}
273+
copy(c.mu.dataRegions[newStartIdx:], c.mu.dataRegions[startIdx:endIdx])
274+
copy(c.mu.emptyBeforeRegion[newStartIdx:], c.mu.emptyBeforeRegion[startIdx:endIdx+1])
275+
}
276+
277+
func (c *C) assert(cond bool) {
278+
if !cond {
279+
panic(errors.AssertionFailedf("overlapcache: conflicting information"))
280+
}
281+
}
282+
283+
func (c *C) check(cmp base.Compare) {
284+
for i := 0; i < c.mu.n; i++ {
285+
r := &c.mu.dataRegions[i]
286+
if !r.Valid(cmp) {
287+
panic(fmt.Sprintf("invalid region %s", r))
288+
}
289+
// Regions must not overlap or touch.
290+
if i > 0 && cmp(c.mu.dataRegions[i-1].End.Key, r.Start) >= 0 {
291+
panic(fmt.Sprintf("overlapping regions %s %s", c.mu.dataRegions[i-1], r))
292+
}
293+
}
294+
}

0 commit comments

Comments
 (0)