|
| 1 | +// Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use |
| 2 | +// of this source code is governed by a BSD-style license that can be found in |
| 3 | +// the LICENSE file. |
| 4 | + |
| 5 | +package overlapcache |
| 6 | + |
| 7 | +import ( |
| 8 | + "fmt" |
| 9 | + "sort" |
| 10 | + "sync" |
| 11 | + |
| 12 | + "github.com/cockroachdb/errors" |
| 13 | + "github.com/cockroachdb/pebble/internal/base" |
| 14 | + "github.com/cockroachdb/pebble/internal/invariants" |
| 15 | +) |
| 16 | + |
| 17 | +// C is a data structure that caches information about data regions in a file. |
| 18 | +// It is used to speed up related overlap checks during ingestion. |
| 19 | +// |
| 20 | +// -- Implementation -- |
| 21 | +// |
| 22 | +// The cache maintains information about a small number of regions. A region |
| 23 | +// corresponds to a user key interval (UserKeyBounds). We define three types of |
| 24 | +// regions: |
| 25 | +// - empty region: it is known that no keys or spans in the file overlap this |
| 26 | +// region. |
| 27 | +// - data region: corresponds to a key or span (or union of keys and spans) in |
| 28 | +// the file. Any single key that falls inside ths region has data overlap. |
| 29 | +// - unknown region. |
| 30 | +// |
| 31 | +// We maintain a list of disjoint and sorted data regions, along with flags |
| 32 | +// which indicate if the regions in-between are empty or unknown. The region |
| 33 | +// before data region 0 refers to the entire start of the file up to data region |
| 34 | +// 0. THe region after data region n-1 refers to the entire end of the file |
| 35 | +// starting from the end of data region n-1. |
| 36 | +// |
| 37 | +// See testdata/cache for some examples represented visually. |
| 38 | +type C struct { |
| 39 | + mu struct { |
| 40 | + sync.Mutex |
| 41 | + n int |
| 42 | + dataRegions [cacheMaxEntries]base.UserKeyBounds |
| 43 | + emptyBeforeRegion [cacheMaxEntries + 1]bool |
| 44 | + } |
| 45 | +} |
| 46 | + |
| 47 | +// cacheMaxEntries must be at least 4. |
| 48 | +const cacheMaxEntries = 6 |
| 49 | + |
| 50 | +// maxKeySize prevents the cache from holding on to very large keys. It is a |
| 51 | +// safety precaution. |
| 52 | +const maxKeySize = 4096 |
| 53 | + |
| 54 | +// CheckDataOverlap tries to determine if the target region overlaps any data |
| 55 | +// regions. |
| 56 | +func (c *C) CheckDataOverlap(cmp base.Compare, target base.UserKeyBounds) (overlaps, ok bool) { |
| 57 | + c.mu.Lock() |
| 58 | + defer c.mu.Unlock() |
| 59 | + n := c.mu.n |
| 60 | + |
| 61 | + // Find first region which ends after the start of the target region. |
| 62 | + idx := sort.Search(n, func(i int) bool { |
| 63 | + return c.mu.dataRegions[i].End.IsUpperBoundFor(cmp, target.Start) |
| 64 | + }) |
| 65 | + if idx < n && target.End.IsUpperBoundFor(cmp, c.mu.dataRegions[idx].Start) { |
| 66 | + // target overlaps with a known data region. |
| 67 | + return true, true |
| 68 | + } |
| 69 | + // The target region falls completely outside regions idx-1 and idx. |
| 70 | + if c.mu.emptyBeforeRegion[idx] { |
| 71 | + // The entire space between data regions idx-1 and idx is known to contain |
| 72 | + // no data. |
| 73 | + return false, true |
| 74 | + } |
| 75 | + // We don't know if there is data in the space between regions idx-1 and idx. |
| 76 | + return false, false |
| 77 | +} |
| 78 | + |
| 79 | +// ReportDataRegion informs the cache that the target region contains data. |
| 80 | +// |
| 81 | +// There is no assumption about the region being maximal (i.e. it could be part |
| 82 | +// of a larger data region). |
| 83 | +// |
| 84 | +// Note that the cache will hold on to the region's key slices indefinitely. |
| 85 | +// They should not be modified ever again by the caller. |
| 86 | +func (c *C) ReportDataRegion(cmp base.Compare, region base.UserKeyBounds) { |
| 87 | + if len(region.Start) > maxKeySize || len(region.End.Key) > maxKeySize { |
| 88 | + return |
| 89 | + } |
| 90 | + |
| 91 | + c.mu.Lock() |
| 92 | + defer c.mu.Unlock() |
| 93 | + if invariants.Enabled { |
| 94 | + defer c.check(cmp) |
| 95 | + } |
| 96 | + c.insertRegion(cmp, region, allowLeftExtension|allowRightExtension) |
| 97 | +} |
| 98 | + |
| 99 | +// ReportEmptyRegion informs the cache of an empty region, in-between two data |
| 100 | +// regions r1 and r2. |
| 101 | +// |
| 102 | +// Unset regions are accepted and serve as "sentinels" representing the start or |
| 103 | +// end of the file. Specifically: |
| 104 | +// - if r1 is unset, the empty region is from the start of the file to the |
| 105 | +// start of r2; |
| 106 | +// - if r2 is unset, the empty region is from the end of r2 to the end of the |
| 107 | +// file; |
| 108 | +// - if both r1 and r2 are unset, the entire file is empty. |
| 109 | +// |
| 110 | +// There is no assumption about the regions being maximal (i.e. r1 could be part |
| 111 | +// of a larger data region extending to the left, and r2 could be part of a |
| 112 | +// larger data region extending to the right). |
| 113 | +// |
| 114 | +// Note that the cache will hold on to the regions' key slices indefinitely. |
| 115 | +// They should not be modified ever again by the caller. |
| 116 | +func (c *C) ReportEmptyRegion(cmp base.Compare, r1, r2 base.UserKeyBounds) { |
| 117 | + if len(r1.Start) > maxKeySize || len(r1.End.Key) > maxKeySize || |
| 118 | + len(r2.Start) > maxKeySize || len(r2.End.Key) > maxKeySize { |
| 119 | + return |
| 120 | + } |
| 121 | + |
| 122 | + c.mu.Lock() |
| 123 | + defer c.mu.Unlock() |
| 124 | + if invariants.Enabled { |
| 125 | + defer c.check(cmp) |
| 126 | + } |
| 127 | + |
| 128 | + switch { |
| 129 | + case r1.Start == nil && r2.Start == nil: |
| 130 | + // The entire file is empty, |
| 131 | + c.assert(c.mu.n == 0) |
| 132 | + c.mu.emptyBeforeRegion[0] = true |
| 133 | + return |
| 134 | + |
| 135 | + case r1.Start == nil: |
| 136 | + // We know there is only empty space before r2. |
| 137 | + idx := c.insertRegion(cmp, r2, allowRightExtension) |
| 138 | + c.assert(idx == 0) |
| 139 | + c.mu.emptyBeforeRegion[0] = true |
| 140 | + return |
| 141 | + |
| 142 | + case r2.Start == nil: |
| 143 | + // We know there is only empty space after r1. |
| 144 | + idx := c.insertRegion(cmp, r1, allowLeftExtension) |
| 145 | + c.assert(idx == c.mu.n-1) |
| 146 | + c.mu.emptyBeforeRegion[c.mu.n] = true |
| 147 | + return |
| 148 | + } |
| 149 | + |
| 150 | + // Find the first region that contains or ends right at r1.Start. |
| 151 | + r1Idx := c.insertionPoint(cmp, r1) |
| 152 | + r1Overlapping, r1, r1EmptyBefore, _ := c.checkOverlap(cmp, r1Idx, r1, allowLeftExtension) |
| 153 | + r2Idx := r1Idx + r1Overlapping |
| 154 | + |
| 155 | + r2Overlapping, r2, _, r2EmptyAfter := c.checkOverlap(cmp, r2Idx, r2, allowRightExtension) |
| 156 | + |
| 157 | + newIdx := c.makeSpace(r1Idx, 2, r2Idx+r2Overlapping) |
| 158 | + c.mu.dataRegions[newIdx] = r1 |
| 159 | + c.mu.dataRegions[newIdx+1] = r2 |
| 160 | + c.mu.emptyBeforeRegion[newIdx] = r1EmptyBefore |
| 161 | + c.mu.emptyBeforeRegion[newIdx+1] = true |
| 162 | + c.mu.emptyBeforeRegion[newIdx+2] = r2EmptyAfter |
| 163 | +} |
| 164 | + |
| 165 | +// insertionPoint returns the first region that contains or ends right at Start. |
| 166 | +// We allow an exclusive end bound "touching" the new region, because we can |
| 167 | +// coalesce with it. |
| 168 | +func (c *C) insertionPoint(cmp base.Compare, region base.UserKeyBounds) int { |
| 169 | + return sort.Search(c.mu.n, func(i int) bool { |
| 170 | + return cmp(c.mu.dataRegions[i].End.Key, region.Start) >= 0 |
| 171 | + }) |
| 172 | +} |
| 173 | + |
| 174 | +// insertRegion inserts a data region, evicting a region if necessary. Returns |
| 175 | +// the index where it was inserted. |
| 176 | +func (c *C) insertRegion( |
| 177 | + cmp base.Compare, region base.UserKeyBounds, extension allowedExtension, |
| 178 | +) (idx int) { |
| 179 | + idx = c.insertionPoint(cmp, region) |
| 180 | + overlapping, extendedRegion, emptyBefore, emptyAfter := c.checkOverlap(cmp, idx, region, extension) |
| 181 | + idx = c.makeSpace(idx, 1, idx+overlapping) |
| 182 | + c.mu.dataRegions[idx] = extendedRegion |
| 183 | + c.mu.emptyBeforeRegion[idx] = emptyBefore |
| 184 | + c.mu.emptyBeforeRegion[idx+1] = emptyAfter |
| 185 | + return idx |
| 186 | +} |
| 187 | + |
| 188 | +// allowedExtension represents in which direction it is legal for checkOverlap |
| 189 | +// to extend a region; used for sanity checking. |
| 190 | +type allowedExtension uint8 |
| 191 | + |
| 192 | +const ( |
| 193 | + allowLeftExtension allowedExtension = 1 << iota |
| 194 | + allowRightExtension |
| 195 | +) |
| 196 | + |
| 197 | +// numOverlappingRegions is called with idx pointing to the first region that |
| 198 | +// ends after region.Start and returns the number of regions that overlap with |
| 199 | +// (or touch) the target region. |
| 200 | +func (c *C) checkOverlap( |
| 201 | + cmp base.Compare, idx int, region base.UserKeyBounds, extension allowedExtension, |
| 202 | +) (numOverlapping int, extendedRegion base.UserKeyBounds, emptyBefore, emptyAfter bool) { |
| 203 | + for ; ; numOverlapping++ { |
| 204 | + if idx+numOverlapping >= c.mu.n || cmp(region.End.Key, c.mu.dataRegions[idx+numOverlapping].Start) < 0 { |
| 205 | + break |
| 206 | + } |
| 207 | + } |
| 208 | + |
| 209 | + // Extend the region if necessary. |
| 210 | + extendedRegion = region |
| 211 | + if numOverlapping > 0 { |
| 212 | + switch cmp(c.mu.dataRegions[idx].Start, region.Start) { |
| 213 | + case -1: |
| 214 | + c.assert(extension&allowLeftExtension != 0) |
| 215 | + extendedRegion.Start = c.mu.dataRegions[idx].Start |
| 216 | + fallthrough |
| 217 | + case 0: |
| 218 | + emptyBefore = c.mu.emptyBeforeRegion[idx] |
| 219 | + } |
| 220 | + |
| 221 | + switch c.mu.dataRegions[idx+numOverlapping-1].End.CompareUpperBounds(cmp, region.End) { |
| 222 | + case 1: |
| 223 | + c.assert(extension&allowRightExtension != 0) |
| 224 | + extendedRegion.End = c.mu.dataRegions[idx+numOverlapping-1].End |
| 225 | + case 0: |
| 226 | + emptyAfter = c.mu.emptyBeforeRegion[idx+numOverlapping] |
| 227 | + } |
| 228 | + } |
| 229 | + return numOverlapping, extendedRegion, emptyBefore, emptyAfter |
| 230 | +} |
| 231 | + |
| 232 | +// makeSpace is used to retain regions [0, keepLeftIdx) and [keepRightIdx, n) |
| 233 | +// and leave space for <newRegions> regions in-between. |
| 234 | +// |
| 235 | +// When necessary, makeSpace evicts regions to make room for the new regions. |
| 236 | +// |
| 237 | +// Returns the index for the first new region (this equals keepLeftIdx when |
| 238 | +// there is no eviction). |
| 239 | +func (c *C) makeSpace(keepLeftIdx, newRegions, keepRightIdx int) (firstSpaceIdx int) { |
| 240 | + start := 0 |
| 241 | + end := c.mu.n |
| 242 | + newLen := keepLeftIdx + newRegions + (c.mu.n - keepRightIdx) |
| 243 | + for ; newLen > cacheMaxEntries; newLen-- { |
| 244 | + // The result doesn't fit, so we have to evict a region. We choose to evict |
| 245 | + // either the first or the last region, whichever keeps the new region(s) |
| 246 | + // closer to the center. The reasoning is that we want to optimize for the |
| 247 | + // case where we get repeated queries around the same region of interest. |
| 248 | + if (keepLeftIdx - start) > (end - keepRightIdx) { |
| 249 | + start++ |
| 250 | + c.mu.emptyBeforeRegion[start] = false |
| 251 | + } else { |
| 252 | + end-- |
| 253 | + c.mu.emptyBeforeRegion[end] = false |
| 254 | + } |
| 255 | + } |
| 256 | + c.moveRegions(start, keepLeftIdx, 0) |
| 257 | + c.moveRegions(keepRightIdx, end, keepLeftIdx-start+newRegions) |
| 258 | + if newLen < c.mu.n { |
| 259 | + // Clear the now unused regions so we don't hold on to key slices. |
| 260 | + clear(c.mu.dataRegions[newLen:c.mu.n]) |
| 261 | + } |
| 262 | + c.mu.n = newLen |
| 263 | + return keepLeftIdx - start |
| 264 | +} |
| 265 | + |
| 266 | +// moveRegions copies the regions [startIdx, endIdx) to |
| 267 | +// [newStartIdx, newStartIdx+endIdx-startIdx). The emptyBeforeRegion flags for |
| 268 | +// [startIdx, endIdx] are also copied. |
| 269 | +func (c *C) moveRegions(startIdx, endIdx int, newStartIdx int) { |
| 270 | + if startIdx >= endIdx || startIdx == newStartIdx { |
| 271 | + return |
| 272 | + } |
| 273 | + copy(c.mu.dataRegions[newStartIdx:], c.mu.dataRegions[startIdx:endIdx]) |
| 274 | + copy(c.mu.emptyBeforeRegion[newStartIdx:], c.mu.emptyBeforeRegion[startIdx:endIdx+1]) |
| 275 | +} |
| 276 | + |
| 277 | +func (c *C) assert(cond bool) { |
| 278 | + if !cond { |
| 279 | + panic(errors.AssertionFailedf("overlapcache: conflicting information")) |
| 280 | + } |
| 281 | +} |
| 282 | + |
| 283 | +func (c *C) check(cmp base.Compare) { |
| 284 | + for i := 0; i < c.mu.n; i++ { |
| 285 | + r := &c.mu.dataRegions[i] |
| 286 | + if !r.Valid(cmp) { |
| 287 | + panic(fmt.Sprintf("invalid region %s", r)) |
| 288 | + } |
| 289 | + // Regions must not overlap or touch. |
| 290 | + if i > 0 && cmp(c.mu.dataRegions[i-1].End.Key, r.Start) >= 0 { |
| 291 | + panic(fmt.Sprintf("overlapping regions %s %s", c.mu.dataRegions[i-1], r)) |
| 292 | + } |
| 293 | + } |
| 294 | +} |
0 commit comments