|
5 | 5 | package pebble |
6 | 6 |
|
7 | 7 | import ( |
8 | | - "slices" |
9 | | - |
10 | 8 | "github.com/cockroachdb/errors" |
11 | 9 | "github.com/cockroachdb/pebble/internal/base" |
12 | | - "github.com/cockroachdb/pebble/internal/invariants" |
13 | 10 | "github.com/cockroachdb/pebble/internal/manifest" |
14 | 11 | "github.com/cockroachdb/pebble/objstorage" |
15 | 12 | "github.com/cockroachdb/pebble/sstable" |
16 | | - "github.com/cockroachdb/pebble/sstable/blob" |
17 | 13 | "github.com/cockroachdb/pebble/valsep" |
18 | 14 | "github.com/cockroachdb/redact" |
19 | 15 | ) |
@@ -52,26 +48,26 @@ func (d *DB) determineCompactionValueSeparation( |
52 | 48 | // For flushes, c.version is nil. |
53 | 49 | blobFileSet = uniqueInputBlobMetadatas(&c.version.BlobFiles, c.inputs) |
54 | 50 | } |
55 | | - minSize := uint64(policy.MinimumSize) |
| 51 | + minSize := policy.MinimumSize |
56 | 52 | switch valueStorage { |
57 | 53 | case ValueStorageLowReadLatency: |
58 | 54 | return valsep.NeverSeparateValues{} |
59 | 55 | case ValueStorageLatencyTolerant: |
60 | 56 | minSize = latencyTolerantMinimumSize |
61 | 57 | default: |
62 | 58 | } |
63 | | - if writeBlobs, outputBlobReferenceDepth := shouldWriteBlobFiles(c, policy, minSize); !writeBlobs { |
| 59 | + if writeBlobs, outputBlobReferenceDepth := shouldWriteBlobFiles(c, policy, uint64(minSize)); !writeBlobs { |
64 | 60 | // This compaction should preserve existing blob references. |
65 | 61 | kind := sstable.ValueSeparationDefault |
66 | 62 | if valueStorage != ValueStorageDefault { |
67 | 63 | kind = sstable.ValueSeparationSpanPolicy |
68 | 64 | } |
69 | | - return &preserveBlobReferences{ |
70 | | - inputBlobPhysicalFiles: blobFileSet, |
71 | | - outputBlobReferenceDepth: outputBlobReferenceDepth, |
72 | | - minimumValueSize: int(minSize), |
73 | | - originalValueSeparationKind: kind, |
74 | | - } |
| 65 | + return valsep.NewPreserveAllHotBlobReferences( |
| 66 | + blobFileSet, |
| 67 | + outputBlobReferenceDepth, |
| 68 | + kind, |
| 69 | + minSize, |
| 70 | + ) |
75 | 71 | } |
76 | 72 |
|
77 | 73 | // This compaction should write values to new blob files. |
@@ -218,172 +214,3 @@ func uniqueInputBlobMetadatas( |
218 | 214 | } |
219 | 215 | return m |
220 | 216 | } |
221 | | - |
222 | | -// preserveBlobReferences implements the compact.ValueSeparation interface. When |
223 | | -// a compaction is configured with preserveBlobReferences, the compaction will |
224 | | -// not create any new blob files. However, input references to existing blob |
225 | | -// references will be preserved and metadata about the table's blob references |
226 | | -// will be collected. |
227 | | -type preserveBlobReferences struct { |
228 | | - // inputBlobPhysicalFiles holds the *PhysicalBlobFile for every unique blob |
229 | | - // file referenced by input sstables. |
230 | | - inputBlobPhysicalFiles map[base.BlobFileID]*manifest.PhysicalBlobFile |
231 | | - outputBlobReferenceDepth manifest.BlobReferenceDepth |
232 | | - |
233 | | - // minimumValueSize is the minimum size of values used by the value separation |
234 | | - // policy that was originally used to write the input sstables. |
235 | | - minimumValueSize int |
236 | | - // originalValueSeparationKind is the value separation policy that was originally used to |
237 | | - // write the input sstables. |
238 | | - originalValueSeparationKind sstable.ValueSeparationKind |
239 | | - |
240 | | - // state |
241 | | - buf []byte |
242 | | - // currReferences holds the pending references that have been referenced by |
243 | | - // the current output sstable. The index of a reference with a given blob |
244 | | - // file ID is the value of the base.BlobReferenceID used by its value handles |
245 | | - // within the output sstable. |
246 | | - currReferences []pendingReference |
247 | | - // totalValueSize is the sum of currReferenceValueSizes. |
248 | | - // |
249 | | - // INVARIANT: totalValueSize == sum(currReferenceValueSizes) |
250 | | - totalValueSize uint64 |
251 | | -} |
252 | | - |
253 | | -type pendingReference struct { |
254 | | - blobFileID base.BlobFileID |
255 | | - valueSize uint64 |
256 | | -} |
257 | | - |
258 | | -// Assert that *preserveBlobReferences implements the compact.ValueSeparation |
259 | | -// interface. |
260 | | -var _ valsep.ValueSeparation = (*preserveBlobReferences)(nil) |
261 | | - |
262 | | -// SetNextOutputConfig implements the ValueSeparation interface. |
263 | | -func (vs *preserveBlobReferences) SetNextOutputConfig(config valsep.ValueSeparationOutputConfig) {} |
264 | | - |
265 | | -// Kind implements the ValueSeparation interface. |
266 | | -func (vs *preserveBlobReferences) Kind() sstable.ValueSeparationKind { |
267 | | - return vs.originalValueSeparationKind |
268 | | -} |
269 | | - |
270 | | -// MinimumSize implements the ValueSeparation interface. |
271 | | -func (vs *preserveBlobReferences) MinimumSize() int { return vs.minimumValueSize } |
272 | | - |
273 | | -// EstimatedFileSize returns an estimate of the disk space consumed by the current |
274 | | -// blob file if it were closed now. |
275 | | -func (vs *preserveBlobReferences) EstimatedFileSize() uint64 { |
276 | | - return 0 |
277 | | -} |
278 | | - |
279 | | -// EstimatedReferenceSize returns an estimate of the disk space consumed by the |
280 | | -// current output sstable's blob references so far. |
281 | | -func (vs *preserveBlobReferences) EstimatedReferenceSize() uint64 { |
282 | | - // TODO(jackson): The totalValueSize is the uncompressed value sizes. With |
283 | | - // compressible data, it overestimates the disk space consumed by the blob |
284 | | - // references. It also does not include the blob file's index block or |
285 | | - // footer, so it can underestimate if values are completely incompressible. |
286 | | - // |
287 | | - // Should we compute a compression ratio per blob file and scale the |
288 | | - // references appropriately? |
289 | | - return vs.totalValueSize |
290 | | -} |
291 | | - |
292 | | -// Add implements compact.ValueSeparation. This implementation will write |
293 | | -// existing blob references to the output table. |
294 | | -func (vs *preserveBlobReferences) Add( |
295 | | - tw sstable.RawWriter, kv *base.InternalKV, forceObsolete bool, _ bool, |
296 | | -) error { |
297 | | - if !kv.V.IsBlobValueHandle() { |
298 | | - // If the value is not already a blob handle (either it's in-place or in |
299 | | - // a value block), we retrieve the value and write it through Add. The |
300 | | - // sstable writer may still decide to put the value in a value block, |
301 | | - // but regardless the value will be written to the sstable itself and |
302 | | - // not a blob file. |
303 | | - v, callerOwned, err := kv.Value(vs.buf) |
304 | | - if err != nil { |
305 | | - return err |
306 | | - } |
307 | | - if callerOwned { |
308 | | - vs.buf = v[:0] |
309 | | - } |
310 | | - return tw.Add(kv.K, v, forceObsolete) |
311 | | - } |
312 | | - |
313 | | - // The value is an existing blob handle. We can copy it into the output |
314 | | - // sstable, taking note of the reference for the table metadata. |
315 | | - lv := kv.V.LazyValue() |
316 | | - fileID := lv.Fetcher.BlobFileID |
317 | | - |
318 | | - var refID base.BlobReferenceID |
319 | | - if refIdx := slices.IndexFunc(vs.currReferences, func(ref pendingReference) bool { |
320 | | - return ref.blobFileID == fileID |
321 | | - }); refIdx != -1 { |
322 | | - refID = base.BlobReferenceID(refIdx) |
323 | | - } else { |
324 | | - refID = base.BlobReferenceID(len(vs.currReferences)) |
325 | | - vs.currReferences = append(vs.currReferences, pendingReference{ |
326 | | - blobFileID: fileID, |
327 | | - valueSize: 0, |
328 | | - }) |
329 | | - } |
330 | | - |
331 | | - if invariants.Enabled && vs.currReferences[refID].blobFileID != fileID { |
332 | | - panic("wrong reference index") |
333 | | - } |
334 | | - |
335 | | - handleSuffix := blob.DecodeHandleSuffix(lv.ValueOrHandle) |
336 | | - inlineHandle := blob.InlineHandle{ |
337 | | - InlineHandlePreface: blob.InlineHandlePreface{ |
338 | | - ReferenceID: refID, |
339 | | - ValueLen: lv.Fetcher.Attribute.ValueLen, |
340 | | - }, |
341 | | - HandleSuffix: handleSuffix, |
342 | | - } |
343 | | - err := tw.AddWithBlobHandle(kv.K, inlineHandle, lv.Fetcher.Attribute.ShortAttribute, forceObsolete) |
344 | | - if err != nil { |
345 | | - return err |
346 | | - } |
347 | | - vs.currReferences[refID].valueSize += uint64(lv.Fetcher.Attribute.ValueLen) |
348 | | - vs.totalValueSize += uint64(lv.Fetcher.Attribute.ValueLen) |
349 | | - return nil |
350 | | -} |
351 | | - |
352 | | -// FinishOutput implements compact.ValueSeparation. |
353 | | -func (vs *preserveBlobReferences) FinishOutput() (valsep.ValueSeparationMetadata, error) { |
354 | | - if invariants.Enabled { |
355 | | - // Assert that the incrementally-maintained totalValueSize matches the |
356 | | - // sum of all the reference value sizes. |
357 | | - totalValueSize := uint64(0) |
358 | | - for _, ref := range vs.currReferences { |
359 | | - totalValueSize += ref.valueSize |
360 | | - } |
361 | | - if totalValueSize != vs.totalValueSize { |
362 | | - return valsep.ValueSeparationMetadata{}, |
363 | | - errors.AssertionFailedf("totalValueSize mismatch: %d != %d", totalValueSize, vs.totalValueSize) |
364 | | - } |
365 | | - } |
366 | | - |
367 | | - references := make(manifest.BlobReferences, len(vs.currReferences)) |
368 | | - for i := range vs.currReferences { |
369 | | - ref := vs.currReferences[i] |
370 | | - phys, ok := vs.inputBlobPhysicalFiles[ref.blobFileID] |
371 | | - if !ok { |
372 | | - return valsep.ValueSeparationMetadata{}, |
373 | | - errors.AssertionFailedf("pebble: blob file %s not found among input sstables", ref.blobFileID) |
374 | | - } |
375 | | - references[i] = manifest.MakeBlobReference(ref.blobFileID, ref.valueSize, ref.valueSize, phys) |
376 | | - } |
377 | | - referenceSize := vs.totalValueSize |
378 | | - vs.currReferences = vs.currReferences[:0] |
379 | | - vs.totalValueSize = 0 |
380 | | - return valsep.ValueSeparationMetadata{ |
381 | | - BlobReferences: references, |
382 | | - BlobReferenceSize: referenceSize, |
383 | | - // The outputBlobReferenceDepth is computed from the input sstables, |
384 | | - // reflecting the worst-case overlap of referenced blob files. If this |
385 | | - // sstable references fewer unique blob files, reduce its depth to the |
386 | | - // count of unique files. |
387 | | - BlobReferenceDepth: min(vs.outputBlobReferenceDepth, manifest.BlobReferenceDepth(len(references))), |
388 | | - }, nil |
389 | | -} |
0 commit comments