Skip to content

Commit faff3ae

Browse files
craig[bot]RaduBerindejbowens
committed
147721: storage: use MinLZ by default only on amd64 r=RaduBerinde a=RaduBerinde On other platforms the MinLZ implementation is much slower so it's safer to just stick to Snappy. Epic: none Release note: None 147728: kvserver: add new value separation timeseries r=jbowens a=jbowens Add new timeseries metrics for value separation. Epic: CRDB-20379 Release note (ops change): Introduces new timeseries metrics for observing the behavior of storage engine value separation. Co-authored-by: Radu Berinde <[email protected]> Co-authored-by: Jackson Owens <[email protected]>
3 parents de5b811 + fda07dc + 04f7e45 commit faff3ae

File tree

3 files changed

+90
-8
lines changed

3 files changed

+90
-8
lines changed

docs/generated/metrics/metrics.yaml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16913,6 +16913,38 @@ layers:
1691316913
unit: BYTES
1691416914
aggregation: AVG
1691516915
derivative: NONE
16916+
- name: storage.value_separation.blob_files.count
16917+
exported_name: storage_value_separation_blob_files_count
16918+
description: The number of blob files that are used to store separated values within the storage engine.
16919+
y_axis_label: Files
16920+
type: GAUGE
16921+
unit: COUNT
16922+
aggregation: AVG
16923+
derivative: NONE
16924+
- name: storage.value_separation.blob_files.size
16925+
exported_name: storage_value_separation_blob_files_size
16926+
description: The size of the physical blob files that are used to store separated values within the storage engine. This sum is the physical post-compression sum of value_bytes.referenced and value_bytes.unreferenced.
16927+
y_axis_label: Bytes
16928+
type: GAUGE
16929+
unit: BYTES
16930+
aggregation: AVG
16931+
derivative: NONE
16932+
- name: storage.value_separation.value_bytes.referenced
16933+
exported_name: storage_value_separation_value_bytes_referenced
16934+
description: The size of storage engine value bytes (pre-compression) that are stored separately in blob files and referenced by a live sstable.
16935+
y_axis_label: Bytes
16936+
type: GAUGE
16937+
unit: BYTES
16938+
aggregation: AVG
16939+
derivative: NONE
16940+
- name: storage.value_separation.value_bytes.unreferenced
16941+
exported_name: storage_value_separation_value_bytes_unreferenced
16942+
description: The size of storage engine value bytes (pre-compression) that are stored separately in blob files and not referenced by any live sstable. These bytes are garbage that could be reclaimed by a compaction.
16943+
y_axis_label: Bytes
16944+
type: GAUGE
16945+
unit: BYTES
16946+
aggregation: AVG
16947+
derivative: NONE
1691616948
- name: storage.wal.bytes_in
1691716949
exported_name: storage_wal_bytes_in
1691816950
description: The number of logical bytes the storage engine has written to the WAL

pkg/kv/kvserver/metrics.go

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2615,6 +2615,30 @@ Note that the measurement does not include the duration for replicating the eval
26152615
Measurement: "Flush Utilization",
26162616
Unit: metric.Unit_PERCENT,
26172617
}
2618+
metaValueSeparationBytesReferenced = metric.Metadata{
2619+
Name: "storage.value_separation.value_bytes.referenced",
2620+
Help: "The size of storage engine value bytes (pre-compression) that are stored separately in blob files and referenced by a live sstable.",
2621+
Measurement: "Bytes",
2622+
Unit: metric.Unit_BYTES,
2623+
}
2624+
metaValueSeparationBytesUnreferenced = metric.Metadata{
2625+
Name: "storage.value_separation.value_bytes.unreferenced",
2626+
Help: "The size of storage engine value bytes (pre-compression) that are stored separately in blob files and not referenced by any live sstable. These bytes are garbage that could be reclaimed by a compaction.",
2627+
Measurement: "Bytes",
2628+
Unit: metric.Unit_BYTES,
2629+
}
2630+
metaValueSeparationBlobFileCount = metric.Metadata{
2631+
Name: "storage.value_separation.blob_files.count",
2632+
Help: "The number of blob files that are used to store separated values within the storage engine.",
2633+
Measurement: "Files",
2634+
Unit: metric.Unit_COUNT,
2635+
}
2636+
metaValueSeparationBlobFileSize = metric.Metadata{
2637+
Name: "storage.value_separation.blob_files.size",
2638+
Help: "The size of the physical blob files that are used to store separated values within the storage engine. This sum is the physical post-compression sum of value_bytes.referenced and value_bytes.unreferenced.",
2639+
Measurement: "Bytes",
2640+
Unit: metric.Unit_BYTES,
2641+
}
26182642
metaWALBytesWritten = metric.Metadata{
26192643
Name: "storage.wal.bytes_written",
26202644
Help: "The number of bytes the storage engine has written to the WAL",
@@ -2914,6 +2938,10 @@ type StoreMetrics struct {
29142938
SSTableCompressionNone *metric.Gauge
29152939
categoryIterMetrics pebbleCategoryIterMetricsContainer
29162940
categoryDiskWriteMetrics pebbleCategoryDiskWriteMetricsContainer
2941+
ValueSeparationBytesReferenced *metric.Gauge
2942+
ValueSeparationBytesUnreferenced *metric.Gauge
2943+
ValueSeparationBlobFileCount *metric.Gauge
2944+
ValueSeparationBlobFileSize *metric.Gauge
29172945
WALBytesWritten *metric.Counter
29182946
WALBytesIn *metric.Counter
29192947
WALFailoverSwitchCount *metric.Counter
@@ -3650,11 +3678,15 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
36503678
categoryDiskWriteMetrics: pebbleCategoryDiskWriteMetricsContainer{
36513679
registry: storeRegistry,
36523680
},
3653-
WALBytesWritten: metric.NewCounter(metaWALBytesWritten),
3654-
WALBytesIn: metric.NewCounter(metaWALBytesIn),
3655-
WALFailoverSwitchCount: metric.NewCounter(metaStorageWALFailoverSwitchCount),
3656-
WALFailoverPrimaryDuration: metric.NewCounter(metaStorageWALFailoverPrimaryDuration),
3657-
WALFailoverSecondaryDuration: metric.NewCounter(metaStorageWALFailoverSecondaryDuration),
3681+
ValueSeparationBytesReferenced: metric.NewGauge(metaValueSeparationBytesReferenced),
3682+
ValueSeparationBytesUnreferenced: metric.NewGauge(metaValueSeparationBytesUnreferenced),
3683+
ValueSeparationBlobFileCount: metric.NewGauge(metaValueSeparationBlobFileCount),
3684+
ValueSeparationBlobFileSize: metric.NewGauge(metaValueSeparationBlobFileSize),
3685+
WALBytesWritten: metric.NewCounter(metaWALBytesWritten),
3686+
WALBytesIn: metric.NewCounter(metaWALBytesIn),
3687+
WALFailoverSwitchCount: metric.NewCounter(metaStorageWALFailoverSwitchCount),
3688+
WALFailoverPrimaryDuration: metric.NewCounter(metaStorageWALFailoverPrimaryDuration),
3689+
WALFailoverSecondaryDuration: metric.NewCounter(metaStorageWALFailoverSecondaryDuration),
36583690
WALFailoverWriteAndSyncLatency: metric.NewManualWindowHistogram(
36593691
metaStorageWALFailoverWriteAndSyncLatency,
36603692
pebble.FsyncLatencyBuckets,
@@ -4088,6 +4120,10 @@ func (sm *StoreMetrics) updateEngineMetrics(m storage.Metrics) {
40884120
sm.FlushableIngestTableCount.Update(int64(m.Flush.AsIngestTableCount))
40894121
sm.FlushableIngestTableSize.Update(int64(m.Flush.AsIngestBytes))
40904122
sm.IngestCount.Update(int64(m.Ingest.Count))
4123+
sm.ValueSeparationBytesReferenced.Update(int64(m.BlobFiles.ReferencedValueSize))
4124+
sm.ValueSeparationBytesUnreferenced.Update(int64(m.BlobFiles.ValueSize - m.BlobFiles.ReferencedValueSize))
4125+
sm.ValueSeparationBlobFileCount.Update(int64(m.BlobFiles.LiveCount))
4126+
sm.ValueSeparationBlobFileSize.Update(int64(m.BlobFiles.LiveSize))
40914127
// NB: `UpdateIfHigher` is used here since there is a race in pebble where
40924128
// sometimes the WAL is rotated but metrics are retrieved prior to the update
40934129
// to BytesIn to account for the previous WAL.

pkg/storage/pebble.go

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"math"
1313
"os"
1414
"path/filepath"
15+
"runtime"
1516
"sort"
1617
"strconv"
1718
"strings"
@@ -224,6 +225,19 @@ func RegisterCompressionAlgorithmClusterSetting(
224225
)
225226
}
226227

228+
var defaultCompressionAlgorithm = func() CompressionAlgorithm {
229+
if runtime.GOARCH == "amd64" {
230+
// We prefer MinLZ on amd64 because it is slightly superior to Snappy in
231+
// almost all cases (both in terms of speed and compression ratio).
232+
//
233+
// Only amd64 has an optimized assembly MinLZ implementation; the Go
234+
// implementation is significantly slower, especially when decompressing;
235+
// see https://github.com/minio/minlz#protobuf-sample
236+
return CompressionAlgorithmMinLZ
237+
}
238+
return CompressionAlgorithmSnappy
239+
}()
240+
227241
// CompressionAlgorithmStorage determines the compression algorithm used to
228242
// compress data blocks when writing sstables for use in a Pebble store (written
229243
// directly, or constructed for ingestion on a remote store via AddSSTable).
@@ -232,7 +246,7 @@ func RegisterCompressionAlgorithmClusterSetting(
232246
var CompressionAlgorithmStorage = RegisterCompressionAlgorithmClusterSetting(
233247
"storage.sstable.compression_algorithm",
234248
`determines the compression algorithm to use when compressing sstable data blocks for use in a Pebble store;`,
235-
CompressionAlgorithmMinLZ, // Default.
249+
defaultCompressionAlgorithm,
236250
)
237251

238252
// CompressionAlgorithmBackupStorage determines the compression algorithm used
@@ -242,7 +256,7 @@ var CompressionAlgorithmStorage = RegisterCompressionAlgorithmClusterSetting(
242256
var CompressionAlgorithmBackupStorage = RegisterCompressionAlgorithmClusterSetting(
243257
"storage.sstable.compression_algorithm_backup_storage",
244258
`determines the compression algorithm to use when compressing sstable data blocks for backup row data storage;`,
245-
CompressionAlgorithmMinLZ, // Default.
259+
defaultCompressionAlgorithm,
246260
)
247261

248262
// CompressionAlgorithmBackupTransport determines the compression algorithm used
@@ -255,7 +269,7 @@ var CompressionAlgorithmBackupStorage = RegisterCompressionAlgorithmClusterSetti
255269
var CompressionAlgorithmBackupTransport = RegisterCompressionAlgorithmClusterSetting(
256270
"storage.sstable.compression_algorithm_backup_transport",
257271
`determines the compression algorithm to use when compressing sstable data blocks for backup transport;`,
258-
CompressionAlgorithmMinLZ, // Default.
272+
defaultCompressionAlgorithm,
259273
)
260274

261275
func getCompressionAlgorithm(

0 commit comments

Comments
 (0)