Skip to content

Commit 04f7e45

Browse files
committed
kvserver: add new value separation timeseries
Add new timeseries metrics for value separation. Epic: CRDB-20379 Release note (ops change): Introduces new timeseries metrics for observing the behavior of storage engine value separation.
1 parent 94b4a04 commit 04f7e45

File tree

2 files changed

+73
-5
lines changed

2 files changed

+73
-5
lines changed

docs/generated/metrics/metrics.yaml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16913,6 +16913,38 @@ layers:
1691316913
unit: BYTES
1691416914
aggregation: AVG
1691516915
derivative: NONE
16916+
- name: storage.value_separation.blob_files.count
16917+
exported_name: storage_value_separation_blob_files_count
16918+
description: The number of blob files that are used to store separated values within the storage engine.
16919+
y_axis_label: Files
16920+
type: GAUGE
16921+
unit: COUNT
16922+
aggregation: AVG
16923+
derivative: NONE
16924+
- name: storage.value_separation.blob_files.size
16925+
exported_name: storage_value_separation_blob_files_size
16926+
description: The size of the physical blob files that are used to store separated values within the storage engine. This sum is the physical post-compression sum of value_bytes.referenced and value_bytes.unreferenced.
16927+
y_axis_label: Bytes
16928+
type: GAUGE
16929+
unit: BYTES
16930+
aggregation: AVG
16931+
derivative: NONE
16932+
- name: storage.value_separation.value_bytes.referenced
16933+
exported_name: storage_value_separation_value_bytes_referenced
16934+
description: The size of storage engine value bytes (pre-compression) that are stored separately in blob files and referenced by a live sstable.
16935+
y_axis_label: Bytes
16936+
type: GAUGE
16937+
unit: BYTES
16938+
aggregation: AVG
16939+
derivative: NONE
16940+
- name: storage.value_separation.value_bytes.unreferenced
16941+
exported_name: storage_value_separation_value_bytes_unreferenced
16942+
description: The size of storage engine value bytes (pre-compression) that are stored separately in blob files and not referenced by any live sstable. These bytes are garbage that could be reclaimed by a compaction.
16943+
y_axis_label: Bytes
16944+
type: GAUGE
16945+
unit: BYTES
16946+
aggregation: AVG
16947+
derivative: NONE
1691616948
- name: storage.wal.bytes_in
1691716949
exported_name: storage_wal_bytes_in
1691816950
description: The number of logical bytes the storage engine has written to the WAL

pkg/kv/kvserver/metrics.go

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2615,6 +2615,30 @@ Note that the measurement does not include the duration for replicating the eval
26152615
Measurement: "Flush Utilization",
26162616
Unit: metric.Unit_PERCENT,
26172617
}
2618+
metaValueSeparationBytesReferenced = metric.Metadata{
2619+
Name: "storage.value_separation.value_bytes.referenced",
2620+
Help: "The size of storage engine value bytes (pre-compression) that are stored separately in blob files and referenced by a live sstable.",
2621+
Measurement: "Bytes",
2622+
Unit: metric.Unit_BYTES,
2623+
}
2624+
metaValueSeparationBytesUnreferenced = metric.Metadata{
2625+
Name: "storage.value_separation.value_bytes.unreferenced",
2626+
Help: "The size of storage engine value bytes (pre-compression) that are stored separately in blob files and not referenced by any live sstable. These bytes are garbage that could be reclaimed by a compaction.",
2627+
Measurement: "Bytes",
2628+
Unit: metric.Unit_BYTES,
2629+
}
2630+
metaValueSeparationBlobFileCount = metric.Metadata{
2631+
Name: "storage.value_separation.blob_files.count",
2632+
Help: "The number of blob files that are used to store separated values within the storage engine.",
2633+
Measurement: "Files",
2634+
Unit: metric.Unit_COUNT,
2635+
}
2636+
metaValueSeparationBlobFileSize = metric.Metadata{
2637+
Name: "storage.value_separation.blob_files.size",
2638+
Help: "The size of the physical blob files that are used to store separated values within the storage engine. This sum is the physical post-compression sum of value_bytes.referenced and value_bytes.unreferenced.",
2639+
Measurement: "Bytes",
2640+
Unit: metric.Unit_BYTES,
2641+
}
26182642
metaWALBytesWritten = metric.Metadata{
26192643
Name: "storage.wal.bytes_written",
26202644
Help: "The number of bytes the storage engine has written to the WAL",
@@ -2914,6 +2938,10 @@ type StoreMetrics struct {
29142938
SSTableCompressionNone *metric.Gauge
29152939
categoryIterMetrics pebbleCategoryIterMetricsContainer
29162940
categoryDiskWriteMetrics pebbleCategoryDiskWriteMetricsContainer
2941+
ValueSeparationBytesReferenced *metric.Gauge
2942+
ValueSeparationBytesUnreferenced *metric.Gauge
2943+
ValueSeparationBlobFileCount *metric.Gauge
2944+
ValueSeparationBlobFileSize *metric.Gauge
29172945
WALBytesWritten *metric.Counter
29182946
WALBytesIn *metric.Counter
29192947
WALFailoverSwitchCount *metric.Counter
@@ -3650,11 +3678,15 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
36503678
categoryDiskWriteMetrics: pebbleCategoryDiskWriteMetricsContainer{
36513679
registry: storeRegistry,
36523680
},
3653-
WALBytesWritten: metric.NewCounter(metaWALBytesWritten),
3654-
WALBytesIn: metric.NewCounter(metaWALBytesIn),
3655-
WALFailoverSwitchCount: metric.NewCounter(metaStorageWALFailoverSwitchCount),
3656-
WALFailoverPrimaryDuration: metric.NewCounter(metaStorageWALFailoverPrimaryDuration),
3657-
WALFailoverSecondaryDuration: metric.NewCounter(metaStorageWALFailoverSecondaryDuration),
3681+
ValueSeparationBytesReferenced: metric.NewGauge(metaValueSeparationBytesReferenced),
3682+
ValueSeparationBytesUnreferenced: metric.NewGauge(metaValueSeparationBytesUnreferenced),
3683+
ValueSeparationBlobFileCount: metric.NewGauge(metaValueSeparationBlobFileCount),
3684+
ValueSeparationBlobFileSize: metric.NewGauge(metaValueSeparationBlobFileSize),
3685+
WALBytesWritten: metric.NewCounter(metaWALBytesWritten),
3686+
WALBytesIn: metric.NewCounter(metaWALBytesIn),
3687+
WALFailoverSwitchCount: metric.NewCounter(metaStorageWALFailoverSwitchCount),
3688+
WALFailoverPrimaryDuration: metric.NewCounter(metaStorageWALFailoverPrimaryDuration),
3689+
WALFailoverSecondaryDuration: metric.NewCounter(metaStorageWALFailoverSecondaryDuration),
36583690
WALFailoverWriteAndSyncLatency: metric.NewManualWindowHistogram(
36593691
metaStorageWALFailoverWriteAndSyncLatency,
36603692
pebble.FsyncLatencyBuckets,
@@ -4088,6 +4120,10 @@ func (sm *StoreMetrics) updateEngineMetrics(m storage.Metrics) {
40884120
sm.FlushableIngestTableCount.Update(int64(m.Flush.AsIngestTableCount))
40894121
sm.FlushableIngestTableSize.Update(int64(m.Flush.AsIngestBytes))
40904122
sm.IngestCount.Update(int64(m.Ingest.Count))
4123+
sm.ValueSeparationBytesReferenced.Update(int64(m.BlobFiles.ReferencedValueSize))
4124+
sm.ValueSeparationBytesUnreferenced.Update(int64(m.BlobFiles.ValueSize - m.BlobFiles.ReferencedValueSize))
4125+
sm.ValueSeparationBlobFileCount.Update(int64(m.BlobFiles.LiveCount))
4126+
sm.ValueSeparationBlobFileSize.Update(int64(m.BlobFiles.LiveSize))
40914127
// NB: `UpdateIfHigher` is used here since there is a race in pebble where
40924128
// sometimes the WAL is rotated but metrics are retrieved prior to the update
40934129
// to BytesIn to account for the previous WAL.

0 commit comments

Comments
 (0)