Skip to content

Commit c6717b0

Browse files
committed
util: introduce LabelSliceCache in metric
Previously, we were persisting label value slice at each child metrics of aggregated metric type. This was inadequate because it will create redundant memory allocations for same label value slice across difference metrics. To address this, this patch introduces `LabelSliceCache` which would persist the label values at registry level and metrics would reference them through the key. The LabelSliceCache is referenced in metrics through `PrometheusEvictable` interface. The LabelSliceCache contains 2 critical methods: 1. Upsert: This method implements the "add reference" part of the reference counting mechanism. This method increments the reference count for label values by 1, if already exists. Otherwise, It will create a new entry for the label values with default value as 1. 2. DecrementAndDeleteIfZero: This method decrements the reference counter for the given label values by 1. If the count is reached to zero then it means that no metrics are relying on the particular label values. In that case, it deletes the entry from cache. These methods ensure that the cache tracks how many metrics are currently using each label combination, enabling proper cleanup when metrics are no longer needed. Epic: CRDB-53398 Part of: CRDB-53830 Release note: None
1 parent fe5ec41 commit c6717b0

File tree

6 files changed

+621
-8
lines changed

6 files changed

+621
-8
lines changed

pkg/util/metric/BUILD.bazel

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ go_library(
1010
"hdrhistogram.go",
1111
"histogram_buckets.go",
1212
"histogram_snapshot.go",
13+
"label_slice_cache.go",
1314
"metric.go",
1415
"prometheus_exporter.go",
1516
"prometheus_rule_exporter.go",
@@ -24,6 +25,7 @@ go_library(
2425
deps = [
2526
"//pkg/settings",
2627
"//pkg/util/buildutil",
28+
"//pkg/util/cache",
2729
"//pkg/util/envutil",
2830
"//pkg/util/log",
2931
"//pkg/util/metamorphic",
@@ -48,6 +50,7 @@ go_test(
4850
size = "small",
4951
srcs = [
5052
"histogram_buckets_test.go",
53+
"label_slice_cache_test.go",
5154
"metric_ext_test.go",
5255
"metric_test.go",
5356
"prometheus_exporter_test.go",

pkg/util/metric/aggmetric/agg_metric.go

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ package aggmetric
1111
import (
1212
"hash/fnv"
1313
"strings"
14+
"time"
1415

1516
"github.com/cockroachdb/cockroach/pkg/util/cache"
1617
"github.com/cockroachdb/cockroach/pkg/util/metric"
@@ -23,8 +24,10 @@ import (
2324
var delimiter = []byte{'_'}
2425

2526
const (
26-
dbLabel = "database"
27-
appLabel = "application_name"
27+
dbLabel = "database"
28+
appLabel = "application_name"
29+
cacheSize = 5000
30+
childMetricTTL = 20 * time.Second
2831
)
2932

3033
// Builder is used to ease constructing metrics with the same labels.
@@ -97,7 +100,6 @@ func (cs *childSet) initWithBTreeStorageType(labels []string) {
97100
}
98101

99102
func getCacheStorage() *cache.UnorderedCache {
100-
const cacheSize = 5000
101103
cacheStorage := cache.NewUnorderedCache(cache.Config{
102104
Policy: cache.CacheLRU,
103105
//TODO (aa-joshi) : make cacheSize configurable in the future
@@ -311,6 +313,16 @@ type ChildMetric interface {
311313
ToPrometheusMetric() *io_prometheus_client.Metric
312314
}
313315

316+
// LabelSliceCachedChildMetric extends ChildMetric with label slice caching capabilities.
317+
// This interface is designed for child metrics that relies on label slice reference
318+
// counting system. Metrics implementing this interface can have their label values
319+
// cached and shared among multiple metrics with identical label combinations,
320+
// reducing memory usage and improving performance in scenarios with many similar metrics.
321+
type LabelSliceCachedChildMetric interface {
322+
ChildMetric
323+
CreatedAt() time.Time
324+
}
325+
314326
type labelValuer interface {
315327
labelValues() []string
316328
}
@@ -330,9 +342,10 @@ func metricKey(labels ...string) uint64 {
330342

331343
type ChildrenStorage interface {
332344
Get(labelVals ...string) (ChildMetric, bool)
345+
GetValue(key uint64) (ChildMetric, bool)
333346
Add(metric ChildMetric)
347+
AddKey(key uint64, metric ChildMetric) error
334348
Del(key ChildMetric)
335-
336349
// ForEach calls f for each child metric, in arbitrary order.
337350
ForEach(f func(metric ChildMetric))
338351
Clear()
@@ -345,6 +358,22 @@ type UnorderedCacheWrapper struct {
345358
cache *cache.UnorderedCache
346359
}
347360

361+
func (ucw *UnorderedCacheWrapper) GetValue(key uint64) (ChildMetric, bool) {
362+
value, ok := ucw.cache.Get(key)
363+
if !ok {
364+
return nil, false
365+
}
366+
return value.(ChildMetric), ok
367+
}
368+
369+
func (ucw *UnorderedCacheWrapper) AddKey(key uint64, metric ChildMetric) error {
370+
if _, ok := ucw.cache.Get(key); ok {
371+
return errors.Newf("child %v already exists\n", metric.labelValues())
372+
}
373+
ucw.cache.Add(key, metric)
374+
return nil
375+
}
376+
348377
func (ucw *UnorderedCacheWrapper) Get(labelVals ...string) (ChildMetric, bool) {
349378
hashKey := metricKey(labelVals...)
350379
value, ok := ucw.cache.Get(hashKey)
@@ -384,6 +413,18 @@ type BtreeWrapper struct {
384413
tree *btree.BTreeG[MetricItem]
385414
}
386415

416+
func (b BtreeWrapper) GetValue(key uint64) (ChildMetric, bool) {
417+
// GetValue method is not relevant for BtreeWrapper as it uses ChildMetric
418+
// as an item in Btree. We are going to remove BtreeWrapper as ChildrenStorage.
419+
panic("unimplemented")
420+
}
421+
422+
func (b BtreeWrapper) AddKey(_ uint64, _ ChildMetric) error {
423+
// AddKey method is not relevant for BtreeWrapper as it uses ChildMetric
424+
// as an item in Btree. We are going to remove BtreeWrapper as ChildrenStorage.
425+
panic("unimplemented")
426+
}
427+
387428
func (b BtreeWrapper) Get(labelVals ...string) (ChildMetric, bool) {
388429
key := labelValuesSlice(labelVals)
389430
cm, ok := b.tree.Get(&key)
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package metric
7+
8+
import (
9+
"sync/atomic"
10+
11+
"github.com/cockroachdb/cockroach/pkg/util/cache"
12+
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
13+
)
14+
15+
// LabelSliceCacheKey is the hash key type for the cache.
16+
type LabelSliceCacheKey uint64
17+
18+
// LabelSliceCacheValue is the value stored in the cache.
19+
type LabelSliceCacheValue struct {
20+
LabelValues []string
21+
Counter atomic.Int64
22+
}
23+
24+
// LabelSliceCache is a thread-safe cache mapping hash keys to label value/counter pairs.
25+
type LabelSliceCache struct {
26+
mu struct {
27+
syncutil.Mutex
28+
cache *cache.UnorderedCache
29+
}
30+
}
31+
32+
// NewLabelSliceCache creates a new LabelSliceCache.
33+
func NewLabelSliceCache() *LabelSliceCache {
34+
labelSliceCache := &LabelSliceCache{}
35+
labelSliceCache.mu.cache = cache.NewUnorderedCache(cache.Config{})
36+
return labelSliceCache
37+
}
38+
39+
// Get returns the value for the given key, or nil if not present.
40+
func (lsc *LabelSliceCache) Get(key LabelSliceCacheKey) (*LabelSliceCacheValue, bool) {
41+
lsc.mu.Lock()
42+
defer lsc.mu.Unlock()
43+
val, ok := lsc.mu.cache.Get(key)
44+
if !ok {
45+
return nil, false
46+
}
47+
return val.(*LabelSliceCacheValue), true
48+
}
49+
50+
// Upsert adds or updates the value for the given key with reference counting.
51+
// This method implements the "add reference" part of the reference counting mechanism.
52+
// When a new metric with a specific label combination is created, this method performs
53+
// either:
54+
// 1. Add a new entry to the cache with a reference count of 1 (if the label combination
55+
// is being used for the first time)
56+
// 2. Increment the existing reference count by 1 (if the label combination is already
57+
// cached and being used by other metrics)
58+
//
59+
// This ensures that the cache tracks how many metrics are currently using each
60+
// label combination, enabling proper cleanup via DecrementAndDeleteIfZero when
61+
// metrics are no longer needed.
62+
func (lsc *LabelSliceCache) Upsert(key LabelSliceCacheKey, value *LabelSliceCacheValue) {
63+
lsc.mu.Lock()
64+
defer lsc.mu.Unlock()
65+
val, ok := lsc.mu.cache.Get(key)
66+
67+
if !ok {
68+
value.Counter.Store(1)
69+
lsc.mu.cache.Add(key, value)
70+
} else {
71+
existingValue := val.(*LabelSliceCacheValue)
72+
existingValue.Counter.Add(1)
73+
lsc.mu.cache.Add(key, existingValue)
74+
}
75+
}
76+
77+
// Delete removes the value for the given key.
78+
func (lsc *LabelSliceCache) Delete(key LabelSliceCacheKey) {
79+
lsc.mu.Lock()
80+
defer lsc.mu.Unlock()
81+
lsc.mu.cache.Del(key)
82+
}
83+
84+
// DecrementAndDeleteIfZero decrements the reference counter for the given key by 1.
85+
// This method implements reference counting for cached label value combinations.
86+
// When metrics with specific label combinations are created, the cache counter
87+
// is incremented via Upsert. When those metrics are no longer needed or go out
88+
// of scope, this method should be called to decrement the reference count.
89+
//
90+
// The automatic deletion when the counter reaches zero is crucial for preventing
91+
// memory leaks in long-running processes where metrics with dynamic label values
92+
// might be created and destroyed frequently. Without this cleanup mechanism,
93+
// the cache would accumulate unused label combinations indefinitely.
94+
//
95+
// Returns true if the entry was deleted due to the counter reaching zero,
96+
// false if the entry still has references or didn't exist.
97+
func (lsc *LabelSliceCache) DecrementAndDeleteIfZero(key LabelSliceCacheKey) bool {
98+
lsc.mu.Lock()
99+
defer lsc.mu.Unlock()
100+
101+
val, ok := lsc.mu.cache.Get(key)
102+
if !ok {
103+
// Key doesn't exist, return 0 and false
104+
return false
105+
}
106+
107+
existingValue := val.(*LabelSliceCacheValue)
108+
newCount := existingValue.Counter.Add(-1)
109+
110+
if newCount <= 0 {
111+
// Remove the entry when counter reaches 0 or below
112+
lsc.mu.cache.Del(key)
113+
return true
114+
}
115+
116+
return false
117+
}

0 commit comments

Comments
 (0)