Skip to content

Commit 0f3a605

Browse files
committed
util/metric: introduce HighCardinalityCounter in aggregated metrics
Previously, aggregated metrics support unbounded child metrics with BTree as child storage. The child metric creation was static. That means we have to explicitly create the child metric and link it to the parent metric. We are going to support high cardinality metrics. This requires dynamic child creation based on label values combination. The existing implementation is inadequate because it has staic child creation and there is no upper bound on the number of child metrics that you can create. This patch introduces `HighCardinalityCounter` which uses unordered cache with LRU eviction as child storage. The child metrics values are only exported and aggregated values is persisted in CRDB. It relies on `LabelSliceCache` to efficiently store label values at registry. The child metric eviction policy is combination of max cache size of `5000` and minimum retention time of `20` seconds. This guarantees that we would see the child metric values at least in one scrape with default interval of `10` seconds before getting evicted due to cache size. The child eviction won't impact the parent value. Epic: CRDB-53398 Part of: CRDB-53831 Release note: None
1 parent 75a4639 commit 0f3a605

File tree

6 files changed

+413
-6
lines changed

6 files changed

+413
-6
lines changed

pkg/util/metric/aggmetric/BUILD.bazel

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@ go_library(
1212
visibility = ["//visibility:public"],
1313
deps = [
1414
"//pkg/util/cache",
15+
"//pkg/util/log",
1516
"//pkg/util/metric",
1617
"//pkg/util/metric/tick",
1718
"//pkg/util/syncutil",
1819
"//pkg/util/timeutil",
1920
"@com_github_cockroachdb_errors//:errors",
21+
"@com_github_cockroachdb_redact//:redact",
2022
"@com_github_gogo_protobuf//proto",
2123
"@com_github_google_btree//:btree",
2224
"@com_github_prometheus_client_model//go",
@@ -43,6 +45,7 @@ go_test(
4345
"//pkg/util/cache",
4446
"//pkg/util/leaktest",
4547
"//pkg/util/metric",
48+
"//pkg/util/timeutil",
4649
"@com_github_cockroachdb_crlib//testutils/require",
4750
"@com_github_prometheus_client_model//go",
4851
"@com_github_prometheus_common//expfmt",

pkg/util/metric/aggmetric/agg_metric.go

Lines changed: 117 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,27 +9,34 @@
99
package aggmetric
1010

1111
import (
12+
"context"
1213
"hash/fnv"
1314
"strings"
1415
"time"
1516

1617
"github.com/cockroachdb/cockroach/pkg/util/cache"
18+
"github.com/cockroachdb/cockroach/pkg/util/log"
1719
"github.com/cockroachdb/cockroach/pkg/util/metric"
1820
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
21+
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
1922
"github.com/cockroachdb/errors"
23+
"github.com/cockroachdb/redact"
2024
"github.com/google/btree"
2125
io_prometheus_client "github.com/prometheus/client_model/go"
2226
)
2327

2428
var delimiter = []byte{'_'}
2529

2630
const (
27-
dbLabel = "database"
28-
appLabel = "application_name"
29-
cacheSize = 5000
30-
childMetricTTL = 20 * time.Second
31+
dbLabel = "database"
32+
appLabel = "application_name"
33+
cacheSize = 5000
34+
retentionTimeTillEviction = 20 * time.Second
3135
)
3236

37+
// This is a no-op context used during logging.
38+
var noOpCtx = context.TODO()
39+
3340
// Builder is used to ease constructing metrics with the same labels.
3441
type Builder struct {
3542
labels []string
@@ -99,6 +106,41 @@ func (cs *childSet) initWithBTreeStorageType(labels []string) {
99106
}
100107
}
101108

109+
func (cs *childSet) initWithCacheStorageType(labels []string, metricName string) {
110+
cs.labels = labels
111+
112+
cs.mu.children = &UnorderedCacheWrapper{
113+
cache: cache.NewUnorderedCache(cache.Config{
114+
Policy: cache.CacheLRU,
115+
ShouldEvict: func(size int, key, value any) bool {
116+
if childMetric, ok := value.(ChildMetric); ok {
117+
// Check if the child metric has exceeded 20 seconds and cache size is greater than 5000
118+
if labelSliceCachedChildMetric, ok := childMetric.(LabelSliceCachedChildMetric); ok {
119+
currentTime := timeutil.Now()
120+
age := currentTime.Sub(labelSliceCachedChildMetric.CreatedAt())
121+
return size > cacheSize && age > retentionTimeTillEviction
122+
}
123+
}
124+
return size > cacheSize
125+
},
126+
OnEvictedEntry: func(entry *cache.Entry) {
127+
if childMetric, ok := entry.Value.(ChildMetric); ok {
128+
labelValues := childMetric.labelValues()
129+
130+
// log metric name and label values of evicted entry
131+
log.Dev.Infof(noOpCtx, "evicted child of metric %s with label values: %s\n",
132+
redact.SafeString(metricName), redact.SafeString(strings.Join(labelValues, ",")))
133+
134+
// Invoke DecrementAndDeleteIfZero from ChildMetric which relies on LabelSliceCache
135+
if boundedChild, ok := childMetric.(LabelSliceCachedChildMetric); ok {
136+
boundedChild.DecrementLabelSliceCacheReference()
137+
}
138+
}
139+
},
140+
}),
141+
}
142+
}
143+
102144
func getCacheStorage() *cache.UnorderedCache {
103145
cacheStorage := cache.NewUnorderedCache(cache.Config{
104146
Policy: cache.CacheLRU,
@@ -165,6 +207,75 @@ func (cs *childSet) get(labelVals ...string) (ChildMetric, bool) {
165207
return cs.mu.children.Get(labelVals...)
166208
}
167209

210+
func (cs *childSet) getOrAddWithLabelSliceCache(
211+
metricName string,
212+
createFn func(key uint64, cache *metric.LabelSliceCache) LabelSliceCachedChildMetric,
213+
labelSliceCache *metric.LabelSliceCache,
214+
labelVals ...string,
215+
) ChildMetric {
216+
// Validate label values count
217+
if len(labelVals) != len(cs.labels) {
218+
if log.V(2) {
219+
log.Dev.Errorf(noOpCtx,
220+
"cannot add child with %d label values %v to metric %s with %d labels %s",
221+
len(labelVals), redact.SafeString(metricName), redact.SafeString(strings.Join(labelVals, ",")),
222+
len(cs.labels), redact.SafeString(strings.Join(cs.labels, ",")))
223+
}
224+
return nil
225+
}
226+
227+
cs.mu.Lock()
228+
defer cs.mu.Unlock()
229+
230+
// Create a LabelSliceCacheKey from the label.
231+
key := metricKey(labelVals...)
232+
233+
// Check if the child already exists
234+
if child, ok := cs.mu.children.GetValue(key); ok {
235+
return child
236+
}
237+
238+
// Create and add the new child
239+
child := createFn(key, labelSliceCache)
240+
err := cs.mu.children.AddKey(key, child)
241+
if err != nil {
242+
if log.V(2) {
243+
log.Dev.Errorf(context.TODO(), "child metric creation failed for metric %s with error %v", redact.SafeString(metricName), err)
244+
}
245+
return nil
246+
}
247+
return child
248+
}
249+
250+
// EachWithLabels is a generic implementation for iterating over child metrics and building prometheus metrics.
251+
// This can be used by any aggregate metric type that embeds childSet.
252+
func (cs *childSet) EachWithLabels(
253+
labels []*io_prometheus_client.LabelPair,
254+
f func(metric *io_prometheus_client.Metric),
255+
labelCache *metric.LabelSliceCache,
256+
) {
257+
cs.mu.Lock()
258+
defer cs.mu.Unlock()
259+
260+
cs.mu.children.ForEach(func(cm ChildMetric) {
261+
m := cm.ToPrometheusMetric()
262+
childLabels := make([]*io_prometheus_client.LabelPair, 0, len(labels)+len(cs.labels))
263+
childLabels = append(childLabels, labels...)
264+
lvs := cm.labelValues()
265+
key := metricKey(lvs...)
266+
labelValueCacheValues, _ := labelCache.Get(metric.LabelSliceCacheKey(key))
267+
for i := range cs.labels {
268+
childLabels = append(childLabels, &io_prometheus_client.LabelPair{
269+
Name: &cs.labels[i],
270+
Value: &labelValueCacheValues.LabelValues[i],
271+
})
272+
}
273+
274+
m.Label = childLabels
275+
f(m)
276+
})
277+
}
278+
168279
// clear method removes all children from the childSet. It does not reset parent metric values.
169280
// Method should cautiously be used when childSet is reinitialised/updated. Today, it is
170281
// only used when cluster settings are updated to support app and db label values. For normal
@@ -321,6 +432,7 @@ type ChildMetric interface {
321432
type LabelSliceCachedChildMetric interface {
322433
ChildMetric
323434
CreatedAt() time.Time
435+
DecrementLabelSliceCacheReference()
324436
}
325437

326438
type labelValuer interface {
@@ -368,7 +480,7 @@ func (ucw *UnorderedCacheWrapper) GetValue(key uint64) (ChildMetric, bool) {
368480

369481
func (ucw *UnorderedCacheWrapper) AddKey(key uint64, metric ChildMetric) error {
370482
if _, ok := ucw.cache.Get(key); ok {
371-
return errors.Newf("child %v already exists\n", metric.labelValues())
483+
return errors.Newf("child %s already exists\n", redact.SafeString(strings.Join(metric.labelValues(), ",")))
372484
}
373485
ucw.cache.Add(key, metric)
374486
return nil

pkg/util/metric/aggmetric/counter.go

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@ package aggmetric
77

88
import (
99
"sync/atomic"
10+
"time"
1011

1112
"github.com/cockroachdb/cockroach/pkg/util/metric"
13+
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
1214
"github.com/gogo/protobuf/proto"
1315
io_prometheus_client "github.com/prometheus/client_model/go"
1416
)
@@ -394,3 +396,170 @@ func (s *SQLChildCounter) Value() int64 {
394396
func (s *SQLChildCounter) Inc(i int64) {
395397
s.value.Inc(i)
396398
}
399+
400+
// HighCardinalityCounter is similar to AggCounter but uses cache storage instead of B-tree,
401+
// allowing for automatic eviction of less frequently used child metrics.
402+
// This is useful when dealing with high cardinality metrics that might exceed resource limits.
403+
type HighCardinalityCounter struct {
404+
g metric.Counter
405+
childSet
406+
labelSliceCache *metric.LabelSliceCache
407+
}
408+
409+
var _ metric.Iterable = (*HighCardinalityCounter)(nil)
410+
var _ metric.PrometheusEvictable = (*HighCardinalityCounter)(nil)
411+
412+
// NewHighCardinalityCounter constructs a new HighCardinalityCounter that uses cache storage
413+
// with eviction for child metrics.
414+
func NewHighCardinalityCounter(
415+
metadata metric.Metadata, childLabels ...string,
416+
) *HighCardinalityCounter {
417+
c := &HighCardinalityCounter{g: *metric.NewCounter(metadata)}
418+
c.initWithCacheStorageType(childLabels, metadata.Name)
419+
return c
420+
}
421+
422+
// GetName is part of the metric.Iterable interface.
423+
func (c *HighCardinalityCounter) GetName(useStaticLabels bool) string {
424+
return c.g.GetName(useStaticLabels)
425+
}
426+
427+
// GetHelp is part of the metric.Iterable interface.
428+
func (c *HighCardinalityCounter) GetHelp() string { return c.g.GetHelp() }
429+
430+
// GetMeasurement is part of the metric.Iterable interface.
431+
func (c *HighCardinalityCounter) GetMeasurement() string { return c.g.GetMeasurement() }
432+
433+
// GetUnit is part of the metric.Iterable interface.
434+
func (c *HighCardinalityCounter) GetUnit() metric.Unit { return c.g.GetUnit() }
435+
436+
// GetMetadata is part of the metric.Iterable interface.
437+
func (c *HighCardinalityCounter) GetMetadata() metric.Metadata { return c.g.GetMetadata() }
438+
439+
// Inspect is part of the metric.Iterable interface.
440+
func (c *HighCardinalityCounter) Inspect(f func(interface{})) { f(c) }
441+
442+
// GetType is part of the metric.PrometheusExportable interface.
443+
func (c *HighCardinalityCounter) GetType() *io_prometheus_client.MetricType {
444+
return c.g.GetType()
445+
}
446+
447+
// GetLabels is part of the metric.PrometheusExportable interface.
448+
func (c *HighCardinalityCounter) GetLabels(useStaticLabels bool) []*io_prometheus_client.LabelPair {
449+
return c.g.GetLabels(useStaticLabels)
450+
}
451+
452+
// ToPrometheusMetric is part of the metric.PrometheusExportable interface.
453+
func (c *HighCardinalityCounter) ToPrometheusMetric() *io_prometheus_client.Metric {
454+
return c.g.ToPrometheusMetric()
455+
}
456+
457+
// Count returns the aggregate count of all of its current and past children.
458+
func (c *HighCardinalityCounter) Count() int64 {
459+
return c.g.Count()
460+
}
461+
462+
// Inc increments the counter value by i for the given label values. If a
463+
// counter with the given label values doesn't exist yet, it creates a new
464+
// counter and increments it. Inc increments parent metrics as well.
465+
func (c *HighCardinalityCounter) Inc(i int64, labelValues ...string) {
466+
c.g.Inc(i)
467+
468+
childMetric := c.GetOrAddChild(labelValues...)
469+
470+
if childMetric != nil {
471+
childMetric.Inc(i)
472+
}
473+
474+
}
475+
476+
// Each is part of the metric.PrometheusIterable interface.
477+
func (c *HighCardinalityCounter) Each(
478+
labels []*io_prometheus_client.LabelPair, f func(metric *io_prometheus_client.Metric),
479+
) {
480+
c.EachWithLabels(labels, f, c.labelSliceCache)
481+
}
482+
483+
// InitializeMetrics is part of the PrometheusEvictable interface.
484+
func (c *HighCardinalityCounter) InitializeMetrics(labelCache *metric.LabelSliceCache) {
485+
c.mu.Lock()
486+
defer c.mu.Unlock()
487+
488+
c.labelSliceCache = labelCache
489+
}
490+
491+
// GetOrAddChild returns the existing child counter for the given label values,
492+
// or creates a new one if it doesn't exist. This is the preferred method for
493+
// cache-based storage to avoid panics on existing keys.
494+
func (c *HighCardinalityCounter) GetOrAddChild(labelVals ...string) *HighCardinalityChildCounter {
495+
496+
if len(labelVals) == 0 {
497+
return nil
498+
}
499+
500+
// Create a LabelSliceCacheKey from the tenantID.
501+
key := metric.LabelSliceCacheKey(metricKey(labelVals...))
502+
503+
child := c.getOrAddWithLabelSliceCache(c.GetMetadata().Name, c.createHighCardinalityChildCounter, c.labelSliceCache, labelVals...)
504+
505+
c.labelSliceCache.Upsert(key, &metric.LabelSliceCacheValue{
506+
LabelValues: labelVals,
507+
})
508+
509+
return child.(*HighCardinalityChildCounter)
510+
}
511+
512+
func (c *HighCardinalityCounter) createHighCardinalityChildCounter(
513+
key uint64, cache *metric.LabelSliceCache,
514+
) LabelSliceCachedChildMetric {
515+
return &HighCardinalityChildCounter{
516+
LabelSliceCacheKey: metric.LabelSliceCacheKey(key),
517+
LabelSliceCache: cache,
518+
createdAt: timeutil.Now(),
519+
}
520+
}
521+
522+
// HighCardinalityChildCounter is a child of a HighCardinalityCounter. When metrics are
523+
// collected by prometheus, each of the children will appear with a distinct label,
524+
// however, when cockroach internally collects metrics, only the parent is collected.
525+
type HighCardinalityChildCounter struct {
526+
metric.LabelSliceCacheKey
527+
value metric.Counter
528+
*metric.LabelSliceCache
529+
createdAt time.Time
530+
}
531+
532+
func (c *HighCardinalityChildCounter) CreatedAt() time.Time {
533+
return c.createdAt
534+
}
535+
536+
func (c *HighCardinalityChildCounter) DecrementLabelSliceCacheReference() {
537+
c.LabelSliceCache.DecrementAndDeleteIfZero(c.LabelSliceCacheKey)
538+
}
539+
540+
// ToPrometheusMetric constructs a prometheus metric for this HighCardinalityChildCounter.
541+
func (c *HighCardinalityChildCounter) ToPrometheusMetric() *io_prometheus_client.Metric {
542+
return &io_prometheus_client.Metric{
543+
Counter: &io_prometheus_client.Counter{
544+
Value: proto.Float64(float64(c.Value())),
545+
},
546+
}
547+
}
548+
549+
func (c *HighCardinalityChildCounter) labelValues() []string {
550+
lv, ok := c.LabelSliceCache.Get(c.LabelSliceCacheKey)
551+
if !ok {
552+
return nil
553+
}
554+
return lv.LabelValues
555+
}
556+
557+
// Value returns the HighCardinalityChildCounter's current value.
558+
func (c *HighCardinalityChildCounter) Value() int64 {
559+
return c.value.Count()
560+
}
561+
562+
// Inc increments the HighCardinalityChildCounter's value.
563+
func (c *HighCardinalityChildCounter) Inc(i int64) {
564+
c.value.Inc(i)
565+
}

0 commit comments

Comments
 (0)