Skip to content

Commit 6321d87

Browse files
craig[bot]aa-joshi
andcommitted
Merge #144450
144450: metrics: add SQLGauge to support high cardinality metrics r=aa-joshi a=aa-joshi This patch introduces `SQLGauge` which is an aggregation gauge of `SQLChildGauge` metrics. SQLGauge supports combination of `database` and `application_name` labels. The SQLChildGauge stores the value of a gauge for a given combination of database and application name. SQLGauge internally uses cache.UnorderedCache to store child metrics with default size of 5000. SQLGauge will report to crdb-internal time series only the aggregate sum of all its children, while its children are additionally exported to prometheus. SQLGauge differs from AggGauge in that a SQLGauge creates child metrics dynamically while AggGauge needs child creation up front. We have extracted out dynamic child creation from AggGauge in SQLGauge. Epic: [CRDB-43153](https://cockroachlabs.atlassian.net/browse/CRDB-43153) Part of: [CRDB-48489](https://cockroachlabs.atlassian.net/browse/CRDB-48489) Release note: None Co-authored-by: Akshay Joshi <[email protected]>
2 parents 1a3f9ea + b2dd376 commit 6321d87

13 files changed

+328
-267
lines changed

pkg/util/metric/aggmetric/BUILD.bazel

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ go_test(
4646
"@com_github_cockroachdb_crlib//testutils/require",
4747
"@com_github_prometheus_client_model//go",
4848
"@com_github_prometheus_common//expfmt",
49-
"@com_github_stretchr_testify//assert",
5049
"@com_github_stretchr_testify//require",
5150
],
5251
)

pkg/util/metric/aggmetric/agg_metric.go

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ package aggmetric
1111
import (
1212
"hash/fnv"
1313
"strings"
14+
"sync/atomic"
1415

1516
"github.com/RaduBerinde/btree" // TODO(#144504): switch to the newer btree
1617
"github.com/cockroachdb/cockroach/pkg/util/cache"
@@ -168,6 +169,128 @@ func (cs *childSet) clear() {
168169
cs.mu.children.Clear()
169170
}
170171

172+
type SQLMetric struct {
173+
labelConfig atomic.Uint64
174+
mu struct {
175+
syncutil.Mutex
176+
children ChildrenStorage
177+
}
178+
}
179+
180+
func NewSQLMetric(labelConfig uint64) *SQLMetric {
181+
sm := &SQLMetric{}
182+
sm.labelConfig.Store(labelConfig)
183+
sm.mu.children = &UnorderedCacheWrapper{
184+
cache: getCacheStorage(),
185+
}
186+
return sm
187+
}
188+
189+
func (sm *SQLMetric) Each(
190+
labels []*io_prometheus_client.LabelPair, f func(metric *io_prometheus_client.Metric),
191+
) {
192+
sm.mu.Lock()
193+
defer sm.mu.Unlock()
194+
195+
sm.mu.children.Do(func(e interface{}) {
196+
cm := sm.mu.children.GetChildMetric(e)
197+
pm := cm.ToPrometheusMetric()
198+
199+
childLabels := make([]*io_prometheus_client.LabelPair, 0, len(labels)+2)
200+
childLabels = append(childLabels, labels...)
201+
lvs := cm.labelValues()
202+
dbLabel := dbLabel
203+
appLabel := appLabel
204+
switch sm.labelConfig.Load() {
205+
case LabelConfigDB:
206+
childLabels = append(childLabels, &io_prometheus_client.LabelPair{
207+
Name: &dbLabel,
208+
Value: &lvs[0],
209+
})
210+
case LabelConfigApp:
211+
childLabels = append(childLabels, &io_prometheus_client.LabelPair{
212+
Name: &appLabel,
213+
Value: &lvs[0],
214+
})
215+
case LabelConfigAppAndDB:
216+
childLabels = append(childLabels, &io_prometheus_client.LabelPair{
217+
Name: &dbLabel,
218+
Value: &lvs[0],
219+
})
220+
childLabels = append(childLabels, &io_prometheus_client.LabelPair{
221+
Name: &appLabel,
222+
Value: &lvs[1],
223+
})
224+
default:
225+
}
226+
pm.Label = childLabels
227+
f(pm)
228+
})
229+
}
230+
231+
func (sm *SQLMetric) get(labelVals ...string) (ChildMetric, bool) {
232+
return sm.mu.children.Get(labelVals...)
233+
}
234+
235+
func (sm *SQLMetric) add(metric ChildMetric) {
236+
sm.mu.children.Add(metric)
237+
}
238+
239+
// getOrAddChild returns the child metric for the given label values. If the child
240+
// doesn't exist, it creates a new one and adds it to the collection.
241+
func (sm *SQLMetric) getOrAddChild(
242+
metricType io_prometheus_client.MetricType, labelValues ...string,
243+
) ChildMetric {
244+
sm.mu.Lock()
245+
defer sm.mu.Unlock()
246+
247+
// If the child already exists, return it.
248+
if child, ok := sm.get(labelValues...); ok {
249+
return child
250+
}
251+
252+
// Otherwise, create a new child, add and return it.
253+
var child ChildMetric
254+
switch metricType {
255+
case io_prometheus_client.MetricType_COUNTER:
256+
child = &SQLChildCounter{
257+
labelValuesSlice: labelValuesSlice(labelValues),
258+
}
259+
case io_prometheus_client.MetricType_GAUGE:
260+
child = &SQLChildGauge{
261+
labelValuesSlice: labelValuesSlice(labelValues),
262+
}
263+
default:
264+
panic(errors.AssertionFailedf("unrecognised metric type %v", metricType))
265+
}
266+
267+
sm.add(child)
268+
return child
269+
}
270+
271+
// getChildByLabelConfig returns the child metric based on the label configuration.
272+
// It returns the child metric and a boolean indicating if the child was found.
273+
// If the label configuration is either LabelConfigDisabled or unrecognised, it returns
274+
// ChildMetric as nil and false.
275+
func (sm *SQLMetric) getChildByLabelConfig(
276+
metricType io_prometheus_client.MetricType, db string, app string,
277+
) (ChildMetric, bool) {
278+
var childMetric ChildMetric
279+
switch sm.labelConfig.Load() {
280+
case LabelConfigDB:
281+
childMetric = sm.getOrAddChild(metricType, db)
282+
return childMetric, true
283+
case LabelConfigApp:
284+
childMetric = sm.getOrAddChild(metricType, app)
285+
return childMetric, true
286+
case LabelConfigAppAndDB:
287+
childMetric = sm.getOrAddChild(metricType, db, app)
288+
return childMetric, true
289+
default:
290+
return nil, false
291+
}
292+
}
293+
171294
type MetricItem interface {
172295
labelValuer
173296
}

pkg/util/metric/aggmetric/counter.go

Lines changed: 6 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import (
99
"sync/atomic"
1010

1111
"github.com/cockroachdb/cockroach/pkg/util/metric"
12-
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
1312
"github.com/gogo/protobuf/proto"
1413
io_prometheus_client "github.com/prometheus/client_model/go"
1514
)
@@ -252,54 +251,8 @@ func (g *CounterFloat64) UpdateIfHigher(i float64) {
252251
// a SQLCounter creates child metrics dynamically while AggCounter needs the
253252
// child creation up front.
254253
type SQLCounter struct {
255-
g metric.Counter
256-
labelConfig atomic.Uint64
257-
mu struct {
258-
syncutil.Mutex
259-
children ChildrenStorage
260-
}
261-
}
262-
263-
func (c *SQLCounter) Each(
264-
labels []*io_prometheus_client.LabelPair, f func(metric *io_prometheus_client.Metric),
265-
) {
266-
c.mu.Lock()
267-
defer c.mu.Unlock()
268-
269-
c.mu.children.Do(func(e interface{}) {
270-
cm := c.mu.children.GetChildMetric(e)
271-
pm := cm.ToPrometheusMetric()
272-
273-
childLabels := make([]*io_prometheus_client.LabelPair, 0, len(labels)+2)
274-
childLabels = append(childLabels, labels...)
275-
lvs := cm.labelValues()
276-
dbLabel := dbLabel
277-
appLabel := appLabel
278-
switch c.labelConfig.Load() {
279-
case LabelConfigDB:
280-
childLabels = append(childLabels, &io_prometheus_client.LabelPair{
281-
Name: &dbLabel,
282-
Value: &lvs[0],
283-
})
284-
case LabelConfigApp:
285-
childLabels = append(childLabels, &io_prometheus_client.LabelPair{
286-
Name: &appLabel,
287-
Value: &lvs[0],
288-
})
289-
case LabelConfigAppAndDB:
290-
childLabels = append(childLabels, &io_prometheus_client.LabelPair{
291-
Name: &dbLabel,
292-
Value: &lvs[0],
293-
})
294-
childLabels = append(childLabels, &io_prometheus_client.LabelPair{
295-
Name: &appLabel,
296-
Value: &lvs[1],
297-
})
298-
default:
299-
}
300-
pm.Label = childLabels
301-
f(pm)
302-
})
254+
g metric.Counter
255+
*SQLMetric
303256
}
304257

305258
var _ metric.Iterable = (*SQLCounter)(nil)
@@ -311,32 +264,10 @@ func NewSQLCounter(metadata metric.Metadata) *SQLCounter {
311264
c := &SQLCounter{
312265
g: *metric.NewCounter(metadata),
313266
}
314-
c.mu.children = &UnorderedCacheWrapper{
315-
cache: getCacheStorage(),
316-
}
317-
c.labelConfig.Store(LabelConfigDisabled)
267+
c.SQLMetric = NewSQLMetric(LabelConfigDisabled)
318268
return c
319269
}
320270

321-
// getOrAddChild returns the child metric for the given label values. If the child
322-
// doesn't exist, it creates a new one and adds it to the collection.
323-
func (c *SQLCounter) getOrAddChild(labelValues ...string) ChildMetric {
324-
c.mu.Lock()
325-
defer c.mu.Unlock()
326-
327-
// If the child already exists, return it.
328-
if child, ok := c.mu.children.Get(labelValues...); ok {
329-
return child
330-
}
331-
332-
// Otherwise, create a new child and return it.
333-
child := &SQLChildCounter{
334-
labelValuesSlice: labelValuesSlice(labelValues),
335-
}
336-
c.mu.children.Add(child)
337-
return child
338-
}
339-
340271
// GetType is part of the metric.PrometheusExportable interface.
341272
func (c *SQLCounter) GetType() *io_prometheus_client.MetricType {
342273
return c.g.GetType()
@@ -389,17 +320,11 @@ func (c *SQLCounter) Inspect(f func(interface{})) {
389320
func (c *SQLCounter) Inc(i int64, db, app string) {
390321
c.g.Inc(i)
391322

392-
var childMetric ChildMetric
393-
switch c.labelConfig.Load() {
394-
case LabelConfigDB:
395-
childMetric = c.getOrAddChild(db)
396-
case LabelConfigApp:
397-
childMetric = c.getOrAddChild(app)
398-
case LabelConfigAppAndDB:
399-
childMetric = c.getOrAddChild(db, app)
400-
default:
323+
childMetric, isChildMetricEnabled := c.getChildByLabelConfig(*c.GetType(), db, app)
324+
if !isChildMetricEnabled {
401325
return
402326
}
327+
403328
childMetric.(*SQLChildCounter).Inc(i)
404329
}
405330

0 commit comments

Comments
 (0)