Skip to content

Commit d11c439

Browse files
committed
sql: update sql metric definition to support configurable labels
This patch updates sql metric declaration to support additional `database` and `application_name` as labels. This is driven by cluster settings introduced as part of #144610. The updated metrics will export additional labels based on cluster settings `sql.metrics.application_name.enabled` and `sql.metrics.database_name.enabled`. The SQLMetric will persist aggregate sum of all its children, while children additionally exported to prometheus. Epic: CRDB-43153 Part of: CRDB-48251 Release note: None
1 parent acfdcc8 commit d11c439

File tree

12 files changed

+209
-68
lines changed

12 files changed

+209
-68
lines changed

pkg/server/telemetry/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@ go_library(
1212
"//pkg/sql/pgwire/pgcode",
1313
"//pkg/sql/pgwire/pgerror",
1414
"//pkg/util/metric",
15+
"//pkg/util/metric/aggmetric",
1516
"//pkg/util/syncutil",
1617
"@com_github_cockroachdb_errors//:errors",
18+
"@com_github_prometheus_client_model//go",
1719
],
1820
)
1921

pkg/server/telemetry/features.go

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@ import (
1414
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
1515
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
1616
"github.com/cockroachdb/cockroach/pkg/util/metric"
17+
"github.com/cockroachdb/cockroach/pkg/util/metric/aggmetric"
1718
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
1819
"github.com/cockroachdb/errors"
20+
io_prometheus_client "github.com/prometheus/client_model/go"
1921
)
2022

2123
// Bucket10 buckets a number by order of magnitude base 10, eg 637 -> 100.
@@ -164,6 +166,92 @@ func (c CounterWithMetric) Inspect(f func(interface{})) {
164166
c.metric.Inspect(f)
165167
}
166168

169+
// CounterWithAggMetric combines a telemetry and a agg metric counter.
170+
type CounterWithAggMetric struct {
171+
telemetry Counter
172+
metric *aggmetric.SQLCounter
173+
}
174+
175+
// Necessary for metric metadata registration.
176+
var _ metric.Iterable = CounterWithAggMetric{}
177+
var _ metric.PrometheusExportable = CounterWithAggMetric{}
178+
179+
// NewCounterWithAggMetric creates a CounterWithAggMetric.
180+
func NewCounterWithAggMetric(metadata metric.Metadata) CounterWithAggMetric {
181+
return CounterWithAggMetric{
182+
telemetry: GetCounter(metadata.Name),
183+
metric: aggmetric.NewSQLCounter(metadata),
184+
}
185+
}
186+
187+
// Inc increments both counters.
188+
func (c CounterWithAggMetric) Inc(dbName, appName string) {
189+
Inc(c.telemetry)
190+
c.metric.Inc(1, dbName, appName)
191+
}
192+
193+
// Count returns the value of the metric, not the telemetry. Note that the
194+
// telemetry value may reset to zero when, for example, GetFeatureCounts() is
195+
// called with ResetCounts to generate a report.
196+
func (c CounterWithAggMetric) Count() int64 {
197+
return c.metric.Count()
198+
}
199+
200+
// Forward the metric.Iterable and PrometheusIterable interface to the metric counter. We
201+
// don't just embed the counter because our Inc() interface is a bit different.
202+
203+
// GetName implements metric.Iterable
204+
func (c CounterWithAggMetric) GetName(useStaticLabels bool) string {
205+
return c.metric.GetName(useStaticLabels)
206+
}
207+
208+
// GetHelp implements metric.Iterable
209+
func (c CounterWithAggMetric) GetHelp() string {
210+
return c.metric.GetHelp()
211+
}
212+
213+
// GetMeasurement implements metric.Iterable
214+
func (c CounterWithAggMetric) GetMeasurement() string {
215+
return c.metric.GetMeasurement()
216+
}
217+
218+
// GetUnit implements metric.Iterable
219+
func (c CounterWithAggMetric) GetUnit() metric.Unit {
220+
return c.metric.GetUnit()
221+
}
222+
223+
// GetMetadata implements metric.Iterable
224+
func (c CounterWithAggMetric) GetMetadata() metric.Metadata {
225+
return c.metric.GetMetadata()
226+
}
227+
228+
// Inspect implements metric.Iterable
229+
func (c CounterWithAggMetric) Inspect(f func(interface{})) {
230+
c.metric.Inspect(f)
231+
}
232+
233+
func (c CounterWithAggMetric) GetType() *io_prometheus_client.MetricType {
234+
return c.metric.GetType()
235+
}
236+
237+
func (c CounterWithAggMetric) GetLabels(useStaticLabels bool) []*io_prometheus_client.LabelPair {
238+
return c.metric.GetLabels(useStaticLabels)
239+
}
240+
241+
func (c CounterWithAggMetric) ToPrometheusMetric() *io_prometheus_client.Metric {
242+
return c.metric.ToPrometheusMetric()
243+
}
244+
245+
func (c CounterWithAggMetric) Each(
246+
pairs []*io_prometheus_client.LabelPair, f func(metric *io_prometheus_client.Metric),
247+
) {
248+
c.metric.Each(pairs, f)
249+
}
250+
251+
func (c CounterWithAggMetric) ReinitialiseChildMetrics(labelValueConfig metric.LabelConfig) {
252+
c.metric.ReinitialiseChildMetrics(labelValueConfig)
253+
}
254+
167255
func init() {
168256
counters.m = make(map[string]Counter, approxFeatureCount)
169257
}

pkg/server/telemetry/features_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,12 +92,17 @@ func TestBucket(t *testing.T) {
9292
// for example, a report is created.
9393
func TestCounterWithMetric(t *testing.T) {
9494
cm := telemetry.NewCounterWithMetric(metric.Metadata{Name: "test-metric"})
95+
cag := telemetry.NewCounterWithAggMetric(metric.Metadata{Name: "test-agg-metric"})
96+
9597
cm.Inc()
98+
cag.Inc("test-db", "test-app")
9699

97100
// Using GetFeatureCounts to read the telemetry value.
98101
m1 := telemetry.GetFeatureCounts(telemetry.Raw, telemetry.ReadOnly)
99102
require.Equal(t, int32(1), m1["test-metric"])
100103
require.Equal(t, int64(1), cm.Count())
104+
require.Equal(t, int32(1), m1["test-agg-metric"])
105+
require.Equal(t, int64(1), cm.Count())
101106

102107
// Reset the telemetry.
103108
telemetry.GetFeatureCounts(telemetry.Raw, telemetry.ResetCounts)
@@ -106,4 +111,6 @@ func TestCounterWithMetric(t *testing.T) {
106111
m2 := telemetry.GetFeatureCounts(telemetry.Raw, telemetry.ReadOnly)
107112
require.Equal(t, int32(0), m2["test-metric"])
108113
require.Equal(t, int64(1), cm.Count())
114+
require.Equal(t, int32(0), m2["test-agg-metric"])
115+
require.Equal(t, int64(1), cm.Count())
109116
}

pkg/sql/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,7 @@ go_library(
582582
"//pkg/util/memzipper",
583583
"//pkg/util/metamorphic",
584584
"//pkg/util/metric",
585+
"//pkg/util/metric/aggmetric",
585586
"//pkg/util/mon",
586587
"//pkg/util/optional",
587588
"//pkg/util/pretty",

pkg/sql/conn_executor.go

Lines changed: 53 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ import (
8383
"github.com/cockroachdb/cockroach/pkg/util/log/logcrash"
8484
"github.com/cockroachdb/cockroach/pkg/util/log/severity"
8585
"github.com/cockroachdb/cockroach/pkg/util/metric"
86+
"github.com/cockroachdb/cockroach/pkg/util/metric/aggmetric"
8687
"github.com/cockroachdb/cockroach/pkg/util/mon"
8788
"github.com/cockroachdb/cockroach/pkg/util/sentryutil"
8889
"github.com/cockroachdb/cockroach/pkg/util/stop"
@@ -581,7 +582,7 @@ func makeMetrics(internal bool, sv *settings.Values) Metrics {
581582
Duration: 6 * metricsSampleInterval,
582583
BucketConfig: metric.IOLatencyBuckets,
583584
}),
584-
SQLServiceLatency: metric.NewHistogram(metric.HistogramOptions{
585+
SQLServiceLatency: aggmetric.NewSQLHistogram(metric.HistogramOptions{
585586
Mode: metric.HistogramModePreferHdrLatency,
586587
Metadata: getMetricMeta(MetaSQLServiceLatency, internal),
587588
Duration: 6 * metricsSampleInterval,
@@ -599,21 +600,21 @@ func makeMetrics(internal bool, sv *settings.Values) Metrics {
599600
Duration: 6 * metricsSampleInterval,
600601
BucketConfig: metric.IOLatencyBuckets,
601602
}),
602-
SQLTxnLatency: metric.NewHistogram(metric.HistogramOptions{
603+
SQLTxnLatency: aggmetric.NewSQLHistogram(metric.HistogramOptions{
603604
Mode: metric.HistogramModePreferHdrLatency,
604605
Metadata: getMetricMeta(MetaSQLTxnLatency, internal),
605606
Duration: 6 * metricsSampleInterval,
606607
BucketConfig: metric.IOLatencyBuckets,
607608
}),
608-
SQLTxnsOpen: metric.NewGauge(getMetricMeta(MetaSQLTxnsOpen, internal)),
609-
SQLActiveStatements: metric.NewGauge(getMetricMeta(MetaSQLActiveQueries, internal)),
609+
SQLTxnsOpen: aggmetric.NewSQLGauge(getMetricMeta(MetaSQLTxnsOpen, internal)),
610+
SQLActiveStatements: aggmetric.NewSQLGauge(getMetricMeta(MetaSQLActiveQueries, internal)),
610611
SQLContendedTxns: metric.NewCounter(getMetricMeta(MetaSQLTxnContended, internal)),
611612

612613
TxnAbortCount: metric.NewCounter(getMetricMeta(MetaTxnAbort, internal)),
613-
FailureCount: metric.NewCounter(getMetricMeta(MetaFailure, internal)),
614+
FailureCount: aggmetric.NewSQLCounter(getMetricMeta(MetaFailure, internal)),
614615
StatementTimeoutCount: metric.NewCounter(getMetricMeta(MetaStatementTimeout, internal)),
615616
TransactionTimeoutCount: metric.NewCounter(getMetricMeta(MetaTransactionTimeout, internal)),
616-
FullTableOrIndexScanCount: metric.NewCounter(getMetricMeta(MetaFullTableOrIndexScan, internal)),
617+
FullTableOrIndexScanCount: aggmetric.NewSQLCounter(getMetricMeta(MetaFullTableOrIndexScan, internal)),
617618
FullTableOrIndexScanRejectedCount: metric.NewCounter(getMetricMeta(MetaFullTableOrIndexScanRejected, internal)),
618619
},
619620
StartedStatementCounters: makeStartedStatementCounters(internal),
@@ -2909,12 +2910,14 @@ func (ex *connExecutor) execCopyOut(
29092910
ctx, cancelQuery = ctxlog.WithCancel(ctx)
29102911
queryID := ex.server.cfg.GenerateID()
29112912
ex.addActiveQuery(cmd.ParsedStmt, nil /* placeholders */, queryID, cancelQuery)
2912-
ex.metrics.EngineMetrics.SQLActiveStatements.Inc(1)
2913+
ex.metrics.EngineMetrics.SQLActiveStatements.Inc(1,
2914+
ex.sessionData().Database, ex.sessionData().ApplicationName)
29132915

29142916
defer func() {
29152917
ex.removeActiveQuery(queryID, cmd.Stmt)
29162918
cancelQuery()
2917-
ex.metrics.EngineMetrics.SQLActiveStatements.Dec(1)
2919+
ex.metrics.EngineMetrics.SQLActiveStatements.Dec(1,
2920+
ex.sessionData().Database, ex.sessionData().ApplicationName)
29182921
if !payloadHasError(retPayload) {
29192922
ex.incrementExecutedStmtCounter(cmd.Stmt)
29202923
}
@@ -3121,12 +3124,14 @@ func (ex *connExecutor) execCopyIn(
31213124
ctx, cancelQuery = ctxlog.WithCancel(ctx)
31223125
queryID := ex.server.cfg.GenerateID()
31233126
ex.addActiveQuery(cmd.ParsedStmt, nil /* placeholders */, queryID, cancelQuery)
3124-
ex.metrics.EngineMetrics.SQLActiveStatements.Inc(1)
3127+
ex.metrics.EngineMetrics.SQLActiveStatements.Inc(1,
3128+
ex.sessionData().Database, ex.sessionData().ApplicationName)
31253129

31263130
defer func() {
31273131
ex.removeActiveQuery(queryID, cmd.Stmt)
31283132
cancelQuery()
3129-
ex.metrics.EngineMetrics.SQLActiveStatements.Dec(1)
3133+
ex.metrics.EngineMetrics.SQLActiveStatements.Dec(1,
3134+
ex.sessionData().Database, ex.sessionData().ApplicationName)
31303135
if !payloadHasError(retPayload) {
31313136
ex.incrementExecutedStmtCounter(cmd.Stmt)
31323137
}
@@ -4510,17 +4515,17 @@ type StatementCounters struct {
45104515
QueryCount telemetry.CounterWithMetric
45114516

45124517
// Basic CRUD statements.
4513-
SelectCount telemetry.CounterWithMetric
4514-
UpdateCount telemetry.CounterWithMetric
4515-
InsertCount telemetry.CounterWithMetric
4516-
DeleteCount telemetry.CounterWithMetric
4518+
SelectCount telemetry.CounterWithAggMetric
4519+
UpdateCount telemetry.CounterWithAggMetric
4520+
InsertCount telemetry.CounterWithAggMetric
4521+
DeleteCount telemetry.CounterWithAggMetric
45174522
// CRUDQueryCount includes all 4 CRUD statements above.
4518-
CRUDQueryCount telemetry.CounterWithMetric
4523+
CRUDQueryCount telemetry.CounterWithAggMetric
45194524

45204525
// Transaction operations.
4521-
TxnBeginCount telemetry.CounterWithMetric
4522-
TxnCommitCount telemetry.CounterWithMetric
4523-
TxnRollbackCount telemetry.CounterWithMetric
4526+
TxnBeginCount telemetry.CounterWithAggMetric
4527+
TxnCommitCount telemetry.CounterWithAggMetric
4528+
TxnRollbackCount telemetry.CounterWithAggMetric
45244529
TxnUpgradedCount *metric.Counter
45254530

45264531
// Transaction XA two-phase commit operations.
@@ -4561,11 +4566,11 @@ type StatementCounters struct {
45614566

45624567
func makeStartedStatementCounters(internal bool) StatementCounters {
45634568
return StatementCounters{
4564-
TxnBeginCount: telemetry.NewCounterWithMetric(
4569+
TxnBeginCount: telemetry.NewCounterWithAggMetric(
45654570
getMetricMeta(MetaTxnBeginStarted, internal)),
4566-
TxnCommitCount: telemetry.NewCounterWithMetric(
4571+
TxnCommitCount: telemetry.NewCounterWithAggMetric(
45674572
getMetricMeta(MetaTxnCommitStarted, internal)),
4568-
TxnRollbackCount: telemetry.NewCounterWithMetric(
4573+
TxnRollbackCount: telemetry.NewCounterWithAggMetric(
45694574
getMetricMeta(MetaTxnRollbackStarted, internal)),
45704575
TxnUpgradedCount: metric.NewCounter(
45714576
getMetricMeta(MetaTxnUpgradedFromWeakIsolation, internal)),
@@ -4587,15 +4592,15 @@ func makeStartedStatementCounters(internal bool) StatementCounters {
45874592
getMetricMeta(MetaReleaseSavepointStarted, internal)),
45884593
RollbackToSavepointCount: telemetry.NewCounterWithMetric(
45894594
getMetricMeta(MetaRollbackToSavepointStarted, internal)),
4590-
SelectCount: telemetry.NewCounterWithMetric(
4595+
SelectCount: telemetry.NewCounterWithAggMetric(
45914596
getMetricMeta(MetaSelectStarted, internal)),
4592-
UpdateCount: telemetry.NewCounterWithMetric(
4597+
UpdateCount: telemetry.NewCounterWithAggMetric(
45934598
getMetricMeta(MetaUpdateStarted, internal)),
4594-
InsertCount: telemetry.NewCounterWithMetric(
4599+
InsertCount: telemetry.NewCounterWithAggMetric(
45954600
getMetricMeta(MetaInsertStarted, internal)),
4596-
DeleteCount: telemetry.NewCounterWithMetric(
4601+
DeleteCount: telemetry.NewCounterWithAggMetric(
45974602
getMetricMeta(MetaDeleteStarted, internal)),
4598-
CRUDQueryCount: telemetry.NewCounterWithMetric(
4603+
CRUDQueryCount: telemetry.NewCounterWithAggMetric(
45994604
getMetricMeta(MetaCRUDStarted, internal)),
46004605
DdlCount: telemetry.NewCounterWithMetric(
46014606
getMetricMeta(MetaDdlStarted, internal)),
@@ -4614,11 +4619,11 @@ func makeStartedStatementCounters(internal bool) StatementCounters {
46144619

46154620
func makeExecutedStatementCounters(internal bool) StatementCounters {
46164621
return StatementCounters{
4617-
TxnBeginCount: telemetry.NewCounterWithMetric(
4622+
TxnBeginCount: telemetry.NewCounterWithAggMetric(
46184623
getMetricMeta(MetaTxnBeginExecuted, internal)),
4619-
TxnCommitCount: telemetry.NewCounterWithMetric(
4624+
TxnCommitCount: telemetry.NewCounterWithAggMetric(
46204625
getMetricMeta(MetaTxnCommitExecuted, internal)),
4621-
TxnRollbackCount: telemetry.NewCounterWithMetric(
4626+
TxnRollbackCount: telemetry.NewCounterWithAggMetric(
46224627
getMetricMeta(MetaTxnRollbackExecuted, internal)),
46234628
TxnUpgradedCount: metric.NewCounter(
46244629
getMetricMeta(MetaTxnUpgradedFromWeakIsolation, internal)),
@@ -4640,15 +4645,15 @@ func makeExecutedStatementCounters(internal bool) StatementCounters {
46404645
getMetricMeta(MetaReleaseSavepointExecuted, internal)),
46414646
RollbackToSavepointCount: telemetry.NewCounterWithMetric(
46424647
getMetricMeta(MetaRollbackToSavepointExecuted, internal)),
4643-
SelectCount: telemetry.NewCounterWithMetric(
4648+
SelectCount: telemetry.NewCounterWithAggMetric(
46444649
getMetricMeta(MetaSelectExecuted, internal)),
4645-
UpdateCount: telemetry.NewCounterWithMetric(
4650+
UpdateCount: telemetry.NewCounterWithAggMetric(
46464651
getMetricMeta(MetaUpdateExecuted, internal)),
4647-
InsertCount: telemetry.NewCounterWithMetric(
4652+
InsertCount: telemetry.NewCounterWithAggMetric(
46484653
getMetricMeta(MetaInsertExecuted, internal)),
4649-
DeleteCount: telemetry.NewCounterWithMetric(
4654+
DeleteCount: telemetry.NewCounterWithAggMetric(
46504655
getMetricMeta(MetaDeleteExecuted, internal)),
4651-
CRUDQueryCount: telemetry.NewCounterWithMetric(
4656+
CRUDQueryCount: telemetry.NewCounterWithAggMetric(
46524657
getMetricMeta(MetaCRUDExecuted, internal)),
46534658
DdlCount: telemetry.NewCounterWithMetric(
46544659
getMetricMeta(MetaDdlExecuted, internal)),
@@ -4667,30 +4672,32 @@ func makeExecutedStatementCounters(internal bool) StatementCounters {
46674672

46684673
func (sc *StatementCounters) incrementCount(ex *connExecutor, stmt tree.Statement) {
46694674
sc.QueryCount.Inc()
4675+
dbName := ex.sessionData().Database
4676+
appName := ex.sessionData().ApplicationName
46704677
switch t := stmt.(type) {
46714678
case *tree.BeginTransaction:
4672-
sc.TxnBeginCount.Inc()
4679+
sc.TxnBeginCount.Inc(dbName, appName)
46734680
case *tree.Select:
4674-
sc.SelectCount.Inc()
4675-
sc.CRUDQueryCount.Inc()
4681+
sc.SelectCount.Inc(dbName, appName)
4682+
sc.CRUDQueryCount.Inc(dbName, appName)
46764683
case *tree.Update:
4677-
sc.UpdateCount.Inc()
4678-
sc.CRUDQueryCount.Inc()
4684+
sc.UpdateCount.Inc(dbName, appName)
4685+
sc.CRUDQueryCount.Inc(dbName, appName)
46794686
case *tree.Insert:
4680-
sc.InsertCount.Inc()
4681-
sc.CRUDQueryCount.Inc()
4687+
sc.InsertCount.Inc(dbName, appName)
4688+
sc.CRUDQueryCount.Inc(dbName, appName)
46824689
case *tree.Delete:
4683-
sc.DeleteCount.Inc()
4684-
sc.CRUDQueryCount.Inc()
4690+
sc.DeleteCount.Inc(dbName, appName)
4691+
sc.CRUDQueryCount.Inc(dbName, appName)
46854692
case *tree.CommitTransaction:
4686-
sc.TxnCommitCount.Inc()
4693+
sc.TxnCommitCount.Inc(dbName, appName)
46874694
case *tree.RollbackTransaction:
46884695
// The CommitWait state means that the transaction has already committed
46894696
// after a specially handled `RELEASE SAVEPOINT cockroach_restart` command.
46904697
if ex.getTransactionState() == CommitWaitStateStr {
4691-
sc.TxnCommitCount.Inc()
4698+
sc.TxnCommitCount.Inc(dbName, appName)
46924699
} else {
4693-
sc.TxnRollbackCount.Inc()
4700+
sc.TxnRollbackCount.Inc(dbName, appName)
46944701
}
46954702
case *tree.PrepareTransaction:
46964703
sc.TxnPrepareCount.Inc()

0 commit comments

Comments
 (0)