Skip to content

Commit 5260861

Browse files
committed
sql: add latency metrics for historical queries
This commit adds 4 new latency metrics specifically to separate out AOST queries: * sql.exec.latency.consistent -- Latency of SQL statement execution of non-historical queries * sql.exec.latency.historical -- Latency of SQL statement execution of historical queries * sql.service.latency.consistent -- Latency of SQL request execution of non-historical queries * sql.service.latency.historical -- Latency of SQL request execution of historical queries This will help when trying to optimize workloads that have a combination of historical and non-historical queries. Fixes: #121507 Part of: https://cockroachlabs.atlassian.net/browse/CRDB-37293 Part of: https://cockroachlabs.atlassian.net/browse/TREQ-152 Part of: https://cockroachlabs.atlassian.net/browse/FEB-22 Release note (ops change): Added 4 new latency metrics: sql.service.latency.historical, sql.service.latency.consistent, sql.exec.latency.historical, sql.exec.latency.consistent for easier query optimizations.
1 parent 615456f commit 5260861

File tree

4 files changed

+89
-13
lines changed

4 files changed

+89
-13
lines changed

docs/generated/metrics/metrics.html

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1695,8 +1695,12 @@
16951695
<tr><td>APPLICATION</td><td>sql.distsql.service.latency.internal</td><td>Latency of DistSQL request execution (internal queries)</td><td>SQL Internal Statements</td><td>HISTOGRAM</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
16961696
<tr><td>APPLICATION</td><td>sql.distsql.vec.openfds</td><td>Current number of open file descriptors used by vectorized external storage</td><td>Files</td><td>GAUGE</td><td>COUNT</td><td>AVG</td><td>NONE</td></tr>
16971697
<tr><td>APPLICATION</td><td>sql.exec.latency</td><td>Latency of SQL statement execution</td><td>Latency</td><td>HISTOGRAM</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
1698+
<tr><td>APPLICATION</td><td>sql.exec.latency.consistent</td><td>Latency of SQL statement execution of non-historical queries</td><td>Latency</td><td>HISTOGRAM</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
1699+
<tr><td>APPLICATION</td><td>sql.exec.latency.consistent.internal</td><td>Latency of SQL statement execution of non-historical queries (internal queries)</td><td>SQL Internal Statements</td><td>HISTOGRAM</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
16981700
<tr><td>APPLICATION</td><td>sql.exec.latency.detail</td><td>Latency of SQL statement execution, by statement fingerprint</td><td>Latency</td><td>HISTOGRAM</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
16991701
<tr><td>APPLICATION</td><td>sql.exec.latency.detail.internal</td><td>Latency of SQL statement execution, by statement fingerprint (internal queries)</td><td>SQL Internal Statements</td><td>HISTOGRAM</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
1702+
<tr><td>APPLICATION</td><td>sql.exec.latency.historical</td><td>Latency of SQL statement execution of historical queries</td><td>Latency</td><td>HISTOGRAM</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
1703+
<tr><td>APPLICATION</td><td>sql.exec.latency.historical.internal</td><td>Latency of SQL statement execution of historical queries (internal queries)</td><td>SQL Internal Statements</td><td>HISTOGRAM</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
17001704
<tr><td>APPLICATION</td><td>sql.exec.latency.internal</td><td>Latency of SQL statement execution (internal queries)</td><td>SQL Internal Statements</td><td>HISTOGRAM</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
17011705
<tr><td>APPLICATION</td><td>sql.failure.count</td><td>Number of statements resulting in a planning or runtime error</td><td>SQL Statements</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
17021706
<tr><td>APPLICATION</td><td>sql.failure.count.internal</td><td>Number of statements resulting in a planning or runtime error (internal queries)</td><td>SQL Internal Statements</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
@@ -1819,6 +1823,10 @@
18191823
<tr><td>APPLICATION</td><td>sql.select.started.count</td><td>Number of SQL SELECT statements started</td><td>SQL Statements</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
18201824
<tr><td>APPLICATION</td><td>sql.select.started.count.internal</td><td>Number of SQL SELECT statements started (internal queries)</td><td>SQL Internal Statements</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
18211825
<tr><td>APPLICATION</td><td>sql.service.latency</td><td>Latency of SQL request execution</td><td>Latency</td><td>HISTOGRAM</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
1826+
<tr><td>APPLICATION</td><td>sql.service.latency.consistent</td><td>Latency of SQL request execution of non-historical queries</td><td>Latency</td><td>HISTOGRAM</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
1827+
<tr><td>APPLICATION</td><td>sql.service.latency.consistent.internal</td><td>Latency of SQL request execution of non-historical queries (internal queries)</td><td>SQL Internal Statements</td><td>HISTOGRAM</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
1828+
<tr><td>APPLICATION</td><td>sql.service.latency.historical</td><td>Latency of SQL request execution of historical queries</td><td>Latency</td><td>HISTOGRAM</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
1829+
<tr><td>APPLICATION</td><td>sql.service.latency.historical.internal</td><td>Latency of SQL request execution of historical queries (internal queries)</td><td>SQL Internal Statements</td><td>HISTOGRAM</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
18221830
<tr><td>APPLICATION</td><td>sql.service.latency.internal</td><td>Latency of SQL request execution (internal queries)</td><td>SQL Internal Statements</td><td>HISTOGRAM</td><td>NANOSECONDS</td><td>AVG</td><td>NONE</td></tr>
18231831
<tr><td>APPLICATION</td><td>sql.statement_timeout.count</td><td>Count of statements that failed because they exceeded the statement timeout</td><td>SQL Statements</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>
18241832
<tr><td>APPLICATION</td><td>sql.statement_timeout.count.internal</td><td>Count of statements that failed because they exceeded the statement timeout (internal queries)</td><td>SQL Internal Statements</td><td>COUNTER</td><td>COUNT</td><td>AVG</td><td>NON_NEGATIVE_DERIVATIVE</td></tr>

pkg/sql/conn_executor.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,7 @@ func makeMetrics(internal bool, sv *settings.Values) Metrics {
557557
SQLOptPlanCacheMisses: metric.NewCounter(getMetricMeta(MetaSQLOptPlanCacheMisses, internal)),
558558
StatementFingerprintCount: metric.NewUniqueCounter(getMetricMeta(MetaUniqueStatementCount, internal)),
559559
SQLExecLatencyDetail: sqlExecLatencyDetail,
560+
560561
// TODO(mrtracy): See HistogramWindowInterval in server/config.go for the 6x factor.
561562
DistSQLExecLatency: metric.NewHistogram(metric.HistogramOptions{
562563
Mode: metric.HistogramModePreferHdrLatency,
@@ -570,6 +571,18 @@ func makeMetrics(internal bool, sv *settings.Values) Metrics {
570571
Duration: 6 * metricsSampleInterval,
571572
BucketConfig: metric.IOLatencyBuckets,
572573
}),
574+
SQLExecLatencyConsistent: metric.NewHistogram(metric.HistogramOptions{
575+
Mode: metric.HistogramModePreferHdrLatency,
576+
Metadata: getMetricMeta(MetaSQLExecLatencyConsistent, internal),
577+
Duration: 6 * metricsSampleInterval,
578+
BucketConfig: metric.IOLatencyBuckets,
579+
}),
580+
SQLExecLatencyHistorical: metric.NewHistogram(metric.HistogramOptions{
581+
Mode: metric.HistogramModePreferHdrLatency,
582+
Metadata: getMetricMeta(MetaSQLExecLatencyHistorical, internal),
583+
Duration: 6 * metricsSampleInterval,
584+
BucketConfig: metric.IOLatencyBuckets,
585+
}),
573586
DistSQLServiceLatency: metric.NewHistogram(metric.HistogramOptions{
574587
Mode: metric.HistogramModePreferHdrLatency,
575588
Metadata: getMetricMeta(MetaDistSQLServiceLatency, internal),
@@ -582,6 +595,18 @@ func makeMetrics(internal bool, sv *settings.Values) Metrics {
582595
Duration: 6 * metricsSampleInterval,
583596
BucketConfig: metric.IOLatencyBuckets,
584597
}),
598+
SQLServiceLatencyConsistent: metric.NewHistogram(metric.HistogramOptions{
599+
Mode: metric.HistogramModePreferHdrLatency,
600+
Metadata: getMetricMeta(MetaSQLServiceLatencyConsistent, internal),
601+
Duration: 6 * metricsSampleInterval,
602+
BucketConfig: metric.IOLatencyBuckets,
603+
}),
604+
SQLServiceLatencyHistorical: metric.NewHistogram(metric.HistogramOptions{
605+
Mode: metric.HistogramModePreferHdrLatency,
606+
Metadata: getMetricMeta(MetaSQLServiceLatencyHistorical, internal),
607+
Duration: 6 * metricsSampleInterval,
608+
BucketConfig: metric.IOLatencyBuckets,
609+
}),
585610
SQLTxnLatency: metric.NewHistogram(metric.HistogramOptions{
586611
Mode: metric.HistogramModePreferHdrLatency,
587612
Metadata: getMetricMeta(MetaSQLTxnLatency, internal),

pkg/sql/exec_util.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -767,6 +767,18 @@ var (
767767
Measurement: "Latency",
768768
Unit: metric.Unit_NANOSECONDS,
769769
}
770+
MetaSQLExecLatencyConsistent = metric.Metadata{
771+
Name: "sql.exec.latency.consistent",
772+
Help: "Latency of SQL statement execution of non-historical queries",
773+
Measurement: "Latency",
774+
Unit: metric.Unit_NANOSECONDS,
775+
}
776+
MetaSQLExecLatencyHistorical = metric.Metadata{
777+
Name: "sql.exec.latency.historical",
778+
Help: "Latency of SQL statement execution of historical queries",
779+
Measurement: "Latency",
780+
Unit: metric.Unit_NANOSECONDS,
781+
}
770782
MetaSQLExecLatencyDetail = metric.Metadata{
771783
Name: "sql.exec.latency.detail",
772784
Help: "Latency of SQL statement execution, by statement fingerprint",
@@ -780,6 +792,18 @@ var (
780792
Measurement: "Latency",
781793
Unit: metric.Unit_NANOSECONDS,
782794
}
795+
MetaSQLServiceLatencyConsistent = metric.Metadata{
796+
Name: "sql.service.latency.consistent",
797+
Help: "Latency of SQL request execution of non-historical queries",
798+
Measurement: "Latency",
799+
Unit: metric.Unit_NANOSECONDS,
800+
}
801+
MetaSQLServiceLatencyHistorical = metric.Metadata{
802+
Name: "sql.service.latency.historical",
803+
Help: "Latency of SQL request execution of historical queries",
804+
Measurement: "Latency",
805+
Unit: metric.Unit_NANOSECONDS,
806+
}
783807
MetaSQLOptPlanCacheHits = metric.Metadata{
784808
Name: "sql.optimizer.plan_cache.hits",
785809
Help: "Number of non-prepared statements for which a cached plan was used",

pkg/sql/executor_statement_metrics.go

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,31 @@ type EngineMetrics struct {
2626
// The subset of SELECTs that were executed by DistSQL with full or partial
2727
// distribution.
2828
DistSQLSelectDistributedCount *metric.Counter
29-
SQLOptPlanCacheHits *metric.Counter
30-
SQLOptPlanCacheMisses *metric.Counter
31-
StatementFingerprintCount *metric.UniqueCounter
32-
33-
SQLExecLatencyDetail *metric.HistogramVec
34-
DistSQLExecLatency metric.IHistogram
35-
SQLExecLatency metric.IHistogram
36-
DistSQLServiceLatency metric.IHistogram
37-
SQLServiceLatency metric.IHistogram
38-
SQLTxnLatency metric.IHistogram
39-
SQLTxnsOpen *metric.Gauge
40-
SQLActiveStatements *metric.Gauge
41-
SQLContendedTxns *metric.Counter
29+
DistSQLExecLatency metric.IHistogram
30+
DistSQLServiceLatency metric.IHistogram
31+
32+
SQLOptPlanCacheHits *metric.Counter
33+
SQLOptPlanCacheMisses *metric.Counter
34+
35+
StatementFingerprintCount *metric.UniqueCounter
36+
SQLExecLatencyDetail *metric.HistogramVec
37+
38+
SQLExecLatency metric.IHistogram
39+
// Exec Latency of only non-AOST queries
40+
SQLExecLatencyConsistent metric.IHistogram
41+
// Exec Latency of only AOST queries
42+
SQLExecLatencyHistorical metric.IHistogram
43+
44+
SQLServiceLatency metric.IHistogram
45+
// Service Latency of only non-AOST queries
46+
SQLServiceLatencyConsistent metric.IHistogram
47+
// Service Latency of only AOST queries
48+
SQLServiceLatencyHistorical metric.IHistogram
49+
50+
SQLTxnLatency metric.IHistogram
51+
SQLTxnsOpen *metric.Gauge
52+
SQLActiveStatements *metric.Gauge
53+
SQLContendedTxns *metric.Counter
4254

4355
// TxnAbortCount counts transactions that were aborted, either due
4456
// to non-retriable errors, or retriable errors when the client-side
@@ -319,6 +331,13 @@ func (ex *connExecutor) recordStatementLatencyMetrics(
319331
}
320332
m.SQLExecLatency.RecordValue(runLatRaw.Nanoseconds())
321333
m.SQLServiceLatency.RecordValue(svcLatRaw.Nanoseconds())
334+
if ex.state.isHistorical.Load() {
335+
m.SQLExecLatencyHistorical.RecordValue(runLatRaw.Nanoseconds())
336+
m.SQLServiceLatencyHistorical.RecordValue(svcLatRaw.Nanoseconds())
337+
} else {
338+
m.SQLExecLatencyConsistent.RecordValue(runLatRaw.Nanoseconds())
339+
m.SQLServiceLatencyConsistent.RecordValue(svcLatRaw.Nanoseconds())
340+
}
322341
}
323342
}
324343
}

0 commit comments

Comments
 (0)