Skip to content

Commit 81377d1

Browse files
craig[bot]yuzefovichdhartunianrail
committed
143251: sql: decode all plan gists in test builds r=yuzefovich a=yuzefovich **explain: remove a couple redundant nil checks** `OutputBuilder.Expr` already checks for `nil` argument, so we can skip doing that in a few places. **sql: decode all plan gists in test builds** This commit adds the logic to always decode plan gists in test builds right after the gist was created. This should have trivial overhead while providing extra test coverage for the feature. Note that we had `TestExplainGist` that was targeting this coverage, so to a certain degree that test is now obsolete. However, I decided to not completely remove it since the test has been good at finding issues unrelated to gists (it is effectively the `sqlsmith` roachtest that runs as a unit test). Informs: #143211 Epic: None Release note: None 144790: metric: document metrics in yaml format r=dhartunian a=dhartunian The generated metrics file is now in YAML format instead of HTML, which makes it easier to consume with automated tooling. The file is now structured in a hierarchical format by layer, then category, then in a list of metric names. Some new fields have been added: - `exported_name`: metric name used in the Prometheus endpoint that ends up shown in 3rd party observability tooling. - `essential`: boolean that flags a metric as essential for customers to use. - `how_to_use`: extended description that is included with essential metrics Resolves: #142571 Release note: None 145402: workflows: run automerge on schedule only r=jlinder a=rail Previously, the automerge workflow was triggered on every push to any branch. This commit changes the workflow to run only on a schedule, also removing the `on: workflow_dispatch` trigger. Epic: none Release note: none Co-authored-by: Yahor Yuzefovich <[email protected]> Co-authored-by: David Hartunian <[email protected]> Co-authored-by: Rail Aliiev <[email protected]>
4 parents 33254c8 + f95c80c + 5b43325 + 898685f commit 81377d1

File tree

15 files changed

+17361
-2094
lines changed

15 files changed

+17361
-2094
lines changed

.github/workflows/auto-merge-backports.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@ name: Auto-Merge Test Backport PRs
33
on:
44
schedule:
55
- cron: "0 * * * *" # Every hour
6-
workflow_dispatch:
7-
push:
86

97
jobs:
108
auto-merge:

docs/generated/metrics/BUILD.bazel

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
genrule(
22
name = "metrics",
3-
outs = ["metrics.html"],
4-
cmd = "$(location //pkg/cmd/cockroach-short) gen metric-list --format=unnumbered-html --logtostderr=NONE > $@",
3+
outs = ["metrics.yaml"],
4+
cmd = "$(location //pkg/cmd/cockroach-short) gen metric-list --logtostderr=NONE > $@",
55
tools = ["//pkg/cmd/cockroach-short"],
66
visibility = [
77
":__pkg__",

docs/generated/metrics/metrics.html

Lines changed: 0 additions & 2037 deletions
This file was deleted.

docs/generated/metrics/metrics.yaml

Lines changed: 17131 additions & 0 deletions
Large diffs are not rendered by default.

pkg/cli/gen.go

Lines changed: 107 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333
slugify "github.com/mozillazg/go-slugify"
3434
"github.com/spf13/cobra"
3535
"github.com/spf13/cobra/doc"
36+
"gopkg.in/yaml.v2"
3637
)
3738

3839
var manPath string
@@ -335,32 +336,119 @@ Output the list of metrics typical for a node.
335336
return err
336337
}
337338

338-
// Sort by layer then metric name.
339+
// Sort by layer then category name.
339340
sort.Slice(sections, func(i, j int) bool {
340341
return sections[i].MetricLayer < sections[j].MetricLayer ||
341342
(sections[i].MetricLayer == sections[j].MetricLayer &&
342343
sections[i].Title < sections[j].Title)
343344
})
344345

345-
// Populate the resulting table.
346-
cols := []string{"Layer", "Metric", "Description", "Y-Axis Label", "Type", "Unit", "Aggregation", "Derivative"}
347-
var rows [][]string
346+
// Structure for file is:
347+
// layers:
348+
// - name: layer_name
349+
// categories:
350+
// - name: category_name
351+
// metrics:
352+
// - name: metric_name
353+
// exported_name: metric_exported_name
354+
// description: metric_description
355+
// y_axis_label: metric_y_axis_label
356+
// etc.
357+
358+
type MetricInfo struct {
359+
Name string `yaml:"name"`
360+
ExportedName string `yaml:"exported_name"`
361+
Description string `yaml:"description"`
362+
YAxisLabel string `yaml:"y_axis_label"`
363+
Type string `yaml:"type"`
364+
Unit string `yaml:"unit"`
365+
Aggregation string `yaml:"aggregation"`
366+
Derivative string `yaml:"derivative"`
367+
HowToUse string `yaml:"how_to_use,omitempty"`
368+
Essential bool `yaml:"essential,omitempty"`
369+
}
370+
371+
type Category struct {
372+
Name string
373+
Metrics []MetricInfo
374+
}
375+
376+
type Layer struct {
377+
Name string
378+
Categories []Category
379+
}
380+
381+
type YAMLOutput struct {
382+
Layers []*Layer
383+
}
384+
385+
layers := make(map[string]*Layer)
348386
for _, section := range sections {
349-
rows = append(rows,
350-
[]string{
351-
section.MetricLayer.String(),
352-
section.Title,
353-
section.Charts[0].Metrics[0].Help,
354-
section.Charts[0].AxisLabel,
355-
section.Charts[0].Metrics[0].MetricType.String(),
356-
section.Charts[0].Units.String(),
357-
section.Charts[0].Aggregator.String(),
358-
section.Charts[0].Derivative.String(),
387+
// Get or create the layer that the current section is in
388+
layerName := section.MetricLayer.String()
389+
layer, ok := layers[layerName]
390+
if !ok {
391+
layer = &Layer{
392+
Name: layerName,
393+
Categories: []Category{},
394+
}
395+
layers[layerName] = layer
396+
}
397+
398+
// Every section is a separate category
399+
category := Category{
400+
Name: section.Title,
401+
}
402+
403+
for _, chart := range section.Charts {
404+
// There are many charts, but only 1 metric per chart.
405+
metric := MetricInfo{
406+
Name: chart.Metrics[0].Name,
407+
ExportedName: chart.Metrics[0].ExportedName,
408+
Description: chart.Metrics[0].Help,
409+
YAxisLabel: chart.AxisLabel,
410+
Type: chart.Metrics[0].MetricType.String(),
411+
Unit: chart.Units.String(),
412+
Aggregation: chart.Aggregator.String(),
413+
Derivative: chart.Derivative.String(),
414+
HowToUse: chart.Metrics[0].HowToUse,
415+
Essential: chart.Metrics[0].Essential,
416+
}
417+
category.Metrics = append(category.Metrics, metric)
418+
}
419+
420+
layer.Categories = append(layer.Categories, category)
421+
}
422+
423+
// Sort metrics within each layer by name
424+
for _, layer := range layers {
425+
for _, cat := range layer.Categories {
426+
sort.Slice(cat.Metrics, func(i, j int) bool {
427+
return cat.Metrics[i].Name < cat.Metrics[j].Name
359428
})
429+
430+
}
360431
}
361-
align := "dddddddd"
362-
sliceIter := clisqlexec.NewRowSliceIter(rows, align)
363-
return sqlExecCtx.PrintQueryOutput(os.Stdout, stderr, cols, sliceIter)
432+
433+
output := YAMLOutput{}
434+
435+
var layerNames []string
436+
for name := range layers {
437+
layerNames = append(layerNames, name)
438+
}
439+
sort.Strings(layerNames)
440+
441+
for _, layer := range layerNames {
442+
output.Layers = append(output.Layers, layers[layer])
443+
}
444+
445+
// Output YAML
446+
yamlData, err := yaml.Marshal(output)
447+
if err != nil {
448+
return err
449+
}
450+
fmt.Fprintf(os.Stdout, "%s", yamlData)
451+
return nil
364452
},
365453
}
366454

@@ -404,5 +492,7 @@ func init() {
404492
[]string{"system-only", "system-visible", "application"},
405493
"label to use in the output for the various setting classes")
406494

495+
genMetricListCmd.Flags().Bool("essential", false, "only emit essential metrics")
496+
407497
GenCmd.AddCommand(genCmds...)
408498
}

pkg/gen/docs.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ DOCS_SRCS = [
88
"//docs/generated/http:nodes-other.md",
99
"//docs/generated/http:nodes-request.md",
1010
"//docs/generated/http:nodes-response.md",
11-
"//docs/generated/metrics:metrics.html",
11+
"//docs/generated/metrics:metrics.yaml",
1212
"//docs/generated/settings:settings-for-tenants.txt",
1313
"//docs/generated/settings:settings.html",
1414
"//docs/generated/sql/bnf:abort_stmt.bnf",

pkg/server/status/runtime.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,12 @@ var (
173173
Help: "Current user+system cpu percentage consumed by the CRDB process, normalized 0-1 by number of cores",
174174
Measurement: "CPU Time",
175175
Unit: metric.Unit_PERCENT,
176+
Essential: true,
177+
Category: metric.Metadata_HARDWARE,
178+
HowToUse: `This metric gives the CPU utilization percentage by the CockroachDB process.
179+
If it is equal to 1 (or 100%), then the CPU is overloaded. The CockroachDB process should
180+
not be running with over 80% utilization for extended periods of time (hours). This metric
181+
is used in the DB Console CPU Percent graph.`,
176182
}
177183
metaCPUNowNS = metric.Metadata{
178184
Name: "sys.cpu.now.ns",

pkg/sql/conn_executor_exec.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
3333
"github.com/cockroachdb/cockroach/pkg/sql/execstats"
3434
"github.com/cockroachdb/cockroach/pkg/sql/isql"
35+
"github.com/cockroachdb/cockroach/pkg/sql/opt/cat"
3536
"github.com/cockroachdb/cockroach/pkg/sql/opt/exec/explain"
3637
"github.com/cockroachdb/cockroach/pkg/sql/paramparse"
3738
"github.com/cockroachdb/cockroach/pkg/sql/parser"
@@ -3348,6 +3349,22 @@ func (ex *connExecutor) makeExecPlan(
33483349
// Include gist in error reports.
33493350
ih := &planner.instrumentation
33503351
ctx = withPlanGist(ctx, ih.planGist.String())
3352+
if buildutil.CrdbTestBuild && ih.planGist.String() != "" {
3353+
// Ensure that the gist can be decoded in test builds.
3354+
//
3355+
// In 50% cases, use nil catalog.
3356+
var catalog cat.Catalog
3357+
if ex.rng.internal.Float64() < 0.5 && !planner.SessionData().AllowRoleMembershipsToChangeDuringTransaction {
3358+
// For some reason, TestAllowRoleMembershipsToChangeDuringTransaction
3359+
// times out with non-nil catalog, so we'll keep it as nil when the
3360+
// session var is set to 'true' ('false' is the default).
3361+
catalog = planner.optPlanningCtx.catalog
3362+
}
3363+
_, err := explain.DecodePlanGistToRows(ctx, &planner.extendedEvalCtx.Context, ih.planGist.String(), catalog)
3364+
if err != nil {
3365+
return ctx, errors.NewAssertionErrorWithWrappedErrf(err, "failed to decode plan gist: %q", ih.planGist.String())
3366+
}
3367+
}
33513368

33523369
// Now that we have the plan gist, check whether we should get a bundle for
33533370
// it.

pkg/sql/exec_util.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,6 +1102,9 @@ var (
11021102
Unit: metric.Unit_COUNT,
11031103
LabeledName: "sql.count",
11041104
StaticLabels: metric.MakeLabelPairs(metric.LabelQueryType, "select"),
1105+
Essential: true,
1106+
Category: metric.Metadata_SQL,
1107+
HowToUse: "This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application.",
11051108
}
11061109
MetaUpdateExecuted = metric.Metadata{
11071110
Name: "sql.update.count",
@@ -1310,6 +1313,8 @@ func getMetricMeta(meta metric.Metadata, internal bool) metric.Metadata {
13101313
meta.Name += ".internal"
13111314
meta.Help += " (internal queries)"
13121315
meta.Measurement = "SQL Internal Statements"
1316+
meta.Essential = false
1317+
meta.HowToUse = ""
13131318
if meta.LabeledName != "" {
13141319
meta.StaticLabels = append(meta.StaticLabels, metric.MakeLabelPairs(metric.LabelQueryInternal, "true")...)
13151320
}

pkg/sql/opt/exec/explain/emit.go

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -740,12 +740,8 @@ func (e *emitter) emitNodeAttributes(ctx context.Context, evalCtx *eval.Context,
740740

741741
case limitOp:
742742
a := n.args.(*limitArgs)
743-
if a.Limit != nil {
744-
ob.Expr("count", a.Limit, nil /* columns */)
745-
}
746-
if a.Offset != nil {
747-
ob.Expr("offset", a.Offset, nil /* columns */)
748-
}
743+
ob.Expr("count", a.Limit, nil /* columns */)
744+
ob.Expr("offset", a.Offset, nil /* columns */)
749745

750746
case sortOp:
751747
a := n.args.(*sortArgs)
@@ -848,9 +844,7 @@ func (e *emitter) emitNodeAttributes(ctx context.Context, evalCtx *eval.Context,
848844

849845
case applyJoinOp:
850846
a := n.args.(*applyJoinArgs)
851-
if a.OnCond != nil {
852-
ob.Expr("pred", a.OnCond, appendColumns(a.Left.Columns(), a.RightColumns...))
853-
}
847+
ob.Expr("pred", a.OnCond, appendColumns(a.Left.Columns(), a.RightColumns...))
854848

855849
case lookupJoinOp:
856850
a := n.args.(*lookupJoinArgs)

0 commit comments

Comments
 (0)