Skip to content

Commit dc2191b

Browse files
craig[bot]alyshanjahani-crljbowens
committed
149540: metric: Report aggregate metric when no child metrics are present r=dhartunian a=alyshanjahani-crl SQLMetrics are PrometheusIterable, they are the first and only instance of metrics that dynamically add and remove childset metrics. This commit fixes a bug where the includeAggregate flag being false (cluster setting disabled) resulting in these SQLMetrics not being reported. When a PrometheusIterable has no child metrics it should report the aggregate regardless of the cluster setting. Fixes: #149481 Release note (bug fix): When child metrics are enabled, include_aggregate is disabled, and the sql.metric.application_name/database_name are disabled, a handful of `sql` metrics were not being reported. 149712: storage: aggressively separate values in range ID keyspace r=sumeerbhola a=jbowens Adapt Cockroach's implementation of pebble.SpanPolicyFunc to mark the range ID keyspace with ValueStorageLatencyTolerant. This setting will cause Pebble to separate values into blob files as long as they're large enough that a pointer to an external value is likely to be smaller than the value itself. Separating values improves write amplification by avoiding rewriting values during some compactions. We target the range ID keyspace because it contains the raft log, which is written during all user writes but rarely read. Tangentially related to #16624. Epic: none Release note (performance improvement): Reduces write amplification by storing raft log values in separate blob files. This reduces write bandwidth, especially on stores with many replicas. This in turn can increase throughput and reduce latency. This behavior is active as long as the storage.value_separation.enabled cluster setting is enabled. Co-authored-by: Alyshan Jahani <[email protected]> Co-authored-by: Jackson Owens <[email protected]>
3 parents 875e04a + 66514d6 + 8d88aaf commit dc2191b

File tree

5 files changed

+238
-51
lines changed

5 files changed

+238
-51
lines changed

pkg/server/status/recorder.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,16 @@ var includeAggregateMetricsEnabled = settings.RegisterBoolSetting(
9898
true,
9999
settings.WithPublic)
100100

101+
// bugfix149481Enabled is a (temporary) cluster setting that fixes
102+
// https://github.com/cockroachdb/cockroach/issues/149481. It is true (enabled) by default on master,
103+
// and false on backports.
104+
var bugfix149481Enabled = settings.RegisterBoolSetting(
105+
settings.ApplicationLevel, "server.child_metrics.bugfix_missing_sql_metrics_149481.enabled",
106+
"fixes bug where certain SQL metrics are not being reported, see: "+
107+
"https://github.com/cockroachdb/cockroach/issues/149481",
108+
true,
109+
settings.WithVisibility(settings.Reserved))
110+
101111
// MetricsRecorder is used to periodically record the information in a number of
102112
// metric registries.
103113
//
@@ -348,10 +358,12 @@ func (mr *MetricsRecorder) ScrapeIntoPrometheusWithStaticLabels(
348358

349359
includeChildMetrics := ChildMetricsEnabled.Get(&mr.settings.SV)
350360
includeAggregateMetrics := includeAggregateMetricsEnabled.Get(&mr.settings.SV)
361+
reinitialisableBugFixEnabled := bugfix149481Enabled.Get(&mr.settings.SV)
351362
scrapeOptions := []metric.ScrapeOption{
352363
metric.WithIncludeChildMetrics(includeChildMetrics),
353364
metric.WithIncludeAggregateMetrics(includeAggregateMetrics),
354365
metric.WithUseStaticLabels(useStaticLabels),
366+
metric.WithReinitialisableBugFixEnabled(reinitialisableBugFixEnabled),
355367
}
356368
if mr.mu.nodeRegistry == nil {
357369
// We haven't yet processed initialization information; output nothing.

pkg/storage/pebble.go

Lines changed: 50 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -549,28 +549,7 @@ func DefaultPebbleOptions() *pebble.Options {
549549
opts.TargetByteDeletionRate = 128 << 20 // 128 MB
550550
opts.Experimental.ShortAttributeExtractor = shortAttributeExtractorForValues
551551

552-
lockTableStartKey := EncodeMVCCKey(MVCCKey{Key: keys.LocalRangeLockTablePrefix})
553-
lockTableEndKey := EncodeMVCCKey(MVCCKey{Key: keys.LocalRangeLockTablePrefix.PrefixEnd()})
554-
localEndKey := EncodeMVCCKey(MVCCKey{Key: keys.LocalPrefix.PrefixEnd()})
555-
opts.Experimental.SpanPolicyFunc = func(startKey []byte) (policy pebble.SpanPolicy, endKey []byte, _ error) {
556-
if !bytes.HasPrefix(startKey, keys.LocalPrefix) {
557-
return pebble.SpanPolicy{}, nil, nil
558-
}
559-
// Prefer fast compression for all local keys, since they shouldn't take up
560-
// a significant part of the space.
561-
policy.PreferFastCompression = true
562-
563-
// We also disable value separation for lock keys.
564-
if cockroachkvs.Compare(startKey, lockTableEndKey) >= 0 {
565-
return policy, localEndKey, nil
566-
}
567-
if cockroachkvs.Compare(startKey, lockTableStartKey) < 0 {
568-
return policy, lockTableStartKey, nil
569-
}
570-
policy.DisableValueSeparationBySuffix = true
571-
policy.ValueStoragePolicy = pebble.ValueStorageLowReadLatency
572-
return policy, lockTableEndKey, nil
573-
}
552+
opts.Experimental.SpanPolicyFunc = spanPolicyFunc
574553
opts.Experimental.UserKeyCategories = userKeyCategories
575554

576555
opts.Levels[0] = pebble.LevelOptions{
@@ -607,6 +586,55 @@ func DefaultPebbleOptions() *pebble.Options {
607586
return opts
608587
}
609588

589+
var (
590+
spanPolicyLocalRangeIDEndKey = EncodeMVCCKey(MVCCKey{Key: keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd()})
591+
spanPolicyLockTableStartKey = EncodeMVCCKey(MVCCKey{Key: keys.LocalRangeLockTablePrefix})
592+
spanPolicyLockTableEndKey = EncodeMVCCKey(MVCCKey{Key: keys.LocalRangeLockTablePrefix.PrefixEnd()})
593+
spanPolicyLocalEndKey = EncodeMVCCKey(MVCCKey{Key: keys.LocalPrefix.PrefixEnd()})
594+
)
595+
596+
// spanPolicyFunc is a pebble.SpanPolicyFunc that applies special policies for
597+
// the CockroachDB keyspace.
598+
func spanPolicyFunc(startKey []byte) (policy pebble.SpanPolicy, endKey []byte, _ error) {
599+
// There's no special policy for non-local keys.
600+
if !bytes.HasPrefix(startKey, keys.LocalPrefix) {
601+
return pebble.SpanPolicy{}, nil, nil
602+
}
603+
// Prefer fast compression for all local keys, since they shouldn't take up
604+
// a significant part of the space.
605+
policy.PreferFastCompression = true
606+
607+
// The first section of the local keyspace is the Range-ID keyspace. It
608+
// extends from the beginning of the keyspace to the Range Local keys. The
609+
// Range-ID keyspace includes the raft log, which is rarely read and
610+
// receives ~half the writes.
611+
if cockroachkvs.Compare(startKey, spanPolicyLocalRangeIDEndKey) < 0 {
612+
if !bytes.HasPrefix(startKey, keys.LocalRangeIDPrefix) {
613+
return pebble.SpanPolicy{}, nil, errors.AssertionFailedf("startKey %s is not a Range-ID key", startKey)
614+
}
615+
policy.ValueStoragePolicy = pebble.ValueStorageLatencyTolerant
616+
return policy, spanPolicyLocalRangeIDEndKey, nil
617+
}
618+
619+
// We also disable value separation for lock keys.
620+
if cockroachkvs.Compare(startKey, spanPolicyLockTableEndKey) >= 0 {
621+
// Not a lock key, so use default value separation within sstable (by
622+
// suffix) and into blob files.
623+
// NB: there won't actually be a suffix in these local keys.
624+
return policy, spanPolicyLocalEndKey, nil
625+
}
626+
if cockroachkvs.Compare(startKey, spanPolicyLockTableStartKey) < 0 {
627+
// Not a lock key, so use default value separation within sstable (by
628+
// suffix) and into blob files.
629+
// NB: there won't actually be a suffix in these local keys.
630+
return policy, spanPolicyLockTableStartKey, nil
631+
}
632+
// Lock key. Disable value separation.
633+
policy.DisableValueSeparationBySuffix = true
634+
policy.ValueStoragePolicy = pebble.ValueStorageLowReadLatency
635+
return policy, spanPolicyLockTableEndKey, nil
636+
}
637+
610638
func shortAttributeExtractorForValues(
611639
key []byte, keyPrefixLen int, value []byte,
612640
) (pebble.ShortAttribute, error) {

pkg/storage/pebble_test.go

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1762,3 +1762,66 @@ func TestPebbleCompactCancellation(t *testing.T) {
17621762
bfs.WaitForBlockAndUnblock()
17631763
wg.Wait()
17641764
}
1765+
1766+
func TestPebbleSpanPolicyFunc(t *testing.T) {
1767+
defer leaktest.AfterTest(t)()
1768+
defer log.Scope(t).Close(t)
1769+
1770+
type testCase struct {
1771+
startKey roachpb.Key
1772+
wantPolicy pebble.SpanPolicy
1773+
wantEndKey []byte
1774+
}
1775+
cases := []testCase{
1776+
{
1777+
startKey: keys.RaftHardStateKey(1),
1778+
wantPolicy: pebble.SpanPolicy{
1779+
PreferFastCompression: true,
1780+
ValueStoragePolicy: pebble.ValueStorageLatencyTolerant,
1781+
},
1782+
wantEndKey: spanPolicyLocalRangeIDEndKey,
1783+
},
1784+
{
1785+
startKey: keys.RaftLogKey(9, 2),
1786+
wantPolicy: pebble.SpanPolicy{
1787+
PreferFastCompression: true,
1788+
ValueStoragePolicy: pebble.ValueStorageLatencyTolerant,
1789+
},
1790+
wantEndKey: spanPolicyLocalRangeIDEndKey,
1791+
},
1792+
{
1793+
startKey: keys.RangeDescriptorKey(roachpb.RKey("a")),
1794+
wantPolicy: pebble.SpanPolicy{
1795+
PreferFastCompression: true,
1796+
},
1797+
wantEndKey: spanPolicyLockTableStartKey,
1798+
},
1799+
{
1800+
startKey: func() roachpb.Key {
1801+
k, _ := keys.LockTableSingleKey(roachpb.Key("a"), nil)
1802+
return k
1803+
}(),
1804+
wantPolicy: pebble.SpanPolicy{
1805+
PreferFastCompression: true,
1806+
DisableValueSeparationBySuffix: true,
1807+
ValueStoragePolicy: pebble.ValueStorageLowReadLatency,
1808+
},
1809+
wantEndKey: spanPolicyLockTableEndKey,
1810+
},
1811+
{
1812+
startKey: keys.SystemSQLCodec.IndexPrefix(1, 2),
1813+
wantPolicy: pebble.SpanPolicy{},
1814+
wantEndKey: nil,
1815+
},
1816+
}
1817+
1818+
for _, tc := range cases {
1819+
t.Run(fmt.Sprintf("%x", tc.startKey), func(t *testing.T) {
1820+
ek := EngineKey{Key: tc.startKey}.Encode()
1821+
policy, endKey, err := spanPolicyFunc(ek)
1822+
require.NoError(t, err)
1823+
require.Equal(t, tc.wantPolicy, policy)
1824+
require.Equal(t, tc.wantEndKey, endKey)
1825+
})
1826+
}
1827+
}

pkg/util/metric/aggmetric/agg_metric_test.go

Lines changed: 73 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -33,40 +33,87 @@ func TestExcludeAggregateMetrics(t *testing.T) {
3333
counter.AddChild("xyz")
3434
r.AddMetric(counter)
3535

36-
// Don't include child metrics, so only report the aggregate.
37-
// Flipping the includeAggregateMetrics flag should have no effect.
38-
testutils.RunTrueAndFalse(t, "includeChildMetrics=false,includeAggregateMetrics", func(t *testing.T, includeAggregateMetrics bool) {
36+
testutils.RunTrueAndFalse(t, "reinitialisableBugFixEnabled", func(t *testing.T, includeBugFix bool) {
37+
// Don't include child metrics, so only report the aggregate.
38+
// Flipping the includeAggregateMetrics flag should have no effect.
39+
testutils.RunTrueAndFalse(t, "includeChildMetrics=false,includeAggregateMetrics", func(t *testing.T, includeAggregateMetrics bool) {
40+
pe := metric.MakePrometheusExporter()
41+
pe.ScrapeRegistry(r, metric.WithIncludeChildMetrics(false), metric.WithIncludeAggregateMetrics(includeAggregateMetrics), metric.WithReinitialisableBugFixEnabled(includeBugFix))
42+
families, err := pe.Gather()
43+
require.NoError(t, err)
44+
require.Equal(t, 1, len(families))
45+
require.Equal(t, "counter", families[0].GetName())
46+
require.Equal(t, 1, len(families[0].GetMetric()))
47+
require.Equal(t, 0, len(families[0].GetMetric()[0].GetLabel()))
48+
})
49+
50+
testutils.RunTrueAndFalse(t, "includeChildMetrics=true,includeAggregateMetrics", func(t *testing.T, includeAggregateMetrics bool) {
51+
pe := metric.MakePrometheusExporter()
52+
pe.ScrapeRegistry(r, metric.WithIncludeChildMetrics(true), metric.WithIncludeAggregateMetrics(includeAggregateMetrics), metric.WithReinitialisableBugFixEnabled(includeBugFix))
53+
families, err := pe.Gather()
54+
require.NoError(t, err)
55+
require.Equal(t, 1, len(families))
56+
require.Equal(t, "counter", families[0].GetName())
57+
var labelPair *prometheusgo.LabelPair
58+
if includeAggregateMetrics {
59+
require.Equal(t, 2, len(families[0].GetMetric()))
60+
require.Equal(t, 0, len(families[0].GetMetric()[0].GetLabel()))
61+
require.Equal(t, 1, len(families[0].GetMetric()[1].GetLabel()))
62+
labelPair = families[0].GetMetric()[1].GetLabel()[0]
63+
} else {
64+
require.Equal(t, 1, len(families[0].GetMetric()))
65+
require.Equal(t, 1, len(families[0].GetMetric()[0].GetLabel()))
66+
labelPair = families[0].GetMetric()[0].GetLabel()[0]
67+
}
68+
require.Equal(t, "child_label", *labelPair.Name)
69+
require.Equal(t, "xyz", *labelPair.Value)
70+
})
71+
})
72+
73+
t.Run("reinitialisable metrics", func(t *testing.T) {
74+
r := metric.NewRegistry()
75+
counter := NewSQLCounter(metric.Metadata{Name: "counter"})
76+
r.AddMetric(counter)
77+
78+
// Aggregate disabled, but no children, so the aggregate is still reported.
3979
pe := metric.MakePrometheusExporter()
40-
pe.ScrapeRegistry(r, metric.WithIncludeChildMetrics(false), metric.WithIncludeAggregateMetrics(includeAggregateMetrics))
80+
pe.ScrapeRegistry(r, metric.WithIncludeChildMetrics(true), metric.WithIncludeAggregateMetrics(false), metric.WithReinitialisableBugFixEnabled(true))
4181
families, err := pe.Gather()
4282
require.NoError(t, err)
4383
require.Equal(t, 1, len(families))
44-
require.Equal(t, "counter", families[0].GetName())
4584
require.Equal(t, 1, len(families[0].GetMetric()))
46-
require.Equal(t, 0, len(families[0].GetMetric()[0].GetLabel()))
47-
})
48-
49-
testutils.RunTrueAndFalse(t, "includeChildMetrics=true,includeAggregateMetrics", func(t *testing.T, includeAggregateMetrics bool) {
50-
pe := metric.MakePrometheusExporter()
51-
pe.ScrapeRegistry(r, metric.WithIncludeChildMetrics(true), metric.WithIncludeAggregateMetrics(includeAggregateMetrics))
52-
families, err := pe.Gather()
85+
require.Equal(t, 0, len(families[0].GetMetric()[0].GetLabel())) // The aggregate has no labels.
86+
87+
// Add children (app and db label). Now only the childset metric is reported.
88+
r.ReinitialiseChildMetrics(true, true)
89+
counter.Inc(1, "db_foo", "app_foo")
90+
pe = metric.MakePrometheusExporter()
91+
pe.ScrapeRegistry(r, metric.WithIncludeChildMetrics(true), metric.WithIncludeAggregateMetrics(false), metric.WithReinitialisableBugFixEnabled(true))
92+
families, err = pe.Gather()
5393
require.NoError(t, err)
5494
require.Equal(t, 1, len(families))
55-
require.Equal(t, "counter", families[0].GetName())
56-
var labelPair *prometheusgo.LabelPair
57-
if includeAggregateMetrics {
58-
require.Equal(t, 2, len(families[0].GetMetric()))
59-
require.Equal(t, 0, len(families[0].GetMetric()[0].GetLabel()))
60-
require.Equal(t, 1, len(families[0].GetMetric()[1].GetLabel()))
61-
labelPair = families[0].GetMetric()[1].GetLabel()[0]
62-
} else {
63-
require.Equal(t, 1, len(families[0].GetMetric()))
64-
require.Equal(t, 1, len(families[0].GetMetric()[0].GetLabel()))
65-
labelPair = families[0].GetMetric()[0].GetLabel()[0]
66-
}
67-
require.Equal(t, "child_label", *labelPair.Name)
68-
require.Equal(t, "xyz", *labelPair.Value)
95+
require.Equal(t, 1, len(families[0].GetMetric()))
96+
// Childset metric labels.
97+
labelPair := families[0].GetMetric()[0].GetLabel()[0]
98+
require.Equal(t, "database", *labelPair.Name)
99+
labelPair = families[0].GetMetric()[0].GetLabel()[1]
100+
require.Equal(t, "application_name", *labelPair.Name)
101+
102+
// Enable aggregate. Now reporting two metrics (the aggregate and the childset).
103+
pe = metric.MakePrometheusExporter()
104+
pe.ScrapeRegistry(r, metric.WithIncludeChildMetrics(true), metric.WithIncludeAggregateMetrics(true), metric.WithReinitialisableBugFixEnabled(true))
105+
families, err = pe.Gather()
106+
require.NoError(t, err)
107+
require.Equal(t, 1, len(families))
108+
require.Equal(t, 2, len(families[0].GetMetric()))
109+
require.Equal(t, 0, len(families[0].GetMetric()[0].GetLabel())) // The aggregate has no labels.
110+
// Childset metric labels.
111+
labelPair = families[0].GetMetric()[1].GetLabel()[0]
112+
require.Equal(t, "database", *labelPair.Name)
113+
labelPair = families[0].GetMetric()[1].GetLabel()[1]
114+
require.Equal(t, "application_name", *labelPair.Name)
69115
})
116+
70117
}
71118

72119
func TestAggMetric(t *testing.T) {

pkg/util/metric/prometheus_exporter.go

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,10 @@ func (pm *PrometheusExporter) findOrCreateFamily(
7878
}
7979

8080
type scrapeOptions struct {
81-
includeChildMetrics bool
82-
includeAggregateMetrics bool
83-
useStaticLabels bool
81+
includeChildMetrics bool
82+
includeAggregateMetrics bool
83+
useStaticLabels bool
84+
reinitialisableBugFixEnabled bool
8485
}
8586

8687
// ScrapeOption is a function that modifies scrapeOptions
@@ -107,6 +108,13 @@ func WithUseStaticLabels(use bool) ScrapeOption {
107108
}
108109
}
109110

111+
// WithReinitialisableBugFixEnabled returns an option to set whether the reinitialisable bug fix is enabled
112+
func WithReinitialisableBugFixEnabled(enabled bool) ScrapeOption {
113+
return func(o *scrapeOptions) {
114+
o.reinitialisableBugFixEnabled = enabled
115+
}
116+
}
117+
110118
// applyScrapeOptions creates a new scrapeOptions with the given options applied
111119
func applyScrapeOptions(options ...ScrapeOption) *scrapeOptions {
112120
opts := &scrapeOptions{
@@ -125,6 +133,7 @@ func applyScrapeOptions(options ...ScrapeOption) *scrapeOptions {
125133
func (pm *PrometheusExporter) ScrapeRegistry(registry *Registry, options ...ScrapeOption) {
126134
o := applyScrapeOptions(options...)
127135
labels := registry.GetLabels()
136+
128137
f := func(name string, v interface{}) {
129138
switch prom := v.(type) {
130139
case PrometheusVector:
@@ -138,6 +147,34 @@ func (pm *PrometheusExporter) ScrapeRegistry(registry *Registry, options ...Scra
138147
}
139148

140149
case PrometheusExportable:
150+
if _, ok := v.(PrometheusReinitialisable); ok && o.reinitialisableBugFixEnabled {
151+
m := prom.ToPrometheusMetric()
152+
// Set registry and metric labels.
153+
m.Label = append(labels, prom.GetLabels(o.useStaticLabels)...)
154+
family := pm.findOrCreateFamily(prom, o)
155+
156+
if o.includeAggregateMetrics {
157+
family.Metric = append(family.Metric, m)
158+
}
159+
160+
promIter, ok := v.(PrometheusIterable)
161+
numChildren := 0
162+
if ok && o.includeChildMetrics {
163+
promIter.Each(m.Label, func(metric *prometheusgo.Metric) {
164+
family.Metric = append(family.Metric, metric)
165+
numChildren += 1
166+
})
167+
}
168+
169+
// PrometheusReinitialisable metrics (like SQLMetric) dynamically
170+
// add child metrics. If no child metrics are present we want to ensure
171+
// we report the aggregate regardless of the respective cluster setting.
172+
if numChildren == 0 && !o.includeAggregateMetrics {
173+
family.Metric = append(family.Metric, m)
174+
}
175+
return
176+
}
177+
141178
m := prom.ToPrometheusMetric()
142179
// Set registry and metric labels.
143180
m.Label = append(labels, prom.GetLabels(o.useStaticLabels)...)

0 commit comments

Comments
 (0)