Skip to content

Commit d15a362

Browse files
committed
pkg/server: plumb LogMetricRegistry to server MetricsRecorder
This patch adds the necessary plumbing to inject the metrics used by the logging package into the MetricsRecorders used by servers in CockroachDB. With this plumbing complete, we're ready to begin using metrics within the log package. Release note: none
1 parent c271f90 commit d15a362

File tree

5 files changed

+68
-14
lines changed

5 files changed

+68
-14
lines changed

pkg/server/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,7 @@ go_library(
305305
"//pkg/util/log",
306306
"//pkg/util/log/eventpb",
307307
"//pkg/util/log/logcrash",
308+
"//pkg/util/log/logmetrics",
308309
"//pkg/util/log/logpb",
309310
"//pkg/util/log/severity",
310311
"//pkg/util/metric",

pkg/server/server.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ import (
107107
"github.com/cockroachdb/cockroach/pkg/util/goschedstats"
108108
"github.com/cockroachdb/cockroach/pkg/util/hlc"
109109
"github.com/cockroachdb/cockroach/pkg/util/log"
110+
"github.com/cockroachdb/cockroach/pkg/util/log/logmetrics"
110111
"github.com/cockroachdb/cockroach/pkg/util/metric"
111112
"github.com/cockroachdb/cockroach/pkg/util/mon"
112113
"github.com/cockroachdb/cockroach/pkg/util/netutil"
@@ -1839,9 +1840,16 @@ func (s *Server) PreStart(ctx context.Context) error {
18391840
})
18401841
})
18411842

1843+
// Init a log metrics registry.
1844+
logRegistry := logmetrics.NewRegistry()
1845+
if logRegistry == nil {
1846+
panic(errors.AssertionFailedf("nil log metrics registry at server startup"))
1847+
}
1848+
18421849
// We can now add the node registry.
18431850
s.recorder.AddNode(
18441851
s.registry,
1852+
logRegistry,
18451853
s.node.Descriptor,
18461854
s.node.startedAt,
18471855
s.cfg.AdvertiseAddr,

pkg/server/status/recorder.go

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,12 @@ type MetricsRecorder struct {
133133
// nodeRegistry contains, as subregistries, the multiple component-specific
134134
// registries which are recorded as "node level" metrics.
135135
nodeRegistry *metric.Registry
136-
desc roachpb.NodeDescriptor
137-
startedAt int64
136+
// logRegistry contains the global metrics registry used by the logging
137+
// package. NB: The underlying metrics are global, but each server gets
138+
// its own separate registry to avoid things such as colliding labels.
139+
logRegistry *metric.Registry
140+
desc roachpb.NodeDescriptor
141+
startedAt int64
138142

139143
// storeRegistries contains a registry for each store on the node. These
140144
// are not stored as subregistries, but rather are treated as wholly
@@ -193,6 +197,7 @@ func (mr *MetricsRecorder) AddTenantRegistry(tenantID roachpb.TenantID, rec *met
193197
// tenant is initialized.
194198
mr.mu.Do(func() {
195199
mr.mu.nodeRegistry.AddLabel("tenant", catconstants.SystemTenantName)
200+
mr.mu.logRegistry.AddLabel("tenant", catconstants.SystemTenantName)
196201
})
197202
}
198203
mr.mu.tenantRegistries[tenantID] = rec
@@ -207,16 +212,17 @@ func (mr *MetricsRecorder) RemoveTenantRegistry(tenantID roachpb.TenantID) {
207212
}
208213

209214
// AddNode adds the Registry from an initialized node, along with its descriptor
210-
// and start time.
215+
// and start time. It also adds the logging registry.
211216
func (mr *MetricsRecorder) AddNode(
212-
reg *metric.Registry,
217+
nodeReg, logReg *metric.Registry,
213218
desc roachpb.NodeDescriptor,
214219
startedAt int64,
215220
advertiseAddr, httpAddr, sqlAddr string,
216221
) {
217222
mr.mu.Lock()
218223
defer mr.mu.Unlock()
219-
mr.mu.nodeRegistry = reg
224+
mr.mu.nodeRegistry = nodeReg
225+
mr.mu.logRegistry = logReg
220226
mr.mu.desc = desc
221227
mr.mu.startedAt = startedAt
222228

@@ -233,20 +239,20 @@ func (mr *MetricsRecorder) AddNode(
233239
metadata.AddLabel(sqlAddrLabelKey, sqlAddr)
234240
nodeIDGauge := metric.NewGauge(metadata)
235241
nodeIDGauge.Update(int64(desc.NodeID))
236-
reg.AddMetric(nodeIDGauge)
242+
nodeReg.AddMetric(nodeIDGauge)
237243

238244
if !disableNodeAndTenantLabels {
239245
nodeIDInt := int(desc.NodeID)
240246
if nodeIDInt != 0 {
241-
reg.AddLabel("node_id", strconv.Itoa(int(desc.NodeID)))
242-
// We assume that all stores have been added to the registry
243-
// prior to calling `AddNode`.
247+
nodeReg.AddLabel("node_id", strconv.Itoa(int(desc.NodeID)))
248+
logReg.AddLabel("node_id", strconv.Itoa(int(desc.NodeID)))
244249
for _, s := range mr.mu.storeRegistries {
245250
s.AddLabel("node_id", strconv.Itoa(int(desc.NodeID)))
246251
}
247252
}
248253
if mr.tenantNameContainer != nil && mr.tenantNameContainer.String() != catconstants.SystemTenantName {
249-
reg.AddLabel("tenant", mr.tenantNameContainer)
254+
nodeReg.AddLabel("tenant", mr.tenantNameContainer)
255+
logReg.AddLabel("tenant", mr.tenantNameContainer)
250256
}
251257
}
252258
}
@@ -279,7 +285,8 @@ func (mr *MetricsRecorder) MarshalJSON() ([]byte, error) {
279285
return []byte("{}"), nil
280286
}
281287
topLevel := map[string]interface{}{
282-
fmt.Sprintf("node.%d", mr.mu.desc.NodeID): mr.mu.nodeRegistry,
288+
fmt.Sprintf("node.%d", mr.mu.desc.NodeID): mr.mu.nodeRegistry,
289+
fmt.Sprintf("node.%d.log", mr.mu.desc.NodeID): mr.mu.logRegistry,
283290
}
284291
// Add collection of stores to top level. JSON requires that keys be strings,
285292
// so we must convert the store ID to a string.
@@ -304,6 +311,7 @@ func (mr *MetricsRecorder) ScrapeIntoPrometheus(pm *metric.PrometheusExporter) {
304311
}
305312
includeChildMetrics := ChildMetricsEnabled.Get(&mr.settings.SV)
306313
pm.ScrapeRegistry(mr.mu.nodeRegistry, includeChildMetrics)
314+
pm.ScrapeRegistry(mr.mu.logRegistry, includeChildMetrics)
307315
for _, reg := range mr.mu.storeRegistries {
308316
pm.ScrapeRegistry(reg, includeChildMetrics)
309317
}
@@ -363,6 +371,9 @@ func (mr *MetricsRecorder) GetTimeSeriesData() []tspb.TimeSeriesData {
363371
timestampNanos: now.UnixNano(),
364372
}
365373
recorder.record(&data)
374+
// Now record the log metrics.
375+
recorder.registry = mr.mu.logRegistry
376+
recorder.record(&data)
366377

367378
// Record time series from node-level registries for secondary tenants.
368379
for tenantID, r := range mr.mu.tenantRegistries {
@@ -422,6 +433,7 @@ func (mr *MetricsRecorder) GetMetricsMetadata() map[string]metric.Metadata {
422433
metrics := make(map[string]metric.Metadata)
423434

424435
mr.mu.nodeRegistry.WriteMetricsMetadata(metrics)
436+
mr.mu.logRegistry.WriteMetricsMetadata(metrics)
425437

426438
// Get a random storeID.
427439
var sID roachpb.StoreID
@@ -511,6 +523,9 @@ func (mr *MetricsRecorder) GenerateNodeStatus(ctx context.Context) *statuspb.Nod
511523
eachRecordableValue(mr.mu.nodeRegistry, func(name string, val float64) {
512524
nodeStat.Metrics[name] = val
513525
})
526+
eachRecordableValue(mr.mu.logRegistry, func(name string, val float64) {
527+
nodeStat.Metrics[name] = val
528+
})
514529

515530
// Generate status summaries for stores.
516531
for storeID, r := range mr.mu.storeRegistries {

pkg/server/status/recorder_test.go

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,8 @@ func TestMetricsRecorderLabels(t *testing.T) {
118118
manual,
119119
st,
120120
)
121-
recorder.AddNode(reg1, nodeDesc, 50, "foo:26257", "foo:26258", "foo:5432")
121+
logReg := metric.NewRegistry()
122+
recorder.AddNode(reg1, logReg, nodeDesc, 50, "foo:26257", "foo:26258", "foo:5432")
122123

123124
nodeDescTenant := roachpb.NodeDescriptor{
124125
NodeID: roachpb.NodeID(7),
@@ -137,7 +138,7 @@ func TestMetricsRecorderLabels(t *testing.T) {
137138
manual,
138139
stTenant,
139140
)
140-
recorderTenant.AddNode(regTenant, nodeDescTenant, 50, "foo:26257", "foo:26258", "foo:5432")
141+
recorderTenant.AddNode(regTenant, logReg, nodeDescTenant, 50, "foo:26257", "foo:26258", "foo:5432")
141142

142143
// ========================================
143144
// Verify that the recorder exports metrics for tenants as text.
@@ -151,6 +152,10 @@ func TestMetricsRecorderLabels(t *testing.T) {
151152
regTenant.AddMetric(g2)
152153
g2.Update(456)
153154

155+
c1 := metric.NewCounter(metric.Metadata{Name: "some_log_metric"})
156+
logReg.AddMetric(c1)
157+
c1.Inc(2)
158+
154159
recorder.AddTenantRegistry(tenantID, regTenant)
155160

156161
buf := bytes.NewBuffer([]byte{})
@@ -211,6 +216,16 @@ func TestMetricsRecorderLabels(t *testing.T) {
211216
},
212217
},
213218
},
219+
{
220+
Name: "cr.node.some_log_metric",
221+
Source: "7",
222+
Datapoints: []tspb.TimeSeriesDatapoint{
223+
{
224+
TimestampNanos: manual.Now().UnixNano(),
225+
Value: float64(2),
226+
},
227+
},
228+
},
214229
// App tenant metrics
215230
{
216231
Name: "cr.node.node-id",
@@ -435,7 +450,8 @@ func TestMetricsRecorder(t *testing.T) {
435450
recorder := NewMetricsRecorder(roachpb.SystemTenantID, roachpb.NewTenantNameContainer(""), nil, nil, manual, st)
436451
recorder.AddStore(store1)
437452
recorder.AddStore(store2)
438-
recorder.AddNode(reg1, nodeDesc, 50, "foo:26257", "foo:26258", "foo:5432")
453+
logReg := metric.NewRegistry()
454+
recorder.AddNode(reg1, logReg, nodeDesc, 50, "foo:26257", "foo:26258", "foo:5432")
439455

440456
// Ensure the metric system's view of time does not advance during this test
441457
// as the test expects time to not advance too far which would age the actual
@@ -467,6 +483,12 @@ func TestMetricsRecorder(t *testing.T) {
467483
source: 1,
468484
isNode: true,
469485
},
486+
{
487+
reg: logReg,
488+
prefix: "log.",
489+
source: 1,
490+
isNode: true,
491+
},
470492
{
471493
reg: store1.registry,
472494
prefix: "",

pkg/server/tenant.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ import (
6666
"github.com/cockroachdb/cockroach/pkg/util/admission"
6767
"github.com/cockroachdb/cockroach/pkg/util/hlc"
6868
"github.com/cockroachdb/cockroach/pkg/util/log"
69+
"github.com/cockroachdb/cockroach/pkg/util/log/logmetrics"
6970
"github.com/cockroachdb/cockroach/pkg/util/metric"
7071
"github.com/cockroachdb/cockroach/pkg/util/netutil"
7172
"github.com/cockroachdb/cockroach/pkg/util/schedulerlatency"
@@ -642,9 +643,16 @@ func (s *SQLServerWrapper) PreStart(ctx context.Context) error {
642643
})
643644
})
644645

646+
// Init a log metrics registry.
647+
logRegistry := logmetrics.NewRegistry()
648+
if logRegistry == nil {
649+
panic(errors.AssertionFailedf("nil log metrics registry at server startup"))
650+
}
651+
645652
// We can now add the node registry.
646653
s.recorder.AddNode(
647654
s.registry,
655+
logRegistry,
648656
roachpb.NodeDescriptor{
649657
NodeID: s.rpcContext.NodeID.Get(),
650658
},

0 commit comments

Comments
 (0)