Skip to content

Commit 4a3695f

Browse files
committed
pkg/util/log: introduce LogMetrics interface
Logging is a critical subsystem within CRDB, but as things are today, we have very little observability into logging itself. For starters, we have no metrics in the logging package at all! This makes it difficult to observe things within the log package. For example, if a fluent-server log sink fails to connect to FluentBit, how can we tell? We get some STDOUT message, but that's about it. It's time to get some metrics into the log package. Doing so is a bit of a tricky dance, because pretty much every package in CRDB imports the logging package, meaning you almost always get a circular dependency when trying to make use of any library within pkg/util/log. Therefore, we must inject metrics functionality into the logging package. This patch provides a means of doing so. Within the log package, we add a new interface called `LogMetrics` with functions that enable its users to modify metrics. The implementation of the interface can live elsewhere, where circular dependencies aren't such a pain. We can then inject the implementation into the log package. This patch also provides a basic implementation to be used. Future patches will plumb the implementation into the log package and actually modify the supported metrics. Release note: none
1 parent 7f011c7 commit 4a3695f

File tree

7 files changed

+266
-0
lines changed

7 files changed

+266
-0
lines changed

pkg/BUILD.bazel

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,7 @@ ALL_TESTS = [
648648
"//pkg/util/log/eventpb:eventpb_test",
649649
"//pkg/util/log/logconfig:logconfig_test",
650650
"//pkg/util/log/logcrash:logcrash_test",
651+
"//pkg/util/log/logmetrics:logmetrics_test",
651652
"//pkg/util/log/testshout:testshout_test",
652653
"//pkg/util/log:log_test",
653654
"//pkg/util/metric/aggmetric:aggmetric_test",
@@ -2296,6 +2297,8 @@ GO_TARGETS = [
22962297
"//pkg/util/log/logcrash:logcrash",
22972298
"//pkg/util/log/logcrash:logcrash_test",
22982299
"//pkg/util/log/logflags:logflags",
2300+
"//pkg/util/log/logmetrics:logmetrics",
2301+
"//pkg/util/log/logmetrics:logmetrics_test",
22992302
"//pkg/util/log/logpb:logpb",
23002303
"//pkg/util/log/logtestutils:logtestutils",
23012304
"//pkg/util/log/logutil:logutil",

pkg/util/log/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ go_library(
3434
"log_decoder.go",
3535
"log_entry.go",
3636
"log_flush.go",
37+
"metric.go",
3738
"redact.go",
3839
"registry.go",
3940
"sinks.go",

pkg/util/log/clog.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,19 @@ type loggingT struct {
109109

110110
allSinkInfos sinkInfoRegistry
111111
allLoggers loggerRegistry
112+
metrics LogMetrics
113+
}
114+
115+
// SetLogMetrics injects an initialized implementation of
116+
// the LogMetrics interface into the logging package. The
117+
// implementation must be injected to avoid a dependency
118+
// cycle.
119+
//
120+
// Should be called within the init() function of the
121+
// implementing package to avoid the possibility of a nil
122+
// LogMetrics during server startups.
123+
func SetLogMetrics(m LogMetrics) {
124+
logging.metrics = m
112125
}
113126

114127
func init() {
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
2+
3+
go_library(
4+
name = "logmetrics",
5+
srcs = ["metrics.go"],
6+
importpath = "github.com/cockroachdb/cockroach/pkg/util/log/logmetrics",
7+
visibility = ["//visibility:public"],
8+
deps = [
9+
"//pkg/util/log",
10+
"//pkg/util/metric",
11+
"//pkg/util/syncutil",
12+
"@com_github_cockroachdb_errors//:errors",
13+
],
14+
)
15+
16+
go_test(
17+
name = "logmetrics_test",
18+
srcs = ["metrics_test.go"],
19+
args = ["-test.timeout=295s"],
20+
embed = [":logmetrics"],
21+
deps = [
22+
"//pkg/util/leaktest",
23+
"//pkg/util/log",
24+
"//pkg/util/metric",
25+
"@com_github_stretchr_testify//require",
26+
],
27+
)

pkg/util/log/logmetrics/metrics.go

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// Copyright 2023 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the Business Source License
4+
// included in the file licenses/BSL.txt.
5+
//
6+
// As of the Change Date specified in that file, in accordance with
7+
// the Business Source License, use of this software will be governed
8+
// by the Apache License, Version 2.0, included in the file
9+
// licenses/APL.txt.
10+
11+
package logmetrics
12+
13+
import (
14+
"github.com/cockroachdb/cockroach/pkg/util/log"
15+
"github.com/cockroachdb/cockroach/pkg/util/metric"
16+
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
17+
"github.com/cockroachdb/errors"
18+
)
19+
20+
var (
21+
// logMetricsReg is a singleton instance of the LogMetricsRegistry.
22+
logMetricsReg = newLogMetricsRegistry()
23+
FluentSinkConnErrors = metric.Metadata{
24+
Name: string(log.FluentSinkConnectionError),
25+
Help: "Number of connection errors experienced by fluent-server logging sinks",
26+
Measurement: "fluent-server log sink connection errors",
27+
Unit: metric.Unit_COUNT,
28+
}
29+
)
30+
31+
// logMetricsStruct is a struct used to contain all metrics
32+
// tracked by the LogMetricsRegistry. This container is necessary
33+
// to register all the metrics with the Registry internal to the
34+
// LogMetricsRegistry.
35+
type logMetricsStruct struct {
36+
FluentSinkConnErrors *metric.Counter
37+
}
38+
39+
// LogMetricsRegistry is a log.LogMetrics implementation used in the
40+
// logging package to give it access to metrics without introducing a
41+
// circular dependency.
42+
//
43+
// All metrics meant to be available to the logging package must be
44+
// registered at the time of initialization.
45+
//
46+
// LogMetricsRegistry is thread-safe.
47+
type LogMetricsRegistry struct {
48+
mu struct {
49+
syncutil.Mutex
50+
// metricsStruct holds the same metrics as the below structures, but
51+
// provides an easy way to inject them into metric.Registry's on demand
52+
// in NewRegistry().
53+
metricsStruct logMetricsStruct
54+
counters map[log.MetricName]*metric.Counter
55+
}
56+
}
57+
58+
var _ log.LogMetrics = (*LogMetricsRegistry)(nil)
59+
60+
func newLogMetricsRegistry() *LogMetricsRegistry {
61+
registry := &LogMetricsRegistry{}
62+
registry.registerCounters()
63+
return registry
64+
}
65+
66+
func (l *LogMetricsRegistry) registerCounters() {
67+
l.mu.Lock()
68+
defer l.mu.Unlock()
69+
l.mu.counters = make(map[log.MetricName]*metric.Counter)
70+
// Create the metrics struct for us to add to registries as they're
71+
// requested.
72+
l.mu.metricsStruct = logMetricsStruct{
73+
FluentSinkConnErrors: metric.NewCounter(FluentSinkConnErrors),
74+
}
75+
// Be sure to also add the metrics to our internal store, for
76+
// recall in functions such as IncrementCounter.
77+
l.mu.counters[log.MetricName(FluentSinkConnErrors.Name)] = l.mu.metricsStruct.FluentSinkConnErrors
78+
}
79+
80+
// NewRegistry initializes and returns a new metric.Registry, populated with metrics
81+
// tracked by the LogMetricsRegistry. While the metrics tracked by the logmetrics package
82+
// are global, they may be shared by multiple servers, test servers, etc. Therefore, we
83+
// need the means to label the metrics separately depending on the server, tenant, etc.
84+
// serving them. For this reason, we provide the ability to track the same log metrics
85+
// across multiple registries.
86+
func NewRegistry() *metric.Registry {
87+
if logMetricsReg == nil {
88+
panic(errors.AssertionFailedf("LogMetricsRegistry was not initialized"))
89+
}
90+
reg := metric.NewRegistry()
91+
logMetricsReg.mu.Lock()
92+
defer logMetricsReg.mu.Unlock()
93+
reg.AddMetricStruct(logMetricsReg.mu.metricsStruct)
94+
return reg
95+
}
96+
97+
// IncrementCounter increments thegi Counter held by the given alias. If a log.MetricName
98+
// is provided as an argument, but is not registered with the LogMetricsRegistry, this function
99+
// panics.
100+
func (l *LogMetricsRegistry) IncrementCounter(metric log.MetricName, amount int64) {
101+
l.mu.Lock()
102+
defer l.mu.Unlock()
103+
counter, ok := l.mu.counters[metric]
104+
if !ok {
105+
panic(errors.AssertionFailedf("MetricName not registered in LogMetricsRegistry: %q", string(metric)))
106+
}
107+
counter.Inc(amount)
108+
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// Copyright 2023 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the Business Source License
4+
// included in the file licenses/BSL.txt.
5+
//
6+
// As of the Change Date specified in that file, in accordance with
7+
// the Business Source License, use of this software will be governed
8+
// by the Apache License, Version 2.0, included in the file
9+
// licenses/APL.txt.
10+
11+
package logmetrics
12+
13+
import (
14+
"testing"
15+
16+
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
17+
"github.com/cockroachdb/cockroach/pkg/util/log"
18+
"github.com/cockroachdb/cockroach/pkg/util/metric"
19+
"github.com/stretchr/testify/require"
20+
)
21+
22+
func TestIncrementCounter(t *testing.T) {
23+
defer leaktest.AfterTest(t)()
24+
defer log.Scope(t).Close(t)
25+
26+
t.Run("panics when log.MetricName not registered", func(t *testing.T) {
27+
l := &LogMetricsRegistry{}
28+
l.mu.counters = map[log.MetricName]*metric.Counter{}
29+
require.PanicsWithErrorf(t,
30+
`MetricName not registered in LogMetricsRegistry: "unregistered"`,
31+
func() {
32+
l.IncrementCounter("unregistered", 1)
33+
}, "expected IncrementCounter to panic for unregistered metric")
34+
})
35+
36+
t.Run("increments counter", func(t *testing.T) {
37+
l := newLogMetricsRegistry()
38+
func() {
39+
l.mu.Lock()
40+
defer l.mu.Unlock()
41+
require.Zero(t, l.mu.metricsStruct.FluentSinkConnErrors.Count())
42+
}()
43+
l.IncrementCounter(log.FluentSinkConnectionError, 1)
44+
l.IncrementCounter(log.FluentSinkConnectionError, 2)
45+
func() {
46+
l.mu.Lock()
47+
defer l.mu.Unlock()
48+
require.Equal(t, int64(3), l.mu.metricsStruct.FluentSinkConnErrors.Count())
49+
}()
50+
})
51+
}
52+
53+
func TestNewRegistry(t *testing.T) {
54+
defer leaktest.AfterTest(t)()
55+
defer log.Scope(t).Close(t)
56+
57+
t.Run("panics when logMetricsReg is nil", func(t *testing.T) {
58+
logMetricsReg = nil
59+
require.PanicsWithErrorf(t,
60+
"LogMetricsRegistry was not initialized",
61+
func() {
62+
_ = NewRegistry()
63+
}, "expected NewRegistry() to panic with nil logMetricsReg package-level var")
64+
})
65+
}
66+
67+
type fakeLogMetrics struct{}
68+
69+
func (*fakeLogMetrics) IncrementCounter(_ log.MetricName, _ int64) {}
70+
71+
var _ log.LogMetrics = (*fakeLogMetrics)(nil)

pkg/util/log/metric.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// Copyright 2023 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the Business Source License
4+
// included in the file licenses/BSL.txt.
5+
//
6+
// As of the Change Date specified in that file, in accordance with
7+
// the Business Source License, use of this software will be governed
8+
// by the Apache License, Version 2.0, included in the file
9+
// licenses/APL.txt.
10+
11+
package log
12+
13+
// LogMetrics enables the registration and recording of metrics
14+
// within the log package.
15+
//
16+
// Because the log package is imported by nearly every package
17+
// within CRDB, it's difficult to add new dependencies to the
18+
// log package without introducing a circular dependency.
19+
//
20+
// The LogMetrics interface provides us with a way to still
21+
// make use of the metrics library within the log package via
22+
// dependency injection, allowing the implementation to live
23+
// elsewhere (e.g. the metrics package).
24+
type LogMetrics interface {
25+
// IncrementCounter increments the Counter metric associated with the
26+
// provided MetricName by the given amount, assuming the
27+
// metric has been registered.
28+
//
29+
// The LogMetrics implementation must have metadata defined
30+
// for the given MetricName within its own scope. See
31+
// pkg/util/log/logmetrics for details.
32+
IncrementCounter(metric MetricName, amount int64)
33+
}
34+
35+
// MetricName represents the name of a metric registered &
36+
// used within the log package, available to use in the LogMetrics
37+
// interface.
38+
type MetricName string
39+
40+
// FluentSinkConnectionError is the MetricName for the metric
41+
// used to count fluent-server log sink connection errors. Please
42+
// refer to its metric metadata for more details (hint: see usages).
43+
const FluentSinkConnectionError MetricName = "fluent.sink.conn.errors"

0 commit comments

Comments
 (0)