Skip to content

Commit 4a5e215

Browse files
Fix: Ensure metrics are properly exported with OpenTelemetry
The issue was that metrics were being created during package init() before the meter provider was set up with the Prometheus exporter. Changes: 1. Modified provider.go to use lazy initialization and allow SetupMeterProvider() to be called explicitly after all readers are added 2. Changed problemmetrics to use lazy initialization via an interface, deferring metric creation until first use 3. Reordered main initialization: exporters are now set up first, then SetupMeterProvider() is called, then problem daemons are initialized This ensures that when metrics are created, the meter provider already has all the configured readers/exporters attached. Co-authored-by: MartinForReal <[email protected]>
1 parent e277f75 commit 4a5e215

File tree

3 files changed

+75
-44
lines changed

3 files changed

+75
-44
lines changed

cmd/nodeproblemdetector/node_problem_detector.go

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"k8s.io/node-problem-detector/pkg/problemdaemon"
3131
"k8s.io/node-problem-detector/pkg/problemdetector"
3232
"k8s.io/node-problem-detector/pkg/types"
33+
"k8s.io/node-problem-detector/pkg/util/metrics"
3334
"k8s.io/node-problem-detector/pkg/version"
3435
)
3536

@@ -43,13 +44,8 @@ func npdMain(ctx context.Context, npdo *options.NodeProblemDetectorOptions) erro
4344
npdo.SetConfigFromDeprecatedOptionsOrDie()
4445
npdo.ValidOrDie()
4546

46-
// Initialize problem daemons.
47-
problemDaemons := problemdaemon.NewProblemDaemons(npdo.MonitorConfigPaths)
48-
if len(problemDaemons) == 0 {
49-
klog.Fatalf("No problem daemon is configured")
50-
}
51-
52-
// Initialize exporters.
47+
// Initialize exporters first so that metric readers are registered before
48+
// the meter provider is set up.
5349
defaultExporters := []types.Exporter{}
5450
if ke := k8sexporter.NewExporterOrDie(ctx, npdo); ke != nil {
5551
defaultExporters = append(defaultExporters, ke)
@@ -70,6 +66,16 @@ func npdMain(ctx context.Context, npdo *options.NodeProblemDetectorOptions) erro
7066
klog.Fatalf("No exporter is successfully setup")
7167
}
7268

69+
// Setup the meter provider after all exporters have registered their readers.
70+
// This ensures metrics are properly exported to all configured backends.
71+
metrics.SetupMeterProvider()
72+
73+
// Initialize problem daemons after meter provider is set up.
74+
problemDaemons := problemdaemon.NewProblemDaemons(npdo.MonitorConfigPaths)
75+
if len(problemDaemons) == 0 {
76+
klog.Fatalf("No problem daemon is configured")
77+
}
78+
7379
// Initialize NPD core.
7480
p := problemdetector.NewProblemDetector(problemDaemons, npdExporters)
7581
return p.Run(ctx)

pkg/problemmetrics/problem_metrics.go

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,34 @@ import (
2929
// GlobalProblemMetricsManager is a singleton of ProblemMetricsManager,
3030
// which should be used to manage all problem-converted metrics across all
3131
// problem daemons.
32-
var GlobalProblemMetricsManager *ProblemMetricsManager
32+
var GlobalProblemMetricsManager ProblemMetricsManagerInterface = &lazyProblemMetricsManager{}
3333

34-
func init() {
35-
GlobalProblemMetricsManager = NewProblemMetricsManagerOrDie()
34+
// ProblemMetricsManagerInterface defines the interface for problem metrics management.
35+
type ProblemMetricsManagerInterface interface {
36+
IncrementProblemCounter(reason string, count int64) error
37+
SetProblemGauge(problemType string, reason string, value bool) error
38+
}
39+
40+
// lazyProblemMetricsManager wraps ProblemMetricsManager with lazy initialization.
41+
type lazyProblemMetricsManager struct {
42+
once sync.Once
43+
manager *ProblemMetricsManager
44+
}
45+
46+
func (l *lazyProblemMetricsManager) init() {
47+
l.once.Do(func() {
48+
l.manager = NewProblemMetricsManagerOrDie()
49+
})
50+
}
51+
52+
func (l *lazyProblemMetricsManager) IncrementProblemCounter(reason string, count int64) error {
53+
l.init()
54+
return l.manager.IncrementProblemCounter(reason, count)
55+
}
56+
57+
func (l *lazyProblemMetricsManager) SetProblemGauge(problemType string, reason string, value bool) error {
58+
l.init()
59+
return l.manager.SetProblemGauge(problemType, reason, value)
3660
}
3761

3862
// ProblemMetricsManager manages problem-converted metrics.

pkg/util/metrics/provider.go

Lines changed: 35 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,11 @@ import (
2626
)
2727

2828
var (
29-
meterProvider *sdkmetric.MeterProvider
30-
meterProviderOnce sync.Once
31-
meter metric.Meter
32-
meterOnce sync.Once
33-
readers []sdkmetric.Reader
34-
resources []*resource.Resource
35-
readersMutex sync.Mutex
29+
meterProvider *sdkmetric.MeterProvider
30+
readers []sdkmetric.Reader
31+
resources []*resource.Resource
32+
readersMutex sync.Mutex
33+
initialized bool
3634
)
3735

3836
// AddReader adds a metric reader to be used when setting up the meter provider.
@@ -54,47 +52,50 @@ func AddReaderWithResource(reader sdkmetric.Reader, res *resource.Resource) {
5452

5553
// SetupMeterProvider initializes the global meter provider with all registered readers.
5654
// This should be called after all readers have been added.
55+
// Can be called multiple times safely - only the first call takes effect.
5756
func SetupMeterProvider() {
58-
meterProviderOnce.Do(func() {
59-
readersMutex.Lock()
60-
defer readersMutex.Unlock()
57+
readersMutex.Lock()
58+
defer readersMutex.Unlock()
6159

62-
opts := make([]sdkmetric.Option, 0, len(readers)+1)
63-
for _, reader := range readers {
64-
opts = append(opts, sdkmetric.WithReader(reader))
65-
}
60+
if initialized {
61+
return
62+
}
63+
initialized = true
6664

67-
// Merge all resources if any
68-
if len(resources) > 0 {
69-
merged := resources[0]
70-
for i := 1; i < len(resources); i++ {
71-
var err error
72-
merged, err = resource.Merge(merged, resources[i])
73-
if err != nil {
74-
// If merge fails, continue with what we have
75-
continue
76-
}
65+
opts := make([]sdkmetric.Option, 0, len(readers)+1)
66+
for _, reader := range readers {
67+
opts = append(opts, sdkmetric.WithReader(reader))
68+
}
69+
70+
// Merge all resources if any
71+
if len(resources) > 0 {
72+
merged := resources[0]
73+
for i := 1; i < len(resources); i++ {
74+
var err error
75+
merged, err = resource.Merge(merged, resources[i])
76+
if err != nil {
77+
// If merge fails, continue with what we have
78+
continue
7779
}
78-
opts = append(opts, sdkmetric.WithResource(merged))
7980
}
81+
opts = append(opts, sdkmetric.WithResource(merged))
82+
}
8083

81-
meterProvider = sdkmetric.NewMeterProvider(opts...)
82-
otel.SetMeterProvider(meterProvider)
83-
})
84+
meterProvider = sdkmetric.NewMeterProvider(opts...)
85+
otel.SetMeterProvider(meterProvider)
8486
}
8587

8688
// GetMeter returns the global meter for creating metrics.
89+
// Note: Metrics created before SetupMeterProvider() is called will use a no-op meter
90+
// and won't be exported. Always call SetupMeterProvider() after adding all readers.
8791
func GetMeter() metric.Meter {
88-
meterOnce.Do(func() {
89-
// Ensure meter provider is set up
90-
SetupMeterProvider()
91-
meter = otel.Meter("k8s.io/node-problem-detector")
92-
})
93-
return meter
92+
return otel.Meter("k8s.io/node-problem-detector")
9493
}
9594

9695
// ShutdownMeterProvider gracefully shuts down the meter provider.
9796
func ShutdownMeterProvider() error {
97+
readersMutex.Lock()
98+
defer readersMutex.Unlock()
9899
if meterProvider != nil {
99100
return meterProvider.Shutdown(context.Background())
100101
}

0 commit comments

Comments
 (0)