Skip to content

Commit 0ac5d74

Browse files
committed
fix: clear aggregatedInflightEventMetric after flushing
1 parent 110d283 commit 0ac5d74

File tree

3 files changed

+13
-7
lines changed

3 files changed

+13
-7
lines changed

pkg/scheduler/metrics/metric_recorder.go

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,9 @@ func (r *PendingPodsRecorder) Clear() {
8080
r.recorder.Set(float64(0))
8181
}
8282

83-
// histgramVecMetric is the data structure passed in the buffer channel between the main framework thread
83+
// histogramVecMetric is the data structure passed in the buffer channel between the main framework thread
8484
// and the metricsRecorder goroutine.
85-
type histgramVecMetric struct {
85+
type histogramVecMetric struct {
8686
metric *metrics.HistogramVec
8787
labelValues []string
8888
value float64
@@ -102,7 +102,7 @@ type gaugeVecMetricKey struct {
102102
// MetricAsyncRecorder records metric in a separate goroutine to avoid overhead in the critical path.
103103
type MetricAsyncRecorder struct {
104104
// bufferCh is a channel that serves as a metrics buffer before the metricsRecorder goroutine reports it.
105-
bufferCh chan *histgramVecMetric
105+
bufferCh chan *histogramVecMetric
106106
// if bufferSize is reached, incoming metrics will be discarded.
107107
bufferSize int
108108
// how often the recorder runs to flush the metrics.
@@ -125,7 +125,7 @@ type MetricAsyncRecorder struct {
125125

126126
func NewMetricsAsyncRecorder(bufferSize int, interval time.Duration, stopCh <-chan struct{}) *MetricAsyncRecorder {
127127
recorder := &MetricAsyncRecorder{
128-
bufferCh: make(chan *histgramVecMetric, bufferSize),
128+
bufferCh: make(chan *histogramVecMetric, bufferSize),
129129
bufferSize: bufferSize,
130130
interval: interval,
131131
stopCh: stopCh,
@@ -156,8 +156,9 @@ func (r *MetricAsyncRecorder) ObserveQueueingHintDurationAsync(pluginName, event
156156
func (r *MetricAsyncRecorder) ObserveInFlightEventsAsync(eventLabel string, valueToAdd float64) {
157157
r.aggregatedInflightEventMetric[gaugeVecMetricKey{metricName: InFlightEvents.Name, labelValue: eventLabel}] += int(valueToAdd)
158158

159-
// Only flush the metric to the channal if the interval is reached.
159+
// Only flush the metric to the channel if the interval is reached.
160160
// The values are flushed to Prometheus in the run() function, which runs once the interval time.
161+
// Note: we implement this flushing here, not in FlushMetrics, because, if we did so, we would need to implement a lock for the map, which we want to avoid.
161162
if time.Since(r.aggregatedInflightEventMetricLastFlushTime) > r.interval {
162163
for key, value := range r.aggregatedInflightEventMetric {
163164
newMetric := &gaugeVecMetric{
@@ -171,11 +172,13 @@ func (r *MetricAsyncRecorder) ObserveInFlightEventsAsync(eventLabel string, valu
171172
}
172173
}
173174
r.aggregatedInflightEventMetricLastFlushTime = time.Now()
175+
// reset
176+
r.aggregatedInflightEventMetric = make(map[gaugeVecMetricKey]int)
174177
}
175178
}
176179

177180
func (r *MetricAsyncRecorder) observeMetricAsync(m *metrics.HistogramVec, value float64, labelsValues ...string) {
178-
newMetric := &histgramVecMetric{
181+
newMetric := &histogramVecMetric{
179182
metric: m,
180183
labelValues: labelsValues,
181184
value: value,

pkg/scheduler/metrics/metric_recorder_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,9 @@ func TestInFlightEventAsync(t *testing.T) {
131131

132132
// It adds -4 and flushes the metric to the channel.
133133
r.ObserveInFlightEventsAsync(podAddLabel, -4)
134+
if len(r.aggregatedInflightEventMetric) != 0 {
135+
t.Errorf("aggregatedInflightEventMetric should be cleared, but got: %v", r.aggregatedInflightEventMetric)
136+
}
134137

135138
got := []gaugeVecMetric{}
136139
for {

pkg/scheduler/metrics/metrics.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ var (
149149
&metrics.GaugeOpts{
150150
Subsystem: SchedulerSubsystem,
151151
Name: "inflight_events",
152-
Help: "Number of events recorded in the scheduling queue.",
152+
Help: "Number of events currently tracked in the scheduling queue.",
153153
StabilityLevel: metrics.ALPHA,
154154
}, []string{"event"})
155155
Goroutines = metrics.NewGaugeVec(

0 commit comments

Comments
 (0)