Skip to content

Commit 90dad61

Browse files
fix(metrics): reset backup gauges after schedule deactivation (#176)
1 parent 8accf0d commit 90dad61

File tree

2 files changed

+110
-0
lines changed

2 files changed

+110
-0
lines changed

internal/metrics/metrics.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,18 @@ func (s *MetricsRegistryImpl) ResetScheduleCounters(schedule *types.BackupSchedu
359359
schedule.ID,
360360
scheduleNameLabel,
361361
)
362+
363+
s.backupsFailedCount.DeleteLabelValues(
364+
schedule.ContainerID,
365+
schedule.DatabaseName,
366+
scheduleNameLabel,
367+
)
368+
369+
s.backupsSucceededCount.DeleteLabelValues(
370+
schedule.ContainerID,
371+
schedule.DatabaseName,
372+
scheduleNameLabel,
373+
)
362374
}
363375

364376
func InitializeMetricsRegistry(

internal/metrics/metrics_test.go

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@ import (
1010
"sync"
1111
"testing"
1212
"time"
13+
"ydbcp/internal/types"
1314

1415
"ydbcp/internal/config"
1516

17+
"github.com/prometheus/client_model/go"
1618
"github.com/stretchr/testify/assert"
1719
)
1820

@@ -70,3 +72,99 @@ func TestMetricsCount(t *testing.T) {
7072
cancel()
7173
wg.Wait()
7274
}
75+
76+
func TestResetScheduleMetrics(t *testing.T) {
77+
var wg sync.WaitGroup
78+
ctx, cancel := context.WithCancel(context.Background())
79+
defer cancel()
80+
81+
cfg := &config.MetricsServerConfig{
82+
BindPort: 8080,
83+
BindAddress: "127.0.0.1",
84+
}
85+
InitializeMetricsRegistry(ctx, &wg, cfg, clockwork.NewFakeClock())
86+
s := GlobalMetricsRegistry.(*MetricsRegistryImpl)
87+
88+
scheduleName := "test_schedule"
89+
schedule := types.BackupSchedule{
90+
ID: "123",
91+
ContainerID: "test_container",
92+
DatabaseName: "test_db",
93+
Name: &scheduleName,
94+
}
95+
96+
// set schedule metrics
97+
s.scheduleLastBackupTimestamp.WithLabelValues(
98+
schedule.ContainerID,
99+
schedule.DatabaseName,
100+
schedule.ID,
101+
scheduleName,
102+
).Set(float64(time.Now().Unix()))
103+
104+
s.backupsSucceededCount.WithLabelValues(schedule.ContainerID, schedule.DatabaseName, scheduleName).Set(1)
105+
s.backupsFailedCount.WithLabelValues(schedule.ContainerID, schedule.DatabaseName, scheduleName).Set(0)
106+
107+
getMetricValue := func(metricFamilies []*io_prometheus_client.MetricFamily, familyName string) (float64, bool) {
108+
for _, mf := range metricFamilies {
109+
if mf.GetName() == familyName {
110+
var val float64
111+
for _, m := range mf.Metric {
112+
if m.Counter != nil {
113+
val += m.Counter.GetValue()
114+
}
115+
116+
if m.Gauge != nil {
117+
val += m.Gauge.GetValue()
118+
}
119+
}
120+
return val, true
121+
}
122+
}
123+
return 0, false
124+
}
125+
126+
// check metrics before ResetScheduleCounters
127+
metricFamilies, err := GlobalMetricsRegistry.GetReg().Gather()
128+
assert.Equal(t, nil, err)
129+
130+
{ // check schedules_last_backup_timestamp
131+
_, found := getMetricValue(metricFamilies, "schedules_last_backup_timestamp")
132+
assert.Equal(t, true, found)
133+
}
134+
135+
{ // check backups_succeeded_count (should be 1)
136+
val, found := getMetricValue(metricFamilies, "backups_succeeded_count")
137+
assert.Equal(t, true, found)
138+
assert.Equal(t, 1, int(val))
139+
}
140+
141+
{ // check backups_succeeded_count (should be 0)
142+
val, found := getMetricValue(metricFamilies, "backups_failed_count")
143+
assert.Equal(t, true, found)
144+
assert.Equal(t, 0, int(val))
145+
}
146+
147+
GlobalMetricsRegistry.ResetScheduleCounters(&schedule)
148+
149+
// check metrics after ResetScheduleCounters
150+
metricFamilies, err = GlobalMetricsRegistry.GetReg().Gather()
151+
assert.Equal(t, nil, err)
152+
153+
{ // check schedules_last_backup_timestamp (should be deleted)
154+
_, found := getMetricValue(metricFamilies, "schedules_last_backup_timestamp")
155+
assert.Equal(t, false, found)
156+
}
157+
158+
{ // check backups_succeeded_count (should be deleted)
159+
_, found := getMetricValue(metricFamilies, "backups_succeeded_count")
160+
assert.Equal(t, false, found)
161+
}
162+
163+
{ // check backups_succeeded_count (should be deleted)
164+
_, found := getMetricValue(metricFamilies, "backups_failed_count")
165+
assert.Equal(t, false, found)
166+
}
167+
168+
cancel()
169+
wg.Wait()
170+
}

0 commit comments

Comments
 (0)