@@ -109,6 +109,7 @@ func (s *MetricsRegistryImpl) IncOperationsStartedCounter(operation types.Operat
109109 operation .GetContainerID (),
110110 operation .GetDatabaseName (),
111111 operation .GetType ().String (),
112+ operation .GetTypeDescription (),
112113 label ,
113114 ).Inc ()
114115}
@@ -131,6 +132,7 @@ func (s *MetricsRegistryImpl) ReportOperationInflight(operation types.Operation)
131132 operation .GetContainerID (),
132133 operation .GetDatabaseName (),
133134 operation .GetType ().String (),
135+ operation .GetTypeDescription (),
134136 operation .GetState ().String (),
135137 label ,
136138 ).Inc ()
@@ -141,6 +143,7 @@ func (s *MetricsRegistryImpl) ReportOperationInflight(operation types.Operation)
141143 operation .GetContainerID (),
142144 operation .GetDatabaseName (),
143145 operation .GetType ().String (),
146+ operation .GetTypeDescription (),
144147 operation .GetState ().String (),
145148 ).Observe (duration .Seconds ())
146149 }
@@ -154,6 +157,7 @@ func (s *MetricsRegistryImpl) ReportOperationMetrics(operation types.Operation)
154157 operation .GetContainerID (),
155158 operation .GetDatabaseName (),
156159 operation .GetType ().String (),
160+ operation .GetTypeDescription (),
157161 operation .GetState ().String (),
158162 ).Observe (duration .Seconds ())
159163 }
@@ -167,7 +171,12 @@ func (s *MetricsRegistryImpl) ReportOperationMetrics(operation types.Operation)
167171 }
168172
169173 s .operationsFinished .WithLabelValues (
170- operation .GetContainerID (), operation .GetDatabaseName (), operation .GetType ().String (), operation .GetState ().String (), label ,
174+ operation .GetContainerID (),
175+ operation .GetDatabaseName (),
176+ operation .GetType ().String (),
177+ operation .GetTypeDescription (),
178+ operation .GetState ().String (),
179+ label ,
171180 ).Inc ()
172181
173182 }
@@ -203,25 +212,62 @@ func (s *MetricsRegistryImpl) IncCompletedBackupsCount(containerId string, datab
203212}
204213
205214func (s * MetricsRegistryImpl ) IncScheduleCounters (schedule * types.BackupSchedule , err error ) {
215+ var scheduleNameLabel string
216+ if schedule .Name != nil {
217+ scheduleNameLabel = * schedule .Name
218+ } else {
219+ scheduleNameLabel = ""
220+ }
221+
206222 if err != nil {
207- s .scheduleActionFailedCount .WithLabelValues (schedule .ContainerID , schedule .DatabaseName , schedule .ID ).Inc ()
223+ s .scheduleActionFailedCount .WithLabelValues (
224+ schedule .ContainerID ,
225+ schedule .DatabaseName ,
226+ schedule .ID ,
227+ scheduleNameLabel ,
228+ ).Inc ()
208229 } else {
209- s .scheduleActionSucceededCount .WithLabelValues (schedule .ContainerID , schedule .DatabaseName , schedule .ID ).Inc ()
230+ s .scheduleActionSucceededCount .WithLabelValues (
231+ schedule .ContainerID ,
232+ schedule .DatabaseName ,
233+ schedule .ID ,
234+ scheduleNameLabel ,
235+ ).Inc ()
210236 }
211237 if schedule .RecoveryPoint != nil {
212- s .scheduleLastBackupTimestamp .WithLabelValues (schedule .ContainerID , schedule .DatabaseName , schedule .ID ).Set (float64 (schedule .RecoveryPoint .Unix ()))
238+ s .scheduleLastBackupTimestamp .WithLabelValues (
239+ schedule .ContainerID ,
240+ schedule .DatabaseName ,
241+ schedule .ID ,
242+ scheduleNameLabel ,
243+ ).Set (float64 (schedule .RecoveryPoint .Unix ()))
213244 } else if schedule .Audit != nil && schedule .Audit .CreatedAt != nil {
214245 // Report schedule creation time as last backup time if no backups were made
215- s .scheduleLastBackupTimestamp .WithLabelValues (schedule .ContainerID , schedule .DatabaseName , schedule .ID ).Set (float64 (schedule .Audit .CreatedAt .AsTime ().Unix ()))
246+ s .scheduleLastBackupTimestamp .WithLabelValues (
247+ schedule .ContainerID ,
248+ schedule .DatabaseName ,
249+ schedule .ID ,
250+ scheduleNameLabel ,
251+ ).Set (float64 (schedule .Audit .CreatedAt .AsTime ().Unix ()))
216252 }
217253 info := schedule .GetBackupInfo (s .clock )
218254 if info != nil {
219- s .scheduleRPOMarginRatio .WithLabelValues (schedule .ContainerID , schedule .DatabaseName , schedule .ID ).Set (info .LastBackupRpoMarginRatio )
255+ s .scheduleRPOMarginRatio .WithLabelValues (
256+ schedule .ContainerID ,
257+ schedule .DatabaseName ,
258+ schedule .ID ,
259+ scheduleNameLabel ,
260+ ).Set (info .LastBackupRpoMarginRatio )
220261 } else if schedule .Audit != nil && schedule .Audit .CreatedAt != nil && schedule .ScheduleSettings .RecoveryPointObjective != nil {
221262 // Report fake LastBackupRpoMarginRatio based on schedule creation time if no backups were made
222263 fakeRpoMargin := s .clock .Since (schedule .Audit .CreatedAt .AsTime ())
223264 fakeLastBackupRpoMarginRatio := fakeRpoMargin .Seconds () / float64 (schedule .ScheduleSettings .RecoveryPointObjective .Seconds )
224- s .scheduleRPOMarginRatio .WithLabelValues (schedule .ContainerID , schedule .DatabaseName , schedule .ID ).Set (fakeLastBackupRpoMarginRatio )
265+ s .scheduleRPOMarginRatio .WithLabelValues (
266+ schedule .ContainerID ,
267+ schedule .DatabaseName ,
268+ schedule .ID ,
269+ scheduleNameLabel ,
270+ ).Set (fakeLastBackupRpoMarginRatio )
225271 }
226272}
227273
@@ -265,32 +311,32 @@ func newMetricsRegistry(ctx context.Context, wg *sync.WaitGroup, cfg *config.Met
265311 Name : "duration_seconds" ,
266312 Help : "Duration of completed operations in seconds" ,
267313 Buckets : prometheus .ExponentialBuckets (10 , 2 , 8 ),
268- }, []string {"container_id" , "database" , "type" , "status" })
314+ }, []string {"container_id" , "database" , "type" , "type_description" , " status" })
269315
270316 s .inflightOperationsDuration = promauto .With (s .reg ).NewHistogramVec (prometheus.HistogramOpts {
271317 Subsystem : "operations" ,
272318 Name : "inflight_duration_seconds" ,
273319 Help : "Duration of running operations in seconds" ,
274320 Buckets : prometheus .ExponentialBuckets (10 , 2 , 8 ),
275- }, []string {"container_id" , "database" , "type" , "state" })
321+ }, []string {"container_id" , "database" , "type" , "type_description" , " state" })
276322
277323 s .operationsStarted = promauto .With (s .reg ).NewCounterVec (prometheus.CounterOpts {
278324 Subsystem : "operations" ,
279325 Name : "started_counter" ,
280326 Help : "Total count of started operations" ,
281- }, []string {"container_id" , "database" , "type" , "schedule_id" })
327+ }, []string {"container_id" , "database" , "type" , "type_description" , " schedule_id" })
282328
283329 s .operationsFinished = promauto .With (s .reg ).NewCounterVec (prometheus.CounterOpts {
284330 Subsystem : "operations" ,
285331 Name : "finished_counter" ,
286332 Help : "Total count of finished operations" ,
287- }, []string {"container_id" , "database" , "type" , "status" , "schedule_id" })
333+ }, []string {"container_id" , "database" , "type" , "type_description" , " status" , "schedule_id" })
288334
289335 s .operationsInflight = promauto .With (s .reg ).NewGaugeVec (prometheus.GaugeOpts {
290336 Subsystem : "operations" ,
291337 Name : "inflight" ,
292338 Help : "Total count of active operations" ,
293- }, []string {"container_id" , "database" , "type" , "status" , "schedule_id" })
339+ }, []string {"container_id" , "database" , "type" , "type_description" , " status" , "schedule_id" })
294340
295341 s .handlerRunsCount = promauto .With (s .reg ).NewCounterVec (prometheus.CounterOpts {
296342 Subsystem : "operation_processor" ,
@@ -326,25 +372,25 @@ func newMetricsRegistry(ctx context.Context, wg *sync.WaitGroup, cfg *config.Met
326372 Subsystem : "schedules" ,
327373 Name : "failed_count" ,
328374 Help : "Total count of failed scheduled backup runs" ,
329- }, []string {"container_id" , "database" , "schedule_id" })
375+ }, []string {"container_id" , "database" , "schedule_id" , "schedule_name" })
330376
331377 s .scheduleActionSucceededCount = promauto .With (s .reg ).NewCounterVec (prometheus.CounterOpts {
332378 Subsystem : "schedules" ,
333379 Name : "succeeded_count" ,
334380 Help : "Total count of successful scheduled backup runs" ,
335- }, []string {"container_id" , "database" , "schedule_id" })
381+ }, []string {"container_id" , "database" , "schedule_id" , "schedule_name" })
336382
337383 s .scheduleLastBackupTimestamp = promauto .With (s .reg ).NewGaugeVec (prometheus.GaugeOpts {
338384 Subsystem : "schedules" ,
339385 Name : "last_backup_timestamp" ,
340386 Help : "Timestamp of last successful backup for this schedule" ,
341- }, []string {"container_id" , "database" , "schedule_id" })
387+ }, []string {"container_id" , "database" , "schedule_id" , "schedule_name" })
342388
343389 s .scheduleRPOMarginRatio = promauto .With (s .reg ).NewGaugeVec (prometheus.GaugeOpts {
344390 Subsystem : "schedules" ,
345391 Name : "rpo_margin_ratio" ,
346392 Help : "if RPO is set for schedule, calculates a ratio to which RPO is satisfied" ,
347- }, []string {"container_id" , "database" , "schedule_id" })
393+ }, []string {"container_id" , "database" , "schedule_id" , "schedule_name" })
348394
349395 mux := http .NewServeMux ()
350396 mux .Handle ("/metrics" , promhttp .HandlerFor (s .reg , promhttp.HandlerOpts {Registry : s .reg }))
0 commit comments