@@ -175,6 +175,18 @@ func (p *pbmCollector) pbmAgentMetrics(ctx context.Context, pbmClient *sdk.Clien
175175}
176176
177177func (p * pbmCollector ) pbmBackupsMetrics (ctx context.Context , pbmClient * sdk.Client , l * slog.Logger ) []prometheus.Metric {
178+ currentNode , err := util .MyRole (ctx , p .base .client )
179+ if err != nil {
180+ l .Error ("failed to get current node info" , "error" , err .Error ())
181+ return nil
182+ }
183+
184+ clusterStatus , err := cli .ClusterStatus (ctx , pbmClient , cli .RSConfGetter (p .mongoURI ))
185+ if err != nil {
186+ l .Error ("failed to get cluster status" , "error" , err .Error ())
187+ return nil
188+ }
189+
178190 backupsList , err := pbmClient .GetAllBackups (ctx )
179191 if err != nil {
180192 l .Error ("failed to get PBM backup list" , "error" , err .Error ())
@@ -184,42 +196,55 @@ func (p *pbmCollector) pbmBackupsMetrics(ctx context.Context, pbmClient *sdk.Cli
184196 metrics := make ([]prometheus.Metric , 0 , len (backupsList ))
185197
186198 for _ , backup := range backupsList {
187- metrics = append (metrics , createPBMMetric ("backup_size_bytes" ,
188- "Size of PBM backup" ,
189- float64 (backup .Size ), map [string ]string {
190- "opid" : backup .OPID ,
191- "status" : string (backup .Status ),
192- "name" : backup .Name ,
193- }),
194- )
195-
196- // Add backup_last_transition_ts metric
197- metrics = append (metrics , createPBMMetric ("backup_last_transition_ts" ,
198- "Last transition timestamp of PBM backup (seconds since epoch)" ,
199- float64 (backup .LastTransitionTS ), map [string ]string {
200- "opid" : backup .OPID ,
201- "status" : string (backup .Status ),
202- "name" : backup .Name ,
203- }),
204- )
205-
206- var endTime int64
207- switch pbmAgentStatus (backup .Status ) {
208- case statusDone , statusCancelled , statusError , statusDown :
209- endTime = backup .LastTransitionTS
210- default :
211- endTime = time .Now ().Unix ()
199+ // For each backup, iterate through all nodes in the cluster
200+ for replsetName , nodes := range clusterStatus {
201+ for _ , node := range nodes {
202+ // Determine role
203+ role := string (node .Role )
204+
205+ // Determine if this is the current node
206+ self := "0"
207+ if node .Host == currentNode .Me {
208+ self = "1"
209+ }
210+
211+ baseLabels := map [string ]string {
212+ "opid" : backup .OPID ,
213+ "status" : string (backup .Status ),
214+ "name" : backup .Name ,
215+ "type" : string (backup .Type ),
216+ "host" : node .Host ,
217+ "replica_set" : replsetName ,
218+ "role" : role ,
219+ "self" : self ,
220+ }
221+
222+ metrics = append (metrics , createPBMMetric ("backup_size_bytes" ,
223+ "Size of PBM backup" ,
224+ float64 (backup .Size ), baseLabels ),
225+ )
226+
227+ // Add backup_last_transition_ts metric
228+ metrics = append (metrics , createPBMMetric ("backup_last_transition_ts" ,
229+ "Last transition timestamp of PBM backup (seconds since epoch)" ,
230+ float64 (backup .LastTransitionTS ), baseLabels ),
231+ )
232+
233+ var endTime int64
234+ switch pbmAgentStatus (backup .Status ) {
235+ case statusDone , statusCancelled , statusError , statusDown :
236+ endTime = backup .LastTransitionTS
237+ default :
238+ endTime = time .Now ().Unix ()
239+ }
240+
241+ duration := time .Unix (endTime - backup .StartTS , 0 ).Unix ()
242+ metrics = append (metrics , createPBMMetric ("backup_duration_seconds" ,
243+ "Duration of PBM backup" ,
244+ float64 (duration ), baseLabels ),
245+ )
246+ }
212247 }
213-
214- duration := time .Unix (endTime - backup .StartTS , 0 ).Unix ()
215- metrics = append (metrics , createPBMMetric ("backup_duration_seconds" ,
216- "Duration of PBM backup" ,
217- float64 (duration ), map [string ]string {
218- "opid" : backup .OPID ,
219- "status" : string (backup .Status ),
220- "name" : backup .Name ,
221- }),
222- )
223248 }
224249 return metrics
225250}
0 commit comments