@@ -26,6 +26,7 @@ import (
2626 "github.com/prometheus/client_golang/prometheus"
2727 "go.mongodb.org/mongo-driver/mongo"
2828
29+ "github.com/percona/mongodb_exporter/internal/proto"
2930 "github.com/percona/mongodb_exporter/internal/util"
3031)
3132
@@ -113,8 +114,14 @@ func (p *pbmCollector) collect(ch chan<- prometheus.Metric) {
113114 "PBM PITR backups are enabled for the cluster" ,
114115 float64 (pitrEnabledMetric ), nil ))
115116
116- metrics = append (metrics , p .pbmBackupsMetrics (p .ctx , pbmClient , logger )... )
117- metrics = append (metrics , p .pbmAgentMetrics (p .ctx , pbmClient , logger )... )
117+ // Get current node info once for both agent and backup metrics
118+ currentNode , err := util .MyRole (p .ctx , p .base .client )
119+ if err != nil {
120+ logger .Error ("failed to get current node info" , "error" , err .Error ())
121+ } else {
122+ metrics = append (metrics , p .pbmBackupsMetrics (p .ctx , pbmClient , logger , currentNode )... )
123+ metrics = append (metrics , p .pbmAgentMetrics (p .ctx , pbmClient , logger , currentNode )... )
124+ }
118125 }
119126
120127 metrics = append (metrics , createPBMMetric ("cluster_backup_configured" ,
@@ -126,13 +133,7 @@ func (p *pbmCollector) collect(ch chan<- prometheus.Metric) {
126133 }
127134}
128135
129- func (p * pbmCollector ) pbmAgentMetrics (ctx context.Context , pbmClient * sdk.Client , l * slog.Logger ) []prometheus.Metric {
130- currentNode , err := util .MyRole (ctx , p .base .client )
131- if err != nil {
132- l .Error ("failed to get current node info" , "error" , err .Error ())
133- return nil
134- }
135-
136+ func (p * pbmCollector ) pbmAgentMetrics (ctx context.Context , pbmClient * sdk.Client , l * slog.Logger , currentNode * proto.HelloResponse ) []prometheus.Metric {
136137 clusterStatus , err := cli .ClusterStatus (ctx , pbmClient , cli .RSConfGetter (p .mongoURI ))
137138 if err != nil {
138139 l .Error ("failed to get cluster status" , "error" , err .Error ())
@@ -174,7 +175,7 @@ func (p *pbmCollector) pbmAgentMetrics(ctx context.Context, pbmClient *sdk.Clien
174175 return metrics
175176}
176177
177- func (p * pbmCollector ) pbmBackupsMetrics (ctx context.Context , pbmClient * sdk.Client , l * slog.Logger ) []prometheus.Metric {
178+ func (p * pbmCollector ) pbmBackupsMetrics (ctx context.Context , pbmClient * sdk.Client , l * slog.Logger , currentNode * proto. HelloResponse ) []prometheus.Metric {
178179 backupsList , err := pbmClient .GetAllBackups (ctx )
179180 if err != nil {
180181 l .Error ("failed to get PBM backup list" , "error" , err .Error ())
@@ -184,45 +185,49 @@ func (p *pbmCollector) pbmBackupsMetrics(ctx context.Context, pbmClient *sdk.Cli
184185 metrics := make ([]prometheus.Metric , 0 , len (backupsList ))
185186
186187 for _ , backup := range backupsList {
187- metrics = append (metrics , createPBMMetric ("backup_size_bytes" ,
188- "Size of PBM backup" ,
189- float64 (backup .Size ), map [string ]string {
190- "opid" : backup .OPID ,
191- "status" : string (backup .Status ),
192- "name" : backup .Name ,
193- "type" : string (backup .Type ),
194- }),
195- )
196-
197- // Add backup_last_transition_ts metric
198- metrics = append (metrics , createPBMMetric ("backup_last_transition_ts" ,
199- "Last transition timestamp of PBM backup (seconds since epoch)" ,
200- float64 (backup .LastTransitionTS ), map [string ]string {
201- "opid" : backup .OPID ,
202- "status" : string (backup .Status ),
203- "name" : backup .Name ,
204- "type" : string (backup .Type ),
205- }),
206- )
207-
208- var endTime int64
209- switch pbmAgentStatus (backup .Status ) {
210- case statusDone , statusCancelled , statusError , statusDown :
211- endTime = backup .LastTransitionTS
212- default :
213- endTime = time .Now ().Unix ()
214- }
188+ // Iterate through replsets in the backup metadata
189+ for _ , replset := range backup .Replsets {
190+ // Determine if this is the current node
191+ self := "0"
192+ if replset .Node == currentNode .Me {
193+ self = "1"
194+ }
215195
216- duration := time .Unix (endTime - backup .StartTS , 0 ).Unix ()
217- metrics = append (metrics , createPBMMetric ("backup_duration_seconds" ,
218- "Duration of PBM backup" ,
219- float64 (duration ), map [string ]string {
220- "opid" : backup .OPID ,
221- "status" : string (backup .Status ),
222- "name" : backup .Name ,
223- "type" : string (backup .Type ),
224- }),
225- )
196+ labels := map [string ]string {
197+ "opid" : backup .OPID ,
198+ "status" : string (backup .Status ),
199+ "name" : backup .Name ,
200+ "host" : replset .Node ,
201+ "replica_set" : replset .Name ,
202+ "self" : self ,
203+ "type" : string (backup .Type ),
204+ }
205+
206+ metrics = append (metrics , createPBMMetric ("backup_size_bytes" ,
207+ "Size of PBM backup" ,
208+ float64 (backup .Size ), labels ),
209+ )
210+
211+ // Add backup_last_transition_ts metric
212+ metrics = append (metrics , createPBMMetric ("backup_last_transition_ts" ,
213+ "Last transition timestamp of PBM backup (seconds since epoch)" ,
214+ float64 (backup .LastTransitionTS ), labels ),
215+ )
216+
217+ var endTime int64
218+ switch pbmAgentStatus (backup .Status ) {
219+ case statusDone , statusCancelled , statusError , statusDown :
220+ endTime = backup .LastTransitionTS
221+ default :
222+ endTime = time .Now ().Unix ()
223+ }
224+
225+ duration := time .Unix (endTime - backup .StartTS , 0 ).Unix ()
226+ metrics = append (metrics , createPBMMetric ("backup_duration_seconds" ,
227+ "Duration of PBM backup" ,
228+ float64 (duration ), labels ),
229+ )
230+ }
226231 }
227232 return metrics
228233}
0 commit comments