Skip to content

Commit 77ca094

Browse files
committed
PMM-14404 - Enhance PBM backup metrics collection by including cluster node information
- Added retrieval of current node role and cluster status. - Updated metrics to include node-specific details such as role and host for each backup. - Improved error handling for node info and cluster status retrieval.
1 parent 10ebe25 commit 77ca094

File tree

1 file changed

+60
-35
lines changed

1 file changed

+60
-35
lines changed

exporter/pbm_collector.go

Lines changed: 60 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,18 @@ func (p *pbmCollector) pbmAgentMetrics(ctx context.Context, pbmClient *sdk.Clien
175175
}
176176

177177
func (p *pbmCollector) pbmBackupsMetrics(ctx context.Context, pbmClient *sdk.Client, l *slog.Logger) []prometheus.Metric {
178+
currentNode, err := util.MyRole(ctx, p.base.client)
179+
if err != nil {
180+
l.Error("failed to get current node info", "error", err.Error())
181+
return nil
182+
}
183+
184+
clusterStatus, err := cli.ClusterStatus(ctx, pbmClient, cli.RSConfGetter(p.mongoURI))
185+
if err != nil {
186+
l.Error("failed to get cluster status", "error", err.Error())
187+
return nil
188+
}
189+
178190
backupsList, err := pbmClient.GetAllBackups(ctx)
179191
if err != nil {
180192
l.Error("failed to get PBM backup list", "error", err.Error())
@@ -184,42 +196,55 @@ func (p *pbmCollector) pbmBackupsMetrics(ctx context.Context, pbmClient *sdk.Cli
184196
metrics := make([]prometheus.Metric, 0, len(backupsList))
185197

186198
for _, backup := range backupsList {
187-
metrics = append(metrics, createPBMMetric("backup_size_bytes",
188-
"Size of PBM backup",
189-
float64(backup.Size), map[string]string{
190-
"opid": backup.OPID,
191-
"status": string(backup.Status),
192-
"name": backup.Name,
193-
}),
194-
)
195-
196-
// Add backup_last_transition_ts metric
197-
metrics = append(metrics, createPBMMetric("backup_last_transition_ts",
198-
"Last transition timestamp of PBM backup (seconds since epoch)",
199-
float64(backup.LastTransitionTS), map[string]string{
200-
"opid": backup.OPID,
201-
"status": string(backup.Status),
202-
"name": backup.Name,
203-
}),
204-
)
205-
206-
var endTime int64
207-
switch pbmAgentStatus(backup.Status) {
208-
case statusDone, statusCancelled, statusError, statusDown:
209-
endTime = backup.LastTransitionTS
210-
default:
211-
endTime = time.Now().Unix()
199+
// For each backup, iterate through all nodes in the cluster
200+
for replsetName, nodes := range clusterStatus {
201+
for _, node := range nodes {
202+
// Determine role
203+
role := string(node.Role)
204+
205+
// Determine if this is the current node
206+
self := "0"
207+
if node.Host == currentNode.Me {
208+
self = "1"
209+
}
210+
211+
baseLabels := map[string]string{
212+
"opid": backup.OPID,
213+
"status": string(backup.Status),
214+
"name": backup.Name,
215+
"type": string(backup.Type),
216+
"host": node.Host,
217+
"replica_set": replsetName,
218+
"role": role,
219+
"self": self,
220+
}
221+
222+
metrics = append(metrics, createPBMMetric("backup_size_bytes",
223+
"Size of PBM backup",
224+
float64(backup.Size), baseLabels),
225+
)
226+
227+
// Add backup_last_transition_ts metric
228+
metrics = append(metrics, createPBMMetric("backup_last_transition_ts",
229+
"Last transition timestamp of PBM backup (seconds since epoch)",
230+
float64(backup.LastTransitionTS), baseLabels),
231+
)
232+
233+
var endTime int64
234+
switch pbmAgentStatus(backup.Status) {
235+
case statusDone, statusCancelled, statusError, statusDown:
236+
endTime = backup.LastTransitionTS
237+
default:
238+
endTime = time.Now().Unix()
239+
}
240+
241+
duration := time.Unix(endTime-backup.StartTS, 0).Unix()
242+
metrics = append(metrics, createPBMMetric("backup_duration_seconds",
243+
"Duration of PBM backup",
244+
float64(duration), baseLabels),
245+
)
246+
}
212247
}
213-
214-
duration := time.Unix(endTime-backup.StartTS, 0).Unix()
215-
metrics = append(metrics, createPBMMetric("backup_duration_seconds",
216-
"Duration of PBM backup",
217-
float64(duration), map[string]string{
218-
"opid": backup.OPID,
219-
"status": string(backup.Status),
220-
"name": backup.Name,
221-
}),
222-
)
223248
}
224249
return metrics
225250
}

0 commit comments

Comments
 (0)