Skip to content

Commit ac219e6

Browse files
authored
fix: enhance metrics with labels (#485)
1 parent cc035ac commit ac219e6

File tree

4 files changed

+55
-54
lines changed

4 files changed

+55
-54
lines changed

cmd/doco-cd/http_handler.go

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ type handlerData struct {
3636
log *logger.Logger // Logger for logging messages
3737
}
3838

39-
func onError(w http.ResponseWriter, log *slog.Logger, errMsg string, details any, jobID string, statusCode int) {
40-
prometheus.WebhookErrorsTotal.Inc()
39+
func onError(repoName string, w http.ResponseWriter, log *slog.Logger, errMsg string, details any, jobID string, statusCode int) {
40+
prometheus.WebhookErrorsTotal.WithLabelValues(repoName).Inc()
4141
log.Error(errMsg)
4242
JSONError(w,
4343
errMsg,
@@ -82,7 +82,7 @@ func HandleEvent(ctx context.Context, jobLog *slog.Logger, w http.ResponseWriter
8282
jobLog.Debug("authenticating to private repository")
8383

8484
if appConfig.GitAccessToken == "" {
85-
onError(w, jobLog, "missing access token for private repository", "", jobID, http.StatusInternalServerError)
85+
onError(repoName, w, jobLog, "missing access token for private repository", "", jobID, http.StatusInternalServerError)
8686

8787
return
8888
}
@@ -95,21 +95,21 @@ func HandleEvent(ctx context.Context, jobLog *slog.Logger, w http.ResponseWriter
9595

9696
// Validate payload.FullName to prevent directory traversal
9797
if strings.Contains(payload.FullName, "..") {
98-
onError(w, jobLog.With(slog.String("repository", payload.FullName)), "invalid repository name", "", jobID, http.StatusBadRequest)
98+
onError(repoName, w, jobLog.With(slog.String("repository", payload.FullName)), "invalid repository name", "", jobID, http.StatusBadRequest)
9999

100100
return
101101
}
102102

103103
internalRepoPath, err := utils.VerifyAndSanitizePath(filepath.Join(dataMountPoint.Destination, repoName), dataMountPoint.Destination) // Path inside the container
104104
if err != nil {
105-
onError(w, jobLog.With(logger.ErrAttr(err)), "failed to verify and sanitize internal filesystem path", err.Error(), jobID, http.StatusBadRequest)
105+
onError(repoName, w, jobLog.With(logger.ErrAttr(err)), "failed to verify and sanitize internal filesystem path", err.Error(), jobID, http.StatusBadRequest)
106106

107107
return
108108
}
109109

110110
externalRepoPath, err := utils.VerifyAndSanitizePath(filepath.Join(dataMountPoint.Destination, repoName), dataMountPoint.Destination) // Path on the host
111111
if err != nil {
112-
onError(w, jobLog.With(logger.ErrAttr(err)), "failed to verify and sanitize external filesystem path", err.Error(), jobID, http.StatusBadRequest)
112+
onError(repoName, w, jobLog.With(logger.ErrAttr(err)), "failed to verify and sanitize external filesystem path", err.Error(), jobID, http.StatusBadRequest)
113113

114114
return
115115
}
@@ -123,12 +123,12 @@ func HandleEvent(ctx context.Context, jobLog *slog.Logger, w http.ResponseWriter
123123

124124
_, err = git.UpdateRepository(internalRepoPath, payload.Ref, appConfig.SkipTLSVerification, appConfig.HttpProxy)
125125
if err != nil {
126-
onError(w, jobLog.With(logger.ErrAttr(err)), "failed to checkout repository", err.Error(), jobID, http.StatusInternalServerError)
126+
onError(repoName, w, jobLog.With(logger.ErrAttr(err)), "failed to checkout repository", err.Error(), jobID, http.StatusInternalServerError)
127127

128128
return
129129
}
130130
} else {
131-
onError(w, jobLog.With(logger.ErrAttr(err)), "failed to clone repository", err.Error(), jobID, http.StatusInternalServerError)
131+
onError(repoName, w, jobLog.With(logger.ErrAttr(err)), "failed to clone repository", err.Error(), jobID, http.StatusInternalServerError)
132132

133133
return
134134
}
@@ -144,7 +144,7 @@ func HandleEvent(ctx context.Context, jobLog *slog.Logger, w http.ResponseWriter
144144
if errors.Is(err, config.ErrDeprecatedConfig) {
145145
jobLog.Warn(err.Error())
146146
} else {
147-
onError(w, jobLog.With(logger.ErrAttr(err)), "failed to get deploy configuration", err.Error(), jobID, http.StatusInternalServerError)
147+
onError(repoName, w, jobLog.With(logger.ErrAttr(err)), "failed to get deploy configuration", err.Error(), jobID, http.StatusInternalServerError)
148148

149149
return
150150
}
@@ -160,14 +160,14 @@ func HandleEvent(ctx context.Context, jobLog *slog.Logger, w http.ResponseWriter
160160

161161
internalRepoPath, err = utils.VerifyAndSanitizePath(filepath.Join(dataMountPoint.Destination, repoName), dataMountPoint.Destination) // Path inside the container
162162
if err != nil {
163-
onError(w, subJobLog.With(logger.ErrAttr(err)), "invalid repository name", err.Error(), jobID, http.StatusBadRequest)
163+
onError(repoName, w, subJobLog.With(logger.ErrAttr(err)), "invalid repository name", err.Error(), jobID, http.StatusBadRequest)
164164

165165
return
166166
}
167167

168168
externalRepoPath, err = utils.VerifyAndSanitizePath(filepath.Join(dataMountPoint.Source, repoName), dataMountPoint.Source) // Path on the host
169169
if err != nil {
170-
onError(w, subJobLog.With(logger.ErrAttr(err)), "invalid repository name", err.Error(), jobID, http.StatusBadRequest)
170+
onError(repoName, w, subJobLog.With(logger.ErrAttr(err)), "invalid repository name", err.Error(), jobID, http.StatusBadRequest)
171171

172172
return
173173
}
@@ -190,7 +190,7 @@ func HandleEvent(ctx context.Context, jobLog *slog.Logger, w http.ResponseWriter
190190
// Try to clone the remote repository
191191
_, err = git.CloneRepository(internalRepoPath, cloneUrl, deployConfig.Reference, appConfig.SkipTLSVerification, appConfig.HttpProxy)
192192
if err != nil && !errors.Is(err, git.ErrRepositoryAlreadyExists) {
193-
onError(w, subJobLog.With(logger.ErrAttr(err)), "failed to clone remote repository", err.Error(), jobID, http.StatusInternalServerError)
193+
onError(repoName, w, subJobLog.With(logger.ErrAttr(err)), "failed to clone remote repository", err.Error(), jobID, http.StatusInternalServerError)
194194

195195
return
196196
}
@@ -202,14 +202,14 @@ func HandleEvent(ctx context.Context, jobLog *slog.Logger, w http.ResponseWriter
202202

203203
repo, err := git.UpdateRepository(internalRepoPath, deployConfig.Reference, appConfig.SkipTLSVerification, appConfig.HttpProxy)
204204
if err != nil {
205-
onError(w, subJobLog.With(logger.ErrAttr(err)), "failed to checkout repository", err.Error(), jobID, http.StatusInternalServerError)
205+
onError(repoName, w, subJobLog.With(logger.ErrAttr(err)), "failed to checkout repository", err.Error(), jobID, http.StatusInternalServerError)
206206

207207
return
208208
}
209209

210210
latestCommit, err := git.GetLatestCommit(repo, deployConfig.Reference)
211211
if err != nil {
212-
onError(w, subJobLog.With(logger.ErrAttr(err)), "failed to get latest commit", err.Error(), jobID, http.StatusInternalServerError)
212+
onError(repoName, w, subJobLog.With(logger.ErrAttr(err)), "failed to get latest commit", err.Error(), jobID, http.StatusInternalServerError)
213213

214214
return
215215
}
@@ -220,7 +220,7 @@ func HandleEvent(ctx context.Context, jobLog *slog.Logger, w http.ResponseWriter
220220
// Check if doco-cd manages the project before destroying the stack
221221
containers, err := docker.GetLabeledContainers(ctx, dockerClient, api.ProjectLabel, deployConfig.Name)
222222
if err != nil {
223-
onError(w, subJobLog.With(logger.ErrAttr(err)), "failed to retrieve containers", err.Error(), jobID, http.StatusInternalServerError)
223+
onError(repoName, w, subJobLog.With(logger.ErrAttr(err)), "failed to retrieve containers", err.Error(), jobID, http.StatusInternalServerError)
224224

225225
return
226226
}
@@ -249,22 +249,22 @@ func HandleEvent(ctx context.Context, jobLog *slog.Logger, w http.ResponseWriter
249249
}
250250

251251
if !managed {
252-
onError(w, subJobLog, fmt.Errorf("%w: %s: aborting destruction", ErrNotManagedByDocoCD, deployConfig.Name).Error(),
252+
onError(repoName, w, subJobLog, fmt.Errorf("%w: %s: aborting destruction", ErrNotManagedByDocoCD, deployConfig.Name).Error(),
253253
"", jobID, http.StatusInternalServerError)
254254

255255
return
256256
}
257257

258258
if !correctRepo {
259-
onError(w, subJobLog, fmt.Errorf("%w: %s: aborting destruction", ErrDeploymentConflict, deployConfig.Name).Error(),
259+
onError(repoName, w, subJobLog, fmt.Errorf("%w: %s: aborting destruction", ErrDeploymentConflict, deployConfig.Name).Error(),
260260
map[string]string{"stack": deployConfig.Name}, jobID, http.StatusInternalServerError)
261261

262262
return
263263
}
264264

265265
err = docker.DestroyStack(subJobLog, &ctx, &dockerCli, deployConfig)
266266
if err != nil {
267-
onError(w, subJobLog.With(logger.ErrAttr(err)), "failed to destroy stack", err.Error(), jobID, http.StatusInternalServerError)
267+
onError(repoName, w, subJobLog.With(logger.ErrAttr(err)), "failed to destroy stack", err.Error(), jobID, http.StatusInternalServerError)
268268

269269
return
270270
}
@@ -277,7 +277,7 @@ func HandleEvent(ctx context.Context, jobLog *slog.Logger, w http.ResponseWriter
277277

278278
subDirs, err := os.ReadDir(parentDir)
279279
if err != nil {
280-
onError(w, subJobLog.With(logger.ErrAttr(err)), "failed to read parent directory", err.Error(), jobID, http.StatusInternalServerError)
280+
onError(repoName, w, subJobLog.With(logger.ErrAttr(err)), "failed to read parent directory", err.Error(), jobID, http.StatusInternalServerError)
281281

282282
return
283283
}
@@ -289,15 +289,15 @@ func HandleEvent(ctx context.Context, jobLog *slog.Logger, w http.ResponseWriter
289289
// Remove only the repository directory
290290
err = os.RemoveAll(internalRepoPath)
291291
if err != nil {
292-
onError(w, subJobLog.With(logger.ErrAttr(err)), "failed to remove deployment directory", err.Error(), jobID, http.StatusInternalServerError)
292+
onError(repoName, w, subJobLog.With(logger.ErrAttr(err)), "failed to remove deployment directory", err.Error(), jobID, http.StatusInternalServerError)
293293

294294
return
295295
}
296296
} else {
297297
// Remove the parent directory if it has only one subdirectory
298298
err = os.RemoveAll(parentDir)
299299
if err != nil {
300-
onError(w, subJobLog.With(logger.ErrAttr(err)), "failed to remove deployment directory", err.Error(), jobID, http.StatusInternalServerError)
300+
onError(repoName, w, subJobLog.With(logger.ErrAttr(err)), "failed to remove deployment directory", err.Error(), jobID, http.StatusInternalServerError)
301301

302302
return
303303
}
@@ -309,7 +309,7 @@ func HandleEvent(ctx context.Context, jobLog *slog.Logger, w http.ResponseWriter
309309
// Skip deployment if another project with the same name already exists
310310
containers, err := docker.GetLabeledContainers(ctx, dockerClient, api.ProjectLabel, deployConfig.Name)
311311
if err != nil {
312-
onError(w, subJobLog.With(logger.ErrAttr(err)), "failed to retrieve containers", err.Error(), jobID, http.StatusInternalServerError)
312+
onError(repoName, w, subJobLog.With(logger.ErrAttr(err)), "failed to retrieve containers", err.Error(), jobID, http.StatusInternalServerError)
313313

314314
return
315315
}
@@ -330,7 +330,7 @@ func HandleEvent(ctx context.Context, jobLog *slog.Logger, w http.ResponseWriter
330330
}
331331

332332
if !correctRepo {
333-
onError(w, subJobLog, fmt.Errorf("%w: %s: skipping deployment", ErrDeploymentConflict, deployConfig.Name).Error(),
333+
onError(repoName, w, subJobLog, fmt.Errorf("%w: %s: skipping deployment", ErrDeploymentConflict, deployConfig.Name).Error(),
334334
map[string]string{"stack": deployConfig.Name}, jobID, http.StatusInternalServerError)
335335

336336
return
@@ -339,7 +339,7 @@ func HandleEvent(ctx context.Context, jobLog *slog.Logger, w http.ResponseWriter
339339
if deployedCommit != "" {
340340
changed, err := git.CompareCommitsInSubdir(repo, plumbing.NewHash(deployedCommit), plumbing.NewHash(latestCommit), deployConfig.WorkingDirectory)
341341
if err != nil {
342-
onError(w, subJobLog, fmt.Errorf("failed to compare commits in subdirectory: %w", err).Error(),
342+
onError(repoName, w, subJobLog, fmt.Errorf("failed to compare commits in subdirectory: %w", err).Error(),
343343
map[string]string{"stack": deployConfig.Name}, jobID, http.StatusInternalServerError)
344344

345345
return
@@ -362,7 +362,7 @@ func HandleEvent(ctx context.Context, jobLog *slog.Logger, w http.ResponseWriter
362362

363363
err = docker.DeployStack(subJobLog, internalRepoPath, externalRepoPath, &ctx, &dockerCli, &payload, deployConfig, latestCommit, Version, false)
364364
if err != nil {
365-
onError(w, subJobLog.With(logger.ErrAttr(err)), "deployment failed", err.Error(), jobID, http.StatusInternalServerError)
365+
onError(repoName, w, subJobLog.With(logger.ErrAttr(err)), "deployment failed", err.Error(), jobID, http.StatusInternalServerError)
366366

367367
return
368368
}
@@ -374,8 +374,8 @@ func HandleEvent(ctx context.Context, jobLog *slog.Logger, w http.ResponseWriter
374374
jobLog.Info(msg, slog.String("elapsed_time", elapsedTime.Truncate(time.Millisecond).String()))
375375
JSONResponse(w, msg, jobID, http.StatusCreated)
376376

377-
prometheus.WebhookRequestsTotal.Inc()
378-
prometheus.WebhookDuration.Observe(elapsedTime.Seconds())
377+
prometheus.WebhookRequestsTotal.WithLabelValues(repoName).Inc()
378+
prometheus.WebhookDuration.WithLabelValues(repoName).Observe(elapsedTime.Seconds())
379379
}
380380

381381
func (h *handlerData) WebhookHandler(w http.ResponseWriter, r *http.Request) {
@@ -417,7 +417,7 @@ func (h *handlerData) WebhookHandler(w http.ResponseWriter, r *http.Request) {
417417
statusCode = http.StatusInternalServerError
418418
}
419419

420-
onError(w, jobLog.With(slog.String("ip", r.RemoteAddr), logger.ErrAttr(err)), errMsg, err.Error(), jobID, statusCode)
420+
onError(getRepoName(payload.CloneURL), w, jobLog.With(slog.String("ip", r.RemoteAddr), logger.ErrAttr(err)), errMsg, err.Error(), jobID, statusCode)
421421

422422
return
423423
}
@@ -428,7 +428,7 @@ func (h *handlerData) WebhookHandler(w http.ResponseWriter, r *http.Request) {
428428
func (h *handlerData) HealthCheckHandler(w http.ResponseWriter, _ *http.Request) {
429429
err := docker.VerifySocketConnection()
430430
if err != nil {
431-
onError(w, h.log.With(logger.ErrAttr(err)), docker.ErrDockerSocketConnectionFailed.Error(), err.Error(), "", http.StatusServiceUnavailable)
431+
onError("healthcheck", w, h.log.With(logger.ErrAttr(err)), docker.ErrDockerSocketConnectionFailed.Error(), err.Error(), "", http.StatusServiceUnavailable)
432432

433433
return
434434
}

cmd/doco-cd/poll_handler.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,12 @@ func (h *handlerData) PollHandler(pollJob *config.PollJob) {
6767

6868
for {
6969
if pollJob.LastRun == 0 || time.Now().Unix() >= pollJob.NextRun {
70-
logger.Debug("Running poll for repository")
70+
repoName := getRepoName(string(pollJob.Config.CloneUrl))
71+
logger.Debug("Running poll for repository", slog.String("repoName", repoName))
7172

7273
err := RunPoll(context.Background(), pollJob.Config, h.appConfig, h.dataMountPoint, h.dockerCli, h.dockerClient, logger)
7374
if err != nil {
74-
prometheus.PollErrors.Inc()
75+
prometheus.PollErrors.WithLabelValues(repoName).Inc()
7576
}
7677

7778
pollJob.NextRun = time.Now().Unix() + int64(pollJob.Config.Interval)
@@ -427,8 +428,8 @@ func RunPoll(ctx context.Context, pollConfig config.PollConfig, appConfig *confi
427428
elapsedTime := time.Since(startTime)
428429
jobLog.Info("job completed successfully", slog.String("elapsed_time", elapsedTime.Truncate(time.Millisecond).String()), slog.String("next_run", nextRun))
429430

430-
prometheus.PollTotal.Inc()
431-
prometheus.PollDuration.Observe(elapsedTime.Seconds())
431+
prometheus.PollTotal.WithLabelValues(repoName).Inc()
432+
prometheus.PollDuration.WithLabelValues(repoName).Observe(elapsedTime.Seconds())
432433

433434
return nil
434435
}

internal/docker/compose.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ func DeployStack(
478478

479479
err = DeployCompose(*ctx, *dockerCli, project, deployConfig, *payload, externalWorkingDir, latestCommit, appVersion, forceDeploy)
480480
if err != nil {
481-
prometheus.DeploymentErrorsTotal.Inc()
481+
prometheus.DeploymentErrorsTotal.WithLabelValues(deployConfig.Name).Inc()
482482

483483
errMsg := "failed to deploy stack"
484484
stackLog.Error(errMsg,
@@ -488,8 +488,8 @@ func DeployStack(
488488
return fmt.Errorf("%s: %w", errMsg, err)
489489
}
490490

491-
prometheus.DeploymentsTotal.Inc()
492-
prometheus.DeploymentDuration.Observe(time.Since(startTime).Seconds())
491+
prometheus.DeploymentsTotal.WithLabelValues(deployConfig.Name).Inc()
492+
prometheus.DeploymentDuration.WithLabelValues(deployConfig.Name).Observe(time.Since(startTime).Seconds())
493493

494494
return nil
495495
}

internal/prometheus/metrics.go

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,54 +22,54 @@ var (
2222
},
2323
[]string{"version", "log_level", "start_time"},
2424
)
25-
PollTotal = prometheus.NewCounter(prometheus.CounterOpts{
25+
PollTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
2626
Namespace: MetricsNamespace,
2727
Name: "polls_total",
2828
Help: "Number of successful polls",
29-
})
30-
PollErrors = prometheus.NewCounter(prometheus.CounterOpts{
29+
}, []string{"repository"})
30+
PollErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
3131
Namespace: MetricsNamespace,
3232
Name: "poll_errors_total",
3333
Help: "Failed polling attempts",
34-
})
35-
PollDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
34+
}, []string{"repository"})
35+
PollDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
3636
Namespace: MetricsNamespace,
3737
Name: "poll_duration_seconds",
3838
Help: "Duration of polling operations in seconds",
3939
Buckets: prometheus.DefBuckets,
40-
})
41-
WebhookRequestsTotal = prometheus.NewCounter(prometheus.CounterOpts{
40+
}, []string{"repository"})
41+
WebhookRequestsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
4242
Namespace: MetricsNamespace,
4343
Name: "webhook_requests_total",
4444
Help: "Total number of webhook requests received",
45-
})
46-
WebhookErrorsTotal = prometheus.NewCounter(prometheus.CounterOpts{
45+
}, []string{"repository"})
46+
WebhookErrorsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
4747
Namespace: MetricsNamespace,
4848
Name: "webhook_errors_total",
4949
Help: "Total number of errors in webhook processing",
50-
})
51-
WebhookDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
50+
}, []string{"repository"})
51+
WebhookDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
5252
Namespace: MetricsNamespace,
5353
Name: "webhook_duration_seconds",
5454
Help: "Duration of webhook processing in seconds",
5555
Buckets: prometheus.DefBuckets,
56-
})
57-
DeploymentsTotal = prometheus.NewCounter(prometheus.CounterOpts{
56+
}, []string{"repository"})
57+
DeploymentsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
5858
Namespace: MetricsNamespace,
5959
Name: "deployments_total",
6060
Help: "Total number of deployments processed",
61-
})
62-
DeploymentErrorsTotal = prometheus.NewCounter(prometheus.CounterOpts{
61+
}, []string{"repository"})
62+
DeploymentErrorsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
6363
Namespace: MetricsNamespace,
6464
Name: "deployment_errors_total",
6565
Help: "Total number of errors during deployments",
66-
})
67-
DeploymentDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
66+
}, []string{"repository"})
67+
DeploymentDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
6868
Namespace: MetricsNamespace,
6969
Name: "deployment_duration_seconds",
7070
Help: "Duration of deployment operations in seconds",
7171
Buckets: prometheus.DefBuckets,
72-
})
72+
}, []string{"repository"})
7373
)
7474

7575
func init() {

0 commit comments

Comments
 (0)