Skip to content

Commit 0694e5d

Browse files
test: add test coverage for metrics (#1910)
* test: add test coverage for metrics - add unit and integration metric check in resourcegroup - fill in pipeline_error and parser_duration in Prometheus e2e * gemini: address comments - reduce redundant invoke - refactor TestReconcile for readability - use waitForResourceGroupStatus to wait until rg status is updated instead of using sleep * cleanup: refactor update_metrics_test for readability * refactor: TestReconcile_Metrics to test with real resource creation
1 parent 8c80c6b commit 0694e5d

File tree

3 files changed

+655
-14
lines changed

3 files changed

+655
-14
lines changed

e2e/nomostest/prometheus_metrics.go

Lines changed: 103 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import (
2424
"github.com/GoogleContainerTools/config-sync/pkg/api/configsync"
2525
"github.com/GoogleContainerTools/config-sync/pkg/core"
2626
"github.com/GoogleContainerTools/config-sync/pkg/kinds"
27-
"github.com/GoogleContainerTools/config-sync/pkg/metrics"
2827
ocmetrics "github.com/GoogleContainerTools/config-sync/pkg/metrics"
2928
"github.com/GoogleContainerTools/config-sync/pkg/util/log"
3029
prometheusapi "github.com/prometheus/client_golang/api"
@@ -184,14 +183,16 @@ func ValidateStandardMetricsForSync(nt *NT, syncKind testmetrics.SyncKind, syncL
184183
ReconcilerSyncSuccess(nt, syncLabels, commitHash),
185184
ReconcilerSourceMetrics(nt, syncLabels, commitHash, count),
186185
ReconcilerSyncMetrics(nt, syncLabels, commitHash),
186+
ReconcilerParserMetrics(nt, syncLabels, commitHash),
187187
ReconcilerOperationsMetrics(nt, syncLabels, ops...),
188-
ReconcilerErrorMetrics(nt, syncLabels, commitHash, summary.Errors))
188+
ReconcilerErrorMetrics(nt, syncLabels, commitHash, summary.Errors),
189+
ReconcilerPipelineMetrics(nt, syncLabels, summary.Errors))
189190
}
190191

191192
// ReconcilerManagerMetrics returns a MetricsPredicate that validates the
192193
// ReconcileDurationView metric.
193194
func ReconcilerManagerMetrics(nt *NT) MetricsPredicate {
194-
nt.Logger.Debugf("[METRICS] Expecting reconciler-manager reconciling status: %s", metrics.StatusSuccess)
195+
nt.Logger.Debugf("[METRICS] Expecting reconciler-manager reconciling status: %s", ocmetrics.StatusSuccess)
195196
return func(ctx context.Context, v1api prometheusv1.API) error {
196197
metricName := ocmetrics.ReconcileDurationView.Name
197198
// ReconcileDurationView is a distribution. Query count to aggregate.
@@ -219,15 +220,32 @@ func ReconcilerSourceMetrics(nt *NT, syncLabels prometheusmodel.LabelSet, commit
219220
// ReconcilerSyncMetrics returns a MetricsPredicate that validates the
220221
// LastApplyTimestampView, ApplyDurationView, and LastSyncTimestampView metrics.
221222
func ReconcilerSyncMetrics(nt *NT, syncLabels prometheusmodel.LabelSet, commitHash string) MetricsPredicate {
222-
nt.Logger.Debugf("[METRICS] Expecting last apply & sync status (commit: %s): %s", commitHash, metrics.StatusSuccess)
223+
nt.Logger.Debugf("[METRICS] Expecting last apply & sync status (commit: %s): %s", commitHash, ocmetrics.StatusSuccess)
223224
return func(ctx context.Context, v1api prometheusv1.API) error {
224225
var err error
225226
err = multierr.Append(err, metricLastApplyTimestampHasStatus(ctx, nt, v1api,
226-
syncLabels, commitHash, metrics.StatusSuccess))
227+
syncLabels, commitHash, ocmetrics.StatusSuccess))
227228
err = multierr.Append(err, metricApplyDurationViewHasStatus(ctx, nt, v1api,
228-
syncLabels, commitHash, metrics.StatusSuccess))
229+
syncLabels, commitHash, ocmetrics.StatusSuccess))
229230
err = multierr.Append(err, metricLastSyncTimestampHasStatus(ctx, nt, v1api,
230-
syncLabels, commitHash, metrics.StatusSuccess))
231+
syncLabels, commitHash, ocmetrics.StatusSuccess))
232+
return err
233+
}
234+
}
235+
236+
// ReconcilerParserMetrics returns a MetricsPredicate that validates the
237+
// ParserDurationView metric for all parser sources (read, parse, update).
238+
func ReconcilerParserMetrics(nt *NT, syncLabels prometheusmodel.LabelSet, commitHash string) MetricsPredicate {
239+
nt.Logger.Debugf("[METRICS] Expecting parser duration status (commit: %s): %s", commitHash, ocmetrics.StatusSuccess)
240+
return func(ctx context.Context, v1api prometheusv1.API) error {
241+
var err error
242+
// Check parser duration for all three sources: read, parse, update
243+
err = multierr.Append(err, metricParserDurationViewHasStatusAndSource(ctx, nt, v1api,
244+
syncLabels, "read", ocmetrics.StatusSuccess))
245+
err = multierr.Append(err, metricParserDurationViewHasStatusAndSource(ctx, nt, v1api,
246+
syncLabels, "parse", ocmetrics.StatusSuccess))
247+
err = multierr.Append(err, metricParserDurationViewHasStatusAndSource(ctx, nt, v1api,
248+
syncLabels, "update", ocmetrics.StatusSuccess))
231249
return err
232250
}
233251
}
@@ -262,6 +280,26 @@ func reconcilerOperationMetrics(nt *NT, syncLabels prometheusmodel.LabelSet, op
262280
}
263281
}
264282

283+
// ReconcilerPipelineMetrics returns a MetricsPredicate that validates the
284+
// PipelineErrorView metrics for source, rendering, and sync components.
285+
func ReconcilerPipelineMetrics(nt *NT, syncLabels prometheusmodel.LabelSet, summary testmetrics.ErrorSummary) MetricsPredicate {
286+
nt.Logger.Debugf("[METRICS] Expecting pipeline error metrics: source=%d, rendering=%d, sync=%d", summary.Source, summary.Rendering, summary.Sync)
287+
288+
var predicates []MetricsPredicate
289+
// Pipeline error metrics
290+
predicates = append(predicates, metricPipelineErrorViewHasValue(nt, syncLabels, "source", summary.Source))
291+
predicates = append(predicates, metricPipelineErrorViewHasValue(nt, syncLabels, "rendering", summary.Rendering))
292+
predicates = append(predicates, metricPipelineErrorViewHasValue(nt, syncLabels, "sync", summary.Sync))
293+
294+
return func(ctx context.Context, v1api prometheusv1.API) error {
295+
var err error
296+
for _, predicate := range predicates {
297+
err = multierr.Append(err, predicate(ctx, v1api))
298+
}
299+
return err
300+
}
301+
}
302+
265303
// ReconcilerErrorMetrics returns a MetricsPredicate that validates the
266304
// following metrics:
267305
// - ResourceFightsView
@@ -293,23 +331,40 @@ func ReconcilerErrorMetrics(nt *NT, syncLabels prometheusmodel.LabelSet, commitH
293331
// ReconcilerSyncSuccess returns a MetricsPredicate that validates that the
294332
// latest commit synced successfully for the specified reconciler and commit.
295333
func ReconcilerSyncSuccess(nt *NT, syncLabels prometheusmodel.LabelSet, commitHash string) MetricsPredicate {
296-
nt.Logger.Debugf("[METRICS] Expecting last sync status (commit: %s): %s", commitHash, metrics.StatusSuccess)
334+
nt.Logger.Debugf("[METRICS] Expecting last sync status (commit: %s): %s", commitHash, ocmetrics.StatusSuccess)
297335
return func(ctx context.Context, v1api prometheusv1.API) error {
298336
return metricLastSyncTimestampHasStatus(ctx, nt, v1api,
299-
syncLabels, commitHash, metrics.StatusSuccess)
337+
syncLabels, commitHash, ocmetrics.StatusSuccess)
300338
}
301339
}
302340

303341
// ReconcilerSyncError returns a MetricsPredicate that validates that the
304342
// latest commit sync errored for the specified reconciler and commit.
305343
func ReconcilerSyncError(nt *NT, syncLabels prometheusmodel.LabelSet, commitHash string) MetricsPredicate {
306-
nt.Logger.Debugf("[METRICS] Expecting last sync status (commit: %s): %s", commitHash, metrics.StatusError)
344+
nt.Logger.Debugf("[METRICS] Expecting last sync status (commit: %s): %s", commitHash, ocmetrics.StatusError)
307345
return func(ctx context.Context, v1api prometheusv1.API) error {
308346
return metricLastSyncTimestampHasStatus(ctx, nt, v1api,
309-
syncLabels, commitHash, metrics.StatusError)
347+
syncLabels, commitHash, ocmetrics.StatusError)
310348
}
311349
}
312350

351+
// ReconcilerParserDuration returns a MetricsPredicate that validates the
352+
// ParserDurationView metric for the specified reconciler, commit, source, and status.
353+
func ReconcilerParserDuration(nt *NT, syncLabels prometheusmodel.LabelSet, commitHash, source, status string) MetricsPredicate {
354+
nt.Logger.Debugf("[METRICS] Expecting parser duration (commit: %s, source: %s, status: %s)", commitHash, source, status)
355+
return func(ctx context.Context, v1api prometheusv1.API) error {
356+
return metricParserDurationViewHasStatusAndSource(ctx, nt, v1api,
357+
syncLabels, source, status)
358+
}
359+
}
360+
361+
// ReconcilerPipelineError returns a MetricsPredicate that validates the
362+
// PipelineErrorView metric for the specified reconciler and component.
363+
func ReconcilerPipelineError(nt *NT, syncLabels prometheusmodel.LabelSet, component string, value int) MetricsPredicate {
364+
nt.Logger.Debugf("[METRICS] Expecting pipeline error (component: %s, value: %d)", component, value)
365+
return metricPipelineErrorViewHasValue(nt, syncLabels, component, value)
366+
}
367+
313368
// metricReconcilerErrorsHasValue returns a MetricsPredicate that validates that
314369
// the latest pod for the specified reconciler has emitted a reconciler error
315370
// metric with the specified component and quantity value.
@@ -398,6 +453,29 @@ func metricInternalErrorsHasValueAtLeast(nt *NT, syncLabels prometheusmodel.Labe
398453
}
399454
}
400455

456+
// metricPipelineErrorViewHasValue returns a MetricsPredicate that validates that
457+
// the latest pod for the specified reconciler has emitted a pipeline error
458+
// metric with the specified component and value.
459+
// If the expected value is zero, the metric being not found is also acceptable.
460+
// Expected components: "source", "rendering", or "sync".
461+
func metricPipelineErrorViewHasValue(nt *NT, syncLabels prometheusmodel.LabelSet, componentName string, value int) MetricsPredicate {
462+
return func(ctx context.Context, v1api prometheusv1.API) error {
463+
metricName := ocmetrics.PipelineErrorName
464+
metricName = fmt.Sprintf("%s%s", prometheusConfigSyncMetricPrefix, metricName)
465+
labels := prometheusmodel.LabelSet{
466+
prometheusmodel.LabelName(ocmetrics.KeyComponent.Name()): prometheusmodel.LabelValue(componentName),
467+
}.Merge(syncLabels)
468+
query := fmt.Sprintf("%s%s", metricName, labels)
469+
470+
if value == 0 {
471+
// When there's no error, the metric may not be recorded.
472+
// So tolerate missing metrics when expecting a zero value.
473+
return metricExistsWithValueOrDoesNotExist(ctx, nt, v1api, query, 0)
474+
}
475+
return metricExistsWithValue(ctx, nt, v1api, query, 1)
476+
}
477+
}
478+
401479
func metricLastSyncTimestampHasStatus(ctx context.Context, nt *NT, v1api prometheusv1.API, syncLabels prometheusmodel.LabelSet, commitHash, status string) error {
402480
metricName := ocmetrics.LastSyncTimestampView.Name
403481
metricName = fmt.Sprintf("%s%s", prometheusConfigSyncMetricPrefix, metricName)
@@ -437,6 +515,19 @@ func metricApplyDurationViewHasStatus(ctx context.Context, nt *NT, v1api prometh
437515
return metricExists(ctx, nt, v1api, query)
438516
}
439517

518+
func metricParserDurationViewHasStatusAndSource(ctx context.Context, nt *NT, v1api prometheusv1.API, syncLabels prometheusmodel.LabelSet, source, status string) error {
519+
metricName := ocmetrics.ParserDurationName
520+
// ParserDurationView is a distribution. Query count to aggregate.
521+
metricName = fmt.Sprintf("%s%s%s", prometheusConfigSyncMetricPrefix, metricName, prometheusDistributionCountSuffix)
522+
labels := prometheusmodel.LabelSet{
523+
prometheusmodel.LabelName(ocmetrics.KeyParserSource.Name()): prometheusmodel.LabelValue(source),
524+
prometheusmodel.LabelName(ocmetrics.KeyStatus.Name()): prometheusmodel.LabelValue(status),
525+
}.Merge(syncLabels)
526+
query := fmt.Sprintf("%s%s", metricName, labels)
527+
528+
return metricExists(ctx, nt, v1api, query)
529+
}
530+
440531
func metricDeclaredResourcesViewHasValue(ctx context.Context, nt *NT, v1api prometheusv1.API, syncLabels prometheusmodel.LabelSet, commitHash string, numResources int) error {
441532
metricName := ocmetrics.DeclaredResourcesView.Name
442533
metricName = fmt.Sprintf("%s%s", prometheusConfigSyncMetricPrefix, metricName)
@@ -502,6 +593,7 @@ func metricQueryNow(ctx context.Context, nt *NT, v1api prometheusv1.API, query s
502593
if len(warnings) > 0 {
503594
nt.T.Logf("prometheus warnings: %v", warnings)
504595
}
596+
505597
return response, nil
506598
}
507599

0 commit comments

Comments
 (0)