diff --git a/.github/workflows/test-gator.yaml b/.github/workflows/test-gator.yaml index 7de809cccd0..a79e9756515 100644 --- a/.github/workflows/test-gator.yaml +++ b/.github/workflows/test-gator.yaml @@ -30,10 +30,6 @@ jobs: name: "Test Gator" runs-on: ubuntu-22.04 timeout-minutes: 5 - strategy: - fail-fast: false - matrix: - KUBERNETES_VERSION: ["1.31.6", "1.32.3", "1.33.2"] # Latest available versions of Kubernetes at - https://hub.docker.com/r/kindest/node/tags steps: - name: Harden Runner uses: step-security/harden-runner@e3f713f2d8f53843e71c69a996d56f51aa9adfb9 # v2.14.1 @@ -49,11 +45,115 @@ jobs: go-version: "1.25" check-latest: true - - name: Download e2e dependencies - run: | - mkdir -p $GITHUB_WORKSPACE/bin - echo "$GITHUB_WORKSPACE/bin" >> $GITHUB_PATH - make e2e-dependencies KUBERNETES_VERSION=${{ matrix.KUBERNETES_VERSION }} - - name: gator test run: make test-gator-containerized + + gator_bench_test: + name: "Gator Bench E2E" + runs-on: ubuntu-22.04 + timeout-minutes: 10 + steps: + - name: Harden Runner + uses: step-security/harden-runner@20cf305ff2072d973412fa9b1e3a4f227bda3c76 # v2.14.0 + with: + egress-policy: audit + + - name: Check out code into the Go module directory + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + + - name: Set up Go + uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0 + with: + go-version: "1.25" + check-latest: true + + - name: Build gator + run: make gator + + - name: Test basic Rego policy benchmark + run: | + ./bin/gator bench \ + --filename test/gator/bench/basic/ \ + --iterations 50 \ + --engine rego \ + --output table + + - name: Test CEL policy benchmark + run: | + ./bin/gator bench \ + --filename test/gator/bench/cel/ \ + --iterations 50 \ + --engine cel \ + --output table + + - name: Test dual-engine policy benchmark + run: | + ./bin/gator bench \ + --filename test/gator/bench/both/ \ + --iterations 50 \ + --engine all \ + --output table + + - name: Test memory profiling + run: | + ./bin/gator bench \ + --filename test/gator/bench/basic/ \ + --iterations 20 \ + --engine rego \ + --memory \ + --output table + + - name: Test concurrent execution + run: | + ./bin/gator bench \ + --filename test/gator/bench/basic/ \ + --iterations 100 \ + --engine rego \ + --concurrency 4 \ + --output table + + - name: Test JSON output + run: | + ./bin/gator bench \ + --filename test/gator/bench/basic/ \ + --iterations 20 \ + --engine rego \ + --output json | jq . + + - name: Test baseline save and compare + run: | + # Save baseline + ./bin/gator bench \ + --filename test/gator/bench/basic/ \ + --iterations 30 \ + --engine rego \ + --save /tmp/baseline.json + + # Compare against baseline - using high min-threshold since we're testing + # functionality not actual performance values in CI + ./bin/gator bench \ + --filename test/gator/bench/basic/ \ + --iterations 30 \ + --engine rego \ + --compare /tmp/baseline.json \ + --threshold 50 \ + --min-threshold 100ms + + - name: Test min-threshold + run: | + # Save baseline + ./bin/gator bench \ + --filename test/gator/bench/basic/ \ + --iterations 30 \ + --engine rego \ + --save /tmp/baseline-min.json + + # Compare with strict threshold (0.1%) but loose min-threshold (1s) + # This ensures the flag prevents failure from small variations + ./bin/gator bench \ + --filename test/gator/bench/basic/ \ + --iterations 30 \ + --engine rego \ + --compare /tmp/baseline-min.json \ + --threshold 0.1 \ + --min-threshold 1s diff --git a/.github/workflows/workflow.yaml b/.github/workflows/workflow.yaml index a8d0f9cfb7d..3b4db9da244 100644 --- a/.github/workflows/workflow.yaml +++ b/.github/workflows/workflow.yaml @@ -271,6 +271,6 @@ jobs: uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 if: ${{ always() }} with: - name: logs-${{ matrix.KUBERNETES_VERSION }} + name: logs-ownerreferences-admission-plugin path: | - logs-*.json \ No newline at end of file + logs-*.json diff --git a/cmd/gator/bench/bench.go b/cmd/gator/bench/bench.go new file mode 100644 index 00000000000..1cfe3d5628a --- /dev/null +++ b/cmd/gator/bench/bench.go @@ -0,0 +1,259 @@ +package bench + +import ( + "fmt" + "os" + "strings" + "time" + + cmdutils "github.com/open-policy-agent/gatekeeper/v3/cmd/gator/util" + "github.com/open-policy-agent/gatekeeper/v3/pkg/gator/bench" + "github.com/spf13/cobra" +) + +const ( + examples = `# Benchmark policies with default settings (1000 iterations, cel engine) +gator bench --filename="policies/" + +# Benchmark with both Rego and CEL engines +gator bench --filename="policies/" --engine=all + +# Benchmark with custom iterations and warmup +gator bench --filename="policies/" --iterations=500 --warmup=50 + +# Benchmark with concurrent load (simulates real webhook traffic) +gator bench --filename="policies/" --concurrency=10 + +# Output results as JSON +gator bench --filename="policies/" --output=json + +# Benchmark policies from multiple sources +gator bench --filename="templates/" --filename="constraints/" --filename="resources/" + +# Benchmark from OCI image +gator bench --image="ghcr.io/example/policies:latest" + +# Benchmark with memory profiling +gator bench --filename="policies/" --memory + +# Save benchmark results as baseline +gator bench --filename="policies/" --save=baseline.json + +# Compare against baseline (fail only if BOTH >10% regression AND >1ms absolute increase) +# This prevents false positives for fast policies where small absolute changes appear as large percentages +gator bench --filename="policies/" --compare=baseline.json --threshold=10 --min-threshold=1ms` +) + +// Cmd is the cobra command for the bench subcommand. +var Cmd = &cobra.Command{ + Use: "bench", + Short: "Benchmark policy evaluation performance", + Long: `Benchmark evaluates the performance of Gatekeeper policies by running +constraint evaluation against test resources and measuring latency metrics. + +This command loads ConstraintTemplates, Constraints, and Kubernetes resources +from the specified files or directories, then repeatedly evaluates the resources +against the constraints to gather performance statistics. + +Supports both Rego and CEL policy engines for comparison.`, + Example: examples, + Run: run, + Args: cobra.NoArgs, +} + +var ( + flagFilenames []string + flagImages []string + flagTempDir string + flagEngine string + flagIterations int + flagWarmup int + flagConcurrency int + flagOutput string + flagStats bool + flagMemory bool + flagSave string + flagCompare string + flagThreshold float64 + flagMinThreshold time.Duration +) + +const ( + flagNameFilename = "filename" + flagNameImage = "image" + flagNameTempDir = "tempdir" + flagNameEngine = "engine" + flagNameIterations = "iterations" + flagNameWarmup = "warmup" + flagNameConcurrency = "concurrency" + flagNameOutput = "output" + flagNameStats = "stats" + flagNameMemory = "memory" + flagNameSave = "save" + flagNameCompare = "compare" + flagNameThreshold = "threshold" + flagNameMinThreshold = "min-threshold" +) + +func init() { + Cmd.Flags().StringArrayVarP(&flagFilenames, flagNameFilename, "f", []string{}, + "a file or directory containing ConstraintTemplates, Constraints, and resources to benchmark. Can be specified multiple times.") + Cmd.Flags().StringArrayVarP(&flagImages, flagNameImage, "i", []string{}, + "a URL to an OCI image containing policies. Can be specified multiple times.") + Cmd.Flags().StringVarP(&flagTempDir, flagNameTempDir, "d", "", + "temporary directory to download and unpack images to.") + Cmd.Flags().StringVarP(&flagEngine, flagNameEngine, "e", string(bench.EngineCEL), + fmt.Sprintf("policy engine to benchmark. One of: %s|%s|%s", bench.EngineRego, bench.EngineCEL, bench.EngineAll)) + Cmd.Flags().IntVarP(&flagIterations, flagNameIterations, "n", 1000, + "number of benchmark iterations to run. Use at least 1000 for meaningful P99 metrics.") + Cmd.Flags().IntVar(&flagWarmup, flagNameWarmup, 10, + "number of warmup iterations before measurement.") + Cmd.Flags().IntVarP(&flagConcurrency, flagNameConcurrency, "c", 1, + "number of concurrent goroutines for reviews. Higher values simulate realistic webhook load.") + Cmd.Flags().StringVarP(&flagOutput, flagNameOutput, "o", "table", + "output format. One of: table|json|yaml") + Cmd.Flags().BoolVar(&flagStats, flagNameStats, false, + "gather detailed statistics from the constraint framework.") + Cmd.Flags().BoolVar(&flagMemory, flagNameMemory, false, + "enable memory profiling to track allocations per review.") + Cmd.Flags().StringVar(&flagSave, flagNameSave, "", + "save benchmark results to this file for future comparison (supports .json and .yaml).") + Cmd.Flags().StringVar(&flagCompare, flagNameCompare, "", + "compare results against a baseline file (supports .json and .yaml).") + Cmd.Flags().Float64Var(&flagThreshold, flagNameThreshold, 10.0, + "regression threshold percentage for comparison. Exit code 1 if exceeded.") + Cmd.Flags().DurationVar(&flagMinThreshold, flagNameMinThreshold, 0, + "minimum absolute latency difference to consider a regression (e.g., 1ms). Prevents false positives on fast policies where small absolute changes appear as large percentages.") +} + +func run(_ *cobra.Command, _ []string) { + // Validate engine flag + engine, err := parseEngine(flagEngine) + if err != nil { + cmdutils.ErrFatalf("invalid engine: %v", err) + } + + // Validate output format + outputFormat, err := bench.ParseOutputFormat(flagOutput) + if err != nil { + cmdutils.ErrFatalf("invalid output format: %v", err) + } + + // Validate inputs + if len(flagFilenames) == 0 && len(flagImages) == 0 { + cmdutils.ErrFatalf("at least one --filename or --image must be specified") + } + + if flagIterations <= 0 { + cmdutils.ErrFatalf("iterations must be positive") + } + + if flagWarmup < 0 { + cmdutils.ErrFatalf("warmup must be non-negative") + } + + if flagThreshold < 0 { + cmdutils.ErrFatalf("threshold must be non-negative") + } + + if flagMinThreshold < 0 { + cmdutils.ErrFatalf("min-threshold must be non-negative") + } + + if flagConcurrency < 1 { + cmdutils.ErrFatalf("concurrency must be at least 1") + } + + // Warn if warmup exceeds iterations (likely user error) + if flagWarmup > flagIterations { + fmt.Fprintf(os.Stderr, "Warning: warmup (%d) exceeds iterations (%d). Consider reducing warmup.\n\n", flagWarmup, flagIterations) + } + + // Validate baseline file exists before running expensive benchmark + if flagCompare != "" { + if _, err := os.Stat(flagCompare); os.IsNotExist(err) { + cmdutils.ErrFatalf("baseline file does not exist: %s", flagCompare) + } else if err != nil { + cmdutils.ErrFatalf("cannot access baseline file: %v", err) + } + } + + // Run benchmark + opts := &bench.Opts{ + Filenames: flagFilenames, + Images: flagImages, + TempDir: flagTempDir, + Engine: engine, + Iterations: flagIterations, + Warmup: flagWarmup, + Concurrency: flagConcurrency, + GatherStats: flagStats, + Memory: flagMemory, + Save: flagSave, + Baseline: flagCompare, + Threshold: flagThreshold, + MinThreshold: flagMinThreshold, + Writer: os.Stderr, + } + + results, err := bench.Run(opts) + if err != nil { + cmdutils.ErrFatalf("benchmark failed: %v", err) + } + + // Format and print results + output, err := bench.FormatResults(results, outputFormat) + if err != nil { + cmdutils.ErrFatalf("formatting results: %v", err) + } + + fmt.Print(output) + + // Save results if requested + if flagSave != "" { + if err := bench.SaveResults(results, flagSave); err != nil { + cmdutils.ErrFatalf("saving results: %v", err) + } + fmt.Fprintf(os.Stderr, "\nResults saved to: %s\n", flagSave) + } + + // Compare against baseline if requested + exitCode := 0 + if flagCompare != "" { + baseline, err := bench.LoadBaseline(flagCompare) + if err != nil { + cmdutils.ErrFatalf("loading baseline: %v", err) + } + + comparisons := bench.Compare(baseline, results, flagThreshold, flagMinThreshold) + if len(comparisons) == 0 { + fmt.Fprintf(os.Stderr, "\nWarning: No matching engines found for comparison\n") + } else { + fmt.Println() + fmt.Print(bench.FormatComparison(comparisons, flagThreshold)) + + // Check if any comparison failed + for _, comp := range comparisons { + if !comp.Passed { + exitCode = 1 + break + } + } + } + } + + os.Exit(exitCode) +} + +func parseEngine(s string) (bench.Engine, error) { + switch strings.ToLower(s) { + case string(bench.EngineRego): + return bench.EngineRego, nil + case string(bench.EngineCEL): + return bench.EngineCEL, nil + case string(bench.EngineAll): + return bench.EngineAll, nil + default: + return "", fmt.Errorf("invalid engine %q (valid: %s, %s, %s)", s, bench.EngineRego, bench.EngineCEL, bench.EngineAll) + } +} diff --git a/cmd/gator/gator.go b/cmd/gator/gator.go index cd0c57e363e..3c5af9e8542 100644 --- a/cmd/gator/gator.go +++ b/cmd/gator/gator.go @@ -3,6 +3,7 @@ package main import ( "os" + "github.com/open-policy-agent/gatekeeper/v3/cmd/gator/bench" "github.com/open-policy-agent/gatekeeper/v3/cmd/gator/expand" "github.com/open-policy-agent/gatekeeper/v3/cmd/gator/sync" "github.com/open-policy-agent/gatekeeper/v3/cmd/gator/test" @@ -17,6 +18,7 @@ var commands = []*cobra.Command{ test.Cmd, expand.Cmd, sync.Cmd, + bench.Cmd, k8sVersion.WithFont("alligator2"), } diff --git a/go.mod b/go.mod index 443151e8892..dd48ad45d0b 100644 --- a/go.mod +++ b/go.mod @@ -37,6 +37,7 @@ require ( golang.org/x/time v0.14.0 google.golang.org/grpc v1.77.0 google.golang.org/protobuf v1.36.11 + gopkg.in/yaml.v3 v3.0.1 k8s.io/api v0.35.0 k8s.io/apiextensions-apiserver v0.35.0 k8s.io/apimachinery v0.35.0 @@ -163,7 +164,6 @@ require ( google.golang.org/genproto/googleapis/rpc v0.0.0-20251022142026-3a174f9686a8 // indirect gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/component-base v0.35.0 // indirect k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect diff --git a/pkg/gator/bench/bench.go b/pkg/gator/bench/bench.go new file mode 100644 index 00000000000..35c2b1cb510 --- /dev/null +++ b/pkg/gator/bench/bench.go @@ -0,0 +1,513 @@ +package bench + +import ( + "context" + "errors" + "fmt" + "runtime" + "sync" + "sync/atomic" + "time" + + "github.com/open-policy-agent/frameworks/constraint/pkg/apis" + constraintclient "github.com/open-policy-agent/frameworks/constraint/pkg/client" + "github.com/open-policy-agent/frameworks/constraint/pkg/client/drivers/rego" + clienterrors "github.com/open-policy-agent/frameworks/constraint/pkg/client/errors" + "github.com/open-policy-agent/frameworks/constraint/pkg/client/reviews" + "github.com/open-policy-agent/frameworks/constraint/pkg/instrumentation" + "github.com/open-policy-agent/gatekeeper/v3/pkg/drivers/k8scel" + "github.com/open-policy-agent/gatekeeper/v3/pkg/gator/reader" + mutationtypes "github.com/open-policy-agent/gatekeeper/v3/pkg/mutation/types" + "github.com/open-policy-agent/gatekeeper/v3/pkg/target" + "github.com/open-policy-agent/gatekeeper/v3/pkg/util" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + k8sruntime "k8s.io/apimachinery/pkg/runtime" +) + +const ( + // MinIterationsForP99 is the minimum number of iterations recommended for + // statistically meaningful P99 metrics. + MinIterationsForP99 = 1000 +) + +var scheme *k8sruntime.Scheme + +func init() { + scheme = k8sruntime.NewScheme() + if err := apis.AddToScheme(scheme); err != nil { + panic(err) + } +} + +// Run executes the benchmark with the given options and returns results +// for each engine tested. +func Run(opts *Opts) ([]Results, error) { + // Warn if iterations are too low for meaningful P99 statistics + if opts.Iterations < MinIterationsForP99 && opts.Writer != nil { + fmt.Fprintf(opts.Writer, "Warning: %d iterations may not provide statistically meaningful P99 metrics. Consider using at least %d iterations.\n\n", + opts.Iterations, MinIterationsForP99) + } + + // Default concurrency to 1 (sequential) + if opts.Concurrency < 1 { + opts.Concurrency = 1 + } + + // Read all resources from files/images + objs, err := reader.ReadSources(opts.Filenames, opts.Images, opts.TempDir) + if err != nil { + return nil, fmt.Errorf("reading sources: %w", err) + } + if len(objs) == 0 { + return nil, fmt.Errorf("no input data identified") + } + + // Categorize objects + var templates []*unstructured.Unstructured + var constraints []*unstructured.Unstructured + var reviewObjs []*unstructured.Unstructured + + for _, obj := range objs { + switch { + case reader.IsTemplate(obj): + templates = append(templates, obj) + case reader.IsConstraint(obj): + constraints = append(constraints, obj) + default: + // Everything else is a potential review object + reviewObjs = append(reviewObjs, obj) + } + } + + if len(templates) == 0 { + return nil, fmt.Errorf("no ConstraintTemplates found in input") + } + if len(constraints) == 0 { + return nil, fmt.Errorf("no Constraints found in input") + } + if len(reviewObjs) == 0 { + return nil, fmt.Errorf("no objects to review found in input") + } + + var results []Results + var warnings []string + + // Determine which engines to benchmark + engines := []Engine{opts.Engine} + if opts.Engine == EngineAll { + engines = []Engine{EngineRego, EngineCEL} + } + + for _, engine := range engines { + result, err := runBenchmark(engine, templates, constraints, reviewObjs, opts) + if err != nil { + // For "all" engine mode, record warning and continue with other engines + if opts.Engine == EngineAll { + warnings = append(warnings, fmt.Sprintf("%s: %s", engine, err.Error())) + continue + } + return nil, fmt.Errorf("benchmarking %s: %w", engine, err) + } + results = append(results, *result) + } + + // Check if we have any results + if len(results) == 0 { + return nil, fmt.Errorf("no engines could process the templates: %v", warnings) + } + + // Add warnings about skipped engines to the first result for visibility + if len(warnings) > 0 && len(results) > 0 && opts.Writer != nil { + for _, w := range warnings { + fmt.Fprintf(opts.Writer, "Warning: Engine skipped - %s\n", w) + } + fmt.Fprintln(opts.Writer) + } + + return results, nil +} + +// runBenchmark runs the benchmark for a single engine. +func runBenchmark( + engine Engine, + templates []*unstructured.Unstructured, + constraints []*unstructured.Unstructured, + reviewObjs []*unstructured.Unstructured, + opts *Opts, +) (*Results, error) { + ctx := context.Background() + var setupBreakdown SetupBreakdown + var skippedTemplates []string + var skippedConstraints []string + loadedTemplateKinds := make(map[string]bool) + + // Create the client for this engine + setupStart := time.Now() + clientStart := time.Now() + client, err := makeClient(engine, opts.GatherStats) + if err != nil { + return nil, fmt.Errorf("creating client: %w", err) + } + setupBreakdown.ClientCreation = time.Since(clientStart) + + // Add templates (with skip support for incompatible templates) + templateStart := time.Now() + for _, obj := range templates { + templ, err := reader.ToTemplate(scheme, obj) + if err != nil { + return nil, fmt.Errorf("converting template %q: %w", obj.GetName(), err) + } + _, err = client.AddTemplate(ctx, templ) + if err != nil { + // Check if this is an engine compatibility issue + if errors.Is(err, clienterrors.ErrNoDriver) { + skippedTemplates = append(skippedTemplates, obj.GetName()) + continue + } + return nil, fmt.Errorf("adding template %q: %w", templ.GetName(), err) + } + // Track the constraint kind this template creates + loadedTemplateKinds[templ.Spec.CRD.Spec.Names.Kind] = true + } + setupBreakdown.TemplateCompilation = time.Since(templateStart) + + // Check if all templates were skipped + loadedTemplateCount := len(templates) - len(skippedTemplates) + if loadedTemplateCount == 0 { + return nil, fmt.Errorf("no templates compatible with %s engine (all %d templates skipped)", engine, len(templates)) + } + + // Add constraints (skip those whose template was skipped) + constraintStart := time.Now() + for _, obj := range constraints { + kind := obj.GetKind() + if !loadedTemplateKinds[kind] { + skippedConstraints = append(skippedConstraints, obj.GetName()) + continue + } + if _, err := client.AddConstraint(ctx, obj); err != nil { + return nil, fmt.Errorf("adding constraint %q: %w", obj.GetName(), err) + } + } + setupBreakdown.ConstraintLoading = time.Since(constraintStart) + + // Check if all constraints were skipped + loadedConstraintCount := len(constraints) - len(skippedConstraints) + if loadedConstraintCount == 0 { + return nil, fmt.Errorf("no constraints loaded (all %d constraints skipped due to missing templates)", len(constraints)) + } + + // Add all objects as data (for referential constraints) + // Note: CEL driver doesn't support referential constraints, so skip data loading for CEL + dataStart := time.Now() + var skippedDataObjects []string + referentialDataSupported := engine != EngineCEL + if referentialDataSupported { + for _, obj := range reviewObjs { + _, err := client.AddData(ctx, obj) + if err != nil { + return nil, fmt.Errorf("adding data %q: %w", obj.GetName(), err) + } + } + } + // Note: We don't populate skippedDataObjects for CEL engine because it's expected + // behavior (CEL doesn't support referential data), not an error. The + // ReferentialDataSupported field indicates this engine limitation. + setupBreakdown.DataLoading = time.Since(dataStart) + + setupDuration := time.Since(setupStart) + + // Warmup phase + for i := 0; i < opts.Warmup; i++ { + for _, obj := range reviewObjs { + au := target.AugmentedUnstructured{ + Object: *obj, + Source: mutationtypes.SourceTypeOriginal, + } + if _, err := client.Review(ctx, au, reviews.EnforcementPoint(util.GatorEnforcementPoint)); err != nil { + return nil, fmt.Errorf("warmup review failed: %w", err) + } + } + } + + // Measurement phase + var durations []time.Duration + var totalViolations int64 + + // Memory profiling: capture memory stats before and after + var memStatsBefore, memStatsAfter runtime.MemStats + if opts.Memory { + runtime.GC() // Run GC to get clean baseline + runtime.ReadMemStats(&memStatsBefore) + } + + benchStart := time.Now() + + // Concurrent or sequential execution based on concurrency setting + var statsEntries []*instrumentation.StatsEntry + if opts.Concurrency > 1 { + durations, totalViolations, statsEntries, err = runConcurrentBenchmark(ctx, client, reviewObjs, opts) + if err != nil { + return nil, err + } + } else { + durations, totalViolations, statsEntries, err = runSequentialBenchmark(ctx, client, reviewObjs, opts) + if err != nil { + return nil, err + } + } + + totalDuration := time.Since(benchStart) + + // Capture memory stats after measurement + var memStats *MemoryStats + if opts.Memory { + runtime.ReadMemStats(&memStatsAfter) + totalReviewsForMem := uint64(opts.Iterations) * uint64(len(reviewObjs)) //nolint:gosec // overflow is acceptable for benchmark counts + if totalReviewsForMem > 0 { + totalAllocs := memStatsAfter.Mallocs - memStatsBefore.Mallocs + totalBytes := memStatsAfter.TotalAlloc - memStatsBefore.TotalAlloc + memStats = &MemoryStats{ + TotalAllocs: totalAllocs, + TotalBytes: totalBytes, + AllocsPerReview: totalAllocs / totalReviewsForMem, + BytesPerReview: totalBytes / totalReviewsForMem, + } + } + } + + // Calculate metrics + latencies := calculateLatencies(durations) + totalReviews := opts.Iterations * len(reviewObjs) + throughput := calculateThroughput(totalReviews, totalDuration) + + return &Results{ + Engine: engine, + TemplateCount: loadedTemplateCount, + ConstraintCount: loadedConstraintCount, + ObjectCount: len(reviewObjs), + Iterations: opts.Iterations, + Concurrency: opts.Concurrency, + SetupDuration: setupDuration, + SetupBreakdown: setupBreakdown, + TotalDuration: totalDuration, + Latencies: latencies, + ViolationCount: int(totalViolations), + ReviewsPerSecond: throughput, + MemoryStats: memStats, + StatsEntries: statsEntries, + SkippedTemplates: skippedTemplates, + SkippedConstraints: skippedConstraints, + SkippedDataObjects: skippedDataObjects, + ReferentialDataSupported: referentialDataSupported, + }, nil +} + +// makeClient creates a constraint client configured for the specified engine. +func makeClient(engine Engine, gatherStats bool) (*constraintclient.Client, error) { + args := []constraintclient.Opt{ + constraintclient.Targets(&target.K8sValidationTarget{}), + constraintclient.EnforcementPoints(util.GatorEnforcementPoint), + } + + switch engine { + case EngineRego: + driver, err := makeRegoDriver(gatherStats) + if err != nil { + return nil, err + } + args = append(args, constraintclient.Driver(driver)) + + case EngineCEL: + driver, err := makeCELDriver(gatherStats) + if err != nil { + return nil, err + } + args = append(args, constraintclient.Driver(driver)) + + default: + return nil, fmt.Errorf("unsupported engine: %s", engine) + } + + return constraintclient.NewClient(args...) +} + +func makeRegoDriver(gatherStats bool) (*rego.Driver, error) { + var args []rego.Arg + if gatherStats { + args = append(args, rego.GatherStats()) + } + return rego.New(args...) +} + +func makeCELDriver(gatherStats bool) (*k8scel.Driver, error) { + var args []k8scel.Arg + if gatherStats { + args = append(args, k8scel.GatherStats()) + } + return k8scel.New(args...) +} + +// runSequentialBenchmark runs the benchmark sequentially (single-threaded). +func runSequentialBenchmark( + ctx context.Context, + client *constraintclient.Client, + reviewObjs []*unstructured.Unstructured, + opts *Opts, +) ([]time.Duration, int64, []*instrumentation.StatsEntry, error) { + var durations []time.Duration + var totalViolations int64 + var statsEntries []*instrumentation.StatsEntry + + for i := 0; i < opts.Iterations; i++ { + for _, obj := range reviewObjs { + au := target.AugmentedUnstructured{ + Object: *obj, + Source: mutationtypes.SourceTypeOriginal, + } + + reviewStart := time.Now() + resp, err := client.Review(ctx, au, reviews.EnforcementPoint(util.GatorEnforcementPoint)) + reviewDuration := time.Since(reviewStart) + + if err != nil { + return nil, 0, nil, fmt.Errorf("review failed for %s/%s: %w", + obj.GetNamespace(), obj.GetName(), err) + } + + durations = append(durations, reviewDuration) + + // Count violations + for _, r := range resp.ByTarget { + totalViolations += int64(len(r.Results)) + } + + // Collect stats only from first iteration to avoid excessive data + if opts.GatherStats && i == 0 { + statsEntries = append(statsEntries, resp.StatsEntries...) + } + } + } + + return durations, totalViolations, statsEntries, nil +} + +// reviewResult holds the result of a single review for concurrent execution. +type reviewResult struct { + duration time.Duration + violations int + statsEntries []*instrumentation.StatsEntry + err error +} + +// runConcurrentBenchmark runs the benchmark with multiple goroutines. +func runConcurrentBenchmark( + ctx context.Context, + client *constraintclient.Client, + reviewObjs []*unstructured.Unstructured, + opts *Opts, +) ([]time.Duration, int64, []*instrumentation.StatsEntry, error) { + totalReviews := opts.Iterations * len(reviewObjs) + + // Create a cancellable context for error propagation + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + // Create work items + type workItem struct { + iteration int + objIndex int + } + workChan := make(chan workItem, totalReviews) + for i := 0; i < opts.Iterations; i++ { + for j := range reviewObjs { + workChan <- workItem{iteration: i, objIndex: j} + } + } + close(workChan) + + // Result collection + resultsChan := make(chan reviewResult, totalReviews) + var wg sync.WaitGroup + var firstErr atomic.Value + + // Launch worker goroutines + for w := 0; w < opts.Concurrency; w++ { + wg.Add(1) + go func() { + defer wg.Done() + for work := range workChan { + // Check if we should stop due to context cancellation + select { + case <-ctx.Done(): + return + default: + } + + obj := reviewObjs[work.objIndex] + au := target.AugmentedUnstructured{ + Object: *obj, + Source: mutationtypes.SourceTypeOriginal, + } + + reviewStart := time.Now() + resp, err := client.Review(ctx, au, reviews.EnforcementPoint(util.GatorEnforcementPoint)) + reviewDuration := time.Since(reviewStart) + + if err != nil { + firstErr.CompareAndSwap(nil, fmt.Errorf("review failed for %s/%s: %w", + obj.GetNamespace(), obj.GetName(), err)) + cancel() // Signal other goroutines to stop + resultsChan <- reviewResult{err: err} + return + } + + violations := 0 + for _, r := range resp.ByTarget { + violations += len(r.Results) + } + + // Collect stats only from first iteration to avoid excessive data + var stats []*instrumentation.StatsEntry + if opts.GatherStats && work.iteration == 0 { + stats = resp.StatsEntries + } + + resultsChan <- reviewResult{ + duration: reviewDuration, + violations: violations, + statsEntries: stats, + } + } + }() + } + + go func() { + wg.Wait() + close(resultsChan) + }() + + var durations []time.Duration + var totalViolations int64 + var statsEntries []*instrumentation.StatsEntry + + for result := range resultsChan { + if result.err != nil { + continue + } + durations = append(durations, result.duration) + totalViolations += int64(result.violations) + if len(result.statsEntries) > 0 { + statsEntries = append(statsEntries, result.statsEntries...) + } + } + + if errVal := firstErr.Load(); errVal != nil { + if err, ok := errVal.(error); ok { + return nil, 0, nil, err + } + return nil, 0, nil, fmt.Errorf("unexpected non-error value stored in firstErr: %T", errVal) + } + + return durations, totalViolations, statsEntries, nil +} diff --git a/pkg/gator/bench/bench_test.go b/pkg/gator/bench/bench_test.go new file mode 100644 index 00000000000..503b5988690 --- /dev/null +++ b/pkg/gator/bench/bench_test.go @@ -0,0 +1,859 @@ +package bench + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestRun_MissingInputs(t *testing.T) { + _, err := Run(&Opts{ + Filenames: []string{}, + Iterations: 10, + Engine: EngineRego, + }) + if err == nil { + t.Error("expected error for missing inputs") + } +} + +func TestRun_NoTemplates(t *testing.T) { + // Create a temp file with just an object (no template) + tmpDir := t.TempDir() + objFile := filepath.Join(tmpDir, "object.yaml") + err := os.WriteFile(objFile, []byte(` +apiVersion: v1 +kind: Pod +metadata: + name: test-pod +`), 0o600) + if err != nil { + t.Fatalf("failed to write test file: %v", err) + } + + _, err = Run(&Opts{ + Filenames: []string{tmpDir}, + Iterations: 1, + Engine: EngineRego, + }) + if err == nil { + t.Error("expected error for missing templates") + } +} + +func TestRun_Integration(t *testing.T) { + // Create temp files with a template, constraint, and object + tmpDir := t.TempDir() + + // Write template + templateFile := filepath.Join(tmpDir, "template.yaml") + err := os.WriteFile(templateFile, []byte(` +apiVersion: templates.gatekeeper.sh/v1 +kind: ConstraintTemplate +metadata: + name: k8srequiredlabels +spec: + crd: + spec: + names: + kind: K8sRequiredLabels + validation: + openAPIV3Schema: + type: object + properties: + labels: + type: array + items: + type: string + targets: + - target: admission.k8s.gatekeeper.sh + rego: | + package k8srequiredlabels + violation[{"msg": msg}] { + provided := {label | input.review.object.metadata.labels[label]} + required := {label | label := input.parameters.labels[_]} + missing := required - provided + count(missing) > 0 + msg := sprintf("missing required labels: %v", [missing]) + } +`), 0o600) + if err != nil { + t.Fatalf("failed to write template file: %v", err) + } + + // Write constraint + constraintFile := filepath.Join(tmpDir, "constraint.yaml") + err = os.WriteFile(constraintFile, []byte(` +apiVersion: constraints.gatekeeper.sh/v1beta1 +kind: K8sRequiredLabels +metadata: + name: require-team-label +spec: + match: + kinds: + - apiGroups: [""] + kinds: ["Pod"] + parameters: + labels: ["team"] +`), 0o600) + if err != nil { + t.Fatalf("failed to write constraint file: %v", err) + } + + // Write object to review + objectFile := filepath.Join(tmpDir, "pod.yaml") + err = os.WriteFile(objectFile, []byte(` +apiVersion: v1 +kind: Pod +metadata: + name: test-pod +spec: + containers: + - name: test + image: nginx +`), 0o600) + if err != nil { + t.Fatalf("failed to write object file: %v", err) + } + + // Run benchmark with Rego engine + results, err := Run(&Opts{ + Filenames: []string{tmpDir}, + Iterations: 5, + Warmup: 1, + Engine: EngineRego, + }) + if err != nil { + t.Fatalf("Run() error = %v", err) + } + + if len(results) != 1 { + t.Fatalf("expected 1 result, got %d", len(results)) + } + + r := results[0] + if r.Engine != EngineRego { + t.Errorf("expected engine %s, got %s", EngineRego, r.Engine) + } + if r.TemplateCount != 1 { + t.Errorf("expected 1 template, got %d", r.TemplateCount) + } + if r.ConstraintCount != 1 { + t.Errorf("expected 1 constraint, got %d", r.ConstraintCount) + } + if r.ObjectCount != 1 { + t.Errorf("expected 1 object, got %d", r.ObjectCount) + } + if r.Iterations != 5 { + t.Errorf("expected 5 iterations, got %d", r.Iterations) + } + // The pod is missing the required "team" label, so we expect violations + if r.ViolationCount == 0 { + t.Error("expected violations for missing label") + } + if r.ReviewsPerSecond <= 0 { + t.Error("expected positive throughput") + } +} + +func TestRun_AllEngines(t *testing.T) { + // Create temp files with a CEL-compatible template (using VAP code block) + tmpDir := t.TempDir() + + // Write template with both Rego and CEL validation + templateFile := filepath.Join(tmpDir, "template.yaml") + err := os.WriteFile(templateFile, []byte(` +apiVersion: templates.gatekeeper.sh/v1 +kind: ConstraintTemplate +metadata: + name: k8srequiredlabels +spec: + crd: + spec: + names: + kind: K8sRequiredLabels + validation: + openAPIV3Schema: + type: object + properties: + labels: + type: array + items: + type: string + targets: + - target: admission.k8s.gatekeeper.sh + rego: | + package k8srequiredlabels + violation[{"msg": msg}] { + provided := {label | input.review.object.metadata.labels[label]} + required := {label | label := input.parameters.labels[_]} + missing := required - provided + count(missing) > 0 + msg := sprintf("missing required labels: %v", [missing]) + } + code: + - engine: K8sNativeValidation + source: + validations: + - expression: "has(object.metadata.labels) && object.metadata.labels.all(label, label in variables.params.labels)" + message: "missing required labels" +`), 0o600) + if err != nil { + t.Fatalf("failed to write template file: %v", err) + } + + // Write constraint + constraintFile := filepath.Join(tmpDir, "constraint.yaml") + err = os.WriteFile(constraintFile, []byte(` +apiVersion: constraints.gatekeeper.sh/v1beta1 +kind: K8sRequiredLabels +metadata: + name: require-team-label +spec: + parameters: + labels: ["team"] +`), 0o600) + if err != nil { + t.Fatalf("failed to write constraint file: %v", err) + } + + // Write object + objectFile := filepath.Join(tmpDir, "pod.yaml") + err = os.WriteFile(objectFile, []byte(` +apiVersion: v1 +kind: Pod +metadata: + name: test-pod +`), 0o600) + if err != nil { + t.Fatalf("failed to write object file: %v", err) + } + + // Run with EngineAll + results, err := Run(&Opts{ + Filenames: []string{tmpDir}, + Iterations: 2, + Warmup: 0, + Engine: EngineAll, + }) + if err != nil { + t.Fatalf("Run() error = %v", err) + } + + // Should have results for both engines + if len(results) != 2 { + t.Fatalf("expected 2 results for EngineAll, got %d", len(results)) + } + + // First result should be Rego + if results[0].Engine != EngineRego { + t.Errorf("expected first result to be rego, got %s", results[0].Engine) + } + // Second result should be CEL + if results[1].Engine != EngineCEL { + t.Errorf("expected second result to be cel, got %s", results[1].Engine) + } +} + +func TestRun_NoConstraints(t *testing.T) { + // Create a temp file with template but no constraint + tmpDir := t.TempDir() + + // Write template + templateFile := filepath.Join(tmpDir, "template.yaml") + err := os.WriteFile(templateFile, []byte(` +apiVersion: templates.gatekeeper.sh/v1 +kind: ConstraintTemplate +metadata: + name: k8srequiredlabels +spec: + crd: + spec: + names: + kind: K8sRequiredLabels + targets: + - target: admission.k8s.gatekeeper.sh + rego: | + package k8srequiredlabels + violation[{"msg": msg}] { + msg := "test" + } +`), 0o600) + if err != nil { + t.Fatalf("failed to write template file: %v", err) + } + + // Write object (no constraint) + objectFile := filepath.Join(tmpDir, "pod.yaml") + err = os.WriteFile(objectFile, []byte(` +apiVersion: v1 +kind: Pod +metadata: + name: test-pod +`), 0o600) + if err != nil { + t.Fatalf("failed to write object file: %v", err) + } + + _, err = Run(&Opts{ + Filenames: []string{tmpDir}, + Iterations: 1, + Engine: EngineRego, + }) + if err == nil { + t.Error("expected error for missing constraints") + } +} + +func TestRun_NoObjects(t *testing.T) { + // Create a temp file with template and constraint but no objects + tmpDir := t.TempDir() + + // Write template + templateFile := filepath.Join(tmpDir, "template.yaml") + err := os.WriteFile(templateFile, []byte(` +apiVersion: templates.gatekeeper.sh/v1 +kind: ConstraintTemplate +metadata: + name: k8srequiredlabels +spec: + crd: + spec: + names: + kind: K8sRequiredLabels + targets: + - target: admission.k8s.gatekeeper.sh + rego: | + package k8srequiredlabels + violation[{"msg": msg}] { + msg := "test" + } +`), 0o600) + if err != nil { + t.Fatalf("failed to write template file: %v", err) + } + + // Write constraint only + constraintFile := filepath.Join(tmpDir, "constraint.yaml") + err = os.WriteFile(constraintFile, []byte(` +apiVersion: constraints.gatekeeper.sh/v1beta1 +kind: K8sRequiredLabels +metadata: + name: require-team-label +`), 0o600) + if err != nil { + t.Fatalf("failed to write constraint file: %v", err) + } + + _, err = Run(&Opts{ + Filenames: []string{tmpDir}, + Iterations: 1, + Engine: EngineRego, + }) + if err == nil { + t.Error("expected error for missing objects to review") + } +} + +func TestRun_WithGatherStats(t *testing.T) { + tmpDir := t.TempDir() + + // Write template + templateFile := filepath.Join(tmpDir, "template.yaml") + err := os.WriteFile(templateFile, []byte(` +apiVersion: templates.gatekeeper.sh/v1 +kind: ConstraintTemplate +metadata: + name: k8srequiredlabels +spec: + crd: + spec: + names: + kind: K8sRequiredLabels + targets: + - target: admission.k8s.gatekeeper.sh + rego: | + package k8srequiredlabels + violation[{"msg": msg}] { + msg := "test" + } +`), 0o600) + if err != nil { + t.Fatalf("failed to write template file: %v", err) + } + + // Write constraint + constraintFile := filepath.Join(tmpDir, "constraint.yaml") + err = os.WriteFile(constraintFile, []byte(` +apiVersion: constraints.gatekeeper.sh/v1beta1 +kind: K8sRequiredLabels +metadata: + name: require-team-label +`), 0o600) + if err != nil { + t.Fatalf("failed to write constraint file: %v", err) + } + + // Write object + objectFile := filepath.Join(tmpDir, "pod.yaml") + err = os.WriteFile(objectFile, []byte(` +apiVersion: v1 +kind: Pod +metadata: + name: test-pod +`), 0o600) + if err != nil { + t.Fatalf("failed to write object file: %v", err) + } + + // Run with GatherStats enabled + results, err := Run(&Opts{ + Filenames: []string{tmpDir}, + Iterations: 2, + Warmup: 0, + Engine: EngineRego, + GatherStats: true, + }) + if err != nil { + t.Fatalf("Run() error = %v", err) + } + + if len(results) != 1 { + t.Fatalf("expected 1 result, got %d", len(results)) + } +} + +func TestRun_CELOnly(t *testing.T) { + tmpDir := t.TempDir() + + // Write template with CEL code block + templateFile := filepath.Join(tmpDir, "template.yaml") + err := os.WriteFile(templateFile, []byte(` +apiVersion: templates.gatekeeper.sh/v1 +kind: ConstraintTemplate +metadata: + name: k8srequiredlabels +spec: + crd: + spec: + names: + kind: K8sRequiredLabels + targets: + - target: admission.k8s.gatekeeper.sh + code: + - engine: K8sNativeValidation + source: + validations: + - expression: "true" + message: "always pass" +`), 0o600) + if err != nil { + t.Fatalf("failed to write template file: %v", err) + } + + // Write constraint + constraintFile := filepath.Join(tmpDir, "constraint.yaml") + err = os.WriteFile(constraintFile, []byte(` +apiVersion: constraints.gatekeeper.sh/v1beta1 +kind: K8sRequiredLabels +metadata: + name: require-team-label +`), 0o600) + if err != nil { + t.Fatalf("failed to write constraint file: %v", err) + } + + // Write object + objectFile := filepath.Join(tmpDir, "pod.yaml") + err = os.WriteFile(objectFile, []byte(` +apiVersion: v1 +kind: Pod +metadata: + name: test-pod +`), 0o600) + if err != nil { + t.Fatalf("failed to write object file: %v", err) + } + + // Run with CEL engine only + results, err := Run(&Opts{ + Filenames: []string{tmpDir}, + Iterations: 2, + Warmup: 0, + Engine: EngineCEL, + }) + if err != nil { + t.Fatalf("Run() error = %v", err) + } + + if len(results) != 1 { + t.Fatalf("expected 1 result, got %d", len(results)) + } + if results[0].Engine != EngineCEL { + t.Errorf("expected engine cel, got %s", results[0].Engine) + } +} + +func TestRun_SetupBreakdown(t *testing.T) { + tmpDir := t.TempDir() + + // Write template + templateFile := filepath.Join(tmpDir, "template.yaml") + err := os.WriteFile(templateFile, []byte(` +apiVersion: templates.gatekeeper.sh/v1 +kind: ConstraintTemplate +metadata: + name: k8srequiredlabels +spec: + crd: + spec: + names: + kind: K8sRequiredLabels + targets: + - target: admission.k8s.gatekeeper.sh + rego: | + package k8srequiredlabels + violation[{"msg": msg}] { + msg := "test" + } +`), 0o600) + if err != nil { + t.Fatalf("failed to write template file: %v", err) + } + + // Write constraint + constraintFile := filepath.Join(tmpDir, "constraint.yaml") + err = os.WriteFile(constraintFile, []byte(` +apiVersion: constraints.gatekeeper.sh/v1beta1 +kind: K8sRequiredLabels +metadata: + name: require-team-label +`), 0o600) + if err != nil { + t.Fatalf("failed to write constraint file: %v", err) + } + + // Write object + objectFile := filepath.Join(tmpDir, "pod.yaml") + err = os.WriteFile(objectFile, []byte(` +apiVersion: v1 +kind: Pod +metadata: + name: test-pod +`), 0o600) + if err != nil { + t.Fatalf("failed to write object file: %v", err) + } + + results, err := Run(&Opts{ + Filenames: []string{tmpDir}, + Iterations: 2, + Warmup: 0, + Engine: EngineRego, + }) + if err != nil { + t.Fatalf("Run() error = %v", err) + } + + if len(results) != 1 { + t.Fatalf("expected 1 result, got %d", len(results)) + } + + r := results[0] + // Check that setup breakdown fields are populated + if r.SetupBreakdown.ClientCreation == 0 { + t.Error("expected ClientCreation to be non-zero") + } + if r.SetupBreakdown.TemplateCompilation == 0 { + t.Error("expected TemplateCompilation to be non-zero") + } + if r.SetupBreakdown.ConstraintLoading == 0 { + t.Error("expected ConstraintLoading to be non-zero") + } + // DataLoading can be zero if there are no objects to load as data +} + +func TestRun_SkippedTemplates(t *testing.T) { + tmpDir := t.TempDir() + + // Write Rego-only template (incompatible with CEL) + templateFile := filepath.Join(tmpDir, "template.yaml") + err := os.WriteFile(templateFile, []byte(` +apiVersion: templates.gatekeeper.sh/v1 +kind: ConstraintTemplate +metadata: + name: k8srequiredlabels +spec: + crd: + spec: + names: + kind: K8sRequiredLabels + targets: + - target: admission.k8s.gatekeeper.sh + rego: | + package k8srequiredlabels + violation[{"msg": msg}] { + msg := "test" + } +`), 0o600) + if err != nil { + t.Fatalf("failed to write template file: %v", err) + } + + // Write constraint + constraintFile := filepath.Join(tmpDir, "constraint.yaml") + err = os.WriteFile(constraintFile, []byte(` +apiVersion: constraints.gatekeeper.sh/v1beta1 +kind: K8sRequiredLabels +metadata: + name: require-team-label +`), 0o600) + if err != nil { + t.Fatalf("failed to write constraint file: %v", err) + } + + // Write object + objectFile := filepath.Join(tmpDir, "pod.yaml") + err = os.WriteFile(objectFile, []byte(` +apiVersion: v1 +kind: Pod +metadata: + name: test-pod +`), 0o600) + if err != nil { + t.Fatalf("failed to write object file: %v", err) + } + + // Run with EngineAll - CEL should fail but Rego should succeed + var buf bytes.Buffer + results, err := Run(&Opts{ + Filenames: []string{tmpDir}, + Iterations: 2, + Warmup: 0, + Engine: EngineAll, + Writer: &buf, + }) + if err != nil { + t.Fatalf("Run() error = %v", err) + } + + // Should have 1 result (only Rego succeeded) + if len(results) != 1 { + t.Fatalf("expected 1 result, got %d", len(results)) + } + + if results[0].Engine != EngineRego { + t.Errorf("expected engine rego, got %s", results[0].Engine) + } + + // Check that warning was written + output := buf.String() + if output == "" { + t.Error("expected warning about skipped CEL engine") + } +} + +func TestRun_Concurrent(t *testing.T) { + tmpDir := t.TempDir() + + // Write template + templateFile := filepath.Join(tmpDir, "template.yaml") + err := os.WriteFile(templateFile, []byte(` +apiVersion: templates.gatekeeper.sh/v1 +kind: ConstraintTemplate +metadata: + name: k8srequiredlabels +spec: + crd: + spec: + names: + kind: K8sRequiredLabels + validation: + openAPIV3Schema: + type: object + properties: + labels: + type: array + items: + type: string + targets: + - target: admission.k8s.gatekeeper.sh + rego: | + package k8srequiredlabels + violation[{"msg": msg}] { + provided := {label | input.review.object.metadata.labels[label]} + required := {label | label := input.parameters.labels[_]} + missing := required - provided + count(missing) > 0 + msg := sprintf("missing required labels: %v", [missing]) + } +`), 0o600) + if err != nil { + t.Fatalf("failed to write template file: %v", err) + } + + // Write constraint + constraintFile := filepath.Join(tmpDir, "constraint.yaml") + err = os.WriteFile(constraintFile, []byte(` +apiVersion: constraints.gatekeeper.sh/v1beta1 +kind: K8sRequiredLabels +metadata: + name: require-team-label +spec: + match: + kinds: + - apiGroups: [""] + kinds: ["Pod"] + parameters: + labels: ["team"] +`), 0o600) + if err != nil { + t.Fatalf("failed to write constraint file: %v", err) + } + + // Write multiple objects to review for concurrent testing + for i := 0; i < 3; i++ { + objectFile := filepath.Join(tmpDir, fmt.Sprintf("pod%d.yaml", i)) + err = os.WriteFile(objectFile, []byte(fmt.Sprintf(` +apiVersion: v1 +kind: Pod +metadata: + name: test-pod-%d +spec: + containers: + - name: test + image: nginx +`, i)), 0o600) + if err != nil { + t.Fatalf("failed to write object file: %v", err) + } + } + + // Run benchmark with concurrency > 1 + results, err := Run(&Opts{ + Filenames: []string{tmpDir}, + Iterations: 10, + Warmup: 1, + Engine: EngineRego, + Concurrency: 4, + }) + if err != nil { + t.Fatalf("Run() error = %v", err) + } + + if len(results) != 1 { + t.Fatalf("expected 1 result, got %d", len(results)) + } + + r := results[0] + if r.Engine != EngineRego { + t.Errorf("expected engine %s, got %s", EngineRego, r.Engine) + } + if r.Concurrency != 4 { + t.Errorf("expected concurrency 4, got %d", r.Concurrency) + } + if r.TemplateCount != 1 { + t.Errorf("expected 1 template, got %d", r.TemplateCount) + } + if r.ConstraintCount != 1 { + t.Errorf("expected 1 constraint, got %d", r.ConstraintCount) + } + if r.ObjectCount != 3 { + t.Errorf("expected 3 objects, got %d", r.ObjectCount) + } + if r.Iterations != 10 { + t.Errorf("expected 10 iterations, got %d", r.Iterations) + } + // All pods are missing the required "team" label, so we expect violations + if r.ViolationCount == 0 { + t.Error("expected violations for missing labels") + } + if r.ReviewsPerSecond <= 0 { + t.Error("expected positive throughput") + } +} + +func TestRun_CELWithGatherStats(t *testing.T) { + tmpDir := t.TempDir() + + // Write template with CEL code block + templateFile := filepath.Join(tmpDir, "template.yaml") + err := os.WriteFile(templateFile, []byte(` +apiVersion: templates.gatekeeper.sh/v1 +kind: ConstraintTemplate +metadata: + name: k8srequiredlabels +spec: + crd: + spec: + names: + kind: K8sRequiredLabels + targets: + - target: admission.k8s.gatekeeper.sh + code: + - engine: K8sNativeValidation + source: + validations: + - expression: "true" + message: "always pass" +`), 0o600) + if err != nil { + t.Fatalf("failed to write template file: %v", err) + } + + // Write constraint + constraintFile := filepath.Join(tmpDir, "constraint.yaml") + err = os.WriteFile(constraintFile, []byte(` +apiVersion: constraints.gatekeeper.sh/v1beta1 +kind: K8sRequiredLabels +metadata: + name: require-team-label +`), 0o600) + if err != nil { + t.Fatalf("failed to write constraint file: %v", err) + } + + // Write object + objectFile := filepath.Join(tmpDir, "pod.yaml") + err = os.WriteFile(objectFile, []byte(` +apiVersion: v1 +kind: Pod +metadata: + name: test-pod +`), 0o600) + if err != nil { + t.Fatalf("failed to write object file: %v", err) + } + + // Run with CEL engine and GatherStats enabled + results, err := Run(&Opts{ + Filenames: []string{tmpDir}, + Iterations: 2, + Warmup: 0, + Engine: EngineCEL, + GatherStats: true, + }) + if err != nil { + t.Fatalf("Run() error = %v", err) + } + + if len(results) != 1 { + t.Fatalf("expected 1 result, got %d", len(results)) + } + if results[0].Engine != EngineCEL { + t.Errorf("expected engine cel, got %s", results[0].Engine) + } +} + +func TestMakeClient_UnsupportedEngine(t *testing.T) { + _, err := makeClient(Engine("invalid"), false) + if err == nil { + t.Error("expected error for unsupported engine") + } + if !strings.Contains(err.Error(), "unsupported engine") { + t.Errorf("expected 'unsupported engine' error, got: %v", err) + } +} diff --git a/pkg/gator/bench/compare.go b/pkg/gator/bench/compare.go new file mode 100644 index 00000000000..5dde1ac0d73 --- /dev/null +++ b/pkg/gator/bench/compare.go @@ -0,0 +1,218 @@ +package bench + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "time" + + "github.com/open-policy-agent/gatekeeper/v3/pkg/gator" + "sigs.k8s.io/yaml" +) + +// SaveResults saves benchmark results to a file in JSON or YAML format. +// The format is determined by the file extension (.json or .yaml/.yml). +func SaveResults(results []Results, path string) error { + ext := filepath.Ext(path) + + var data []byte + var err error + + switch ext { + case gator.ExtYAML, gator.ExtYML: + data, err = yaml.Marshal(results) + default: + // Default to JSON + data, err = json.MarshalIndent(results, "", " ") + } + if err != nil { + return fmt.Errorf("marshaling results: %w", err) + } + + if err := os.WriteFile(path, data, 0o600); err != nil { + return fmt.Errorf("writing results to %s: %w", path, err) + } + + return nil +} + +// LoadBaseline loads baseline results from a file. +// The format is determined by the file extension (.json or .yaml/.yml). +func LoadBaseline(path string) ([]Results, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("reading baseline from %s: %w", path, err) + } + + ext := filepath.Ext(path) + var results []Results + + switch ext { + case gator.ExtYAML, gator.ExtYML: + err = yaml.Unmarshal(data, &results) + default: + // Default to JSON + err = json.Unmarshal(data, &results) + } + if err != nil { + return nil, fmt.Errorf("unmarshaling baseline: %w", err) + } + + return results, nil +} + +// Compare compares current results against baseline results and returns comparison data. +// The threshold is the percentage change considered a regression (e.g., 10 means 10%). +// The minThreshold is the minimum absolute difference to consider a regression. +// For latency metrics, positive change = regression. For throughput, negative change = regression. +func Compare(baseline, current []Results, threshold float64, minThreshold time.Duration) []ComparisonResult { + var comparisons []ComparisonResult + + // Create a map of baseline results by engine for easy lookup + baselineByEngine := make(map[Engine]*Results) + for i := range baseline { + baselineByEngine[baseline[i].Engine] = &baseline[i] + } + + // Compare each current result against its baseline + for i := range current { + curr := ¤t[i] + base, ok := baselineByEngine[curr.Engine] + if !ok { + // No baseline for this engine, skip comparison + continue + } + + comparison := compareResults(base, curr, threshold, minThreshold) + comparisons = append(comparisons, comparison) + } + + return comparisons +} + +func compareResults(baseline, current *Results, threshold float64, minThreshold time.Duration) ComparisonResult { + var metrics []MetricComparison + var failedMetrics []string + allPassed := true + + // Compare latency metrics (higher is worse, so positive delta = regression) + latencyMetrics := []struct { + name string + baseline float64 + current float64 + }{ + {"P50 Latency", float64(baseline.Latencies.P50), float64(current.Latencies.P50)}, + {"P95 Latency", float64(baseline.Latencies.P95), float64(current.Latencies.P95)}, + {"P99 Latency", float64(baseline.Latencies.P99), float64(current.Latencies.P99)}, + {"Mean Latency", float64(baseline.Latencies.Mean), float64(current.Latencies.Mean)}, + } + + for _, m := range latencyMetrics { + delta := calculateDelta(m.baseline, m.current) + // For latency, check both percentage threshold AND minimum absolute threshold + // If minThreshold is set, ignore regressions smaller than the absolute minimum + absDiff := time.Duration(m.current) - time.Duration(m.baseline) + passed := delta <= threshold || (minThreshold > 0 && absDiff < minThreshold) + if !passed { + allPassed = false + failedMetrics = append(failedMetrics, m.name) + } + metrics = append(metrics, MetricComparison{ + Name: m.name, + Baseline: m.baseline, + Current: m.current, + Delta: delta, + Passed: passed, + }) + } + + // Compare throughput (lower is worse, so negative delta = regression) + throughputDelta := calculateDelta(baseline.ReviewsPerSecond, current.ReviewsPerSecond) + // For throughput, we invert the logic: negative delta is a regression + // If minThreshold is set, convert it to a throughput difference threshold + // A latency increase of minThreshold corresponds to a throughput change that we should ignore + throughputPassed := -throughputDelta <= threshold + if !throughputPassed && minThreshold > 0 && baseline.Latencies.Mean > 0 { + // Calculate the absolute throughput difference + absThroughputDiff := baseline.ReviewsPerSecond - current.ReviewsPerSecond + // Convert minThreshold to an equivalent throughput tolerance + // If we tolerate minThreshold latency change, we should tolerate proportional throughput change + // Use baseline throughput to derive a reasonable tolerance from the latency threshold + // throughput ≈ 1/latency, so tolerance should be proportional to baseline throughput + minThroughputDiff := baseline.ReviewsPerSecond * (float64(minThreshold) / float64(baseline.Latencies.Mean)) + if absThroughputDiff < minThroughputDiff { + throughputPassed = true + } + } + if !throughputPassed { + allPassed = false + failedMetrics = append(failedMetrics, "Throughput") + } + metrics = append(metrics, MetricComparison{ + Name: "Throughput", + Baseline: baseline.ReviewsPerSecond, + Current: current.ReviewsPerSecond, + Delta: throughputDelta, + Passed: throughputPassed, + }) + + // Compare memory stats if available + // Note: minThreshold is a time.Duration and applies only to latency/throughput metrics. + // Memory metrics are evaluated strictly against the percentage threshold. + if baseline.MemoryStats != nil && current.MemoryStats != nil { + allocsDelta := calculateDelta( + float64(baseline.MemoryStats.AllocsPerReview), + float64(current.MemoryStats.AllocsPerReview), + ) + allocsPassed := allocsDelta <= threshold + if !allocsPassed { + allPassed = false + failedMetrics = append(failedMetrics, "Allocs/Review") + } + metrics = append(metrics, MetricComparison{ + Name: "Allocs/Review", + Baseline: float64(baseline.MemoryStats.AllocsPerReview), + Current: float64(current.MemoryStats.AllocsPerReview), + Delta: allocsDelta, + Passed: allocsPassed, + }) + + bytesDelta := calculateDelta( + float64(baseline.MemoryStats.BytesPerReview), + float64(current.MemoryStats.BytesPerReview), + ) + bytesPassed := bytesDelta <= threshold + if !bytesPassed { + allPassed = false + failedMetrics = append(failedMetrics, "Bytes/Review") + } + metrics = append(metrics, MetricComparison{ + Name: "Bytes/Review", + Baseline: float64(baseline.MemoryStats.BytesPerReview), + Current: float64(current.MemoryStats.BytesPerReview), + Delta: bytesDelta, + Passed: bytesPassed, + }) + } + + return ComparisonResult{ + BaselineEngine: baseline.Engine, + CurrentEngine: current.Engine, + Metrics: metrics, + Passed: allPassed, + FailedMetrics: failedMetrics, + } +} + +// calculateDelta calculates the percentage change from baseline to current. +// Returns positive value if current > baseline (regression for latency metrics). +func calculateDelta(baseline, current float64) float64 { + if baseline == 0 { + if current == 0 { + return 0 + } + return 100 // Infinite increase represented as 100% + } + return ((current - baseline) / baseline) * 100 +} diff --git a/pkg/gator/bench/compare_test.go b/pkg/gator/bench/compare_test.go new file mode 100644 index 00000000000..cf8a71b7725 --- /dev/null +++ b/pkg/gator/bench/compare_test.go @@ -0,0 +1,416 @@ +package bench + +import ( + "os" + "path/filepath" + "strings" + "testing" + "time" +) + +func TestSaveAndLoadResults(t *testing.T) { + results := []Results{ + { + Engine: EngineRego, + TemplateCount: 5, + ConstraintCount: 10, + ObjectCount: 100, + Iterations: 50, + SetupDuration: time.Second, + TotalDuration: 5 * time.Second, + Latencies: Latencies{ + Min: 100 * time.Microsecond, + Max: 10 * time.Millisecond, + Mean: 1 * time.Millisecond, + P50: 900 * time.Microsecond, + P95: 5 * time.Millisecond, + P99: 8 * time.Millisecond, + }, + ViolationCount: 25, + ReviewsPerSecond: 1000, + MemoryStats: &MemoryStats{ + AllocsPerReview: 500, + BytesPerReview: 10240, + TotalAllocs: 25000, + TotalBytes: 512000, + }, + }, + } + + t.Run("JSON format", func(t *testing.T) { + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "baseline.json") + + // Save + err := SaveResults(results, path) + if err != nil { + t.Fatalf("SaveResults failed: %v", err) + } + + // Verify file exists + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Fatalf("file was not created") + } + + // Load + loaded, err := LoadBaseline(path) + if err != nil { + t.Fatalf("LoadBaseline failed: %v", err) + } + + if len(loaded) != 1 { + t.Fatalf("expected 1 result, got %d", len(loaded)) + } + + if loaded[0].Engine != EngineRego { + t.Errorf("Engine = %v, want %v", loaded[0].Engine, EngineRego) + } + if loaded[0].ReviewsPerSecond != 1000 { + t.Errorf("ReviewsPerSecond = %v, want %v", loaded[0].ReviewsPerSecond, 1000) + } + }) + + t.Run("YAML format", func(t *testing.T) { + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "baseline.yaml") + + // Save + err := SaveResults(results, path) + if err != nil { + t.Fatalf("SaveResults failed: %v", err) + } + + // Load + loaded, err := LoadBaseline(path) + if err != nil { + t.Fatalf("LoadBaseline failed: %v", err) + } + + if len(loaded) != 1 { + t.Fatalf("expected 1 result, got %d", len(loaded)) + } + + if loaded[0].Engine != EngineRego { + t.Errorf("Engine = %v, want %v", loaded[0].Engine, EngineRego) + } + }) + + t.Run("YML extension", func(t *testing.T) { + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "baseline.yml") + + // Save + err := SaveResults(results, path) + if err != nil { + t.Fatalf("SaveResults failed: %v", err) + } + + // Load + loaded, err := LoadBaseline(path) + if err != nil { + t.Fatalf("LoadBaseline failed: %v", err) + } + + if len(loaded) != 1 { + t.Fatalf("expected 1 result, got %d", len(loaded)) + } + }) +} + +func TestLoadBaseline_FileNotFound(t *testing.T) { + _, err := LoadBaseline("/nonexistent/path/baseline.json") + if err == nil { + t.Fatal("expected error for non-existent file") + } +} + +func TestCompare(t *testing.T) { + baseline := []Results{ + { + Engine: EngineRego, + Latencies: Latencies{ + P50: 1 * time.Millisecond, + P95: 5 * time.Millisecond, + P99: 10 * time.Millisecond, + Mean: 2 * time.Millisecond, + }, + ReviewsPerSecond: 1000, + MemoryStats: &MemoryStats{ + AllocsPerReview: 500, + BytesPerReview: 10240, + }, + }, + } + + t.Run("no regression", func(t *testing.T) { + current := []Results{ + { + Engine: EngineRego, + Latencies: Latencies{ + P50: 1050 * time.Microsecond, // 5% increase + P95: 5 * time.Millisecond, + P99: 10 * time.Millisecond, + Mean: 2 * time.Millisecond, + }, + ReviewsPerSecond: 950, // 5% decrease + MemoryStats: &MemoryStats{ + AllocsPerReview: 520, // 4% increase + BytesPerReview: 10500, + }, + }, + } + + comparisons := Compare(baseline, current, 10.0, 0) + if len(comparisons) != 1 { + t.Fatalf("expected 1 comparison, got %d", len(comparisons)) + } + + if !comparisons[0].Passed { + t.Errorf("expected comparison to pass, got failed metrics: %v", comparisons[0].FailedMetrics) + } + }) + + t.Run("latency regression", func(t *testing.T) { + current := []Results{ + { + Engine: EngineRego, + Latencies: Latencies{ + P50: 1500 * time.Microsecond, // 50% increase - regression! + P95: 5 * time.Millisecond, + P99: 10 * time.Millisecond, + Mean: 2 * time.Millisecond, + }, + ReviewsPerSecond: 1000, + }, + } + + comparisons := Compare(baseline, current, 10.0, 0) + if len(comparisons) != 1 { + t.Fatalf("expected 1 comparison, got %d", len(comparisons)) + } + + if comparisons[0].Passed { + t.Error("expected comparison to fail due to latency regression") + } + if len(comparisons[0].FailedMetrics) == 0 { + t.Error("expected failed metrics to be populated") + } + }) + + t.Run("throughput regression", func(t *testing.T) { + current := []Results{ + { + Engine: EngineRego, + Latencies: Latencies{ + P50: 1 * time.Millisecond, + P95: 5 * time.Millisecond, + P99: 10 * time.Millisecond, + Mean: 2 * time.Millisecond, + }, + ReviewsPerSecond: 800, // 20% decrease - regression! + }, + } + + comparisons := Compare(baseline, current, 10.0, 0) + if len(comparisons) != 1 { + t.Fatalf("expected 1 comparison, got %d", len(comparisons)) + } + + if comparisons[0].Passed { + t.Error("expected comparison to fail due to throughput regression") + } + + foundThroughput := false + for _, m := range comparisons[0].FailedMetrics { + if m == "Throughput" { + foundThroughput = true + break + } + } + if !foundThroughput { + t.Error("expected Throughput to be in failed metrics") + } + }) + + t.Run("no matching engine", func(t *testing.T) { + current := []Results{ + { + Engine: EngineCEL, // Different engine + Latencies: Latencies{ + P50: 1 * time.Millisecond, + }, + ReviewsPerSecond: 1000, + }, + } + + comparisons := Compare(baseline, current, 10.0, 0) + if len(comparisons) != 0 { + t.Errorf("expected 0 comparisons for non-matching engine, got %d", len(comparisons)) + } + }) + + t.Run("min threshold bypasses percentage regression", func(t *testing.T) { + // Use a fast baseline where percentage changes are noise + fastBaseline := []Results{ + { + Engine: EngineRego, + Latencies: Latencies{ + P50: 100 * time.Microsecond, + P95: 200 * time.Microsecond, + P99: 300 * time.Microsecond, + Mean: 150 * time.Microsecond, + }, + ReviewsPerSecond: 10000, + }, + } + + current := []Results{ + { + Engine: EngineRego, + Latencies: Latencies{ + P50: 120 * time.Microsecond, // 20% increase but only 20µs + P95: 240 * time.Microsecond, // 20% increase but only 40µs + P99: 360 * time.Microsecond, // 20% increase but only 60µs + Mean: 180 * time.Microsecond, // 20% increase but only 30µs + }, + ReviewsPerSecond: 8000, // 20% decrease + }, + } + + // Without min threshold, this would fail (20% > 10%) + comparisonsWithoutMin := Compare(fastBaseline, current, 10.0, 0) + if len(comparisonsWithoutMin) != 1 { + t.Fatalf("expected 1 comparison, got %d", len(comparisonsWithoutMin)) + } + if comparisonsWithoutMin[0].Passed { + t.Error("expected comparison without min-threshold to fail") + } + + // With min threshold of 1s, all changes should pass as the absolute differences + // are well below the min-threshold tolerance + comparisonsWithMin := Compare(fastBaseline, current, 10.0, 1*time.Second) + if len(comparisonsWithMin) != 1 { + t.Fatalf("expected 1 comparison, got %d", len(comparisonsWithMin)) + } + + // With a large min threshold, the comparison should pass since all differences + // are below the min-threshold tolerance (including throughput) + if !comparisonsWithMin[0].Passed { + t.Errorf("expected comparison with large min-threshold (1s) to pass, got failed metrics: %v", comparisonsWithMin[0].FailedMetrics) + } + }) +} + +func TestCalculateDelta(t *testing.T) { + tests := []struct { + name string + baseline float64 + current float64 + want float64 + }{ + { + name: "no change", + baseline: 100, + current: 100, + want: 0, + }, + { + name: "10% increase", + baseline: 100, + current: 110, + want: 10, + }, + { + name: "10% decrease", + baseline: 100, + current: 90, + want: -10, + }, + { + name: "zero baseline with current", + baseline: 0, + current: 100, + want: 100, + }, + { + name: "both zero", + baseline: 0, + current: 0, + want: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := calculateDelta(tt.baseline, tt.current) + if got != tt.want { + t.Errorf("calculateDelta(%v, %v) = %v, want %v", + tt.baseline, tt.current, got, tt.want) + } + }) + } +} + +func TestFormatComparison(t *testing.T) { + comparisons := []ComparisonResult{ + { + BaselineEngine: EngineRego, + CurrentEngine: EngineRego, + Metrics: []MetricComparison{ + {Name: "P50 Latency", Baseline: 1000000, Current: 1100000, Delta: 10, Passed: true}, + {Name: "Throughput", Baseline: 1000, Current: 900, Delta: -10, Passed: true}, + }, + Passed: true, + FailedMetrics: nil, + }, + } + + output := FormatComparison(comparisons, 10.0) + + // Check that output contains expected strings + if output == "" { + t.Error("expected non-empty output") + } + + expectedStrings := []string{ + "Baseline Comparison", + "REGO", + "P50 Latency", + "Throughput", + "No significant regressions", + } + + for _, s := range expectedStrings { + if !strings.Contains(output, s) { + t.Errorf("expected output to contain %q", s) + } + } +} + +func TestFormatComparison_WithRegression(t *testing.T) { + comparisons := []ComparisonResult{ + { + BaselineEngine: EngineRego, + CurrentEngine: EngineRego, + Metrics: []MetricComparison{ + {Name: "P50 Latency", Baseline: 1000000, Current: 1500000, Delta: 50, Passed: false}, + }, + Passed: false, + FailedMetrics: []string{"P50 Latency"}, + }, + } + + output := FormatComparison(comparisons, 10.0) + + expectedStrings := []string{ + "REGRESSION", + "Regressions detected", + "P50 Latency", + } + + for _, s := range expectedStrings { + if !strings.Contains(output, s) { + t.Errorf("expected output to contain %q", s) + } + } +} diff --git a/pkg/gator/bench/metrics.go b/pkg/gator/bench/metrics.go new file mode 100644 index 00000000000..38ae7c6c7a1 --- /dev/null +++ b/pkg/gator/bench/metrics.go @@ -0,0 +1,66 @@ +package bench + +import ( + "sort" + "time" +) + +// calculateLatencies computes latency statistics from a slice of durations. +func calculateLatencies(durations []time.Duration) Latencies { + if len(durations) == 0 { + return Latencies{} + } + + // Sort for percentile calculation + sorted := make([]time.Duration, len(durations)) + copy(sorted, durations) + sort.Slice(sorted, func(i, j int) bool { + return sorted[i] < sorted[j] + }) + + var total time.Duration + for _, d := range sorted { + total += d + } + + return Latencies{ + Min: sorted[0], + Max: sorted[len(sorted)-1], + Mean: time.Duration(int64(total) / int64(len(sorted))), + P50: percentile(sorted, 50), + P95: percentile(sorted, 95), + P99: percentile(sorted, 99), + } +} + +// percentile calculates the p-th percentile from a sorted slice of durations. +// The input slice must be sorted in ascending order. +func percentile(sorted []time.Duration, p float64) time.Duration { + if len(sorted) == 0 { + return 0 + } + if len(sorted) == 1 { + return sorted[0] + } + + // Calculate the index using the nearest-rank method + rank := (p / 100.0) * float64(len(sorted)-1) + lower := int(rank) + upper := lower + 1 + + if upper >= len(sorted) { + return sorted[len(sorted)-1] + } + + // Linear interpolation between the two nearest ranks + weight := rank - float64(lower) + return time.Duration(float64(sorted[lower])*(1-weight) + float64(sorted[upper])*weight) +} + +// calculateThroughput computes reviews per second. +func calculateThroughput(reviewCount int, duration time.Duration) float64 { + if duration == 0 { + return 0 + } + return float64(reviewCount) / duration.Seconds() +} diff --git a/pkg/gator/bench/metrics_test.go b/pkg/gator/bench/metrics_test.go new file mode 100644 index 00000000000..4b718e14b0d --- /dev/null +++ b/pkg/gator/bench/metrics_test.go @@ -0,0 +1,187 @@ +package bench + +import ( + "testing" + "time" +) + +func TestCalculateLatencies(t *testing.T) { + tests := []struct { + name string + durations []time.Duration + wantMin time.Duration + wantMax time.Duration + wantMean time.Duration + }{ + { + name: "empty slice", + durations: []time.Duration{}, + wantMin: 0, + wantMax: 0, + wantMean: 0, + }, + { + name: "single duration", + durations: []time.Duration{100 * time.Millisecond}, + wantMin: 100 * time.Millisecond, + wantMax: 100 * time.Millisecond, + wantMean: 100 * time.Millisecond, + }, + { + name: "multiple durations", + durations: []time.Duration{ + 10 * time.Millisecond, + 20 * time.Millisecond, + 30 * time.Millisecond, + 40 * time.Millisecond, + 50 * time.Millisecond, + }, + wantMin: 10 * time.Millisecond, + wantMax: 50 * time.Millisecond, + wantMean: 30 * time.Millisecond, + }, + { + name: "unsorted durations", + durations: []time.Duration{ + 50 * time.Millisecond, + 10 * time.Millisecond, + 30 * time.Millisecond, + 20 * time.Millisecond, + 40 * time.Millisecond, + }, + wantMin: 10 * time.Millisecond, + wantMax: 50 * time.Millisecond, + wantMean: 30 * time.Millisecond, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := calculateLatencies(tt.durations) + + if got.Min != tt.wantMin { + t.Errorf("Min = %v, want %v", got.Min, tt.wantMin) + } + if got.Max != tt.wantMax { + t.Errorf("Max = %v, want %v", got.Max, tt.wantMax) + } + if got.Mean != tt.wantMean { + t.Errorf("Mean = %v, want %v", got.Mean, tt.wantMean) + } + }) + } +} + +func TestPercentile(t *testing.T) { + tests := []struct { + name string + sorted []time.Duration + p float64 + want time.Duration + }{ + { + name: "empty slice", + sorted: []time.Duration{}, + p: 50, + want: 0, + }, + { + name: "single element p50", + sorted: []time.Duration{100 * time.Millisecond}, + p: 50, + want: 100 * time.Millisecond, + }, + { + name: "p50 odd count", + sorted: []time.Duration{ + 10 * time.Millisecond, + 20 * time.Millisecond, + 30 * time.Millisecond, + 40 * time.Millisecond, + 50 * time.Millisecond, + }, + p: 50, + want: 30 * time.Millisecond, + }, + { + name: "p99 many elements", + sorted: []time.Duration{ + 10 * time.Millisecond, + 20 * time.Millisecond, + 30 * time.Millisecond, + 40 * time.Millisecond, + 50 * time.Millisecond, + }, + p: 99, + want: 49600 * time.Microsecond, // interpolated + }, + { + name: "p100 returns last element", + sorted: []time.Duration{ + 10 * time.Millisecond, + 20 * time.Millisecond, + 30 * time.Millisecond, + }, + p: 100, + want: 30 * time.Millisecond, // upper >= len case + }, + { + name: "two elements p0", + sorted: []time.Duration{10 * time.Millisecond, 20 * time.Millisecond}, + p: 0, + want: 10 * time.Millisecond, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := percentile(tt.sorted, tt.p) + // Allow 1ms tolerance for interpolation + diff := got - tt.want + if diff < 0 { + diff = -diff + } + if diff > time.Millisecond { + t.Errorf("percentile(%v, %v) = %v, want %v", tt.sorted, tt.p, got, tt.want) + } + }) + } +} + +func TestCalculateThroughput(t *testing.T) { + tests := []struct { + name string + reviewCount int + duration time.Duration + want float64 + }{ + { + name: "zero duration", + reviewCount: 100, + duration: 0, + want: 0, + }, + { + name: "1 second duration", + reviewCount: 100, + duration: time.Second, + want: 100, + }, + { + name: "500ms duration", + reviewCount: 50, + duration: 500 * time.Millisecond, + want: 100, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := calculateThroughput(tt.reviewCount, tt.duration) + if got != tt.want { + t.Errorf("calculateThroughput(%v, %v) = %v, want %v", + tt.reviewCount, tt.duration, got, tt.want) + } + }) + } +} diff --git a/pkg/gator/bench/output.go b/pkg/gator/bench/output.go new file mode 100644 index 00000000000..f77cecf31a3 --- /dev/null +++ b/pkg/gator/bench/output.go @@ -0,0 +1,595 @@ +package bench + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "strings" + "text/tabwriter" + "time" + + "gopkg.in/yaml.v3" +) + +// OutputFormat represents the output format for benchmark results. +type OutputFormat string + +const ( + // OutputFormatTable outputs results as a human-readable table. + OutputFormatTable OutputFormat = "table" + // OutputFormatJSON outputs results as JSON. + OutputFormatJSON OutputFormat = "json" + // OutputFormatYAML outputs results as YAML. + OutputFormatYAML OutputFormat = "yaml" +) + +// ParseOutputFormat parses a string into an OutputFormat. +func ParseOutputFormat(s string) (OutputFormat, error) { + switch strings.ToLower(s) { + case "", "table": + return OutputFormatTable, nil + case "json": + return OutputFormatJSON, nil + case "yaml": + return OutputFormatYAML, nil + default: + return "", fmt.Errorf("invalid output format: %q (valid: table, json, yaml)", s) + } +} + +// FormatResults formats benchmark results according to the specified format. +func FormatResults(results []Results, format OutputFormat) (string, error) { + switch format { + case OutputFormatJSON: + return formatJSON(results) + case OutputFormatYAML: + return formatYAML(results) + case OutputFormatTable: + fallthrough + default: + return formatTable(results), nil + } +} + +// FormatComparison formats comparison results for display. +func FormatComparison(comparisons []ComparisonResult, threshold float64) string { + var buf bytes.Buffer + + for i, comp := range comparisons { + if i > 0 { + buf.WriteString("\n") + } + writeComparisonResult(&buf, &comp, threshold) + } + + return buf.String() +} + +func writeComparisonResult(w io.Writer, comp *ComparisonResult, threshold float64) { + fmt.Fprintf(w, "=== Baseline Comparison: %s Engine ===\n\n", + strings.ToUpper(string(comp.CurrentEngine))) + + tw := tabwriter.NewWriter(w, 0, 0, 2, ' ', 0) + + // Header + fmt.Fprintln(tw, "Metric\tBaseline\tCurrent\tDelta\tStatus") + fmt.Fprintln(tw, "------\t--------\t-------\t-----\t------") + + for _, m := range comp.Metrics { + status := "✓" + if !m.Passed { + status = "✗ REGRESSION" + } + + // Format values based on metric type + var baselineStr, currentStr string + switch { + case strings.Contains(m.Name, "Latency"): + baselineStr = formatDuration(time.Duration(m.Baseline)) + currentStr = formatDuration(time.Duration(m.Current)) + case strings.Contains(m.Name, "Bytes"): + baselineStr = formatBytes(uint64(m.Baseline)) + currentStr = formatBytes(uint64(m.Current)) + case strings.Contains(m.Name, "Throughput"): + baselineStr = fmt.Sprintf("%.2f/sec", m.Baseline) + currentStr = fmt.Sprintf("%.2f/sec", m.Current) + default: + baselineStr = fmt.Sprintf("%.0f", m.Baseline) + currentStr = fmt.Sprintf("%.0f", m.Current) + } + + deltaStr := fmt.Sprintf("%+.1f%%", m.Delta) + fmt.Fprintf(tw, "%s\t%s\t%s\t%s\t%s\n", + m.Name, baselineStr, currentStr, deltaStr, status) + } + tw.Flush() + + fmt.Fprintln(w) + if comp.Passed { + fmt.Fprintf(w, "✓ No significant regressions (threshold: %.1f%%)\n", threshold) + } else { + fmt.Fprintf(w, "✗ Regressions detected in: %s (threshold: %.1f%%)\n", + strings.Join(comp.FailedMetrics, ", "), threshold) + } +} + +func formatJSON(results []Results) (string, error) { + // Convert to JSON-friendly format with string durations + jsonResults := toJSONResults(results) + b, err := json.MarshalIndent(jsonResults, "", " ") + if err != nil { + return "", fmt.Errorf("marshaling JSON: %w", err) + } + return string(b), nil +} + +func formatYAML(results []Results) (string, error) { + // Convert to YAML-friendly format with string durations + yamlResults := toJSONResults(results) + b, err := yaml.Marshal(yamlResults) + if err != nil { + return "", fmt.Errorf("marshaling YAML: %w", err) + } + return string(b), nil +} + +func formatTable(results []Results) string { + var buf bytes.Buffer + + // Write individual result tables + for i := range results { + if i > 0 { + buf.WriteString("\n") + } + writeResultTable(&buf, &results[i]) + } + + // Write comparison table if multiple engines + if len(results) > 1 { + buf.WriteString("\n") + writeComparisonTable(&buf, results) + } + + return buf.String() +} + +func writeResultTable(w io.Writer, r *Results) { + fmt.Fprintf(w, "=== Benchmark Results: %s Engine ===\n\n", strings.ToUpper(string(r.Engine))) + + tw := tabwriter.NewWriter(w, 0, 0, 2, ' ', 0) + + // Configuration section + fmt.Fprintln(tw, "Configuration:") + fmt.Fprintf(tw, " Templates:\t%d\n", r.TemplateCount) + fmt.Fprintf(tw, " Constraints:\t%d\n", r.ConstraintCount) + fmt.Fprintf(tw, " Objects:\t%d\n", r.ObjectCount) + fmt.Fprintf(tw, " Iterations:\t%d\n", r.Iterations) + if r.Concurrency > 1 { + fmt.Fprintf(tw, " Concurrency:\t%d\n", r.Concurrency) + } + fmt.Fprintf(tw, " Total Reviews:\t%d\n", r.Iterations*r.ObjectCount) + fmt.Fprintln(tw) + + // Skipped templates/constraints/data warning + if len(r.SkippedTemplates) > 0 || len(r.SkippedConstraints) > 0 || len(r.SkippedDataObjects) > 0 { + fmt.Fprintln(tw, "Warnings:") + if len(r.SkippedTemplates) > 0 { + fmt.Fprintf(tw, " Skipped Templates:\t%d (%s)\n", + len(r.SkippedTemplates), strings.Join(r.SkippedTemplates, ", ")) + } + if len(r.SkippedConstraints) > 0 { + fmt.Fprintf(tw, " Skipped Constraints:\t%d (%s)\n", + len(r.SkippedConstraints), strings.Join(r.SkippedConstraints, ", ")) + } + if len(r.SkippedDataObjects) > 0 { + fmt.Fprintf(tw, " Skipped Data Objects:\t%d (failed to load as referential data)\n", + len(r.SkippedDataObjects)) + // Show first few objects if not too many + if len(r.SkippedDataObjects) <= 5 { + fmt.Fprintf(tw, " Objects:\t%s\n", strings.Join(r.SkippedDataObjects, ", ")) + } else { + fmt.Fprintf(tw, " Objects:\t%s, ... (and %d more)\n", + strings.Join(r.SkippedDataObjects[:5], ", "), len(r.SkippedDataObjects)-5) + } + } + fmt.Fprintln(tw) + } + + // Informational note about engine limitations (not a warning) + if !r.ReferentialDataSupported { + fmt.Fprintln(tw, "Note:") + fmt.Fprintf(tw, " Referential Data:\tNot supported by %s engine\n", r.Engine) + fmt.Fprintln(tw, " \t(Referential constraints cannot be exercised with this engine)") + fmt.Fprintln(tw) + } + + // Timing section with breakdown + fmt.Fprintln(tw, "Timing:") + fmt.Fprintf(tw, " Setup Duration:\t%s\n", formatDuration(r.SetupDuration)) + if r.SetupBreakdown.ClientCreation > 0 { + fmt.Fprintf(tw, " └─ Client Creation:\t%s\n", formatDuration(r.SetupBreakdown.ClientCreation)) + fmt.Fprintf(tw, " └─ Template Compilation:\t%s\n", formatDuration(r.SetupBreakdown.TemplateCompilation)) + fmt.Fprintf(tw, " └─ Constraint Loading:\t%s\n", formatDuration(r.SetupBreakdown.ConstraintLoading)) + fmt.Fprintf(tw, " └─ Data Loading:\t%s\n", formatDuration(r.SetupBreakdown.DataLoading)) + } + fmt.Fprintf(tw, " Total Duration:\t%s\n", formatDuration(r.TotalDuration)) + fmt.Fprintf(tw, " Throughput:\t%.2f reviews/sec\n", r.ReviewsPerSecond) + fmt.Fprintln(tw) + + // Latency section + fmt.Fprintln(tw, "Latency (per review):") + fmt.Fprintf(tw, " Min:\t%s\n", formatDuration(r.Latencies.Min)) + fmt.Fprintf(tw, " Max:\t%s\n", formatDuration(r.Latencies.Max)) + fmt.Fprintf(tw, " Mean:\t%s\n", formatDuration(r.Latencies.Mean)) + fmt.Fprintf(tw, " P50:\t%s\n", formatDuration(r.Latencies.P50)) + fmt.Fprintf(tw, " P95:\t%s\n", formatDuration(r.Latencies.P95)) + fmt.Fprintf(tw, " P99:\t%s\n", formatDuration(r.Latencies.P99)) + fmt.Fprintln(tw) + + // Results section + fmt.Fprintln(tw, "Results:") + fmt.Fprintf(tw, " Violations Found:\t%d\n", r.ViolationCount) + + // Memory section (if available) + if r.MemoryStats != nil { + fmt.Fprintln(tw) + fmt.Fprintln(tw, "Memory:") + fmt.Fprintf(tw, " Allocs/Review:\t%d\n", r.MemoryStats.AllocsPerReview) + fmt.Fprintf(tw, " Bytes/Review:\t%s\n", formatBytes(r.MemoryStats.BytesPerReview)) + fmt.Fprintf(tw, " Total Allocs:\t%d\n", r.MemoryStats.TotalAllocs) + fmt.Fprintf(tw, " Total Bytes:\t%s\n", formatBytes(r.MemoryStats.TotalBytes)) + } + + // Stats section (if available) + if len(r.StatsEntries) > 0 { + fmt.Fprintln(tw) + fmt.Fprintln(tw, "Per-Constraint Statistics (from first iteration):") + for _, entry := range r.StatsEntries { + if entry == nil { + continue + } + // Include StatsFor to identify which constraint/template produced the stat + if entry.StatsFor != "" { + fmt.Fprintf(tw, " Constraint: %s (Scope: %s)\n", entry.StatsFor, entry.Scope) + } else { + fmt.Fprintf(tw, " Scope: %s\n", entry.Scope) + } + for _, stat := range entry.Stats { + if stat == nil { + continue + } + fmt.Fprintf(tw, " %s:\t%v %s\n", stat.Name, stat.Value, stat.Source.Type) + } + } + } + + tw.Flush() +} + +// writeComparisonTable writes a side-by-side comparison of engine results. +func writeComparisonTable(w io.Writer, results []Results) { + fmt.Fprintln(w, "=== Engine Comparison ===") + fmt.Fprintln(w) + + tw := tabwriter.NewWriter(w, 0, 0, 2, ' ', 0) + + // Header row + fmt.Fprint(tw, "Metric") + for i := range results { + fmt.Fprintf(tw, "\t%s", strings.ToUpper(string(results[i].Engine))) + } + fmt.Fprintln(tw) + + // Separator + fmt.Fprint(tw, "------") + for range results { + fmt.Fprint(tw, "\t------") + } + fmt.Fprintln(tw) + + // Templates + fmt.Fprint(tw, "Templates") + for i := range results { + fmt.Fprintf(tw, "\t%d", results[i].TemplateCount) + } + fmt.Fprintln(tw) + + // Constraints + fmt.Fprint(tw, "Constraints") + for i := range results { + fmt.Fprintf(tw, "\t%d", results[i].ConstraintCount) + } + fmt.Fprintln(tw) + + // Setup Duration + fmt.Fprint(tw, "Setup Time") + for i := range results { + fmt.Fprintf(tw, "\t%s", formatDuration(results[i].SetupDuration)) + } + fmt.Fprintln(tw) + + // Throughput + fmt.Fprint(tw, "Throughput") + for i := range results { + fmt.Fprintf(tw, "\t%.2f/sec", results[i].ReviewsPerSecond) + } + fmt.Fprintln(tw) + + // Mean Latency + fmt.Fprint(tw, "Mean Latency") + for i := range results { + fmt.Fprintf(tw, "\t%s", formatDuration(results[i].Latencies.Mean)) + } + fmt.Fprintln(tw) + + // P95 Latency + fmt.Fprint(tw, "P95 Latency") + for i := range results { + fmt.Fprintf(tw, "\t%s", formatDuration(results[i].Latencies.P95)) + } + fmt.Fprintln(tw) + + // P99 Latency + fmt.Fprint(tw, "P99 Latency") + for i := range results { + fmt.Fprintf(tw, "\t%s", formatDuration(results[i].Latencies.P99)) + } + fmt.Fprintln(tw) + + // Violations + fmt.Fprint(tw, "Violations") + for i := range results { + fmt.Fprintf(tw, "\t%d", results[i].ViolationCount) + } + fmt.Fprintln(tw) + + // Memory stats (if available) + hasMemory := false + for i := range results { + if results[i].MemoryStats != nil { + hasMemory = true + break + } + } + if hasMemory { + fmt.Fprint(tw, "Allocs/Review") + for i := range results { + if results[i].MemoryStats != nil { + fmt.Fprintf(tw, "\t%d", results[i].MemoryStats.AllocsPerReview) + } else { + fmt.Fprint(tw, "\t-") + } + } + fmt.Fprintln(tw) + + fmt.Fprint(tw, "Bytes/Review") + for i := range results { + if results[i].MemoryStats != nil { + fmt.Fprintf(tw, "\t%s", formatBytes(results[i].MemoryStats.BytesPerReview)) + } else { + fmt.Fprint(tw, "\t-") + } + } + fmt.Fprintln(tw) + } + + tw.Flush() + + // Show performance difference if exactly 2 engines + if len(results) == 2 { + fmt.Fprintln(w) + writePerfDiff(w, &results[0], &results[1]) + } +} + +// writePerfDiff writes a performance comparison between two engines. +func writePerfDiff(w io.Writer, r1, r2 *Results) { + // Calculate throughput ratio + if r1.ReviewsPerSecond <= 0 || r2.ReviewsPerSecond <= 0 { + return + } + + switch { + case r1.ReviewsPerSecond > r2.ReviewsPerSecond: + ratio := r1.ReviewsPerSecond / r2.ReviewsPerSecond + fmt.Fprintf(w, "Performance: %s is %.2fx faster than %s\n", + strings.ToUpper(string(r1.Engine)), ratio, strings.ToUpper(string(r2.Engine))) + case r2.ReviewsPerSecond > r1.ReviewsPerSecond: + ratio := r2.ReviewsPerSecond / r1.ReviewsPerSecond + fmt.Fprintf(w, "Performance: %s is %.2fx faster than %s\n", + strings.ToUpper(string(r2.Engine)), ratio, strings.ToUpper(string(r1.Engine))) + default: + fmt.Fprintln(w, "Performance: Both engines have similar throughput") + } +} + +// formatDuration formats a duration in a human-readable way. +func formatDuration(d time.Duration) string { + if d < time.Microsecond { + return fmt.Sprintf("%dns", d.Nanoseconds()) + } + if d < time.Millisecond { + return fmt.Sprintf("%.2fµs", float64(d.Nanoseconds())/1000) + } + if d < time.Second { + return fmt.Sprintf("%.2fms", float64(d.Nanoseconds())/1000000) + } + return fmt.Sprintf("%.3fs", d.Seconds()) +} + +// formatBytes formats bytes in a human-readable way. +func formatBytes(b uint64) string { + const ( + KB = 1024 + MB = KB * 1024 + GB = MB * 1024 + ) + switch { + case b >= GB: + return fmt.Sprintf("%.2f GB", float64(b)/GB) + case b >= MB: + return fmt.Sprintf("%.2f MB", float64(b)/MB) + case b >= KB: + return fmt.Sprintf("%.2f KB", float64(b)/KB) + default: + return fmt.Sprintf("%d B", b) + } +} + +// JSONResults is a JSON/YAML-friendly version of Results with string durations. +type JSONResults struct { + Engine string `json:"engine" yaml:"engine"` + TemplateCount int `json:"templateCount" yaml:"templateCount"` + ConstraintCount int `json:"constraintCount" yaml:"constraintCount"` + ObjectCount int `json:"objectCount" yaml:"objectCount"` + Iterations int `json:"iterations" yaml:"iterations"` + Concurrency int `json:"concurrency,omitempty" yaml:"concurrency,omitempty"` + TotalReviews int `json:"totalReviews" yaml:"totalReviews"` + SetupDuration string `json:"setupDuration" yaml:"setupDuration"` + SetupBreakdown JSONSetupBreakdown `json:"setupBreakdown" yaml:"setupBreakdown"` + TotalDuration string `json:"totalDuration" yaml:"totalDuration"` + Latencies JSONLatency `json:"latencies" yaml:"latencies"` + ViolationCount int `json:"violationCount" yaml:"violationCount"` + ReviewsPerSecond float64 `json:"reviewsPerSecond" yaml:"reviewsPerSecond"` + MemoryStats *JSONMemoryStats `json:"memoryStats,omitempty" yaml:"memoryStats,omitempty"` + StatsEntries []JSONStatsEntry `json:"statsEntries,omitempty" yaml:"statsEntries,omitempty"` + SkippedTemplates []string `json:"skippedTemplates,omitempty" yaml:"skippedTemplates,omitempty"` + SkippedConstraints []string `json:"skippedConstraints,omitempty" yaml:"skippedConstraints,omitempty"` + SkippedDataObjects []string `json:"skippedDataObjects,omitempty" yaml:"skippedDataObjects,omitempty"` + ReferentialDataSupported bool `json:"referentialDataSupported" yaml:"referentialDataSupported"` +} + +// JSONSetupBreakdown is a JSON/YAML-friendly version of SetupBreakdown with string durations. +type JSONSetupBreakdown struct { + ClientCreation string `json:"clientCreation" yaml:"clientCreation"` + TemplateCompilation string `json:"templateCompilation" yaml:"templateCompilation"` + ConstraintLoading string `json:"constraintLoading" yaml:"constraintLoading"` + DataLoading string `json:"dataLoading" yaml:"dataLoading"` +} + +// JSONLatency is a JSON/YAML-friendly version of Latencies with string durations. +type JSONLatency struct { + Min string `json:"min" yaml:"min"` + Max string `json:"max" yaml:"max"` + Mean string `json:"mean" yaml:"mean"` + P50 string `json:"p50" yaml:"p50"` + P95 string `json:"p95" yaml:"p95"` + P99 string `json:"p99" yaml:"p99"` +} + +// JSONMemoryStats is a JSON/YAML-friendly version of MemoryStats. +type JSONMemoryStats struct { + AllocsPerReview uint64 `json:"allocsPerReview" yaml:"allocsPerReview"` + BytesPerReview string `json:"bytesPerReview" yaml:"bytesPerReview"` + TotalAllocs uint64 `json:"totalAllocs" yaml:"totalAllocs"` + TotalBytes string `json:"totalBytes" yaml:"totalBytes"` +} + +// JSONStatsEntry is a JSON/YAML-friendly version of StatsEntry. +type JSONStatsEntry struct { + Scope string `json:"scope" yaml:"scope"` + StatsFor string `json:"statsFor,omitempty" yaml:"statsFor,omitempty"` + Stats []JSONStat `json:"stats" yaml:"stats"` + Labels []JSONStatLabel `json:"labels,omitempty" yaml:"labels,omitempty"` +} + +// JSONStat is a JSON/YAML-friendly version of instrumentation.Stat. +type JSONStat struct { + Name string `json:"name" yaml:"name"` + Value interface{} `json:"value" yaml:"value"` + Source string `json:"source" yaml:"source"` +} + +// JSONStatLabel is a JSON/YAML-friendly version of instrumentation.Label. +type JSONStatLabel struct { + Name string `json:"name" yaml:"name"` + Value interface{} `json:"value" yaml:"value"` +} + +func toJSONResults(results []Results) []JSONResults { + jsonResults := make([]JSONResults, len(results)) + for i := range results { + r := &results[i] + jr := JSONResults{ + Engine: string(r.Engine), + TemplateCount: r.TemplateCount, + ConstraintCount: r.ConstraintCount, + ObjectCount: r.ObjectCount, + Iterations: r.Iterations, + Concurrency: r.Concurrency, + TotalReviews: r.Iterations * r.ObjectCount, + SetupDuration: r.SetupDuration.String(), + SetupBreakdown: JSONSetupBreakdown{ + ClientCreation: r.SetupBreakdown.ClientCreation.String(), + TemplateCompilation: r.SetupBreakdown.TemplateCompilation.String(), + ConstraintLoading: r.SetupBreakdown.ConstraintLoading.String(), + DataLoading: r.SetupBreakdown.DataLoading.String(), + }, + TotalDuration: r.TotalDuration.String(), + Latencies: JSONLatency{ + Min: r.Latencies.Min.String(), + Max: r.Latencies.Max.String(), + Mean: r.Latencies.Mean.String(), + P50: r.Latencies.P50.String(), + P95: r.Latencies.P95.String(), + P99: r.Latencies.P99.String(), + }, + ViolationCount: r.ViolationCount, + ReviewsPerSecond: r.ReviewsPerSecond, + SkippedTemplates: r.SkippedTemplates, + SkippedConstraints: r.SkippedConstraints, + SkippedDataObjects: r.SkippedDataObjects, + ReferentialDataSupported: r.ReferentialDataSupported, + } + + // Add memory stats if available + if r.MemoryStats != nil { + jr.MemoryStats = &JSONMemoryStats{ + AllocsPerReview: r.MemoryStats.AllocsPerReview, + BytesPerReview: formatBytes(r.MemoryStats.BytesPerReview), + TotalAllocs: r.MemoryStats.TotalAllocs, + TotalBytes: formatBytes(r.MemoryStats.TotalBytes), + } + } + + // Add stats entries if available + if len(r.StatsEntries) > 0 { + jr.StatsEntries = make([]JSONStatsEntry, 0, len(r.StatsEntries)) + for _, entry := range r.StatsEntries { + if entry == nil { + continue + } + jsonEntry := JSONStatsEntry{ + Scope: entry.Scope, + StatsFor: entry.StatsFor, + } + // Convert stats + for _, stat := range entry.Stats { + if stat == nil { + continue + } + jsonEntry.Stats = append(jsonEntry.Stats, JSONStat{ + Name: stat.Name, + Value: stat.Value, + Source: fmt.Sprintf("%s/%s", stat.Source.Type, stat.Source.Value), + }) + } + // Convert labels + for _, label := range entry.Labels { + if label == nil { + continue + } + jsonEntry.Labels = append(jsonEntry.Labels, JSONStatLabel{ + Name: label.Name, + Value: label.Value, + }) + } + jr.StatsEntries = append(jr.StatsEntries, jsonEntry) + } + } + + jsonResults[i] = jr + } + return jsonResults +} diff --git a/pkg/gator/bench/output_test.go b/pkg/gator/bench/output_test.go new file mode 100644 index 00000000000..5d77dd31333 --- /dev/null +++ b/pkg/gator/bench/output_test.go @@ -0,0 +1,771 @@ +package bench + +import ( + "bytes" + "strings" + "testing" + "time" +) + +func TestParseOutputFormat(t *testing.T) { + tests := []struct { + input string + want OutputFormat + wantErr bool + }{ + {"", OutputFormatTable, false}, + {"table", OutputFormatTable, false}, + {"TABLE", OutputFormatTable, false}, + {"json", OutputFormatJSON, false}, + {"JSON", OutputFormatJSON, false}, + {"yaml", OutputFormatYAML, false}, + {"YAML", OutputFormatYAML, false}, + {"invalid", "", true}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + got, err := ParseOutputFormat(tt.input) + if (err != nil) != tt.wantErr { + t.Errorf("ParseOutputFormat(%q) error = %v, wantErr %v", tt.input, err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("ParseOutputFormat(%q) = %v, want %v", tt.input, got, tt.want) + } + }) + } +} + +func TestFormatResults(t *testing.T) { + results := []Results{ + { + Engine: EngineRego, + TemplateCount: 2, + ConstraintCount: 3, + ObjectCount: 10, + Iterations: 100, + SetupDuration: 50 * time.Millisecond, + TotalDuration: time.Second, + Latencies: Latencies{ + Min: 500 * time.Microsecond, + Max: 5 * time.Millisecond, + Mean: 1 * time.Millisecond, + P50: 900 * time.Microsecond, + P95: 3 * time.Millisecond, + P99: 4 * time.Millisecond, + }, + ViolationCount: 50, + ReviewsPerSecond: 1000, + }, + } + + t.Run("table format", func(t *testing.T) { + output, err := FormatResults(results, OutputFormatTable) + if err != nil { + t.Fatalf("FormatResults() error = %v", err) + } + + // Check for expected content + expectedStrings := []string{ + "REGO Engine", + "Templates:", + "Constraints:", + "Latency", + "Min:", + "P99:", + "Violations Found:", + } + + for _, s := range expectedStrings { + if !strings.Contains(output, s) { + t.Errorf("table output missing expected string %q", s) + } + } + }) + + t.Run("json format", func(t *testing.T) { + output, err := FormatResults(results, OutputFormatJSON) + if err != nil { + t.Fatalf("FormatResults() error = %v", err) + } + + // Check for expected JSON keys + expectedStrings := []string{ + `"engine": "rego"`, + `"templateCount": 2`, + `"constraintCount": 3`, + `"latencies"`, + `"min"`, + `"p99"`, + } + + for _, s := range expectedStrings { + if !strings.Contains(output, s) { + t.Errorf("json output missing expected string %q", s) + } + } + }) + + t.Run("yaml format", func(t *testing.T) { + output, err := FormatResults(results, OutputFormatYAML) + if err != nil { + t.Fatalf("FormatResults() error = %v", err) + } + + // Check for expected YAML keys + expectedStrings := []string{ + "engine: rego", + "templateCount: 2", + "constraintCount: 3", + "latencies:", + } + + for _, s := range expectedStrings { + if !strings.Contains(output, s) { + t.Errorf("yaml output missing expected string %q", s) + } + } + }) +} + +func TestFormatDuration(t *testing.T) { + tests := []struct { + d time.Duration + want string + }{ + {500 * time.Nanosecond, "500ns"}, + {1500 * time.Nanosecond, "1.50µs"}, + {500 * time.Microsecond, "500.00µs"}, + {1500 * time.Microsecond, "1.50ms"}, + {500 * time.Millisecond, "500.00ms"}, + {1500 * time.Millisecond, "1.500s"}, + {2 * time.Second, "2.000s"}, + } + + for _, tt := range tests { + t.Run(tt.d.String(), func(t *testing.T) { + got := formatDuration(tt.d) + if got != tt.want { + t.Errorf("formatDuration(%v) = %q, want %q", tt.d, got, tt.want) + } + }) + } +} + +func TestFormatResults_SetupBreakdown(t *testing.T) { + results := []Results{ + { + Engine: EngineRego, + TemplateCount: 1, + ConstraintCount: 1, + ObjectCount: 1, + Iterations: 10, + SetupDuration: 100 * time.Millisecond, + SetupBreakdown: SetupBreakdown{ + ClientCreation: 10 * time.Millisecond, + TemplateCompilation: 50 * time.Millisecond, + ConstraintLoading: 30 * time.Millisecond, + DataLoading: 10 * time.Millisecond, + }, + TotalDuration: time.Second, + Latencies: Latencies{Min: time.Millisecond, Max: time.Millisecond, Mean: time.Millisecond}, + ViolationCount: 0, + ReviewsPerSecond: 10, + }, + } + + output, err := FormatResults(results, OutputFormatTable) + if err != nil { + t.Fatalf("FormatResults() error = %v", err) + } + + // Check for setup breakdown content + expectedStrings := []string{ + "Client Creation:", + "Template Compilation:", + "Constraint Loading:", + "Data Loading:", + } + + for _, s := range expectedStrings { + if !strings.Contains(output, s) { + t.Errorf("table output missing setup breakdown: %q", s) + } + } +} + +func TestFormatResults_SkippedTemplates(t *testing.T) { + results := []Results{ + { + Engine: EngineRego, + TemplateCount: 2, + ConstraintCount: 2, + ObjectCount: 1, + Iterations: 10, + SetupDuration: 50 * time.Millisecond, + TotalDuration: time.Second, + Latencies: Latencies{Min: time.Millisecond, Max: time.Millisecond, Mean: time.Millisecond}, + ViolationCount: 0, + ReviewsPerSecond: 10, + SkippedTemplates: []string{"template1", "template2"}, + SkippedConstraints: []string{"constraint1"}, + }, + } + + output, err := FormatResults(results, OutputFormatTable) + if err != nil { + t.Fatalf("FormatResults() error = %v", err) + } + + // Check for warnings section + expectedStrings := []string{ + "Warnings:", + "Skipped Templates:", + "template1", + "template2", + "Skipped Constraints:", + "constraint1", + } + + for _, s := range expectedStrings { + if !strings.Contains(output, s) { + t.Errorf("table output missing skipped warning: %q", s) + } + } +} + +func TestFormatResults_SkippedDataObjects(t *testing.T) { + // Test skipped data objects - this tests actual failures during data loading, + // not CEL engine limitations (which use ReferentialDataSupported flag) + results := []Results{ + { + Engine: EngineRego, + TemplateCount: 2, + ConstraintCount: 2, + ObjectCount: 5, + Iterations: 10, + SetupDuration: 50 * time.Millisecond, + TotalDuration: time.Second, + Latencies: Latencies{Min: time.Millisecond, Max: time.Millisecond, Mean: time.Millisecond}, + ViolationCount: 0, + ReviewsPerSecond: 50, + ReferentialDataSupported: true, + SkippedDataObjects: []string{"default/pod1", "default/pod2", "kube-system/configmap1"}, + }, + } + + output, err := FormatResults(results, OutputFormatTable) + if err != nil { + t.Fatalf("FormatResults() error = %v", err) + } + + // Check for warnings section with skipped data objects + expectedStrings := []string{ + "Warnings:", + "Skipped Data Objects:", + "failed to load as referential data", + "default/pod1", + "default/pod2", + "kube-system/configmap1", + } + + for _, s := range expectedStrings { + if !strings.Contains(output, s) { + t.Errorf("table output missing skipped data warning: %q\nOutput:\n%s", s, output) + } + } +} + +func TestFormatResults_SkippedDataObjectsTruncated(t *testing.T) { + // Test with more than 5 objects to verify truncation + results := []Results{ + { + Engine: EngineRego, + TemplateCount: 2, + ConstraintCount: 2, + ObjectCount: 10, + Iterations: 10, + SetupDuration: 50 * time.Millisecond, + TotalDuration: time.Second, + Latencies: Latencies{Min: time.Millisecond, Max: time.Millisecond, Mean: time.Millisecond}, + ViolationCount: 0, + ReviewsPerSecond: 100, + ReferentialDataSupported: true, + SkippedDataObjects: []string{ + "obj1", "obj2", "obj3", "obj4", "obj5", "obj6", "obj7", + }, + }, + } + + output, err := FormatResults(results, OutputFormatTable) + if err != nil { + t.Fatalf("FormatResults() error = %v", err) + } + + // Should show truncation message + expectedStrings := []string{ + "Skipped Data Objects:", + "and 2 more", + } + + for _, s := range expectedStrings { + if !strings.Contains(output, s) { + t.Errorf("table output missing truncation message: %q\nOutput:\n%s", s, output) + } + } +} + +func TestFormatResults_ReferentialDataNotSupported(t *testing.T) { + // Test that CEL engine shows informational note (not warning) about referential data + results := []Results{ + { + Engine: EngineCEL, + TemplateCount: 2, + ConstraintCount: 2, + ObjectCount: 5, + Iterations: 10, + SetupDuration: 50 * time.Millisecond, + TotalDuration: time.Second, + Latencies: Latencies{Min: time.Millisecond, Max: time.Millisecond, Mean: time.Millisecond}, + ViolationCount: 0, + ReviewsPerSecond: 50, + ReferentialDataSupported: false, // CEL doesn't support referential data + }, + } + + output, err := FormatResults(results, OutputFormatTable) + if err != nil { + t.Fatalf("FormatResults() error = %v", err) + } + + // Should show informational note, not warning + expectedStrings := []string{ + "Note:", + "Referential Data:", + "Not supported by", + "CEL", + } + + for _, s := range expectedStrings { + if !strings.Contains(output, s) { + t.Errorf("table output missing referential data note: %q\nOutput:\n%s", s, output) + } + } + + // Should NOT show "Warnings:" for referential data (that's for actual failures) + if strings.Contains(output, "Warnings:") { + t.Errorf("table output should not show Warnings for CEL referential data limitation\nOutput:\n%s", output) + } +} + +func TestFormatResults_ComparisonTable(t *testing.T) { + results := []Results{ + { + Engine: EngineRego, + TemplateCount: 2, + ConstraintCount: 2, + ObjectCount: 10, + Iterations: 100, + SetupDuration: 50 * time.Millisecond, + TotalDuration: time.Second, + Latencies: Latencies{Mean: time.Millisecond, P95: 2 * time.Millisecond, P99: 3 * time.Millisecond}, + ViolationCount: 10, + ReviewsPerSecond: 1000, + }, + { + Engine: EngineCEL, + TemplateCount: 2, + ConstraintCount: 2, + ObjectCount: 10, + Iterations: 100, + SetupDuration: 30 * time.Millisecond, + TotalDuration: 500 * time.Millisecond, + Latencies: Latencies{Mean: 500 * time.Microsecond, P95: time.Millisecond, P99: 2 * time.Millisecond}, + ViolationCount: 10, + ReviewsPerSecond: 2000, + }, + } + + output, err := FormatResults(results, OutputFormatTable) + if err != nil { + t.Fatalf("FormatResults() error = %v", err) + } + + // Check for comparison table content + expectedStrings := []string{ + "Engine Comparison", + "Metric", + "REGO", + "CEL", + "Throughput", + "Mean Latency", + "P95 Latency", + "P99 Latency", + "Performance:", // Performance comparison line + } + + for _, s := range expectedStrings { + if !strings.Contains(output, s) { + t.Errorf("table output missing comparison content: %q", s) + } + } +} + +func TestFormatResults_SetupBreakdownJSON(t *testing.T) { + results := []Results{ + { + Engine: EngineRego, + TemplateCount: 1, + ConstraintCount: 1, + ObjectCount: 1, + Iterations: 10, + SetupDuration: 100 * time.Millisecond, + SetupBreakdown: SetupBreakdown{ + ClientCreation: 10 * time.Millisecond, + TemplateCompilation: 50 * time.Millisecond, + ConstraintLoading: 30 * time.Millisecond, + DataLoading: 10 * time.Millisecond, + }, + TotalDuration: time.Second, + Latencies: Latencies{Min: time.Millisecond, Max: time.Millisecond, Mean: time.Millisecond}, + ViolationCount: 0, + ReviewsPerSecond: 10, + }, + } + + output, err := FormatResults(results, OutputFormatJSON) + if err != nil { + t.Fatalf("FormatResults() error = %v", err) + } + + // Check for setup breakdown in JSON + expectedStrings := []string{ + `"setupBreakdown"`, + `"clientCreation"`, + `"templateCompilation"`, + `"constraintLoading"`, + `"dataLoading"`, + } + + for _, s := range expectedStrings { + if !strings.Contains(output, s) { + t.Errorf("json output missing setup breakdown: %q", s) + } + } +} + +func TestFormatResults_SkippedInJSON(t *testing.T) { + results := []Results{ + { + Engine: EngineRego, + TemplateCount: 1, + ConstraintCount: 1, + ObjectCount: 1, + Iterations: 10, + SetupDuration: 50 * time.Millisecond, + TotalDuration: time.Second, + Latencies: Latencies{Min: time.Millisecond, Max: time.Millisecond, Mean: time.Millisecond}, + ViolationCount: 0, + ReviewsPerSecond: 10, + SkippedTemplates: []string{"skipped-template"}, + SkippedConstraints: []string{"skipped-constraint"}, + }, + } + + output, err := FormatResults(results, OutputFormatJSON) + if err != nil { + t.Fatalf("FormatResults() error = %v", err) + } + + // Check for skipped items in JSON + expectedStrings := []string{ + `"skippedTemplates"`, + `"skipped-template"`, + `"skippedConstraints"`, + `"skipped-constraint"`, + } + + for _, s := range expectedStrings { + if !strings.Contains(output, s) { + t.Errorf("json output missing skipped items: %q", s) + } + } +} + +func TestFormatResults_EqualThroughput(t *testing.T) { + // Test the case where both engines have identical throughput + results := []Results{ + { + Engine: EngineRego, + TemplateCount: 1, + ConstraintCount: 1, + ObjectCount: 1, + Iterations: 10, + SetupDuration: 50 * time.Millisecond, + TotalDuration: time.Second, + Latencies: Latencies{Mean: time.Millisecond, P95: time.Millisecond, P99: time.Millisecond}, + ViolationCount: 0, + ReviewsPerSecond: 1000, // Same throughput + }, + { + Engine: EngineCEL, + TemplateCount: 1, + ConstraintCount: 1, + ObjectCount: 1, + Iterations: 10, + SetupDuration: 50 * time.Millisecond, + TotalDuration: time.Second, + Latencies: Latencies{Mean: time.Millisecond, P95: time.Millisecond, P99: time.Millisecond}, + ViolationCount: 0, + ReviewsPerSecond: 1000, // Same throughput + }, + } + + output, err := FormatResults(results, OutputFormatTable) + if err != nil { + t.Fatalf("FormatResults() error = %v", err) + } + + // Should contain the "similar throughput" message + if !strings.Contains(output, "similar throughput") { + t.Error("expected 'similar throughput' message for equal performance") + } +} + +func TestFormatResults_ZeroThroughput(t *testing.T) { + // Test the case where one engine has zero throughput + results := []Results{ + { + Engine: EngineRego, + TemplateCount: 1, + ConstraintCount: 1, + ObjectCount: 1, + Iterations: 10, + SetupDuration: 50 * time.Millisecond, + TotalDuration: time.Second, + Latencies: Latencies{Mean: time.Millisecond, P95: time.Millisecond, P99: time.Millisecond}, + ViolationCount: 0, + ReviewsPerSecond: 0, // Zero throughput + }, + { + Engine: EngineCEL, + TemplateCount: 1, + ConstraintCount: 1, + ObjectCount: 1, + Iterations: 10, + SetupDuration: 50 * time.Millisecond, + TotalDuration: time.Second, + Latencies: Latencies{Mean: time.Millisecond, P95: time.Millisecond, P99: time.Millisecond}, + ViolationCount: 0, + ReviewsPerSecond: 1000, + }, + } + + output, err := FormatResults(results, OutputFormatTable) + if err != nil { + t.Fatalf("FormatResults() error = %v", err) + } + + // Should NOT contain a performance comparison when one has zero throughput + if strings.Contains(output, "faster than") { + t.Error("should not show performance comparison when throughput is zero") + } +} + +func TestFormatResults_RegoFasterThanCEL(t *testing.T) { + // Test case where Rego is faster than CEL (reversed from normal) + results := []Results{ + { + Engine: EngineRego, + TemplateCount: 1, + ConstraintCount: 1, + ObjectCount: 1, + Iterations: 10, + SetupDuration: 50 * time.Millisecond, + TotalDuration: time.Second, + Latencies: Latencies{Mean: time.Millisecond, P95: time.Millisecond, P99: time.Millisecond}, + ViolationCount: 0, + ReviewsPerSecond: 2000, // Rego faster + }, + { + Engine: EngineCEL, + TemplateCount: 1, + ConstraintCount: 1, + ObjectCount: 1, + Iterations: 10, + SetupDuration: 50 * time.Millisecond, + TotalDuration: time.Second, + Latencies: Latencies{Mean: time.Millisecond, P95: time.Millisecond, P99: time.Millisecond}, + ViolationCount: 0, + ReviewsPerSecond: 1000, + }, + } + + output, err := FormatResults(results, OutputFormatTable) + if err != nil { + t.Fatalf("FormatResults() error = %v", err) + } + + // Should show REGO is faster + if !strings.Contains(output, "REGO is") || !strings.Contains(output, "faster than CEL") { + t.Error("expected performance comparison showing REGO faster than CEL") + } +} + +func TestWritePerfDiff_NegativeThroughput(t *testing.T) { + var buf bytes.Buffer + r1 := &Results{Engine: EngineRego, ReviewsPerSecond: -1} + r2 := &Results{Engine: EngineCEL, ReviewsPerSecond: 1000} + + writePerfDiff(&buf, r1, r2) + + // Should not output anything when throughput is negative + if buf.String() != "" { + t.Error("expected no output for negative throughput") + } +} + +func TestFormatBytes(t *testing.T) { + tests := []struct { + bytes uint64 + want string + }{ + {0, "0 B"}, + {512, "512 B"}, + {1024, "1.00 KB"}, + {1536, "1.50 KB"}, + {1048576, "1.00 MB"}, + {1572864, "1.50 MB"}, + {1073741824, "1.00 GB"}, + } + + for _, tt := range tests { + t.Run(tt.want, func(t *testing.T) { + got := formatBytes(tt.bytes) + if got != tt.want { + t.Errorf("formatBytes(%d) = %q, want %q", tt.bytes, got, tt.want) + } + }) + } +} + +func TestFormatResults_WithMemoryStats(t *testing.T) { + results := []Results{ + { + Engine: EngineRego, + TemplateCount: 1, + ConstraintCount: 1, + ObjectCount: 1, + Iterations: 10, + SetupDuration: 50 * time.Millisecond, + TotalDuration: time.Second, + Latencies: Latencies{Min: time.Millisecond, Max: time.Millisecond, Mean: time.Millisecond}, + ViolationCount: 0, + ReviewsPerSecond: 10, + MemoryStats: &MemoryStats{ + AllocsPerReview: 500, + BytesPerReview: 10240, + TotalAllocs: 5000, + TotalBytes: 102400, + }, + }, + } + + t.Run("table format with memory", func(t *testing.T) { + output, err := FormatResults(results, OutputFormatTable) + if err != nil { + t.Fatalf("FormatResults() error = %v", err) + } + + expectedStrings := []string{ + "Memory:", + "Allocs/Review:", + "500", + "Bytes/Review:", + "10.00 KB", + "Total Allocs:", + "Total Bytes:", + } + + for _, s := range expectedStrings { + if !strings.Contains(output, s) { + t.Errorf("table output missing memory stat: %q", s) + } + } + }) + + t.Run("json format with memory", func(t *testing.T) { + output, err := FormatResults(results, OutputFormatJSON) + if err != nil { + t.Fatalf("FormatResults() error = %v", err) + } + + expectedStrings := []string{ + `"memoryStats"`, + `"allocsPerReview": 500`, + `"bytesPerReview": "10.00 KB"`, + `"totalAllocs": 5000`, + } + + for _, s := range expectedStrings { + if !strings.Contains(output, s) { + t.Errorf("json output missing memory stat: %q", s) + } + } + }) +} + +func TestFormatResults_ComparisonTableWithMemory(t *testing.T) { + results := []Results{ + { + Engine: EngineRego, + TemplateCount: 1, + ConstraintCount: 1, + ObjectCount: 1, + Iterations: 10, + SetupDuration: 50 * time.Millisecond, + TotalDuration: time.Second, + Latencies: Latencies{Mean: time.Millisecond, P95: time.Millisecond, P99: time.Millisecond}, + ViolationCount: 0, + ReviewsPerSecond: 1000, + MemoryStats: &MemoryStats{ + AllocsPerReview: 500, + BytesPerReview: 10240, + }, + }, + { + Engine: EngineCEL, + TemplateCount: 1, + ConstraintCount: 1, + ObjectCount: 1, + Iterations: 10, + SetupDuration: 50 * time.Millisecond, + TotalDuration: time.Second, + Latencies: Latencies{Mean: time.Millisecond, P95: time.Millisecond, P99: time.Millisecond}, + ViolationCount: 0, + ReviewsPerSecond: 2000, + MemoryStats: &MemoryStats{ + AllocsPerReview: 200, + BytesPerReview: 4096, + }, + }, + } + + output, err := FormatResults(results, OutputFormatTable) + if err != nil { + t.Fatalf("FormatResults() error = %v", err) + } + + // Check for memory in comparison table + expectedStrings := []string{ + "Allocs/Review", + "Bytes/Review", + } + + for _, s := range expectedStrings { + if !strings.Contains(output, s) { + t.Errorf("comparison table missing memory row: %q", s) + } + } +} diff --git a/pkg/gator/bench/types.go b/pkg/gator/bench/types.go new file mode 100644 index 00000000000..8527216e3e1 --- /dev/null +++ b/pkg/gator/bench/types.go @@ -0,0 +1,219 @@ +package bench + +import ( + "io" + "time" + + "github.com/open-policy-agent/frameworks/constraint/pkg/instrumentation" +) + +// Engine represents the policy evaluation engine to benchmark. +type Engine string + +const ( + // EngineRego benchmarks the Rego/OPA policy engine. + EngineRego Engine = "rego" + // EngineCEL benchmarks the Kubernetes CEL policy engine. + EngineCEL Engine = "cel" + // EngineAll benchmarks both Rego and CEL engines. + EngineAll Engine = "all" +) + +// Opts configures the benchmark run. +type Opts struct { + // Filenames are the paths to files or directories containing + // ConstraintTemplates, Constraints, and objects to review. + Filenames []string + + // Images are OCI image URLs containing policies. + Images []string + + // TempDir is the directory for unpacking OCI images. + TempDir string + + // Engine specifies which policy engine(s) to benchmark. + Engine Engine + + // Iterations is the number of review cycles to run. + Iterations int + + // Warmup is the number of warmup iterations before measurement. + Warmup int + + // GatherStats enables collection of per-constraint statistics + // from the constraint framework. + GatherStats bool + + // Memory enables memory profiling during benchmark. + Memory bool + + // Baseline is the path to a baseline results file for comparison. + Baseline string + + // Save is the path to save benchmark results for future comparison. + Save string + + // Threshold is the regression threshold percentage for comparison. + // If a metric regresses more than this percentage, the benchmark fails. + Threshold float64 + + // MinThreshold is the minimum absolute latency difference (in duration) to consider + // a regression. This prevents false positives on very fast policies where small + // absolute changes appear as large percentage changes. + MinThreshold time.Duration + + // Concurrency is the number of concurrent goroutines to use for reviews. + // Default is 1 (sequential). Higher values simulate realistic webhook load. + Concurrency int + + // Writer is where warnings and informational messages are written. + // If nil, warnings are not printed. + Writer io.Writer +} + +// Results contains benchmark metrics for a single engine. +type Results struct { + // Engine is the policy engine that was benchmarked. + Engine Engine `json:"engine" yaml:"engine"` + + // TemplateCount is the number of ConstraintTemplates loaded. + TemplateCount int `json:"templateCount" yaml:"templateCount"` + + // SkippedTemplates contains names of templates skipped due to engine incompatibility. + SkippedTemplates []string `json:"skippedTemplates,omitempty" yaml:"skippedTemplates,omitempty"` + + // ConstraintCount is the number of Constraints loaded. + ConstraintCount int `json:"constraintCount" yaml:"constraintCount"` + + // SkippedConstraints contains names of constraints skipped due to missing templates. + SkippedConstraints []string `json:"skippedConstraints,omitempty" yaml:"skippedConstraints,omitempty"` + + // SkippedDataObjects contains names of objects that failed to load as referential data. + // This is populated only when actual errors occur during data loading, not for expected + // engine limitations (use ReferentialDataSupported for that). + SkippedDataObjects []string `json:"skippedDataObjects,omitempty" yaml:"skippedDataObjects,omitempty"` + + // ReferentialDataSupported indicates whether the engine supports referential data. + // When false, referential constraints cannot be exercised (e.g., CEL engine). + ReferentialDataSupported bool `json:"referentialDataSupported" yaml:"referentialDataSupported"` + + // ObjectCount is the number of objects reviewed. + ObjectCount int `json:"objectCount" yaml:"objectCount"` + + // Iterations is the number of review cycles run. + Iterations int `json:"iterations" yaml:"iterations"` + + // Concurrency is the number of concurrent goroutines used. + Concurrency int `json:"concurrency" yaml:"concurrency"` + + // SetupDuration is the total time taken to load templates, constraints, and data. + SetupDuration time.Duration `json:"setupDuration" yaml:"setupDuration"` + + // SetupBreakdown contains detailed timing for each setup phase. + SetupBreakdown SetupBreakdown `json:"setupBreakdown" yaml:"setupBreakdown"` + + // TotalDuration is the total time for all review iterations. + TotalDuration time.Duration `json:"totalDuration" yaml:"totalDuration"` + + // Latencies contains timing for each review operation. + Latencies Latencies `json:"latencies" yaml:"latencies"` + + // ViolationCount is the total number of violations found. + ViolationCount int `json:"violationCount" yaml:"violationCount"` + + // ReviewsPerSecond is the throughput metric (reviews/second). + ReviewsPerSecond float64 `json:"reviewsPerSecond" yaml:"reviewsPerSecond"` + + // MemoryStats contains memory allocation statistics (only populated with --memory). + MemoryStats *MemoryStats `json:"memoryStats,omitempty" yaml:"memoryStats,omitempty"` + + // StatsEntries contains per-constraint statistics from the policy engine (only populated with --stats). + StatsEntries []*instrumentation.StatsEntry `json:"statsEntries,omitempty" yaml:"statsEntries,omitempty"` +} + +// SetupBreakdown contains detailed timing for setup phases. +type SetupBreakdown struct { + // ClientCreation is the time to create the constraint client. + ClientCreation time.Duration `json:"clientCreation" yaml:"clientCreation"` + + // TemplateCompilation is the time to compile all templates. + TemplateCompilation time.Duration `json:"templateCompilation" yaml:"templateCompilation"` + + // ConstraintLoading is the time to load all constraints. + ConstraintLoading time.Duration `json:"constraintLoading" yaml:"constraintLoading"` + + // DataLoading is the time to load reference data. + DataLoading time.Duration `json:"dataLoading" yaml:"dataLoading"` +} + +// Latencies contains latency statistics. +type Latencies struct { + // Min is the minimum latency observed. + Min time.Duration `json:"min" yaml:"min"` + + // Max is the maximum latency observed. + Max time.Duration `json:"max" yaml:"max"` + + // Mean is the average latency. + Mean time.Duration `json:"mean" yaml:"mean"` + + // P50 is the 50th percentile (median) latency. + P50 time.Duration `json:"p50" yaml:"p50"` + + // P95 is the 95th percentile latency. + P95 time.Duration `json:"p95" yaml:"p95"` + + // P99 is the 99th percentile latency. + P99 time.Duration `json:"p99" yaml:"p99"` +} + +// MemoryStats contains memory allocation statistics from benchmark runs. +type MemoryStats struct { + // AllocsPerReview is the average number of allocations per review. + AllocsPerReview uint64 `json:"allocsPerReview" yaml:"allocsPerReview"` + + // BytesPerReview is the average bytes allocated per review. + BytesPerReview uint64 `json:"bytesPerReview" yaml:"bytesPerReview"` + + // TotalAllocs is the total number of allocations during measurement. + TotalAllocs uint64 `json:"totalAllocs" yaml:"totalAllocs"` + + // TotalBytes is the total bytes allocated during measurement. + TotalBytes uint64 `json:"totalBytes" yaml:"totalBytes"` +} + +// ComparisonResult contains the result of comparing current results against a baseline. +type ComparisonResult struct { + // BaselineEngine is the engine from the baseline. + BaselineEngine Engine `json:"baselineEngine" yaml:"baselineEngine"` + + // CurrentEngine is the engine from the current run. + CurrentEngine Engine `json:"currentEngine" yaml:"currentEngine"` + + // Metrics contains the comparison for each metric. + Metrics []MetricComparison `json:"metrics" yaml:"metrics"` + + // Passed indicates whether all metrics are within threshold. + Passed bool `json:"passed" yaml:"passed"` + + // FailedMetrics contains names of metrics that exceeded threshold. + FailedMetrics []string `json:"failedMetrics,omitempty" yaml:"failedMetrics,omitempty"` +} + +// MetricComparison contains comparison data for a single metric. +type MetricComparison struct { + // Name is the metric name. + Name string `json:"name" yaml:"name"` + + // Baseline is the baseline value. + Baseline float64 `json:"baseline" yaml:"baseline"` + + // Current is the current value. + Current float64 `json:"current" yaml:"current"` + + // Delta is the percentage change (positive = regression for latency, negative = improvement). + Delta float64 `json:"delta" yaml:"delta"` + + // Passed indicates whether this metric is within threshold. + Passed bool `json:"passed" yaml:"passed"` +} diff --git a/pkg/gator/fileext.go b/pkg/gator/fileext.go new file mode 100644 index 00000000000..b703ba1d7df --- /dev/null +++ b/pkg/gator/fileext.go @@ -0,0 +1,21 @@ +package gator + +// File extension constants for supported file formats. +const ( + // ExtYAML is the standard YAML file extension. + ExtYAML = ".yaml" + // ExtYML is the alternative YAML file extension. + ExtYML = ".yml" + // ExtJSON is the JSON file extension. + ExtJSON = ".json" +) + +// IsYAMLExtension returns true if the extension is a valid YAML extension. +func IsYAMLExtension(ext string) bool { + return ext == ExtYAML || ext == ExtYML +} + +// IsSupportedExtension returns true if the extension is supported (YAML or JSON). +func IsSupportedExtension(ext string) bool { + return ext == ExtYAML || ext == ExtYML || ext == ExtJSON +} diff --git a/pkg/gator/fileext_test.go b/pkg/gator/fileext_test.go new file mode 100644 index 00000000000..7e07fdb8deb --- /dev/null +++ b/pkg/gator/fileext_test.go @@ -0,0 +1,45 @@ +package gator + +import "testing" + +func TestIsYAMLExtension(t *testing.T) { + tests := []struct { + ext string + expected bool + }{ + {ExtYAML, true}, + {ExtYML, true}, + {ExtJSON, false}, + {".txt", false}, + {"", false}, + } + + for _, tt := range tests { + t.Run(tt.ext, func(t *testing.T) { + if got := IsYAMLExtension(tt.ext); got != tt.expected { + t.Errorf("IsYAMLExtension(%q) = %v, want %v", tt.ext, got, tt.expected) + } + }) + } +} + +func TestIsSupportedExtension(t *testing.T) { + tests := []struct { + ext string + expected bool + }{ + {ExtYAML, true}, + {ExtYML, true}, + {ExtJSON, true}, + {".txt", false}, + {"", false}, + } + + for _, tt := range tests { + t.Run(tt.ext, func(t *testing.T) { + if got := IsSupportedExtension(tt.ext); got != tt.expected { + t.Errorf("IsSupportedExtension(%q) = %v, want %v", tt.ext, got, tt.expected) + } + }) + } +} diff --git a/pkg/gator/reader/filereader.go b/pkg/gator/reader/filereader.go index 6c1f1a7d78e..3d07262a693 100644 --- a/pkg/gator/reader/filereader.go +++ b/pkg/gator/reader/filereader.go @@ -6,11 +6,12 @@ import ( "os" "path/filepath" + "github.com/open-policy-agent/gatekeeper/v3/pkg/gator" "github.com/open-policy-agent/gatekeeper/v3/pkg/oci" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" ) -var allowedExtensions = []string{".yaml", ".yml", ".json"} +var allowedExtensions = []string{gator.ExtYAML, gator.ExtYML, gator.ExtJSON} func ReadSources(filenames []string, images []string, tempDir string) ([]*unstructured.Unstructured, error) { var sources []*source diff --git a/pkg/gator/verify/read_suites.go b/pkg/gator/verify/read_suites.go index ce72e3578b8..3d939ef0bcc 100644 --- a/pkg/gator/verify/read_suites.go +++ b/pkg/gator/verify/read_suites.go @@ -135,7 +135,7 @@ type fileList []string func (l *fileList) addFile(target string) error { // target is a file. ext := path.Ext(target) - if ext != ".yaml" && ext != ".yml" { + if !gator.IsYAMLExtension(ext) { return fmt.Errorf("%w: %q", ErrUnsupportedExtension, ext) } *l = append(*l, target) @@ -172,7 +172,7 @@ func isYAMLFile(d fs.DirEntry) bool { return false } ext := path.Ext(d.Name()) - return ext == ".yaml" || ext == ".yml" + return gator.IsYAMLExtension(ext) } func readSuite(f fs.FS, path string) (*Suite, error) { diff --git a/test/gator/bench/basic/constraint.yaml b/test/gator/bench/basic/constraint.yaml new file mode 100644 index 00000000000..d845b242643 --- /dev/null +++ b/test/gator/bench/basic/constraint.yaml @@ -0,0 +1,11 @@ +apiVersion: constraints.gatekeeper.sh/v1beta1 +kind: K8sRequiredLabels +metadata: + name: require-team-label +spec: + match: + kinds: + - apiGroups: [""] + kinds: ["Pod"] + parameters: + labels: ["team"] diff --git a/test/gator/bench/basic/resources.yaml b/test/gator/bench/basic/resources.yaml new file mode 100644 index 00000000000..3fd85fbbb11 --- /dev/null +++ b/test/gator/bench/basic/resources.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Pod +metadata: + name: valid-pod + labels: + team: platform +spec: + containers: + - name: nginx + image: nginx +--- +apiVersion: v1 +kind: Pod +metadata: + name: invalid-pod +spec: + containers: + - name: nginx + image: nginx diff --git a/test/gator/bench/basic/template.yaml b/test/gator/bench/basic/template.yaml new file mode 100644 index 00000000000..fe36b5a67de --- /dev/null +++ b/test/gator/bench/basic/template.yaml @@ -0,0 +1,28 @@ +apiVersion: templates.gatekeeper.sh/v1 +kind: ConstraintTemplate +metadata: + name: k8srequiredlabels +spec: + crd: + spec: + names: + kind: K8sRequiredLabels + validation: + openAPIV3Schema: + type: object + properties: + labels: + type: array + items: + type: string + targets: + - target: admission.k8s.gatekeeper.sh + rego: | + package k8srequiredlabels + violation[{"msg": msg}] { + provided := {label | input.review.object.metadata.labels[label]} + required := {label | label := input.parameters.labels[_]} + missing := required - provided + count(missing) > 0 + msg := sprintf("Missing required labels: %v", [missing]) + } diff --git a/test/gator/bench/both/constraint.yaml b/test/gator/bench/both/constraint.yaml new file mode 100644 index 00000000000..c331ee3c4a4 --- /dev/null +++ b/test/gator/bench/both/constraint.yaml @@ -0,0 +1,13 @@ +apiVersion: constraints.gatekeeper.sh/v1beta1 +kind: K8sAllowedRepos +metadata: + name: allowed-repos +spec: + match: + kinds: + - apiGroups: [""] + kinds: ["Pod"] + parameters: + repos: + - "gcr.io/myproject/" + - "docker.io/library/" diff --git a/test/gator/bench/both/resources.yaml b/test/gator/bench/both/resources.yaml new file mode 100644 index 00000000000..f4112c7eca1 --- /dev/null +++ b/test/gator/bench/both/resources.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Pod +metadata: + name: allowed-image + namespace: default +spec: + containers: + - name: app + image: gcr.io/myproject/myapp:v1.0 +--- +apiVersion: v1 +kind: Pod +metadata: + name: disallowed-image + namespace: default +spec: + containers: + - name: app + image: quay.io/unauthorized/app:latest diff --git a/test/gator/bench/both/template.yaml b/test/gator/bench/both/template.yaml new file mode 100644 index 00000000000..55708544651 --- /dev/null +++ b/test/gator/bench/both/template.yaml @@ -0,0 +1,44 @@ +apiVersion: templates.gatekeeper.sh/v1 +kind: ConstraintTemplate +metadata: + name: k8sallowedrepos +spec: + crd: + spec: + names: + kind: K8sAllowedRepos + validation: + openAPIV3Schema: + type: object + properties: + repos: + type: array + items: + type: string + targets: + - target: admission.k8s.gatekeeper.sh + rego: | + package k8sallowedrepos + + violation[{"msg": msg}] { + container := input.review.object.spec.containers[_] + not strings.any_prefix_match(container.image, input.parameters.repos) + msg := sprintf("container <%v> has an invalid image repo <%v>, allowed repos are %v", [container.name, container.image, input.parameters.repos]) + } + + violation[{"msg": msg}] { + container := input.review.object.spec.initContainers[_] + not strings.any_prefix_match(container.image, input.parameters.repos) + msg := sprintf("initContainer <%v> has an invalid image repo <%v>, allowed repos are %v", [container.name, container.image, input.parameters.repos]) + } + code: + - engine: K8sNativeValidation + source: + validations: + - expression: "object.spec.containers.all(c, variables.repos.exists(repo, c.image.startsWith(repo)))" + messageExpression: "'container ' + variables.failedContainer + ' has an invalid image repo, allowed repos are ' + variables.repos.join(', ')" + variables: + - name: repos + expression: "has(variables.params.repos) ? variables.params.repos : []" + - name: failedContainer + expression: "object.spec.containers.filter(c, !variables.repos.exists(repo, c.image.startsWith(repo))).map(c, c.name).join(', ')" diff --git a/test/gator/bench/cel/constraint.yaml b/test/gator/bench/cel/constraint.yaml new file mode 100644 index 00000000000..3704bfa3b08 --- /dev/null +++ b/test/gator/bench/cel/constraint.yaml @@ -0,0 +1,9 @@ +apiVersion: constraints.gatekeeper.sh/v1beta1 +kind: K8sContainerLimits +metadata: + name: require-limits +spec: + match: + kinds: + - apiGroups: [""] + kinds: ["Pod"] diff --git a/test/gator/bench/cel/resources.yaml b/test/gator/bench/cel/resources.yaml new file mode 100644 index 00000000000..12637bb8483 --- /dev/null +++ b/test/gator/bench/cel/resources.yaml @@ -0,0 +1,26 @@ +apiVersion: v1 +kind: Pod +metadata: + name: pod-with-limits + namespace: default +spec: + containers: + - name: nginx + image: nginx:latest + resources: + limits: + cpu: "500m" + memory: "128Mi" + requests: + cpu: "250m" + memory: "64Mi" +--- +apiVersion: v1 +kind: Pod +metadata: + name: pod-without-limits + namespace: default +spec: + containers: + - name: nginx + image: nginx:latest diff --git a/test/gator/bench/cel/template.yaml b/test/gator/bench/cel/template.yaml new file mode 100644 index 00000000000..d37ef8e9216 --- /dev/null +++ b/test/gator/bench/cel/template.yaml @@ -0,0 +1,17 @@ +apiVersion: templates.gatekeeper.sh/v1 +kind: ConstraintTemplate +metadata: + name: k8scontainerlimits +spec: + crd: + spec: + names: + kind: K8sContainerLimits + targets: + - target: admission.k8s.gatekeeper.sh + code: + - engine: K8sNativeValidation + source: + validations: + - expression: "has(object.spec.containers) && object.spec.containers.all(c, has(c.resources) && has(c.resources.limits))" + message: "All containers must have resource limits" diff --git a/test/gator/bench/scripts/analyze-data.sh b/test/gator/bench/scripts/analyze-data.sh new file mode 100755 index 00000000000..1ad8a1ea25a --- /dev/null +++ b/test/gator/bench/scripts/analyze-data.sh @@ -0,0 +1,187 @@ +#!/bin/bash +# Analysis script for gator bench data + +OUTPUT_DIR="/tmp/gator-bench-data" + +if [ ! -d "$OUTPUT_DIR" ]; then + echo "Error: No data found. Run gather-data.sh first." + exit 1 +fi + +echo "=== Gator Bench Data Analysis ===" +echo "" + +############################################################################### +# Test 1: CEL vs Rego Comparison +############################################################################### +echo "=== Test 1: CEL vs Rego Comparison ===" +echo "" + +if [ -f "$OUTPUT_DIR/test1_rego.json" ] && [ -f "$OUTPUT_DIR/test1_cel.json" ]; then + REGO_THROUGHPUT=$(jq -r '.[0].reviewsPerSecond' "$OUTPUT_DIR/test1_rego.json") + CEL_THROUGHPUT=$(jq -r '.[0].reviewsPerSecond' "$OUTPUT_DIR/test1_cel.json") + + REGO_MEAN=$(jq -r '.[0].latencies.mean' "$OUTPUT_DIR/test1_rego.json") + CEL_MEAN=$(jq -r '.[0].latencies.mean' "$OUTPUT_DIR/test1_cel.json") + + REGO_P99=$(jq -r '.[0].latencies.p99' "$OUTPUT_DIR/test1_rego.json") + CEL_P99=$(jq -r '.[0].latencies.p99' "$OUTPUT_DIR/test1_cel.json") + + REGO_SETUP=$(jq -r '.[0].setupDuration' "$OUTPUT_DIR/test1_rego.json") + CEL_SETUP=$(jq -r '.[0].setupDuration' "$OUTPUT_DIR/test1_cel.json") + + echo "Metric Rego CEL Ratio (CEL/Rego)" + echo "------ ---- --- ----------------" + printf "Throughput %-17.2f %-17.2f %.2fx\n" "$REGO_THROUGHPUT" "$CEL_THROUGHPUT" "$(echo "scale=2; $CEL_THROUGHPUT / $REGO_THROUGHPUT" | bc)" + printf "Mean Latency (ns) %-17.0f %-17.0f %.2fx\n" "$REGO_MEAN" "$CEL_MEAN" "$(echo "scale=2; $REGO_MEAN / $CEL_MEAN" | bc)" + printf "P99 Latency (ns) %-17.0f %-17.0f %.2fx\n" "$REGO_P99" "$CEL_P99" "$(echo "scale=2; $REGO_P99 / $CEL_P99" | bc)" + printf "Setup Time (ns) %-17.0f %-17.0f %.2fx\n" "$REGO_SETUP" "$CEL_SETUP" "$(echo "scale=2; $REGO_SETUP / $CEL_SETUP" | bc)" + echo "" +fi + +############################################################################### +# Test 2: Concurrency Scaling +############################################################################### +echo "=== Test 2: Concurrency Scaling ===" +echo "" + +echo "Concurrency Throughput P99 Latency Efficiency" +echo "----------- ---------- ----------- ----------" + +BASELINE_THROUGHPUT="" +for CONC in 1 2 4 8 16; do + FILE="$OUTPUT_DIR/test2_conc_${CONC}.json" + if [ -f "$FILE" ]; then + THROUGHPUT=$(jq -r '.[0].reviewsPerSecond' "$FILE") + P99=$(jq -r '.[0].latencies.p99' "$FILE") + + if [ -z "$BASELINE_THROUGHPUT" ]; then + BASELINE_THROUGHPUT=$THROUGHPUT + EFFICIENCY="100%" + else + # Expected linear scaling + EXPECTED=$(echo "scale=2; $BASELINE_THROUGHPUT * $CONC" | bc) + EFF=$(echo "scale=0; ($THROUGHPUT / $EXPECTED) * 100" | bc) + EFFICIENCY="${EFF}%" + fi + + P99_MS=$(echo "scale=3; $P99 / 1000000" | bc) + printf "%-12d %-14.2f %-14.3fms %s\n" "$CONC" "$THROUGHPUT" "$P99_MS" "$EFFICIENCY" + fi +done +echo "" + +############################################################################### +# Test 3: P99 Stability +############################################################################### +echo "=== Test 3: P99 Stability vs Iteration Count ===" +echo "" + +echo "Iterations P50 (µs) P95 (µs) P99 (µs) Mean (µs)" +echo "---------- -------- -------- -------- ---------" + +for ITER in 50 100 500 1000 5000; do + FILE="$OUTPUT_DIR/test3_iter_${ITER}.json" + if [ -f "$FILE" ]; then + P50=$(jq -r '.[0].latencies.p50' "$FILE") + P95=$(jq -r '.[0].latencies.p95' "$FILE") + P99=$(jq -r '.[0].latencies.p99' "$FILE") + MEAN=$(jq -r '.[0].latencies.mean' "$FILE") + + P50_US=$(echo "scale=2; $P50 / 1000" | bc) + P95_US=$(echo "scale=2; $P95 / 1000" | bc) + P99_US=$(echo "scale=2; $P99 / 1000" | bc) + MEAN_US=$(echo "scale=2; $MEAN / 1000" | bc) + + printf "%-12d %-11.2f %-11.2f %-11.2f %.2f\n" "$ITER" "$P50_US" "$P95_US" "$P99_US" "$MEAN_US" + fi +done +echo "" + +############################################################################### +# Test 4: Memory Comparison +############################################################################### +echo "=== Test 4: Memory Profiling ===" +echo "" + +if [ -f "$OUTPUT_DIR/test4_rego_memory.json" ] && [ -f "$OUTPUT_DIR/test4_cel_memory.json" ]; then + REGO_ALLOCS=$(jq -r '.[0].memoryStats.allocsPerReview // "N/A"' "$OUTPUT_DIR/test4_rego_memory.json") + CEL_ALLOCS=$(jq -r '.[0].memoryStats.allocsPerReview // "N/A"' "$OUTPUT_DIR/test4_cel_memory.json") + + REGO_BYTES=$(jq -r '.[0].memoryStats.bytesPerReview // "N/A"' "$OUTPUT_DIR/test4_rego_memory.json") + CEL_BYTES=$(jq -r '.[0].memoryStats.bytesPerReview // "N/A"' "$OUTPUT_DIR/test4_cel_memory.json") + + echo "Metric Rego CEL" + echo "------ ---- ---" + printf "Allocs/Review %-17s %s\n" "$REGO_ALLOCS" "$CEL_ALLOCS" + printf "Bytes/Review %-17s %s\n" "$REGO_BYTES" "$CEL_BYTES" + echo "" +fi + +############################################################################### +# Test 5: Warmup Impact +############################################################################### +echo "=== Test 5: Warmup Impact ===" +echo "" + +echo "Warmup Mean (µs) P99 (µs)" +echo "------ --------- --------" + +for WARMUP in 0 5 10 50 100; do + FILE="$OUTPUT_DIR/test5_warmup_${WARMUP}.json" + if [ -f "$FILE" ]; then + MEAN=$(jq -r '.[0].latencies.mean' "$FILE") + P99=$(jq -r '.[0].latencies.p99' "$FILE") + + MEAN_US=$(echo "scale=2; $MEAN / 1000" | bc) + P99_US=$(echo "scale=2; $P99 / 1000" | bc) + + printf "%-12d %-11.2f %.2f\n" "$WARMUP" "$MEAN_US" "$P99_US" + fi +done +echo "" + +############################################################################### +# Test 6: Variance Analysis +############################################################################### +echo "=== Test 6: Variance Analysis ===" +echo "" + +echo "Run Throughput Mean (µs) P99 (µs)" +echo "--- ---------- --------- --------" + +SUM_THROUGHPUT=0 +SUM_MEAN=0 +SUM_P99=0 +COUNT=0 + +for RUN in 1 2 3 4 5; do + FILE="$OUTPUT_DIR/test6_run_${RUN}.json" + if [ -f "$FILE" ]; then + THROUGHPUT=$(jq -r '.[0].reviewsPerSecond' "$FILE") + MEAN=$(jq -r '.[0].latencies.mean' "$FILE") + P99=$(jq -r '.[0].latencies.p99' "$FILE") + + MEAN_US=$(echo "scale=2; $MEAN / 1000" | bc) + P99_US=$(echo "scale=2; $P99 / 1000" | bc) + + printf "%-5d %-14.2f %-12.2f %.2f\n" "$RUN" "$THROUGHPUT" "$MEAN_US" "$P99_US" + + SUM_THROUGHPUT=$(echo "$SUM_THROUGHPUT + $THROUGHPUT" | bc) + SUM_MEAN=$(echo "$SUM_MEAN + $MEAN_US" | bc) + SUM_P99=$(echo "$SUM_P99 + $P99_US" | bc) + COUNT=$((COUNT + 1)) + fi +done + +if [ $COUNT -gt 0 ]; then + AVG_THROUGHPUT=$(echo "scale=2; $SUM_THROUGHPUT / $COUNT" | bc) + AVG_MEAN=$(echo "scale=2; $SUM_MEAN / $COUNT" | bc) + AVG_P99=$(echo "scale=2; $SUM_P99 / $COUNT" | bc) + + echo "--- ---------- --------- --------" + printf "AVG %-14.2f %-12.2f %.2f\n" "$AVG_THROUGHPUT" "$AVG_MEAN" "$AVG_P99" +fi +echo "" + +echo "=== Analysis Complete ===" diff --git a/test/gator/bench/scripts/gather-data.sh b/test/gator/bench/scripts/gather-data.sh new file mode 100755 index 00000000000..66eb445f6d3 --- /dev/null +++ b/test/gator/bench/scripts/gather-data.sh @@ -0,0 +1,144 @@ +#!/bin/bash +# Performance data gathering script for gator bench +# This script collects data to understand performance characteristics + +set -e + +GATOR="./bin/gator" +OUTPUT_DIR="/tmp/gator-bench-data" +ITERATIONS=1000 + +mkdir -p "$OUTPUT_DIR" + +echo "=== Gator Bench Data Collection ===" +echo "Output directory: $OUTPUT_DIR" +echo "Iterations per test: $ITERATIONS" +echo "" + +# Build gator first +echo "Building gator..." +make gator > /dev/null 2>&1 +echo "Done." +echo "" + +############################################################################### +# Test 1: CEL vs Rego - Same Policy (K8sAllowedRepos supports both) +############################################################################### +echo "=== Test 1: CEL vs Rego Comparison ===" + +echo "Running Rego engine..." +$GATOR bench \ + --filename test/gator/bench/both/ \ + --engine rego \ + --iterations $ITERATIONS \ + --output json > "$OUTPUT_DIR/test1_rego.json" + +echo "Running CEL engine..." +$GATOR bench \ + --filename test/gator/bench/both/ \ + --engine cel \ + --iterations $ITERATIONS \ + --output json > "$OUTPUT_DIR/test1_cel.json" + +echo "Results saved to test1_rego.json and test1_cel.json" +echo "" + +############################################################################### +# Test 2: Concurrency Scaling +############################################################################### +echo "=== Test 2: Concurrency Scaling ===" + +for CONC in 1 2 4 8 16; do + echo "Running with concurrency=$CONC..." + $GATOR bench \ + --filename test/gator/bench/basic/ \ + --iterations $ITERATIONS \ + --concurrency $CONC \ + --output json > "$OUTPUT_DIR/test2_conc_${CONC}.json" +done + +echo "Results saved to test2_conc_*.json" +echo "" + +############################################################################### +# Test 3: Iteration Count Impact on P99 Stability +############################################################################### +echo "=== Test 3: P99 Stability vs Iteration Count ===" + +for ITER in 50 100 500 1000 5000; do + echo "Running with iterations=$ITER..." + $GATOR bench \ + --filename test/gator/bench/basic/ \ + --iterations $ITER \ + --output json > "$OUTPUT_DIR/test3_iter_${ITER}.json" +done + +echo "Results saved to test3_iter_*.json" +echo "" + +############################################################################### +# Test 4: Memory Profiling Comparison +############################################################################### +echo "=== Test 4: Memory Profiling ===" + +echo "Running Rego with memory profiling..." +$GATOR bench \ + --filename test/gator/bench/both/ \ + --engine rego \ + --iterations $ITERATIONS \ + --memory \ + --output json > "$OUTPUT_DIR/test4_rego_memory.json" + +echo "Running CEL with memory profiling..." +$GATOR bench \ + --filename test/gator/bench/both/ \ + --engine cel \ + --iterations $ITERATIONS \ + --memory \ + --output json > "$OUTPUT_DIR/test4_cel_memory.json" + +echo "Results saved to test4_*_memory.json" +echo "" + +############################################################################### +# Test 5: Warmup Impact +############################################################################### +echo "=== Test 5: Warmup Impact ===" + +for WARMUP in 0 5 10 50 100; do + echo "Running with warmup=$WARMUP..." + $GATOR bench \ + --filename test/gator/bench/basic/ \ + --iterations 500 \ + --warmup $WARMUP \ + --output json > "$OUTPUT_DIR/test5_warmup_${WARMUP}.json" +done + +echo "Results saved to test5_warmup_*.json" +echo "" + +############################################################################### +# Test 6: Multiple Runs for Variance Analysis +############################################################################### +echo "=== Test 6: Variance Analysis (5 runs) ===" + +for RUN in 1 2 3 4 5; do + echo "Run $RUN/5..." + $GATOR bench \ + --filename test/gator/bench/basic/ \ + --iterations $ITERATIONS \ + --output json > "$OUTPUT_DIR/test6_run_${RUN}.json" +done + +echo "Results saved to test6_run_*.json" +echo "" + +############################################################################### +# Summary +############################################################################### +echo "=== Data Collection Complete ===" +echo "" +echo "All data saved to: $OUTPUT_DIR" +echo "" +echo "To analyze, run: ./test/gator/bench/scripts/analyze-data.sh" + diff --git a/website/docs/gator.md b/website/docs/gator.md index 9f9946556aa..80ef6106ab4 100644 --- a/website/docs/gator.md +++ b/website/docs/gator.md @@ -629,6 +629,371 @@ templatename3: +## The `gator bench` subcommand + +`gator bench` measures the performance of Gatekeeper policy evaluation. It loads ConstraintTemplates, Constraints, and Kubernetes resources, then repeatedly evaluates the resources against the constraints to gather latency and throughput metrics. + +:::note +`gator bench` measures **compute-only** policy evaluation latency, which does not include network round-trip time, TLS overhead, or Kubernetes API server processing. Real-world webhook latency will be higher. Use these metrics for relative comparisons between policy versions, not as absolute production latency predictions. +::: + +This command is useful for: +- **Policy developers**: Testing policy performance before deployment +- **Platform teams**: Comparing Rego vs CEL engine performance +- **CI/CD pipelines**: Detecting performance regressions between releases + +### Usage + +```shell +gator bench --filename=policies/ +``` + +#### Flags + +| Flag | Short | Default | Description | +|------|-------|---------|-------------| +| `--filename` | `-f` | | File or directory containing ConstraintTemplates, Constraints, and resources. Repeatable. | +| `--image` | `-i` | | OCI image URL containing policies. Repeatable. | +| `--engine` | `-e` | `cel` | Policy engine to benchmark: `rego`, `cel`, or `all` | +| `--iterations` | `-n` | `1000` | Number of benchmark iterations. Use ≥1000 for reliable P99 percentiles. | +| `--warmup` | | `10` | Warmup iterations before measurement | +| `--concurrency` | `-c` | `1` | Number of concurrent goroutines for parallel evaluation | +| `--output` | `-o` | `table` | Output format: `table`, `json`, or `yaml` | +| `--memory` | | `false` | Enable memory profiling (estimates only, not GC-cycle accurate) | +| `--save` | | | Save results to file for future comparison | +| `--compare` | | | Compare against a baseline file | +| `--threshold` | | `10` | Regression threshold percentage (for CI/CD) | +| `--min-threshold` | | `0` | Minimum absolute latency difference to consider (e.g., `100µs`). Useful for fast policies where percentage changes may be noise. | +| `--stats` | | `false` | Gather detailed statistics from constraint framework | + +### Examples + +#### Basic Benchmark + +```shell +gator bench --filename=policies/ +``` + +Output: +``` +=== Benchmark Results: Rego Engine === + +Configuration: + Templates: 5 + Constraints: 10 + Objects: 50 + Iterations: 1000 + Total Reviews: 50000 + +Timing: + Setup Duration: 25.00ms + └─ Client Creation: 0.05ms + └─ Template Compilation: 20.00ms + └─ Constraint Loading: 3.00ms + └─ Data Loading: 1.95ms + Total Duration: 25.00s + Throughput: 2000.00 reviews/sec + +Latency (per review): + Min: 200.00µs + Max: 5.00ms + Mean: 500.00µs + P50: 450.00µs + P95: 1.20ms + P99: 2.50ms + +Results: + Violations Found: 1500 +``` + +#### Concurrent Benchmarking + +Simulate parallel load to test contention behavior: + +```shell +gator bench --filename=policies/ --concurrency=4 +``` + +This runs 4 parallel goroutines each executing reviews concurrently. + +``` +=== Benchmark Results: Rego Engine === + +Configuration: + Templates: 5 + Constraints: 10 + Objects: 50 + Iterations: 1000 + Concurrency: 4 + Total Reviews: 50000 +... +``` + +#### Compare Rego vs CEL Engines + +```shell +gator bench --filename=policies/ --engine=all +``` + +This runs benchmarks for both engines and displays a comparison table: + +``` +=== Engine Comparison === + +Metric Rego CEL +------ ------ ------ +Templates 5 5 +Constraints 10 10 +Setup Time 25.00ms 15.00ms +Throughput 2000/sec 3500/sec +Mean Latency 500.00µs 285.00µs +P95 Latency 1.20ms 600.00µs +P99 Latency 2.50ms 900.00µs +Violations 150 150 + +Performance: CEL is 1.75x faster than Rego +``` + +:::note +Templates without CEL code will be skipped when benchmarking the CEL engine. +A warning will be displayed indicating which templates were skipped. +::: + +:::caution +The CEL engine does not support referential constraints. Referential data loading +is skipped entirely when benchmarking with CEL—this is expected behavior, not an error. +If you have policies that rely on referential data (e.g., checking if a namespace exists), +those constraints will not be fully exercised during CEL benchmarks. An informational note +will be displayed indicating that referential data is not supported by the CEL engine. +::: + +#### Memory Profiling + +```shell +gator bench --filename=policies/ --memory +``` + +Adds memory statistics to the output: + +``` +Memory (estimated): + Allocs/Review: 3000 + Bytes/Review: 150.00 KB + Total Allocs: 15000000 + Total Bytes: 732.42 MB +``` + +:::caution +Memory statistics are estimates based on `runtime.MemStats` captured before and after benchmark runs. They do not account for garbage collection cycles that may occur during benchmarking. For production memory analysis, use Go's pprof profiler. +::: + +#### Save and Compare Baselines + +Save benchmark results as a baseline: + +```shell +gator bench --filename=policies/ --memory --save=baseline.json +``` + +Compare future runs against the baseline: + +```shell +gator bench --filename=policies/ --memory --compare=baseline.json +``` + +Output includes a comparison table: + +``` +=== Baseline Comparison: Rego Engine === + +Metric Baseline Current Delta Status +------ -------- ------- ----- ------ +P50 Latency 450.00µs 460.00µs +2.2% ✓ +P95 Latency 1.20ms 1.25ms +4.2% ✓ +P99 Latency 2.50ms 2.60ms +4.0% ✓ +Mean Latency 500.00µs 510.00µs +2.0% ✓ +Throughput 2000/sec 1960/sec -2.0% ✓ +Allocs/Review 3000 3050 +1.7% ✓ +Bytes/Review 150.00 KB 152.00 KB +1.3% ✓ + +✓ No significant regressions (threshold: 10.0%) +``` + +For fast policies (< 1ms), small percentage changes may be noise. Use `--min-threshold` to set an absolute minimum difference: + +```shell +gator bench --filename=policies/ --compare=baseline.json --threshold=10 --min-threshold=100µs +``` + +This marks a metric as passing if either: +- The percentage change is within the threshold (10%), OR +- The absolute difference is less than the min-threshold (100µs) + +### CI/CD Integration + +Use `gator bench` in CI/CD pipelines to detect performance regressions automatically. + +#### GitHub Actions Example + +```yaml +name: Policy Benchmark + +on: + pull_request: + paths: + - 'policies/**' + +jobs: + benchmark: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Download baseline + uses: actions/download-artifact@v4 + with: + name: benchmark-baseline + path: . + continue-on-error: true # First run won't have baseline + + - name: Install gator + run: | + go install github.com/open-policy-agent/gatekeeper/v3/cmd/gator@latest + + - name: Run benchmark + run: | + if [ -f baseline.json ]; then + # Use min-threshold to avoid flaky failures on fast policies + gator bench -f policies/ --memory \ + --compare=baseline.json \ + --threshold=10 \ + --min-threshold=100µs + else + gator bench -f policies/ --memory --save=baseline.json + fi + + - name: Upload baseline + if: github.ref == 'refs/heads/main' + uses: actions/upload-artifact@v4 + with: + name: benchmark-baseline + path: baseline.json +``` + +:::tip +Use `--min-threshold` in CI to prevent flaky failures. For policies that evaluate in under 1ms, a 10% regression might only be 50µs of noise from system jitter. +::: + +#### Exit Codes + +| Exit Code | Meaning | +|-----------|---------| +| `0` | Benchmark completed successfully, no regressions detected | +| `1` | Error occurred, or regression threshold exceeded (when using `--compare`) | + +When `--compare` is used with `--threshold`, the command exits with code `1` if any metric regresses beyond the threshold. This enables CI/CD pipelines to fail builds that introduce performance regressions. + +### Understanding Metrics + +| Metric | Description | +|--------|-------------| +| **P50/P95/P99 Latency** | Percentile latencies per review. P99 of 2ms means 99% of reviews complete in ≤2ms. Use ≥1000 iterations for reliable P99. | +| **Mean Latency** | Average time per review | +| **Throughput** | Reviews processed per second | +| **Allocs/Review** | Memory allocations per review (with `--memory`). Estimate only. | +| **Bytes/Review** | Bytes allocated per review (with `--memory`). Estimate only. | +| **Setup Duration** | Time to load templates, constraints, and data | + +#### Setup Duration Breakdown + +Setup duration includes: +- **Client Creation**: Initializing the constraint client +- **Template Compilation**: Compiling Rego/CEL code in ConstraintTemplates +- **Constraint Loading**: Adding constraints to the client +- **Data Loading**: Loading all Kubernetes resources into the data cache + +:::note +Data loading adds all provided resources to the constraint client's cache. This is intentional behavior that matches how Gatekeeper evaluates referential constraints—policies that reference other cluster resources (e.g., checking if a namespace exists) need this cached data available during evaluation. +::: + +#### Performance Guidance + +- **P99 latency < 100ms** is recommended for production admission webhooks +- **CEL is typically faster than Rego** for equivalent policies +- **High memory allocations** may indicate inefficient policy patterns +- **Setup time** matters for cold starts; consider template compilation cost +- **Concurrency testing** (`--concurrency=N`) reveals contention issues not visible in sequential runs + +### Performance Characteristics + +The following characteristics are based on architectural differences between policy engines and general benchmarking principles. Actual numbers will vary based on policy complexity, hardware, and workload. + +:::tip +These insights were generated using the data gathering scripts in the Gatekeeper repository: +- [`test/gator/bench/scripts/gather-data.sh`](https://github.com/open-policy-agent/gatekeeper/blob/master/test/gator/bench/scripts/gather-data.sh) - Collects benchmark data across different scenarios +- [`test/gator/bench/scripts/analyze-data.sh`](https://github.com/open-policy-agent/gatekeeper/blob/master/test/gator/bench/scripts/analyze-data.sh) - Analyzes and summarizes the collected data + +You can run these scripts locally to validate these characteristics on your own hardware. +::: + +#### CEL vs Rego + +| Characteristic | CEL | Rego | +|----------------|-----|------| +| **Evaluation Speed** | 1.5-3x faster | Baseline | +| **Memory per Review** | 20-30% less | Baseline | +| **Setup/Compilation** | 2-3x slower | Faster | +| **Best For** | Long-running processes | Cold starts | + +**Why the difference?** +- CEL compiles to more efficient bytecode, resulting in faster evaluation +- Rego has lighter upfront compilation cost but slower per-evaluation overhead +- For admission webhooks (long-running), CEL's evaluation speed advantage compounds over time + +#### Concurrency Scaling + +:::note +The `--concurrency` flag simulates parallel policy evaluation similar to how Kubernetes admission webhooks handle concurrent requests. In production, Gatekeeper processes multiple admission requests simultaneously, making concurrent benchmarking essential for realistic performance testing. +::: + +- **Linear scaling** up to 4-8 concurrent workers +- **Diminishing returns** beyond CPU core count +- **Increased P99 variance** at high concurrency due to contention +- **Recommendation**: Use 4-8 workers for load testing; match production replica count + +``` +Concurrency Typical Efficiency +1 100% (baseline) +2 85-95% +4 70-85% +8 50-70% +16+ <50% (diminishing returns) +``` + +#### Benchmarking Best Practices + +| Practice | Recommendation | Why | +|----------|----------------|-----| +| **Iterations** | ≥1000 | Required for statistically meaningful P99 percentiles | +| **Warmup** | 10 iterations | Go runtime stabilizes quickly; more warmup has minimal impact | +| **Multiple Runs** | 3-5 runs | Expect 2-8% variance between identical runs | +| **P99 vs Mean** | Focus on P99 for SLAs | P99 has higher variance (~8%) than mean (~2%) | +| **CI Thresholds** | Use `--min-threshold` | Prevents flaky failures from natural variance | + +#### Interpreting Results + +**Healthy patterns:** +- P95/P99 within 2-5x of P50 (consistent performance) +- Memory allocations stable across runs +- Throughput scales with concurrency up to core count + +**Warning signs:** +- P99 > 10x P50 (high tail latency, possible GC pressure) +- Memory growing with iteration count (potential leak) +- Throughput decreasing at low concurrency (contention issue) +- Large variance between runs (noisy environment or unstable policy) + + ## Bundling Policy into OCI Artifacts It may be useful to bundle policy files into OCI Artifacts for ingestion during